import pandas as pd
import os
# Standard imports
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import sklearn
import math
#from chow_test import chowtest as ct
import chowtest as ct
%matplotlib inline
from statsmodels.graphics import tsaplots
# Import Statsmodels
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic
##This code runs the Dickey-Fuller Test
# This tests for stationarity in our time-series
#define function for ADF test DF WORKS ONLY WHEN AR(1), ADF
from statsmodels.tsa.stattools import adfuller
def adf_test(timeseries):
#Perform Dickey-Fuller test:
print ('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print (dfoutput)
Data Preparation
Oil Futures Data
##Data Importation
# Data directory
DATA_DIR = "./Data"
globCO2_daily = pd.read_csv("CO2_data_daily.csv")
globCO2_monthly = pd.read_csv("CO2_data_monthly.csv")
globCO2_daily = globCO2_daily[['Date','smoothed','trend']]
globCO2_monthly = globCO2_daily[['Date','smoothed','trend']]
start_dt = "2005-01-03"
end_dt = "2009-01-02"
#Reading in Crude WTI Data
master = pd.read_csv('master.csv', parse_dates = ['Date'])
master.set_index(keys = 'Date', inplace = True)
BCOMCL = master['Oil_Future']
print(master.head())
BCOMCL.tail()
#import seaborn as sns
#BCOMCL
from datetime import datetime
import matplotlib as mpl
start_dt = "1992-01-03"
end_dt = "2020-12-02"
values = BCOMCL[start_dt:end_dt]
dates = pd.to_datetime(BCOMCL[start_dt:end_dt].index)
plt.plot(dates, values)
#plt.ylim(50,750)
#xtick_location = BCOMCL.index.astype('datetime64[s]').tolist()[::100]
#xtick_labels = BCOMCL.index.astype('datetime64[s]').tolist()[::100]
#plt.xticks(ticks=xtick_location, labels=xtick_labels, rotation=90, fontsize=12, alpha=.7)
plt.title("WTI Crude Oil Futures (1995 - 2020)" + ' \nRecessions Shaded Gray', fontsize=14)
plt.yticks(fontsize=12, alpha=.7)
plt.gca().spines["top"].set_alpha(.0)
plt.gca().spines["bottom"].set_alpha(.3)
plt.gca().spines["right"].set_alpha(.0)
plt.gca().spines["left"].set_alpha(.3)
# Add a vertical grey shaded region
plt.axvspan('2001-01-01', '2001-12-31', color='grey', alpha=0.6);
plt.axvspan('2007-11-01', '2009-07-31', color='grey', alpha=0.6);
plt.axvspan('1995-01-01', '2004-01-02', color='grey', alpha=0.2);
plt.axvspan('2004-01-02', '2014-01-02', color='grey', alpha=0.3);
plt.axvspan('2014-01-02', '2021-01-02', color='grey', alpha=0.2);
plt.annotate('Oil Futures Go Negative \nDate = 2020-04',
fontsize=7,
fontweight='bold',
xy=('2020-03-30', values['2020-03-30']),
xycoords='data',
xytext=(-100, -15),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
plt.annotate('Pre-Financialization \n Regime',
fontsize = 8,
xy =('1996-01-02',800))
plt.annotate('Post-Financialization \n Regime',
fontsize = 8,
xy =('2006-01-02',800))
plt.annotate('Post-Shale \n Regime',
fontsize = 8,
xy =('2016-01-02',800))
plt.axvline('2020-03-30', color='grey', linestyle='--');
plt.ylabel('Crude Oil Futures Price ($ per barrel)', fontsize=12)
plt.xlabel('Date (Weekly increments)')
plt.grid(axis='y', alpha=.3)
plt.show()
#Reading in Brent Crude Oil Data
BCOMCO = pd.read_csv('BCOMCO.csv', parse_dates=['Date'])
BCOMCO1 = pd.read_csv('BCOMCO1.csv', parse_dates=['Date'])
BCOMCO2 = pd.read_csv('BCOMCO2.csv', parse_dates=['Date'])
BCOMCO3 = pd.read_csv('BCOMCO3.csv', parse_dates=['Date'])
BCOMCO5 = pd.read_csv('BCOMCO5.csv', parse_dates=['Date'])
BCOMCO6 = pd.read_csv('BCOMCO6.csv', parse_dates=['Date'])
BCOMCO.head()
#BCOMCO
#start_dt = "2005-01-03"
#end_dt = "2009-01-02"
values = BCOMCO['BCOMCO']
#values = values[start_dt:end_dt]
dates = pd.to_datetime(BCOMCO['Date'])
plt.plot(dates, values)
Gasoline Price
## Gasoline Data Importation
master = pd.read_csv('master.csv', index_col=0, parse_dates = ['Date'])
#dates = pd.to_datetime(master['Gas_Prices'])
#Gas_Weekly.set_index(keys = 'Date', inplace = True)
Gas_reg = master['Gas_Price']
dates = Gas_reg.index
print(dates)
values = Gas_reg
plt.plot(dates, values)
Gas_reg
##Converting Daily Oil Futures into Weekly oil Futures
# This data is re-adjusted by averaging across week containers [maybe similar to the roll function by python]
# The use of averaging is still questionable.
BCOMCL_Weekly = {}
Weeks = Gas_Weekly.index
print(type(BCOMCL['Date'][0]))
n = len(Weeks)
for i in range(n-1):
cont = BCOMCL[BCOMCL['Date'] < Weeks[i+1]]
cont = cont[cont['Date'] >= Weeks[i]]
cont_size = len(cont)
#if not cont.empty:
# print(sum(cont.BCOMCO))
# input()
if cont_size != 0:
value = sum(cont.BCOMCL)/cont_size
BCOMCL_Weekly[Weeks[i]] = value
BCOMCL_Weekly = pd.DataFrame.from_dict(BCOMCL_Weekly, orient='index',columns=['Week'])
#print(BCOMCL_Weekly)
values = BCOMCL_Weekly['Week']
dates = BCOMCL_Weekly.index
ax = BCOMCL_Weekly.plot(color='g') #green represents crude oil BCOMCO_Weekly
Gas_reg.plot(ax=ax, secondary_y=True)
#Writing weekly data into new csv file
start_dt = "1992-01-02"
end_dt = "2014-01-02"
data = Gas_Weekly[start_dt:end_dt]
data['RCRGP'].to_csv('Weekly_gas.csv')
values[start_dt:end_dt].to_csv('Weekly_oil.csv')
# The Daily price changes for both gasoline and crude oil
# The two axes are separate to indicate difference in scale
# Note that daily price change and returns are almost the same except with scale difference.
# It appears to be stationary.
ax = Gas_reg.pct_change().plot(color='g')
y = BCOMCL_Weekly.pct_change()
y.plot(ax=ax, secondary_y = True)
start_dt = '1998-01-02'
#print(Gas_reg[start_dt:end_dt].pct_change())
end_dt = '2020-01-02'
ax = Gas_reg[start_dt:end_dt].pct_change().plot(color='g')
#y = BCOMCL_Weekly[start_dt:end_dt].pct_change()
#y.plot(ax=ax, secondary_y = True)
diff_p = np.subtract(Gas_reg[start_dt:end_dt].pct_change().squeeze(), BCOMCL_Weekly[start_dt:end_dt].pct_change().squeeze())
print(diff_p)
# The Daily price changes for both gasoline and crude oil
# The two axes are separate to indicate difference in scale
# Note that daily price change and returns are almost the same except with scale difference.
# It appears to be stationary.
ax = Gas_reg.diff().plot(color='g', label = 'gas')
y = BCOMCL_Weekly.diff()
y.plot(ax=ax, secondary_y = True, label = 'crude oil')
Data Attributes
Stationarity Tests
We conduct stationarity tests using Dickey-Fuller test
adf_test(BCOMCL_Weekly)
adf_test(Gas_reg.dropna())
From the above results, we find that from the Adf_test that our time series are non-stationary. We now test if they are d(1) stationary by considering their daily returns
adf_test(BCOMCL_Weekly.pct_change()[1:])
Gas_change = Gas_reg.dropna().pct_change()
adf_test(Gas_change[1:])
Evidently, the returns pass the test to be stationary
adf_test(BCOMCL_Weekly.diff()[1:])
Gas_change = Gas_reg.dropna().diff()
adf_test(Gas_change[1:])
ACF (Autocorrelation Function + Partial Correlation Function) Observations
# We plot ACF and PCF graphs to try and find a good lag term to observe the data
# The below plot has too many days so the window is too large for meaningful observation
# We take a subsection of a non-turbulent time period observed below
Gas_Change = Gas_reg[start_dt:end_dt].dropna().pct_change()
x = pd.plotting.autocorrelation_plot(Gas_change[1:])
start_dt = "1991-01-21"
end_dt = "2000-01-02"
Gas_Change = Gas_reg[start_dt:end_dt].dropna().pct_change()
Gas_Change = Gas_Change[1:]
x = pd.plotting.autocorrelation_plot(Gas_Change)
tsaplots.plot_pacf(Gas_Change)
tsaplots.plot_pacf(BCOMCL_Weekly.pct_change()[1:])
Tentatively, we want to fit VAR(4,5)
VAR Model Fitting
## We combine both time series into one dataframe with the missing data removed
Gas_Change = Gas_reg[start_dt:end_dt].dropna().pct_change()
df = pd.concat([Gas_change,BCOMCL_Weekly.pct_change()[1:]], axis = 1)
df = df.dropna()
#print(df)
#start_dt = "2000-01-02"
#end_dt = "2014-01-02"
#df = df[start_dt:end_dt]
#We run VAR recursively to model test for best model
model = VAR(df)
for i in [1,2,3,4,5,6,7,8,9]:
result = model.fit(i)
print('Lag Order =', i)
print('AIC : ', result.aic)
print('BIC : ', result.bic)
print('FPE : ', result.fpe)
print('HQIC: ', result.hqic, '\n')
# We feature select lag term of 5 here
x = model.select_order(maxlags=12)
x.summary()
model_fitted = model.fit(3)
model_fitted.summary()
# Get the lag order
lag_order = model_fitted.k_ar
print(lag_order) #> 4
# Input data for forecasting
forecast_input = df.values[-lag_order:]
forecast_input
# Forecast
fc = model_fitted.forecast(y=forecast_input, steps=3)
df_forecast = pd.DataFrame(fc, index=df.index[-3:], columns=df.columns + '_2d')
df_forecast
GARCH Model Fitting
# # Initial Attempts for a GARCH model fit
# from arch import arch_model
# model = arch_model(Gas_change, mean='Zero', vol='ARCH', p=15)
# # We need to find mGARCH implementation - I can try to write this if this is not found.
# #(Mengmeng: found this, seems to work)
#from rpy2.robjects import pandas2ri
#import rpy2.robjects as objects
#import numpy as np
# # pd_rets - a pandas dataframe of daily returns, where the column names are the tickers of stocks and index is the trading days.
# # compute DCC-Garch in R using rmgarch package
#pandas2ri.activate()
#r_rets = pandas2ri.py2ri(pd_rets) # convert the daily returns from pandas dataframe in Python to dataframe in R
#r_dccgarch_code = """
# library('rmgarch')
# function(r_rets, n_days){
# univariate_spec <- ugarchspec(mean.model = list(armaOrder = c(0,0)),
# variance.model = list(garchOrder = c(1,1),
# variance.targeting = FALSE,
# model = "sGARCH"),
# distribution.model = "norm")
# n <- dim(r_rets)[2]
# dcc_spec <- dccspec(uspec = multispec(replicate(n, univariate_spec)),
# dccOrder = c(1,1),
# distribution = "mvnorm")
# dcc_fit <- dccfit(dcc_spec, data=r_rets)
# forecasts <- dccforecast(dcc_fit, n.ahead = n_days)
# list(dcc_fit, forecasts@mforecast$H)
# }
# """
#r_dccgarch = robjects.r(r_dccgarch_code)
#r_res = r_dccgarch(r_rets,n_days)
#pandas2ri.deactivate()
# # end of R
#r_dccgarch_model = r_res[0] # model parameters
#r_forecast_cov = r_res[1] # forecasted covariance matrices for n_days
# # access and transform the covariance matrices in R format
#n_cols = pd_rets.shape[1] # get the number of stocks in pd_rets
#n_elements = n_cols*n_cols # the number of elements in each covariance matrix
#n_matrix = int(len(r_forecast_cov[0])/(n_elements))
#print(n_matrix) # this should be equal to n_days
# # sum the daily forecasted covariance matrices
#cov_matrix = 0
#for i in range(n_matrix):
# i_matrix = np.array([v for v in r_forecast_cov[0][i*n_elements:(i+1)*n_elements]])
# i_matrix = i_matrix.reshape(n_cols,n_cols)
# cov_matrix += i_matrix
Heteroskedasticity Observations
start_dt = "1991-01-21"
end_dt = "2000-01-02"
Gas_Change = Gas_reg[start_dt:end_dt].dropna().pct_change()
Gas_Change = Gas_Change[1:]
plt.plot(Gas_Change.index, Gas_Change)
Stuctural Break Tests
#WRITTEN DOMESTICALLY
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(values)
plt.show()
# fit an ARIMA model and plot residual errors
from pandas import datetime
from pandas import read_csv
from pandas import DataFrame
from statsmodels.tsa.arima.model import ARIMA
from matplotlib import pyplot
# fit model
model = ARIMA(Gas_reg, order=(5,1,0))
model_fit = model.fit()
# summary of fit model
print(model_fit.summary())
# line plot of residuals
residuals = DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
# density plot of residuals
residuals.plot(kind='kde')
pyplot.show()
# summary stats of residuals
print(residuals.describe())
# # fit model
# model = ARIMA(values, order=(5,1,0))
# model_fit = model.fit()
# # summary of fit model
# print(model_fit.summary())
# # line plot of residuals
# residuals = DataFrame(model_fit.resid)
# residuals.plot()
# pyplot.show()
# # density plot of residuals
# residuals.plot(kind='kde')
# pyplot.show()
# # summary stats of residuals
# print(residuals.describe())
from statsmodels.tsa.stattools import grangercausalitytests
maxlag=12
test = 'ssr-chi2test'
def grangers_causality_matrix(X_train, variables, test = 'ssr_chi2test', verbose=False):
dataset = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
for c in dataset.columns:
for r in dataset.index:
test_result = grangercausalitytests(X_train[[r,c]], maxlag=maxlag, verbose=False)
p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
min_p_value = np.min(p_values)
dataset.loc[r,c] = min_p_value
dataset.columns = [var + '_x' for var in variables]
dataset.index = [var + '_y' for var in variables]
return dataset
#print(BCOMCO_Weekly['Week'])
df = pd.concat([master['Oil_Future'], master['Gas_Price']], axis = 1)
df = df.dropna()
df = df.pct_change()['2009-01-02':'2016-01-02']
#print(df)
df.columns = ['Oil','Gas']
grangers_causality_matrix(df, variables = ['Oil','Gas'])
test_result = grangercausalitytests(X_train[[r,c]], maxlag=maxlag, verbose=False)
Gas_reg["1991-01-21":]
#BCOMCO_Weekly["1991-01-21":].Week
#from rpy2.robjects import pandas2ri
#import rpy2.robjects as objects
#import numpy as np
## pd_rets - a pandas dataframe of daily returns, where the column names are the tickers of stocks and index is the trading days.
## compute DCC-Garch in R using rmgarch package
#pandas2ri.activate()
#r_rets = pandas2ri.py2ri(pd_rets) # convert the daily returns from pandas dataframe in Python to dataframe in R
#r_dccgarch_code = """
# library('rmgarch')
# function(r_rets, n_days){
# univariate_spec <- ugarchspec(mean.model = list(armaOrder = c(0,0)),
# variance.model = list(garchOrder = c(1,1),
# variance.targeting = FALSE,
# model = "sGARCH"),
# distribution.model = "norm")
# n <- dim(r_rets)[2]
# dcc_spec <- dccspec(uspec = multispec(replicate(n, univariate_spec)),
# dccOrder = c(1,1),
# distribution = "mvnorm")
# dcc_fit <- dccfit(dcc_spec, data=r_rets)
# forecasts <- dccforecast(dcc_fit, n.ahead = n_days)
# list(dcc_fit, forecasts@mforecast$H)
# }
# """
#r_dccgarch = robjects.r(r_dccgarch_code)
#r_res = r_dccgarch(r_rets,n_days)
#pandas2ri.deactivate()
## end of R
#
#r_dccgarch_model = r_res[0] # model parameters
#r_forecast_cov = r_res[1] # forecsted covariance matrices for n_days
## access and transform the covariance matrices in R format
#n_cols = pd_rets.shape[1] # get the number of stocks in pd_rets
#n_elements = n_cols*n_cols # the number of elements in each covariance matrix
#n_matrix = int(len(r_forecast_cov[0])/(n_elements))
#print(n_matrix) # this should be equal to n_days
# sum the daily forecasted covariance matrices
#cov_matrix = 0
#for i in range(n_matrix):
# i_matrix = np.array([v for v in r_forecast_cov[0][i*n_elements:(i+1)*n_elements]])
# i_matrix = i_matrix.reshape(n_cols,n_cols)
# cov_matrix += i_matrix
Heteroskedastic Structural Break Test
from statsmodels.tsa.arima.model import ARIMA
def arma_fit (X, y, p, q):
#oil = BCOMCO_Weekly['Week']
#for i in range(1,lag):
#oil = pd.concat([oil, BCOMCO_Weekly['Week'].shift(-i)], axis = 1)
model = ARIMA(endog = y, exog = X, order=[p,0, 0])
model_fit = model.fit()
print(model_fit.summary())
return (model_fit.params)
#Gas_reg = Gas_reg.dropna()
#arma_fit(Gas_reg, 5,5).sigma2
print(BCOMCL_Weekly)
lag = 0
start_dt = "1994-01-02"
end_dt = "2010-01-02"
def MZ_Test (X, y, lag, t1, t2, p, q):
sigma0 = arma_fit(X, y, p, q).sigma2
X1= X[:t1]
y1 = y[:t1]
X2 = X[t2:]
y2 = y[t2:]
T = len(X)
T1 = len(X1)
T2 = len(X2)
sigma1 = arma_fit(X1, y1, p, q).sigma2
sigma2 = arma_fit(X2, y2, p, q).sigma2
MZ = (T- lag)*np.log(sigma0) - ((T1)*np.log(sigma1) + (T2)*(np.log(sigma2)))
return MZ
Gas = Gas_reg.dropna().pct_change()[1:]
oil = BCOMCL_Weekly.dropna().pct_change()[1:]
oil = oil[start_dt: end_dt]
break_start = "1995-01-02"
break_end = "1995-01-02"
Gas = Gas[start_dt:end_dt]
oil = oil[start_dt:end_dt]
#plt.plot(Gas)
#plt.plot(oil)
#oil.squeeze()
#Gas.squeeze()
MZ_Test(oil.squeeze(), Gas.squeeze(), 5, break_start, break_end, 5, 5)
CO2 Data
-234.97538573390648
import statsmodels.tsa.stattools as ts
#cointegration_result = ts.coint(BCOMCO_Weekly, Gas_reg)
# BCOMCO_Weekly
#print(BCOMCL_Weekly.head())
#print(BCOMCL_Weekly.tail())
# print(Gas_reg.head())
#print(Gas_reg.tail())
#print(type(BCOMCL_Weekly))
#print(type(Gas_reg))
result = pd.concat([master['Oil_Future'].dropna().pct_change(), master['Gas_Price'].dropna().pct_change()], axis = 1)
result = result[1:]
result = result['2009-01-02':'2020-01-02']
print(result)
result = result.dropna()
#x1=result['Close_x']
#y1=result['Close_y']
coin_result = ts.coint(result['Oil_Future'], result['Gas_Price'])
print(coin_result)