import pandas as pd
import os
# Standard imports
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import sklearn
import math
#from chow_test import chowtest as ct
import chowtest as ct
%matplotlib inline
from statsmodels.graphics import tsaplots
# Import Statsmodels
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic
##This code runs the Dickey-Fuller Test
# This tests for stationarity in our time-series
#define function for ADF test DF WORKS ONLY WHEN AR(1), ADF
from statsmodels.tsa.stattools import adfuller
def adf_test(timeseries):
#Perform Dickey-Fuller test:
print ('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print (dfoutput)
##Data Importation
# Data directory
DATA_DIR = "./Data"
globCO2_daily = pd.read_csv("CO2_data_daily.csv")
globCO2_monthly = pd.read_csv("CO2_data_monthly.csv")
globCO2_daily = globCO2_daily[['Date','smoothed','trend']]
globCO2_monthly = globCO2_daily[['Date','smoothed','trend']]
start_dt = "2005-01-03"
end_dt = "2009-01-02"
#Reading in Crude WTI Data
master = pd.read_csv('master.csv', parse_dates = ['Date'])
master.set_index(keys = 'Date', inplace = True)
BCOMCL = master['Oil_Future']
print(master.head())
BCOMCL.tail()
Oil_Future Gas_Price Gas_Inv CO2
Date
1995-01-09 67.8433 1.134 213545 359.60
1995-01-17 69.9090 1.126 215577 360.28
1995-01-23 69.4449 1.132 217341 360.42
1995-01-30 69.1496 1.131 220092 360.12
1995-02-06 70.0778 1.124 222808 360.07
#import seaborn as sns
#BCOMCL
from datetime import datetime
import matplotlib as mpl
start_dt = "1992-01-03"
end_dt = "2020-12-02"
values = BCOMCL[start_dt:end_dt]
dates = pd.to_datetime(BCOMCL[start_dt:end_dt].index)
plt.plot(dates, values)
#plt.ylim(50,750)
#xtick_location = BCOMCL.index.astype('datetime64[s]').tolist()[::100]
#xtick_labels = BCOMCL.index.astype('datetime64[s]').tolist()[::100]
#plt.xticks(ticks=xtick_location, labels=xtick_labels, rotation=90, fontsize=12, alpha=.7)
plt.title("WTI Crude Oil Futures (1995 - 2020)" + ' \nRecessions Shaded Gray', fontsize=14)
plt.yticks(fontsize=12, alpha=.7)
plt.gca().spines["top"].set_alpha(.0)
plt.gca().spines["bottom"].set_alpha(.3)
plt.gca().spines["right"].set_alpha(.0)
plt.gca().spines["left"].set_alpha(.3)
# Add a vertical grey shaded region
plt.axvspan('2001-01-01', '2001-12-31', color='grey', alpha=0.6);
plt.axvspan('2007-11-01', '2009-07-31', color='grey', alpha=0.6);
plt.axvspan('1995-01-01', '2004-01-02', color='grey', alpha=0.2);
plt.axvspan('2004-01-02', '2014-01-02', color='grey', alpha=0.3);
plt.axvspan('2014-01-02', '2021-01-02', color='grey', alpha=0.2);
plt.annotate('Oil Futures Go Negative \nDate = 2020-04',
fontsize=7,
fontweight='bold',
xy=('2020-03-30', values['2020-03-30']),
xycoords='data',
xytext=(-100, -15),
textcoords='offset points',
arrowprops=dict(arrowstyle="->"))
plt.annotate('Pre-Financialization \n Regime',
fontsize = 8,
xy =('1996-01-02',800))
plt.annotate('Post-Financialization \n Regime',
fontsize = 8,
xy =('2006-01-02',800))
plt.annotate('Post-Shale \n Regime',
fontsize = 8,
xy =('2016-01-02',800))
plt.axvline('2020-03-30', color='grey', linestyle='--');
plt.ylabel('Crude Oil Futures Price ($ per barrel)', fontsize=12)
plt.xlabel('Date (Weekly increments)')
plt.grid(axis='y', alpha=.3)
plt.show()
ConversionError: Failed to convert value(s) to axis units: '2020-03-30'
ConversionError: Failed to convert value(s) to axis units: '2020-03-30'
#Reading in Brent Crude Oil Data
BCOMCO = pd.read_csv('BCOMCO.csv', parse_dates=['Date'])
BCOMCO1 = pd.read_csv('BCOMCO1.csv', parse_dates=['Date'])
BCOMCO2 = pd.read_csv('BCOMCO2.csv', parse_dates=['Date'])
BCOMCO3 = pd.read_csv('BCOMCO3.csv', parse_dates=['Date'])
BCOMCO5 = pd.read_csv('BCOMCO5.csv', parse_dates=['Date'])
BCOMCO6 = pd.read_csv('BCOMCO6.csv', parse_dates=['Date'])
BCOMCO.head()
#BCOMCO
#start_dt = "2005-01-03"
#end_dt = "2009-01-02"
values = BCOMCO['BCOMCO']
#values = values[start_dt:end_dt]
dates = pd.to_datetime(BCOMCO['Date'])
plt.plot(dates, values)
## Gasoline Data Importation
master = pd.read_csv('master.csv', index_col=0, parse_dates = ['Date'])
#dates = pd.to_datetime(master['Gas_Prices'])
#Gas_Weekly.set_index(keys = 'Date', inplace = True)
Gas_reg = master['Gas_Price']
dates = Gas_reg.index
print(dates)
values = Gas_reg
plt.plot(dates, values)
Gas_reg
DatetimeIndex(['1995-01-09', '1995-01-17', '1995-01-23', '1995-01-30',
'1995-02-06', '1995-02-13', '1995-02-21', '1995-02-27',
'1995-03-06', '1995-03-13',
...
'2020-11-09', '2020-11-16', '2020-11-23', '2020-11-30',
'2020-12-07', '2020-12-14', '2020-12-21', '2020-12-28',
'2021-01-04', '2021-01-11'],
dtype='datetime64[ns]', name='Date', length=1357, freq=None)
##Converting Daily Oil Futures into Weekly oil Futures
# This data is re-adjusted by averaging across week containers [maybe similar to the roll function by python]
# The use of averaging is still questionable.
BCOMCL_Weekly = {}
Weeks = Gas_Weekly.index
print(type(BCOMCL['Date'][0]))
n = len(Weeks)
for i in range(n-1):
cont = BCOMCL[BCOMCL['Date'] < Weeks[i+1]]
cont = cont[cont['Date'] >= Weeks[i]]
cont_size = len(cont)
#if not cont.empty:
# print(sum(cont.BCOMCO))
# input()
if cont_size != 0:
value = sum(cont.BCOMCL)/cont_size
BCOMCL_Weekly[Weeks[i]] = value
BCOMCL_Weekly = pd.DataFrame.from_dict(BCOMCL_Weekly, orient='index',columns=['Week'])
#print(BCOMCL_Weekly)
values = BCOMCL_Weekly['Week']
dates = BCOMCL_Weekly.index
ax = BCOMCL_Weekly.plot(color='g') #green represents crude oil BCOMCO_Weekly
Gas_reg.plot(ax=ax, secondary_y=True)
<class 'pandas._libs.tslibs.timestamps.Timestamp'>
#Writing weekly data into new csv file
start_dt = "1992-01-02"
end_dt = "2014-01-02"
data = Gas_Weekly[start_dt:end_dt]
data['RCRGP'].to_csv('Weekly_gas.csv')
values[start_dt:end_dt].to_csv('Weekly_oil.csv')
# The Daily price changes for both gasoline and crude oil
# The two axes are separate to indicate difference in scale
# Note that daily price change and returns are almost the same except with scale difference.
# It appears to be stationary.
ax = Gas_reg.pct_change().plot(color='g')
y = BCOMCL_Weekly.pct_change()
y.plot(ax=ax, secondary_y = True)
start_dt = '1998-01-02'
#print(Gas_reg[start_dt:end_dt].pct_change())
end_dt = '2020-01-02'
ax = Gas_reg[start_dt:end_dt].pct_change().plot(color='g')
#y = BCOMCL_Weekly[start_dt:end_dt].pct_change()
#y.plot(ax=ax, secondary_y = True)
diff_p = np.subtract(Gas_reg[start_dt:end_dt].pct_change().squeeze(), BCOMCL_Weekly[start_dt:end_dt].pct_change().squeeze())
print(diff_p)
Date
1998-01-05 NaN
1998-01-12 0.019464
1998-01-19 0.017320
1998-01-26 -0.080959
1998-02-02 0.030339
...
2019-12-02 0.003015
2019-12-09 -0.031082
2019-12-16 -0.038586
2019-12-23 -0.011349
2019-12-30 0.009878
Length: 1148, dtype: float64
# The Daily price changes for both gasoline and crude oil
# The two axes are separate to indicate difference in scale
# Note that daily price change and returns are almost the same except with scale difference.
# It appears to be stationary.
ax = Gas_reg.diff().plot(color='g', label = 'gas')
y = BCOMCL_Weekly.diff()
y.plot(ax=ax, secondary_y = True, label = 'crude oil')
adf_test(BCOMCL_Weekly)
Results of Dickey-Fuller Test:
Test Statistic -2.289386
p-value 0.175414
#Lags Used 8.000000
Number of Observations Used 1579.000000
Critical Value (1%) -3.434498
Critical Value (5%) -2.863372
Critical Value (10%) -2.567745
dtype: float64
adf_test(Gas_reg.dropna())
Results of Dickey-Fuller Test:
Test Statistic -1.950032
p-value 0.308903
#Lags Used 19.000000
Number of Observations Used 1563.000000
Critical Value (1%) -3.434541
Critical Value (5%) -2.863391
Critical Value (10%) -2.567755
dtype: float64
adf_test(BCOMCL_Weekly.pct_change()[1:])
Results of Dickey-Fuller Test:
Test Statistic -1.561813e+01
p-value 1.745590e-28
#Lags Used 4.000000e+00
Number of Observations Used 1.582000e+03
Critical Value (1%) -3.434490e+00
Critical Value (5%) -2.863369e+00
Critical Value (10%) -2.567744e+00
dtype: float64
Gas_change = Gas_reg.dropna().pct_change()
adf_test(Gas_change[1:])
Results of Dickey-Fuller Test:
Test Statistic -1.274147e+01
p-value 8.925519e-24
#Lags Used 8.000000e+00
Number of Observations Used 1.573000e+03
Critical Value (1%) -3.434514e+00
Critical Value (5%) -2.863379e+00
Critical Value (10%) -2.567749e+00
dtype: float64
adf_test(BCOMCL_Weekly.diff()[1:])
Results of Dickey-Fuller Test:
Test Statistic -8.060461e+00
p-value 1.648333e-12
#Lags Used 2.400000e+01
Number of Observations Used 1.562000e+03
Critical Value (1%) -3.434543e+00
Critical Value (5%) -2.863392e+00
Critical Value (10%) -2.567756e+00
dtype: float64
Gas_change = Gas_reg.dropna().diff()
adf_test(Gas_change[1:])
Results of Dickey-Fuller Test:
Test Statistic -9.745373e+00
p-value 8.283125e-17
#Lags Used 1.800000e+01
Number of Observations Used 1.563000e+03
Critical Value (1%) -3.434541e+00
Critical Value (5%) -2.863391e+00
Critical Value (10%) -2.567755e+00
dtype: float64
# We plot ACF and PCF graphs to try and find a good lag term to observe the data
# The below plot has too many days so the window is too large for meaningful observation
# We take a subsection of a non-turbulent time period observed below
Gas_Change = Gas_reg[start_dt:end_dt].dropna().pct_change()
x = pd.plotting.autocorrelation_plot(Gas_change[1:])
start_dt = "1991-01-21"
end_dt = "2000-01-02"
Gas_Change = Gas_reg[start_dt:end_dt].dropna().pct_change()
Gas_Change = Gas_Change[1:]
x = pd.plotting.autocorrelation_plot(Gas_Change)
tsaplots.plot_pacf(Gas_Change)
tsaplots.plot_pacf(BCOMCL_Weekly.pct_change()[1:])
## We combine both time series into one dataframe with the missing data removed
Gas_Change = Gas_reg[start_dt:end_dt].dropna().pct_change()
df = pd.concat([Gas_change,BCOMCL_Weekly.pct_change()[1:]], axis = 1)
df = df.dropna()
#print(df)
#start_dt = "2000-01-02"
#end_dt = "2014-01-02"
#df = df[start_dt:end_dt]
#We run VAR recursively to model test for best model
model = VAR(df)
for i in [1,2,3,4,5,6,7,8,9]:
result = model.fit(i)
print('Lag Order =', i)
print('AIC : ', result.aic)
print('BIC : ', result.bic)
print('FPE : ', result.fpe)
print('HQIC: ', result.hqic, '\n')
Lag Order = 1
AIC : -13.15432851175539
BIC : -13.133954410011006
FPE : 1.9370807182366146e-06
HQIC: -13.14675815600872
Lag Order = 2
AIC : -13.167872338063152
BIC : -13.133898006117265
FPE : 1.911022161954444e-06
HQIC: -13.155248176659349
Lag Order = 3
AIC : -13.172149014027143
BIC : -13.124560427829627
FPE : 1.9028669316942915e-06
HQIC: -13.154465514356383
Lag Order = 4
AIC : -13.172687471009063
BIC : -13.111470582889002
FPE : 1.9018428455200215e-06
HQIC: -13.149939090879895
Lag Order = 5
AIC : -13.185027094249211
BIC : -13.11016783286013
FPE : 1.8785194069994605e-06
HQIC: -13.157208281865953
Lag Order = 6
AIC : -13.182957782125335
BIC : -13.094442052390255
FPE : 1.882411232703875e-06
HQIC: -13.150062976065403
Lag Order = 7
AIC : -13.185199048423323
BIC : -13.083012731479664
FPE : 1.8781977315031204e-06
HQIC: -13.147222677614497
Lag Order = 8
AIC : -13.187289056229067
BIC : -13.071418009373389
FPE : 1.8742773745790896e-06
HQIC: -13.144225539926687
Lag Order = 9
AIC : -13.188797648625224
BIC : -13.059227705257802
FPE : 1.8714532418589163e-06
HQIC: -13.140641396389318
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:583: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
' ignored when e.g. forecasting.', ValueWarning)
# We feature select lag term of 5 here
x = model.select_order(maxlags=12)
x.summary()
model_fitted = model.fit(3)
model_fitted.summary()
# Get the lag order
lag_order = model_fitted.k_ar
print(lag_order) #> 4
# Input data for forecasting
forecast_input = df.values[-lag_order:]
forecast_input
3
# Forecast
fc = model_fitted.forecast(y=forecast_input, steps=3)
df_forecast = pd.DataFrame(fc, index=df.index[-3:], columns=df.columns + '_2d')
df_forecast
# # Initial Attempts for a GARCH model fit
# from arch import arch_model
# model = arch_model(Gas_change, mean='Zero', vol='ARCH', p=15)
# # We need to find mGARCH implementation - I can try to write this if this is not found.
# #(Mengmeng: found this, seems to work)
#from rpy2.robjects import pandas2ri
#import rpy2.robjects as objects
#import numpy as np
# # pd_rets - a pandas dataframe of daily returns, where the column names are the tickers of stocks and index is the trading days.
# # compute DCC-Garch in R using rmgarch package
#pandas2ri.activate()
#r_rets = pandas2ri.py2ri(pd_rets) # convert the daily returns from pandas dataframe in Python to dataframe in R
#r_dccgarch_code = """
# library('rmgarch')
# function(r_rets, n_days){
# univariate_spec <- ugarchspec(mean.model = list(armaOrder = c(0,0)),
# variance.model = list(garchOrder = c(1,1),
# variance.targeting = FALSE,
# model = "sGARCH"),
# distribution.model = "norm")
# n <- dim(r_rets)[2]
# dcc_spec <- dccspec(uspec = multispec(replicate(n, univariate_spec)),
# dccOrder = c(1,1),
# distribution = "mvnorm")
# dcc_fit <- dccfit(dcc_spec, data=r_rets)
# forecasts <- dccforecast(dcc_fit, n.ahead = n_days)
# list(dcc_fit, forecasts@mforecast$H)
# }
# """
#r_dccgarch = robjects.r(r_dccgarch_code)
#r_res = r_dccgarch(r_rets,n_days)
#pandas2ri.deactivate()
# # end of R
#r_dccgarch_model = r_res[0] # model parameters
#r_forecast_cov = r_res[1] # forecasted covariance matrices for n_days
# # access and transform the covariance matrices in R format
#n_cols = pd_rets.shape[1] # get the number of stocks in pd_rets
#n_elements = n_cols*n_cols # the number of elements in each covariance matrix
#n_matrix = int(len(r_forecast_cov[0])/(n_elements))
#print(n_matrix) # this should be equal to n_days
# # sum the daily forecasted covariance matrices
#cov_matrix = 0
#for i in range(n_matrix):
# i_matrix = np.array([v for v in r_forecast_cov[0][i*n_elements:(i+1)*n_elements]])
# i_matrix = i_matrix.reshape(n_cols,n_cols)
# cov_matrix += i_matrix
start_dt = "1991-01-21"
end_dt = "2000-01-02"
Gas_Change = Gas_reg[start_dt:end_dt].dropna().pct_change()
Gas_Change = Gas_Change[1:]
plt.plot(Gas_Change.index, Gas_Change)
#WRITTEN DOMESTICALLY
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(values)
plt.show()
# fit an ARIMA model and plot residual errors
from pandas import datetime
from pandas import read_csv
from pandas import DataFrame
from statsmodels.tsa.arima.model import ARIMA
from matplotlib import pyplot
# fit model
model = ARIMA(Gas_reg, order=(5,1,0))
model_fit = model.fit()
# summary of fit model
print(model_fit.summary())
# line plot of residuals
residuals = DataFrame(model_fit.resid)
residuals.plot()
pyplot.show()
# density plot of residuals
residuals.plot(kind='kde')
pyplot.show()
# summary stats of residuals
print(residuals.describe())
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:2: FutureWarning: The pandas.datetime class is deprecated and will be removed from pandas in a future version. Import from datetime module instead.
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
SARIMAX Results
==============================================================================
Dep. Variable: RCRGP No. Observations: 1589
Model: ARIMA(5, 1, 0) Log Likelihood 2829.880
Date: Wed, 24 Feb 2021 AIC -5647.760
Time: 02:24:05 BIC -5615.539
Sample: 08-20-1990 HQIC -5635.791
- 01-25-2021
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.4629 0.010 47.042 0.000 0.444 0.482
ar.L2 0.0488 0.015 3.294 0.001 0.020 0.078
ar.L3 0.0995 0.015 6.720 0.000 0.071 0.129
ar.L4 0.0031 0.012 0.261 0.794 -0.020 0.027
ar.L5 -0.0410 0.016 -2.558 0.011 -0.072 -0.010
sigma2 0.0016 2.29e-05 71.125 0.000 0.002 0.002
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 18388.75
Prob(Q): 0.96 Prob(JB): 0.00
Heteroskedasticity (H): 13.34 Skew: 0.59
Prob(H) (two-sided): 0.00 Kurtosis: 19.63
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
0
count 1583.000000
mean 0.001003
std 0.050489
min -0.339512
25% -0.014979
50% -0.001071
75% 0.014493
max 1.191000
# # fit model
# model = ARIMA(values, order=(5,1,0))
# model_fit = model.fit()
# # summary of fit model
# print(model_fit.summary())
# # line plot of residuals
# residuals = DataFrame(model_fit.resid)
# residuals.plot()
# pyplot.show()
# # density plot of residuals
# residuals.plot(kind='kde')
# pyplot.show()
# # summary stats of residuals
# print(residuals.describe())
from statsmodels.tsa.stattools import grangercausalitytests
maxlag=12
test = 'ssr-chi2test'
def grangers_causality_matrix(X_train, variables, test = 'ssr_chi2test', verbose=False):
dataset = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
for c in dataset.columns:
for r in dataset.index:
test_result = grangercausalitytests(X_train[[r,c]], maxlag=maxlag, verbose=False)
p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
min_p_value = np.min(p_values)
dataset.loc[r,c] = min_p_value
dataset.columns = [var + '_x' for var in variables]
dataset.index = [var + '_y' for var in variables]
return dataset
#print(BCOMCO_Weekly['Week'])
df = pd.concat([master['Oil_Future'], master['Gas_Price']], axis = 1)
df = df.dropna()
df = df.pct_change()['2009-01-02':'2016-01-02']
#print(df)
df.columns = ['Oil','Gas']
grangers_causality_matrix(df, variables = ['Oil','Gas'])
test_result = grangercausalitytests(X_train[[r,c]], maxlag=maxlag, verbose=False)
Gas_reg["1991-01-21":]
#BCOMCO_Weekly["1991-01-21":].Week
#from rpy2.robjects import pandas2ri
#import rpy2.robjects as objects
#import numpy as np
## pd_rets - a pandas dataframe of daily returns, where the column names are the tickers of stocks and index is the trading days.
## compute DCC-Garch in R using rmgarch package
#pandas2ri.activate()
#r_rets = pandas2ri.py2ri(pd_rets) # convert the daily returns from pandas dataframe in Python to dataframe in R
#r_dccgarch_code = """
# library('rmgarch')
# function(r_rets, n_days){
# univariate_spec <- ugarchspec(mean.model = list(armaOrder = c(0,0)),
# variance.model = list(garchOrder = c(1,1),
# variance.targeting = FALSE,
# model = "sGARCH"),
# distribution.model = "norm")
# n <- dim(r_rets)[2]
# dcc_spec <- dccspec(uspec = multispec(replicate(n, univariate_spec)),
# dccOrder = c(1,1),
# distribution = "mvnorm")
# dcc_fit <- dccfit(dcc_spec, data=r_rets)
# forecasts <- dccforecast(dcc_fit, n.ahead = n_days)
# list(dcc_fit, forecasts@mforecast$H)
# }
# """
#r_dccgarch = robjects.r(r_dccgarch_code)
#r_res = r_dccgarch(r_rets,n_days)
#pandas2ri.deactivate()
## end of R
#
#r_dccgarch_model = r_res[0] # model parameters
#r_forecast_cov = r_res[1] # forecsted covariance matrices for n_days
## access and transform the covariance matrices in R format
#n_cols = pd_rets.shape[1] # get the number of stocks in pd_rets
#n_elements = n_cols*n_cols # the number of elements in each covariance matrix
#n_matrix = int(len(r_forecast_cov[0])/(n_elements))
#print(n_matrix) # this should be equal to n_days
# sum the daily forecasted covariance matrices
#cov_matrix = 0
#for i in range(n_matrix):
# i_matrix = np.array([v for v in r_forecast_cov[0][i*n_elements:(i+1)*n_elements]])
# i_matrix = i_matrix.reshape(n_cols,n_cols)
# cov_matrix += i_matrix
from statsmodels.tsa.arima.model import ARIMA
def arma_fit (X, y, p, q):
#oil = BCOMCO_Weekly['Week']
#for i in range(1,lag):
#oil = pd.concat([oil, BCOMCO_Weekly['Week'].shift(-i)], axis = 1)
model = ARIMA(endog = y, exog = X, order=[p,0, 0])
model_fit = model.fit()
print(model_fit.summary())
return (model_fit.params)
#Gas_reg = Gas_reg.dropna()
#arma_fit(Gas_reg, 5,5).sigma2
print(BCOMCL_Weekly)
Week
1990-08-20 99.346720
1990-08-27 91.817780
1990-09-03 100.553325
1990-09-10 103.689820
1990-09-17 113.444780
... ...
2020-12-21 43.762400
2020-12-28 44.036125
2021-01-04 45.914040
2021-01-11 48.271880
2021-01-18 48.290575
[1588 rows x 1 columns]
lag = 0
start_dt = "1994-01-02"
end_dt = "2010-01-02"
def MZ_Test (X, y, lag, t1, t2, p, q):
sigma0 = arma_fit(X, y, p, q).sigma2
X1= X[:t1]
y1 = y[:t1]
X2 = X[t2:]
y2 = y[t2:]
T = len(X)
T1 = len(X1)
T2 = len(X2)
sigma1 = arma_fit(X1, y1, p, q).sigma2
sigma2 = arma_fit(X2, y2, p, q).sigma2
MZ = (T- lag)*np.log(sigma0) - ((T1)*np.log(sigma1) + (T2)*(np.log(sigma2)))
return MZ
Gas = Gas_reg.dropna().pct_change()[1:]
oil = BCOMCL_Weekly.dropna().pct_change()[1:]
oil = oil[start_dt: end_dt]
break_start = "1995-01-02"
break_end = "1995-01-02"
Gas = Gas[start_dt:end_dt]
oil = oil[start_dt:end_dt]
#plt.plot(Gas)
#plt.plot(oil)
#oil.squeeze()
#Gas.squeeze()
MZ_Test(oil.squeeze(), Gas.squeeze(), 5, break_start, break_end, 5, 5)
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
/root/venv/lib/python3.7/site-packages/statsmodels/base/model.py:568: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
ConvergenceWarning)
SARIMAX Results
==============================================================================
Dep. Variable: RCRGP No. Observations: 835
Model: ARIMA(5, 0, 0) Log Likelihood 2161.150
Date: Wed, 24 Feb 2021 AIC -4306.299
Time: 02:24:15 BIC -4268.480
Sample: 01-03-1994 HQIC -4291.800
- 12-28-2009
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0016 0.002 0.960 0.337 -0.002 0.005
Week -0.0837 0.015 -5.765 0.000 -0.112 -0.055
ar.L1 0.5374 0.018 30.187 0.000 0.503 0.572
ar.L2 0.0549 0.026 2.138 0.033 0.005 0.105
ar.L3 0.1128 0.026 4.391 0.000 0.062 0.163
ar.L4 -0.0195 0.022 -0.872 0.383 -0.063 0.024
ar.L5 -0.1021 0.026 -3.922 0.000 -0.153 -0.051
sigma2 0.0003 8.28e-06 39.914 0.000 0.000 0.000
===================================================================================
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 4811.02
Prob(Q): 0.96 Prob(JB): 0.00
Heteroskedasticity (H): 9.42 Skew: 0.54
Prob(H) (two-sided): 0.00 Kurtosis: 14.71
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
SARIMAX Results
==============================================================================
Dep. Variable: RCRGP No. Observations: 53
Model: ARIMA(5, 0, 0) Log Likelihood 186.116
Date: Wed, 24 Feb 2021 AIC -356.232
Time: 02:24:17 BIC -340.470
Sample: 01-03-1994 HQIC -350.170
- 01-02-1995
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
const 0.0019 0.002 0.928 0.353 -0.002 0.006
Week -0.0701 0.037 -1.909 0.056 -0.142 0.002
ar.L1 0.4452 0.164 2.712 0.007 0.123 0.767
ar.L2 0.2201 0.243 0.905 0.365 -0.256 0.697
ar.L3 -0.0316 0.283 -0.112 0.911 -0.586 0.522
ar.L4 -0.0080 0.190 -0.042 0.966 -0.381 0.365
ar.L5 -0.1704 0.147 -1.159 0.246 -0.459 0.118
sigma2 5.15e-05 1.15e-05 4.490 0.000 2.9e-05 7.4e-05
===================================================================================
Ljung-Box (L1) (Q): 0.03 Jarque-Bera (JB): 7.36
Prob(Q): 0.85 Prob(JB): 0.03
Heteroskedasticity (H): 2.98 Skew: -0.26
Prob(H) (two-sided): 0.03 Kurtosis: 4.75
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
/root/venv/lib/python3.7/site-packages/statsmodels/tsa/base/tsa_model.py:527: ValueWarning: No frequency information was provided, so inferred frequency W-MON will be used.
% freq, ValueWarning)
KeyboardInterrupt:
-234.97538573390648
import statsmodels.tsa.stattools as ts
#cointegration_result = ts.coint(BCOMCO_Weekly, Gas_reg)
# BCOMCO_Weekly
#print(BCOMCL_Weekly.head())
#print(BCOMCL_Weekly.tail())
# print(Gas_reg.head())
#print(Gas_reg.tail())
#print(type(BCOMCL_Weekly))
#print(type(Gas_reg))
result = pd.concat([master['Oil_Future'].dropna().pct_change(), master['Gas_Price'].dropna().pct_change()], axis = 1)
result = result[1:]
result = result['2009-01-02':'2020-01-02']
print(result)
result = result.dropna()
#x1=result['Close_x']
#y1=result['Close_y']
coin_result = ts.coint(result['Oil_Future'], result['Gas_Price'])
Oil_Future Gas_Price
Date
2009-01-05 0.212963 0.040120
2009-01-12 -0.131021 0.056419
2009-01-20 -0.050084 0.034332
2009-01-26 0.069383 -0.004215
2009-02-02 -0.066667 0.028571
... ... ...
2019-12-02 -0.027450 -0.001871
2019-12-09 0.050639 -0.005624
2019-12-16 0.017857 -0.009427
2019-12-23 0.015681 -0.002284
2019-12-30 0.019108 0.014117
[574 rows x 2 columns]
print(coin_result)
(-10.858306448685779, 1.837427888845542e-18, array([-3.94986831, -3.36566319, -3.06490325]))