from dateutil.parser import parse
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
plt.rcParams.update({'figure.figsize': (10, 7), 'figure.dpi': 120})
# Import as Dataframe
df = pd.read_excel('Test 2.xlsx', sheet_name='data',parse_dates=['Date'])
df.head()
df
all_features = df.keys().tolist()[2:]
for key in all_features:
x = df['Date'].values
y1 = df[key].values
# Plot
fig, ax = plt.subplots(1, 1, figsize=(16,5), dpi= 120)
plt.fill_between(x, y1=y1, y2=-y1, alpha=0.5, linewidth=2, color='seagreen')
max_element = max(y1)
min_element = min(y1)
plt.ylim(-(max_element), (max_element))
plt.title(f'{key} vs Date (Two Side View)', fontsize=16)
plt.hlines(y=0, xmin=np.min(df.Date), xmax=np.max(df.Date), linewidth=.5)
plt.show()
from statsmodels.tsa.seasonal import seasonal_decompose
df = pd.read_excel('Test 2.xlsx', sheet_name='data',parse_dates=['Date'],index_col='Date')
for key in all_features:
try:
# Multiplicative Decomposition
result_mul = seasonal_decompose(df[key], model='multiplicative', extrapolate_trend='freq')
# Additive Decomposition
result_add = seasonal_decompose(df[key], model='additive', extrapolate_trend='freq')
# Plot
plt.rcParams.update({'figure.figsize': (20,20)})
result_mul.plot().suptitle(f'Multiplicative Decompose for {key}', fontsize=22)
result_add.plot().suptitle(f'Additive Decompose for {key}', fontsize=22)
plt.show()
except:
pass
from statsmodels.tsa.stattools import adfuller, kpss
for col in all_features:
try:
print(f'Test for {col} : \n')
# ADF Test
result = adfuller(df[col].values, autolag='AIC')
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
for key, value in result[4].items():
print('Critial Values:')
print(f'{key}, {value}')
# KPSS Test
result = kpss(df[col].values, regression='c')
print('\nKPSS Statistic: %f' % result[0])
print('p-value: %f' % result[1])
for key, value in result[3].items():
print('Critial Values:')
print(f' {key}, {value}')
print(f'\n\n')
except:
print('\n')
for col in all_features:
#Determing rolling statistics
rolmean = pd.Series(df[col]).rolling(window=2).mean()
rolstd = pd.Series(df[col]).rolling(window=2).std()
# Plot rolling statistics:
plt.plot(df[col], color='blue',label='Original')
plt.plot(rolmean, color='red', label='Rolling Mean')
plt.plot(rolstd, color='black', label = 'Rolling Std')
plt.legend(loc='best')
plt.title(f'Rolling Mean & Standard Deviation for {col}')
plt.show()
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
for col in all_features:
plt.plot(df[col])
plot_acf(df[col])
plot_acf(df[col].diff().dropna())
plot_acf(df[col].diff().diff().dropna())
print(col)
plt.show()
for col in all_features:
plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120})
fig, axes = plt.subplots(1, 3, sharex=True)
axes[0].plot(df[col].diff());
axes[1].plot(df[col].diff().diff())
axes[2].plot(df[col].diff().diff().diff())
plt.show()
from statsmodels.tsa.arima.model import ARIMA
oil_model = ARIMA(df['Oil'], order=(1,2,2))
oil_model_fit = oil_model.fit()
print(oil_model_fit.summary())
# Plot residual errors
residuals = pd.DataFrame(oil_model_fit.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title="Residuals", ax=ax[0])
residuals.plot(kind='kde', title='Density', ax=ax[1])
plt.show()
from datetime import datetime, timedelta
base = datetime.strptime('2022-07-21','%Y-%m-%d')
date_list = [base + timedelta(days=x) for x in range(11)]
predicted_oil = [round(oil_model_fit.predict(i)[0]) for i in date_list ]
predicted_oil
from statsmodels.tsa.arima_model import ARIMA
import pmdarima as pm
model = pm.auto_arima(df['Oil'], start_p=1, start_q=1,
test='adf', # use adftest to find optimal 'd'
max_p=3, max_q=3, # maximum p and q
m=1, # frequency of series
d=None, # let model determine 'd'
seasonal=False, # No Seasonality
start_P=0,
D=0,
trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True)
print(model.summary())
model.plot_diagnostics(figsize=(7,5))
plt.show()
oil_model = model.fit(df['Oil'])
oil_model.predict(n_periods=11)
gas_model = ARIMA(df['Gas'], order=(1,2,2))
gas_model_fit = gas_model.fit()
df_output['NGL'] = 0