Question 2 - Analyze Time Series

from dateutil.parser import parse import matplotlib as mpl import matplotlib.pyplot as plt import seaborn as sns import numpy as np import pandas as pd plt.rcParams.update({'figure.figsize': (10, 7), 'figure.dpi': 120}) # Import as Dataframe df = pd.read_excel('Test 2.xlsx', sheet_name='data',parse_dates=['Date']) df.head()

df

all_features = df.keys().tolist()[2:]

for key in all_features: x = df['Date'].values y1 = df[key].values # Plot fig, ax = plt.subplots(1, 1, figsize=(16,5), dpi= 120) plt.fill_between(x, y1=y1, y2=-y1, alpha=0.5, linewidth=2, color='seagreen') max_element = max(y1) min_element = min(y1) plt.ylim(-(max_element), (max_element)) plt.title(f'{key} vs Date (Two Side View)', fontsize=16) plt.hlines(y=0, xmin=np.min(df.Date), xmax=np.max(df.Date), linewidth=.5) plt.show()

from statsmodels.tsa.seasonal import seasonal_decompose df = pd.read_excel('Test 2.xlsx', sheet_name='data',parse_dates=['Date'],index_col='Date') for key in all_features: try: # Multiplicative Decomposition result_mul = seasonal_decompose(df[key], model='multiplicative', extrapolate_trend='freq') # Additive Decomposition result_add = seasonal_decompose(df[key], model='additive', extrapolate_trend='freq') # Plot plt.rcParams.update({'figure.figsize': (20,20)}) result_mul.plot().suptitle(f'Multiplicative Decompose for {key}', fontsize=22) result_add.plot().suptitle(f'Additive Decompose for {key}', fontsize=22) plt.show() except: pass

from statsmodels.tsa.stattools import adfuller, kpss for col in all_features: try: print(f'Test for {col} : \n') # ADF Test result = adfuller(df[col].values, autolag='AIC') print(f'ADF Statistic: {result[0]}') print(f'p-value: {result[1]}') for key, value in result[4].items(): print('Critial Values:') print(f'{key}, {value}') # KPSS Test result = kpss(df[col].values, regression='c') print('\nKPSS Statistic: %f' % result[0]) print('p-value: %f' % result[1]) for key, value in result[3].items(): print('Critial Values:') print(f' {key}, {value}') print(f'\n\n') except: print('\n')

for col in all_features: #Determing rolling statistics rolmean = pd.Series(df[col]).rolling(window=2).mean() rolstd = pd.Series(df[col]).rolling(window=2).std() # Plot rolling statistics: plt.plot(df[col], color='blue',label='Original') plt.plot(rolmean, color='red', label='Rolling Mean') plt.plot(rolstd, color='black', label = 'Rolling Std') plt.legend(loc='best') plt.title(f'Rolling Mean & Standard Deviation for {col}') plt.show()

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf for col in all_features: plt.plot(df[col]) plot_acf(df[col]) plot_acf(df[col].diff().dropna()) plot_acf(df[col].diff().diff().dropna()) print(col) plt.show()

for col in all_features: plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':120}) fig, axes = plt.subplots(1, 3, sharex=True) axes[0].plot(df[col].diff()); axes[1].plot(df[col].diff().diff()) axes[2].plot(df[col].diff().diff().diff()) plt.show()

from statsmodels.tsa.arima.model import ARIMA oil_model = ARIMA(df['Oil'], order=(1,2,2)) oil_model_fit = oil_model.fit() print(oil_model_fit.summary())

# Plot residual errors residuals = pd.DataFrame(oil_model_fit.resid) fig, ax = plt.subplots(1,2) residuals.plot(title="Residuals", ax=ax[0]) residuals.plot(kind='kde', title='Density', ax=ax[1]) plt.show()

from datetime import datetime, timedelta base = datetime.strptime('2022-07-21','%Y-%m-%d') date_list = [base + timedelta(days=x) for x in range(11)]

predicted_oil = [round(oil_model_fit.predict(i)[0]) for i in date_list ]

predicted_oil

from statsmodels.tsa.arima_model import ARIMA import pmdarima as pm model = pm.auto_arima(df['Oil'], start_p=1, start_q=1, test='adf', # use adftest to find optimal 'd' max_p=3, max_q=3, # maximum p and q m=1, # frequency of series d=None, # let model determine 'd' seasonal=False, # No Seasonality start_P=0, D=0, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) print(model.summary())

model.plot_diagnostics(figsize=(7,5)) plt.show()

oil_model = model.fit(df['Oil']) oil_model.predict(n_periods=11)

gas_model = ARIMA(df['Gas'], order=(1,2,2)) gas_model_fit = gas_model.fit()

df_output['NGL'] = 0