!pip install pandas-datareader==0.10.0
!pip install statsmodels==0.13.2
import os
import datetime as dt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pandas_datareader.data as web
from scipy import stats
import statsmodels.api as sm
import seaborn as sns
data_dir = "./data/oil_price_analysis"
os.makedirs(data_dir, exist_ok=True)
start = dt.datetime(1950, 1, 1)
end = dt.datetime(2022, 7, 15)
wti_oil_price = web.DataReader('DCOILWTICO', 'fred', start, end)
# save the data
file_path = f"{data_dir}/DCOILWTICO.csv"
wti_oil_price.to_csv(file_path)
wti_oil_price
wti_oil_price = pd.read_csv(file_path, index_col="DATE", parse_dates=True)
print(wti_oil_price.shape)
wti_oil_price.tail(3)
plt.figure(figsize=(10, 6))
plt.plot(wti_oil_price)
plt.grid(axis="y")
plt.xlabel("Date")
plt.ylabel("Oil Price ($/Barrel)")
plt.show()
wti_oil_price.loc['2020-04-20']
wti_oil_price.loc['2020-04-20':'2020-04-30']
wti_oil_price.loc[dt.datetime(2020, 4, 20)]
wti_oil_price.loc['2020-04']
wti_oil_price.loc['2020']
start = dt.datetime(1950, 1, 1)
end = dt.datetime(2022, 7, 15)
brent_oil_price = web.DataReader('DCOILBRENTEU', 'fred', start, end)
# save the data
file_path = f"{data_dir}/DCOILBRENTEU.csv"
brent_oil_price.to_csv(file_path)
brent_oil_price = pd.read_csv(file_path, index_col="DATE", parse_dates=True)
print(brent_oil_price.shape)
brent_oil_price.tail(3)
brent_oil_price
brent_oil_price = brent_oil_price.dropna()
plt.figure(figsize=(10, 6))
plt.plot(brent_oil_price)
plt.grid(axis="y")
plt.xlabel("Date")
plt.ylabel("Oil Price ($/Barrel)")
plt.show()
monthly_data_list = []
year_list = brent_oil_price.index.year.unique()
month_list = sorted(brent_oil_price.index.month.unique())
for year in year_list:
for month in month_list:
last_day_data = brent_oil_price[(brent_oil_price.index.year == year) &
(brent_oil_price.index.month == month)].tail(1)
monthly_data_list.append(last_day_data)
brent_oil_price_monthly = pd.concat(monthly_data_list)
brent_oil_price_monthly.index = pd.to_datetime(
brent_oil_price_monthly.index.strftime("%Y-%m")) # reset index to YYYY-01-01
brent_oil_price_monthly.tail()
plt.figure(figsize=(10,6))
plt.plot(brent_oil_price, label="Daily")
plt.plot(brent_oil_price_monthly ,label="Monthly")
plt.grid(axis="y")
plt.xlabel("Date")
plt.ylabel("Oil Price ($/Barrel)")
plt.legend(loc="upper left")
plt.show()
# moving average
brent_oil_price_moving_average_50 = brent_oil_price.rolling(50).mean()
brent_oil_price_moving_average_200 = brent_oil_price.rolling(200).mean()
plt.figure(figsize=(10,6))
plt.plot(brent_oil_price, label="Original")
plt.plot(brent_oil_price_moving_average_50, label="50-Day Moving Average")
plt.plot(brent_oil_price_moving_average_200, label="200-Day Moving Average")
plt.grid(axis="y")
plt.xlabel("Date")
plt.ylabel("Oil Price ($/Barrel)")
plt.legend(loc="upper left")
plt.xlim((dt.datetime(2019, 1, 1), max(brent_oil_price.index)))
plt.show()
brent_oil_price_diff = brent_oil_price_monthly - brent_oil_price_monthly.shift()
brent_oil_price_diff.head()
brent_oil_price_monthly.diff().head()
plt.plot(brent_oil_price_diff)
plt.grid(axis="y")
plt.xlabel("Date")
plt.ylabel("Oil Price Change ($/Barrel)")
plt.show()
plt.hist(brent_oil_price_diff, bins=20)
plt.xlabel("Oil Price Change ($/Barrel)")
plt.xlim(-37, 37)
plt.show()
brent_oil_price_logdiff = np.log(brent_oil_price_monthly) - np.log(brent_oil_price_monthly.shift())
fig, axes = plt.subplots(ncols=2, figsize=(10,5))
axes[0].plot(brent_oil_price_logdiff.dropna())
axes[0].grid(axis="y")
axes[0].set_xlabel("Date")
axes[0].set_ylabel("Oil Price Change")
axes[0].set_ylim(-1.5, 1.5)
axes[1].hist(brent_oil_price_logdiff.dropna(), bins=20)
axes[1].set_xlabel("Oil Price Change")
axes[1].set_xlim(-1.5, 1.5)
plt.show()
# autocorrelation
acf = sm.tsa.stattools.acf(brent_oil_price_monthly, nlags=20)
acf
# partial autocorrelation
pacf = sm.tsa.stattools.pacf(brent_oil_price_monthly, nlags=20, method='ols')
pacf
fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(brent_oil_price_monthly, lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(brent_oil_price_monthly, lags=40, ax=ax2)
fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(brent_oil_price_diff.dropna(), lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(brent_oil_price_diff.dropna(), lags=40, ax=ax2)