!pip install pandas-datareader==0.10.0
!pip install fastdtw==0.3.4
import os
import datetime as dt
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pandas_datareader.data as web
from sklearn import preprocessing
from sklearn.metrics.pairwise import cosine_similarity
from fastdtw import fastdtw
data_dir = "./data/nasdaq_bear_analysis"
os.makedirs(data_dir, exist_ok=True)
# get historical data of Nasdaq 100 index(^NDX)
nasdaq_100 = web.DataReader('^NDX', 'yahoo', start='1980-01-01', end='2022-07-16')
print(nasdaq_100.shape)
# save the data
nasdaq100_file_path = f"{data_dir}/nasdaq100.csv"
nasdaq_100.to_csv(nasdaq100_file_path)
nasdaq_100 = pd.read_csv(nasdaq100_file_path, index_col="Date", parse_dates=True)
nasdaq_100.head(3)
fig, ax = plt.subplots(figsize=(15, 8))
ax.plot(nasdaq_100.index, nasdaq_100["Close"])
ax.grid(axis="y")
ax.set_title("Nasdaq 100 historical price")
ax.set_xlabel("Date")
ax.set_ylabel("Price")
plt.show()
ts_black_monday = nasdaq_100.loc["1987-10-05":"1987-10-26",
"Close"] / nasdaq_100.loc["1987-10-05", "Close"]
ts_1990_recession = nasdaq_100.loc["1990-07-16":"1990-10-11",
"Close"] / nasdaq_100.loc["1990-07-16", "Close"]
ts_dotcom = nasdaq_100.loc["2000-03-27":"2002-10-07",
"Close"] / nasdaq_100.loc["2000-03-27", "Close"]
ts_financial_crisis = nasdaq_100.loc["2008-06-05":"2009-03-09",
"Close"] / nasdaq_100.loc["2008-06-05", "Close"]
ts_covid19_pandemic = nasdaq_100.loc["2020-02-19":" 2020-03-20",
"Close"] / nasdaq_100.loc["2020-02-19", "Close"]
ts_current_bear = nasdaq_100.loc["2021-11-19": "2022-07-16",
"Close"] / nasdaq_100.loc["2021-11-19", "Close"]
ts_current_bear
fig, ax = plt.subplots(figsize=(15, 8))
ax.plot(range(len(ts_black_monday)), ts_black_monday, alpha=1, label="Black Monday")
ax.plot(range(len(ts_1990_recession)), ts_1990_recession, alpha=0.5, label="early 1990s recession")
ax.plot(range(len(ts_dotcom)), ts_dotcom, alpha=0.5, label="Dotcom Bubble")
ax.plot(range(len(ts_financial_crisis)), ts_financial_crisis, alpha=0.5, label="Financial Crisis")
ax.plot(range(len(ts_covid19_pandemic)), ts_covid19_pandemic, alpha=1, label="COVID-19 recession")
ax.plot(range(len(ts_current_bear)), ts_current_bear, alpha=0.7, label="current bear market")
ax.scatter([211.8], [1 - 0.5246], marker="x", color="red")
ax.grid(axis="y")
ax.set_ylim(0, 1)
ax.set_title("Nasdaq 100 historical bear market")
ax.set_xlabel("Duration(in trading days)")
ax.set_ylabel("Price")
ax.legend()
plt.show()
event_name_list = ["Black Monday", "Early 1990s Recession", "Dotcom Bubble",
"Financial Crirsis", "COVID19 Pandmic", "Current Bear"]
# calculate average line
def calculate_downside(data):
return 1 - data.min()
decrease_rate = [calculate_downside(ts_black_monday),
calculate_downside(ts_1990_recession),
calculate_downside(ts_dotcom),
calculate_downside(ts_financial_crisis),
calculate_downside(ts_covid19_pandemic),
calculate_downside(ts_current_bear)]
df_feature1 = pd.DataFrame({"Name": event_name_list, "Decrease Rate": decrease_rate})
df_feature1
def calculate_slope(data):
return (1 - data.min()) / len(data)
decrease_speed = [calculate_slope(ts_black_monday),
calculate_slope(ts_1990_recession),
calculate_slope(ts_dotcom),
calculate_slope(ts_financial_crisis),
calculate_slope(ts_covid19_pandemic),
calculate_slope(ts_current_bear)]
df_feature2 = pd.DataFrame({"Name": event_name_list, "Decrease Speed": decrease_speed})
df_feature2
duration = [len(ts_black_monday),
len(ts_1990_recession),
len(ts_dotcom),
len(ts_financial_crisis),
len(ts_covid19_pandemic),
len(ts_current_bear)]
df_feature3 = pd.DataFrame({"Name": event_name_list, "Duration": duration})
df_feature3
df_feature4 = pd.DataFrame({"Name": event_name_list, "Decrease Rate": decrease_rate, "Duration": duration})
df_feature4
decrease_rate_scaled = preprocessing.scale(decrease_rate)
duration_scaled = preprocessing.scale(duration)
fig, ax = plt.subplots(figsize=(8, 8))
for i, event in enumerate(event_name_list):
ax.scatter(decrease_rate_scaled[i], duration_scaled[i], marker="D", label=event)
ax.grid(linestyle="--")
ax.set_xlim(-2.5, 2.5)
ax.set_ylim(-2.5, 2.5)
ax.set_xlabel("Decrease Rate")
ax.set_ylabel("Duration(trading days)")
ax.legend()
plt.show()
df_feature4_1 = pd.DataFrame(
{"Name": event_name_list, "Decrease Rate": decrease_rate_scaled, "Duration": duration_scaled})
df_feature4_1
similarity_list = []
for i in df_feature4_1.index:
cos_sim = cosine_similarity(df_feature4_1.loc[[i], ["Decrease Rate", "Duration"]], df_feature4_1.loc[[5], [
"Decrease Rate", "Duration"]])
similarity_list.append(cos_sim[0][0])
df_feature4_1["Similarity"] = similarity_list
df_feature4_1
fig, ax = plt.subplots(figsize=(8, 8))
for i, event in enumerate(event_name_list):
ax.scatter(decrease_rate_scaled[i], duration_scaled[i], marker="D", label=event)
ax.plot([0, decrease_rate_scaled[5]], [0, duration_scaled[5]], color="black", alpha=0.6)
ax.plot([0, decrease_rate_scaled[4]], [0, duration_scaled[4]], color="black", linestyle="dotted", alpha=0.6)
ax.grid(linestyle="--")
ax.set_xlim(-2.5, 2.5)
ax.set_ylim(-2.5, 2.5)
ax.set_xlabel("Decrease Rate")
ax.set_ylabel("Duration(trading days)")
ax.legend()
plt.show()
def calc_dtw(l_sr):
dict_dtw = {}
for item1, item2 in itertools.product(l_sr, repeat=2):
distance, path = fastdtw(item1[1], item2[1])
dict_dtw[(item1[0], item2[0])] = distance
return dict_dtw
dict_dtw = calc_dtw(list(zip(event_name_list,
[ts_1990_recession,
ts_black_monday,
ts_dotcom,
ts_financial_crisis,
ts_covid19_pandemic,
ts_current_bear])))
dict_dtw
dist_matrix = np.array(list(dict_dtw.values())).reshape(6, 6)
df_dist_matrix = pd.DataFrame(data=dist_matrix, index=event_name_list, columns=event_name_list)
plt.figure(figsize=(8, 6))
sns.heatmap(df_dist_matrix, square=True, annot=True, cmap='Blues')
plt.show()
event_name_list_2 = event_name_list[:2] + event_name_list[3:]
dict_dtw_2 = calc_dtw(list(zip(event_name_list_2,
[ts_1990_recession,
ts_black_monday,
ts_financial_crisis,
ts_covid19_pandemic,
ts_current_bear])))
dist_matrix_2 = np.array(list(dict_dtw_2.values())).reshape(5, 5)
df_dist_matrix_2 = pd.DataFrame(data=dist_matrix_2, index=event_name_list_2, columns=event_name_list_2)
plt.figure(figsize=(8, 6))
sns.heatmap(df_dist_matrix_2, square=True, annot=True, cmap='Blues')
plt.show()