Lab 6 Team S01B-3
Problem Statement: To discover if Chinese New Year will have impacts on the Chinese stock markets' performances.
Null Hypothesis 1: There is no significant difference in December & January returns compared to CNY returns.
Null Hypothesis 2: There is no significant difference in December & January trading volume compared to CNY trading volume.
#Installing Yahoo Finance
!pip install yfinance --upgrade --no-cache-dir
#Importing Packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import yfinance as yf
#import Shanghai Stock Exchange Composite's Data into the DataFrame
symbols_list = ['000001.SS']
start = dt.datetime(2014,12,1)
end = dt.datetime(2021,4,1)
data = yf.download(symbols_list, start=start, end=end)
data.info()
data.head()
df = data.reset_index() #resets the index, brings the date to become a variable
df = df[['Date','Adj Close', 'Volume']]
df['y_m_d'] = df['Date'].dt.date
df['month_of_year'] = df['Date'].dt.month #new column with the month of year
df['day_of_week'] = df['Date'].dt.dayofweek
df['return'] = df['Adj Close'].pct_change()
df['annualized_volatility'] = (df['return'].rolling(252).std())
df.tail()
#2015CNY
start_date2015 = dt.date(2015,2,11)
end_date2015 = dt.date(2015,2,17)
mask2015 = (df['y_m_d'] >= start_date2015) & (df['y_m_d'] <= end_date2015)
#2016CNY
start_date2016 = dt.date(2016,1,31)
end_date2016 = dt.date(2016,2,6)
mask2016 = (df['y_m_d'] >= start_date2016) & (df['y_m_d'] <= end_date2016)
#2017CNY
start_date2017 = dt.date(2017,2,21)
end_date2017 = dt.date(2017,2,27)
mask2017 = (df['y_m_d'] >= start_date2017) & (df['y_m_d'] <= end_date2017)
#2018CNY
start_date2018 = dt.date(2018,2,8)
end_date2018 = dt.date(2018,2,14)
mask2018 = (df['y_m_d'] >= start_date2018) & (df['y_m_d'] <= end_date2018)
#2019CNY
start_date2019 = dt.date(2019,1,28)
end_date2019 = dt.date(2019,2,3)
mask2019 = (df['y_m_d'] >= start_date2019) & (df['y_m_d'] <= end_date2019)
#CNY dataframes and merger of dataframes
df2015 = df.loc[mask2015]
df2016 = df.loc[mask2016]
df2017 = df.loc[mask2017]
df2018 = df.loc[mask2018]
df2019 = df.loc[mask2019]
df_cny = df2015.append([df2016,df2017,df2018,df2019])
df_cny
december= df.query('''month_of_year == 12''')[1:]
january = df.query('''month_of_year == 1''')[1:]
1. ARE RETURNS ON BOTH DATES SIGNIFICANTLY DIFFERENT?
#this demonstrates the returns for every week before Chinese New Year from 2015 - 2019
df_cny['return'].hist(bins=20, color='g', alpha=0.5)
df_cny['return'].describe()
#this demostrates the returns for every December from 2015 - 2019
december['return'].hist(bins=100, color='r', alpha=0.5)
december['return'].describe()
#this demostrates the returns for every January from 2015 - 2019
january['return'].hist(bins=100, color='b', alpha=0.5)
january['return'].describe()
#p-value calculation
import scipy.stats as stats
print("Difference in mean return: ")
print((df_cny['return'].mean() - january['return'].mean())*100)
stat, p = stats.ttest_ind(df_cny['return'], january['return'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean between january return and CNY return is significantly different (reject H0)')
else:
print('The difference in mean between january return and CNY return is not significantly different (fail to reject H0)')
#p-value calculation
import scipy.stats as stats
print("Difference in mean return: ")
print((df_cny['return'].mean() - december['return'].mean())*100)
stat, p = stats.ttest_ind(df_cny['return'], december['return'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean between December return and CNY return is significantly different (reject H0)')
else:
print('The difference in mean between December return and CNY return is not significantly different (fail to reject H0)')
2. ARE TRADING VOLUMES ON BOTH DATES SIGNIFICANTLY DIFFERENT?
#this demostrates the trading volumes for every week before Chinese New Year from 2015 - 2019
df_cny['Volume'].hist(bins=20, color='g', alpha=0.5)
df_cny['Volume'].describe()
#this demostrates the trading volumes for every December from 2015 - 2019
december['Volume'].hist(bins=100, color='r', alpha=0.5)
december['Volume'].describe()
#this demostrates the trading volumes for every January from 2015 - 2019
january['Volume'].hist(bins=100, color='b', alpha=0.5)
january['Volume'].describe()
#p-value calculation
import scipy.stats as stats
print("Difference in mean return: ")
print((df_cny['Volume'].mean() - december['Volume'].mean())*100)
stat, p = stats.ttest_ind(df_cny['Volume'], december['Volume'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean between December Volume and CNY Volume is significantly different (reject H0)')
else:
print('The difference in mean between December Volume and CNY Volume is not significantly different (fail to reject H0)')
#p-value calculation
import scipy.stats as stats
print("Difference in mean return: ")
print((df_cny['Volume'].mean() - january['Volume'].mean())*100)
stat, p = stats.ttest_ind(df_cny['Volume'], january['Volume'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean between january Volume and CNY Volume is significantly different (reject H0)')
else:
print('The difference in mean between january Volume and CNY Volume is not significantly different (fail to reject H0)')
3. ARE TRADING VOLATILITY ON BOTH DATES SIGNIFICANTLY DIFFERENT?
from matplotlib import pyplot
start_date2016_DEC = dt.date(2016,12,1)
end_date2016_DEC = dt.date(2016,12,31)
mask2016_DEC = (df['y_m_d'] >= start_date2016_DEC) & (df['y_m_d'] <= end_date2016_DEC)
start_date2017_DEC = dt.date(2017,12,1)
end_date2017_DEC = dt.date(2017,12,31)
mask2017_DEC = (df['y_m_d'] >= start_date2017_DEC) & (df['y_m_d'] <= end_date2017_DEC)
start_date2018_DEC = dt.date(2018,12,1)
end_date2018_DEC = dt.date(2018,12,31)
mask2018_DEC = (df['y_m_d'] >= start_date2018_DEC) & (df['y_m_d'] <= end_date2018_DEC)
start_date2019_DEC = dt.date(2019,12,2)
end_date2019_DEC = dt.date(2019,12,31)
mask2019_DEC = (df['y_m_d'] >= start_date2019_DEC) & (df['y_m_d'] <= end_date2019_DEC)
df2016_DEC = df.loc[mask2016_DEC]
df2017_DEC = df.loc[mask2017_DEC]
df2018_DEC = df.loc[mask2018_DEC]
df2019_DEC = df.loc[mask2019_DEC]
d2016=df2016_DEC['return'].std()
d2017=df2017_DEC['return'].std()
d2018=df2018_DEC['return'].std()
d2019=df2019_DEC['return'].std()
d=[d2016,d2017,d2018,d2019]
x2016=df2016['return'].std()
x2017=df2017['return'].std()
x2018=df2018['return'].std()
x2019=df2019['return'].std()
x=[x2016,x2017,x2018,x2019]
y2016 = df2016['annualized_volatility'].mean()
y2017 = df2016['annualized_volatility'].mean()
y2018 = df2016['annualized_volatility'].mean()
y2019 = df2016['annualized_volatility'].mean()
y=[y2016,y2017,y2018,y2019]
z = ['2016','2017','2018','2019']
plt.scatter(z,x)
plt.scatter(z,y)
plt.scatter(z,d)
plt.legend(['daily_std_1week_beforeCNY','daily_std_whole_year','daily_std_December'],bbox_to_anchor=(1.1, 1.05))
plt.show()