Lab 6 Team S01B-3
Problem Statement: To discover if Chinese New Year will have impacts on the Chinese stock markets' performances.
Null Hypothesis 1: There is no significant difference in December & January returns compared to CNY returns.
Null Hypothesis 2: There is no significant difference in December & January trading volume compared to CNY trading volume.
#Installing Yahoo Finance
!pip install yfinance --upgrade --no-cache-dir
Requirement already satisfied: yfinance in /root/venv/lib/python3.7/site-packages (0.1.63)
Requirement already satisfied: requests>=2.20 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from yfinance) (2.26.0)
Requirement already satisfied: multitasking>=0.0.7 in /root/venv/lib/python3.7/site-packages (from yfinance) (0.0.9)
Requirement already satisfied: numpy>=1.15 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from yfinance) (1.19.5)
Requirement already satisfied: pandas>=0.24 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from yfinance) (1.2.5)
Requirement already satisfied: lxml>=4.5.1 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from yfinance) (4.6.3)
Requirement already satisfied: pytz>=2017.3 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from pandas>=0.24->yfinance) (2021.1)
Requirement already satisfied: python-dateutil>=2.7.3 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pandas>=0.24->yfinance) (2.8.2)
Requirement already satisfied: six>=1.5 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from python-dateutil>=2.7.3->pandas>=0.24->yfinance) (1.16.0)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from requests>=2.20->yfinance) (1.26.7)
Requirement already satisfied: certifi>=2017.4.17 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from requests>=2.20->yfinance) (2021.5.30)
Requirement already satisfied: charset-normalizer~=2.0.0 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from requests>=2.20->yfinance) (2.0.6)
Requirement already satisfied: idna<4,>=2.5 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from requests>=2.20->yfinance) (3.2)
#Importing Packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import yfinance as yf
#import Shanghai Stock Exchange Composite's Data into the DataFrame
symbols_list = ['000001.SS']
start = dt.datetime(2014,12,1)
end = dt.datetime(2021,4,1)
data = yf.download(symbols_list, start=start, end=end)
data.info()
[*********************100%***********************] 1 of 1 completed
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1540 entries, 2014-12-01 to 2021-03-31
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Open 1540 non-null float64
1 High 1540 non-null float64
2 Low 1540 non-null float64
3 Close 1540 non-null float64
4 Adj Close 1540 non-null float64
5 Volume 1540 non-null int64
dtypes: float64(5), int64(1)
memory usage: 84.2 KB
data.head()
df = data.reset_index() #resets the index, brings the date to become a variable
df = df[['Date','Adj Close', 'Volume']]
df['y_m_d'] = df['Date'].dt.date
df['month_of_year'] = df['Date'].dt.month #new column with the month of year
df['day_of_week'] = df['Date'].dt.dayofweek
df['return'] = df['Adj Close'].pct_change()
df['annualized_volatility'] = (df['return'].rolling(252).std())
df.tail()
#2015CNY
start_date2015 = dt.date(2015,2,11)
end_date2015 = dt.date(2015,2,17)
mask2015 = (df['y_m_d'] >= start_date2015) & (df['y_m_d'] <= end_date2015)
#2016CNY
start_date2016 = dt.date(2016,1,31)
end_date2016 = dt.date(2016,2,6)
mask2016 = (df['y_m_d'] >= start_date2016) & (df['y_m_d'] <= end_date2016)
#2017CNY
start_date2017 = dt.date(2017,2,21)
end_date2017 = dt.date(2017,2,27)
mask2017 = (df['y_m_d'] >= start_date2017) & (df['y_m_d'] <= end_date2017)
#2018CNY
start_date2018 = dt.date(2018,2,8)
end_date2018 = dt.date(2018,2,14)
mask2018 = (df['y_m_d'] >= start_date2018) & (df['y_m_d'] <= end_date2018)
#2019CNY
start_date2019 = dt.date(2019,1,28)
end_date2019 = dt.date(2019,2,3)
mask2019 = (df['y_m_d'] >= start_date2019) & (df['y_m_d'] <= end_date2019)
#CNY dataframes and merger of dataframes
df2015 = df.loc[mask2015]
df2016 = df.loc[mask2016]
df2017 = df.loc[mask2017]
df2018 = df.loc[mask2018]
df2019 = df.loc[mask2019]
df_cny = df2015.append([df2016,df2017,df2018,df2019])
df_cny
december= df.query('''month_of_year == 12''')[1:]
january = df.query('''month_of_year == 1''')[1:]
1. ARE RETURNS ON BOTH DATES SIGNIFICANTLY DIFFERENT?
#this demonstrates the returns for every week before Chinese New Year from 2015 - 2019
df_cny['return'].hist(bins=20, color='g', alpha=0.5)
df_cny['return'].describe()
#this demostrates the returns for every December from 2015 - 2019
december['return'].hist(bins=100, color='r', alpha=0.5)
december['return'].describe()
#this demostrates the returns for every January from 2015 - 2019
january['return'].hist(bins=100, color='b', alpha=0.5)
january['return'].describe()
#p-value calculation
import scipy.stats as stats
print("Difference in mean return: ")
print((df_cny['return'].mean() - january['return'].mean())*100)
stat, p = stats.ttest_ind(df_cny['return'], january['return'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean between january return and CNY return is significantly different (reject H0)')
else:
print('The difference in mean between january return and CNY return is not significantly different (fail to reject H0)')
Difference in mean return:
0.1953577975194575
p value is 0.5033780921266096
The difference in mean between january return and CNY return is not significantly different (fail to reject H0)
#p-value calculation
import scipy.stats as stats
print("Difference in mean return: ")
print((df_cny['return'].mean() - december['return'].mean())*100)
stat, p = stats.ttest_ind(df_cny['return'], december['return'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean between December return and CNY return is significantly different (reject H0)')
else:
print('The difference in mean between December return and CNY return is not significantly different (fail to reject H0)')
Difference in mean return:
-0.09481290125977983
p value is 0.7264140587987047
The difference in mean between December return and CNY return is not significantly different (fail to reject H0)
2. ARE TRADING VOLUMES ON BOTH DATES SIGNIFICANTLY DIFFERENT?
#this demostrates the trading volumes for every week before Chinese New Year from 2015 - 2019
df_cny['Volume'].hist(bins=20, color='g', alpha=0.5)
df_cny['Volume'].describe()
#this demostrates the trading volumes for every December from 2015 - 2019
december['Volume'].hist(bins=100, color='r', alpha=0.5)
december['Volume'].describe()
#this demostrates the trading volumes for every January from 2015 - 2019
january['Volume'].hist(bins=100, color='b', alpha=0.5)
january['Volume'].describe()
#p-value calculation
import scipy.stats as stats
print("Difference in mean return: ")
print((df_cny['Volume'].mean() - december['Volume'].mean())*100)
stat, p = stats.ttest_ind(df_cny['Volume'], december['Volume'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean between December Volume and CNY Volume is significantly different (reject H0)')
else:
print('The difference in mean between December Volume and CNY Volume is not significantly different (fail to reject H0)')
Difference in mean return:
-5964018.421052632
p value is 1.4213252826836245e-05
The difference in mean between December Volume and CNY Volume is significantly different (reject H0)
#p-value calculation
import scipy.stats as stats
print("Difference in mean return: ")
print((df_cny['Volume'].mean() - january['Volume'].mean())*100)
stat, p = stats.ttest_ind(df_cny['Volume'], january['Volume'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean between january Volume and CNY Volume is significantly different (reject H0)')
else:
print('The difference in mean between january Volume and CNY Volume is not significantly different (fail to reject H0)')
Difference in mean return:
-5636385.401459853
p value is 1.7670693929696055e-06
The difference in mean between january Volume and CNY Volume is significantly different (reject H0)
3. ARE TRADING VOLATILITY ON BOTH DATES SIGNIFICANTLY DIFFERENT?
from matplotlib import pyplot
start_date2016_DEC = dt.date(2016,12,1)
end_date2016_DEC = dt.date(2016,12,31)
mask2016_DEC = (df['y_m_d'] >= start_date2016_DEC) & (df['y_m_d'] <= end_date2016_DEC)
start_date2017_DEC = dt.date(2017,12,1)
end_date2017_DEC = dt.date(2017,12,31)
mask2017_DEC = (df['y_m_d'] >= start_date2017_DEC) & (df['y_m_d'] <= end_date2017_DEC)
start_date2018_DEC = dt.date(2018,12,1)
end_date2018_DEC = dt.date(2018,12,31)
mask2018_DEC = (df['y_m_d'] >= start_date2018_DEC) & (df['y_m_d'] <= end_date2018_DEC)
start_date2019_DEC = dt.date(2019,12,2)
end_date2019_DEC = dt.date(2019,12,31)
mask2019_DEC = (df['y_m_d'] >= start_date2019_DEC) & (df['y_m_d'] <= end_date2019_DEC)
df2016_DEC = df.loc[mask2016_DEC]
df2017_DEC = df.loc[mask2017_DEC]
df2018_DEC = df.loc[mask2018_DEC]
df2019_DEC = df.loc[mask2019_DEC]
d2016=df2016_DEC['return'].std()
d2017=df2017_DEC['return'].std()
d2018=df2018_DEC['return'].std()
d2019=df2019_DEC['return'].std()
d=[d2016,d2017,d2018,d2019]
x2016=df2016['return'].std()
x2017=df2017['return'].std()
x2018=df2018['return'].std()
x2019=df2019['return'].std()
x=[x2016,x2017,x2018,x2019]
y2016 = df2016['annualized_volatility'].mean()
y2017 = df2016['annualized_volatility'].mean()
y2018 = df2016['annualized_volatility'].mean()
y2019 = df2016['annualized_volatility'].mean()
y=[y2016,y2017,y2018,y2019]
z = ['2016','2017','2018','2019']
plt.scatter(z,x)
plt.scatter(z,y)
plt.scatter(z,d)
plt.legend(['daily_std_1week_beforeCNY','daily_std_whole_year','daily_std_December'],bbox_to_anchor=(1.1, 1.05))
plt.show()