!pip install yfinance --upgrade --no-cache-dir
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import yfinance as yf
symbols_list = ['SPY']
start = dt.datetime(2015,9,24)
end = dt.datetime(2021,9,24)
data = yf.download(symbols_list, start=start, end=end)
data.info()
## Reset Index to get Dates as a normal column in the dataframe.
df = data.reset_index()
df.info()
## Choose relevant data for the analysis.
df = df[['Date','Adj Close', 'Volume']]
df.info()
Compare the U.S. Stock market in December and January. We suggested that the Stock's return, volume, and volatility in January is higher than December.
## Pull months from the dates.
## Calculate the daily returns of Adjusted Changes.
## Calculate annualized volatility.
df['months'] = df['Date'].dt.month
df['return'] = df['Adj Close'].pct_change()
df['annualized_volatility'] = (df['return'].rolling(252).std())*(252)**(1/2)
df.tail()
## Filter for months December and January
Dec_returns = df.query('''months == 12''')[1:]
Jan_returns = df.query('''months == 1''')[1:]
print(Dec_returns.head())
Jan_returns.head()
## Plot returns of the two month for comparison.
Dec_returns['return'].hist(bins=100, color='r', alpha=0.5)
Jan_returns['return'].hist(bins=100, color='g', alpha=0.5)
Dec_returns['return'].describe()
Jan_returns['return'].describe()
## Perform T-Test
import scipy.stats as stats
print("Difference in mean return: ")
print((Jan_returns['return'].mean() - Dec_returns['return'].mean())*100)
stat, p = stats.ttest_ind(Jan_returns['return'], Dec_returns['return'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean return is significantly different (reject H0)')
else:
print('The difference in mean return is not significantly different (fail to reject H0)')
## Plot the annualized volaitily for December and January.
Jan_returns['annualized_volatility'].hist(bins=100, color='r', alpha=0.5)
Dec_returns['annualized_volatility'].hist(bins=100, color='g', alpha=0.5)
Jan_returns['annualized_volatility'].describe()
Dec_returns['annualized_volatility'].describe()
import scipy.stats as stats
print("Difference in mean return volatility: ")
print((Jan_returns['annualized_volatility'].notna().mean() - Dec_returns['annualized_volatility'].notna().mean())*100)
stat, p = stats.ttest_ind(Jan_returns['annualized_volatility'].notna(), Dec_returns['annualized_volatility'].notna(), equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean return volatility is significantly different (reject H0)')
else:
print('The difference in mean return volatility is not significantly different (fail to reject H0)')
Dec_returns['Volume'].hist(bins=100, color='r', alpha=0.5)
Jan_returns['Volume'].hist(bins=100, color='g', alpha=0.5)
Dec_returns['Volume'].describe()
Jan_returns['Volume'].describe()
import scipy.stats as stats
print("Difference in mean trading volume: ")
print(Jan_returns['Volume'].mean() - Dec_returns['Volume'].mean())
stat, p = stats.ttest_ind(Jan_returns['Volume'], Dec_returns['Volume'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean trading volume is significantly different (reject H0)')
else:
print('The difference in mean trading volume is not significantly different (fail to reject H0)')
In conclusion. The U.S. Stocks' returns, trading volumes, and volatility don't have a significant differences between December and January
Chi-Square Test to test independency of the Stock returns of December and January.
## Define function that returns the contingency table.
def Contingency_Table(df1, df2, date="Date", month1 = 12, month2 = 1):
dfs_old = [df1, df2]
dfs_new = []
m1 = month1
m2 = month2
## Prepare data of each Dataframe
for df in dfs_old:
df["Year"] = pd.DatetimeIndex(df[date]).year
df["Month"] = pd.DatetimeIndex(df[date]).month
##Not necessary. Drops data of last/ first year. Done above already.
#if df['Month'].mean() == m1:
#last_year = df['Year'].max()
#lm_days = df[df['Year'] == last_year].count()[date]
#print(last_year)
#print(lm_days)
#df = df.iloc[:(len(df)-lm_days),:]
#for y in range(2015,2022):
#days = df[df["Year"] == y].count()[date]
#print(str(y) + " has " + str(days) + " days.")
#elif df['Month'].mean() == m2:
#first_year = df['Year'].min()
#fm_days = df[df['Year'] == first_year].count()[date]
#print(first_year)
#print(fm_days)
#df = df.iloc[fm_days:,:]
#for y in range(2015,2022):
#days = df[df["Year"] == y].count()[date]
#print(str(y) + " has " + str(days) + " days.")
df = df[["Date", "Year", "Month", "return"]]
df['row_num'] = df.reset_index().index+1
dfs_new.append(df)
## Merge to Contingency Table
con_tab = dfs_new[0].merge(dfs_new[1], on = "row_num", suffixes = ("_dec", "_jan"))
con_tab = con_tab[['row_num','Year_dec','Year_jan','return_dec', 'return_jan']]
con_tab.set_index('row_num', inplace=True, drop=True)
dof = (len(dfs_new)-1)*(con_tab.count()['Year_dec']-1)
#print('Contingency Table contains: ' + str(len(dfs_new)) + " variables and " + str(con_tab.count()['Year_dec']) + " values. Degrees of Freedom are therefore "+ str(dof))
return con_tab, dof
## Create Contingency Table and Degrees of Freedom, assign to "CT" and "dof":
CT, dof = Contingency_Table(Dec_returns, Jan_returns)
print('Degrees of Freedom: ' + str(dof))
CT.head()
## Perform Chi²-Test
## Set confidence level:
alpha = 0.05
##Calculate Test-Statistics
CT['Chi_sq_i'] = ((CT['return_dec']-CT['return_jan'])**2)/CT['return_jan']
Chi_sq_emp = CT['Chi_sq_i'].sum()
print("The emperical Chi²-Value is: " + str(Chi_sq_emp))
##Get theoretical Chi²-Value:
from scipy.stats import chisquare