!pip install yfinance --upgrade --no-cache-dir

import numpy as np import matplotlib.pyplot as plt import pandas as pd import datetime as dt import yfinance as yf

symbols_list = ['SPY'] start = dt.datetime(2015,9,24) end = dt.datetime(2021,9,24) data = yf.download(symbols_list, start=start, end=end) data.info()

## Reset Index to get Dates as a normal column in the dataframe. df = data.reset_index() df.info()

## Choose relevant data for the analysis. df = df[['Date','Adj Close', 'Volume']] df.info()

Compare the U.S. Stock market in December and January. We suggested that the Stock's return, volume, and volatility in January is higher than December.

## Pull months from the dates. ## Calculate the daily returns of Adjusted Changes. ## Calculate annualized volatility. df['months'] = df['Date'].dt.month df['return'] = df['Adj Close'].pct_change() df['annualized_volatility'] = (df['return'].rolling(252).std())*(252)**(1/2) df.tail()

## Filter for months December and January Dec_returns = df.query('''months == 12''')[1:] Jan_returns = df.query('''months == 1''')[1:]

print(Dec_returns.head()) Jan_returns.head()

## Plot returns of the two month for comparison. Dec_returns['return'].hist(bins=100, color='r', alpha=0.5) Jan_returns['return'].hist(bins=100, color='g', alpha=0.5)

Dec_returns['return'].describe()

Jan_returns['return'].describe()

## Perform T-Test import scipy.stats as stats print("Difference in mean return: ") print((Jan_returns['return'].mean() - Dec_returns['return'].mean())*100) stat, p = stats.ttest_ind(Jan_returns['return'], Dec_returns['return'], equal_var=False) # interpret p-value alpha = 0.05 print("p value is " + str(p)) if p <= alpha: print('The difference in mean return is significantly different (reject H0)') else: print('The difference in mean return is not significantly different (fail to reject H0)')

## Plot the annualized volaitily for December and January. Jan_returns['annualized_volatility'].hist(bins=100, color='r', alpha=0.5) Dec_returns['annualized_volatility'].hist(bins=100, color='g', alpha=0.5)

Jan_returns['annualized_volatility'].describe()

Dec_returns['annualized_volatility'].describe()

import scipy.stats as stats print("Difference in mean return volatility: ") print((Jan_returns['annualized_volatility'].notna().mean() - Dec_returns['annualized_volatility'].notna().mean())*100) stat, p = stats.ttest_ind(Jan_returns['annualized_volatility'].notna(), Dec_returns['annualized_volatility'].notna(), equal_var=False) # interpret p-value alpha = 0.05 print("p value is " + str(p)) if p <= alpha: print('The difference in mean return volatility is significantly different (reject H0)') else: print('The difference in mean return volatility is not significantly different (fail to reject H0)')

Dec_returns['Volume'].hist(bins=100, color='r', alpha=0.5) Jan_returns['Volume'].hist(bins=100, color='g', alpha=0.5)

Dec_returns['Volume'].describe()

Jan_returns['Volume'].describe()

import scipy.stats as stats print("Difference in mean trading volume: ") print(Jan_returns['Volume'].mean() - Dec_returns['Volume'].mean()) stat, p = stats.ttest_ind(Jan_returns['Volume'], Dec_returns['Volume'], equal_var=False) # interpret p-value alpha = 0.05 print("p value is " + str(p)) if p <= alpha: print('The difference in mean trading volume is significantly different (reject H0)') else: print('The difference in mean trading volume is not significantly different (fail to reject H0)')

In conclusion. The U.S. Stocks' returns, trading volumes, and volatility don't have a significant differences between December and January

Chi-Square Test to test independency of the Stock returns of December and January.

## Define function that returns the contingency table. def Contingency_Table(df1, df2, date="Date", month1 = 12, month2 = 1): dfs_old = [df1, df2] dfs_new = [] m1 = month1 m2 = month2 ## Prepare data of each Dataframe for df in dfs_old: df["Year"] = pd.DatetimeIndex(df[date]).year df["Month"] = pd.DatetimeIndex(df[date]).month ##Not necessary. Drops data of last/ first year. Done above already. #if df['Month'].mean() == m1: #last_year = df['Year'].max() #lm_days = df[df['Year'] == last_year].count()[date] #print(last_year) #print(lm_days) #df = df.iloc[:(len(df)-lm_days),:] #for y in range(2015,2022): #days = df[df["Year"] == y].count()[date] #print(str(y) + " has " + str(days) + " days.") #elif df['Month'].mean() == m2: #first_year = df['Year'].min() #fm_days = df[df['Year'] == first_year].count()[date] #print(first_year) #print(fm_days) #df = df.iloc[fm_days:,:] #for y in range(2015,2022): #days = df[df["Year"] == y].count()[date] #print(str(y) + " has " + str(days) + " days.") df = df[["Date", "Year", "Month", "return"]] df['row_num'] = df.reset_index().index+1 dfs_new.append(df) ## Merge to Contingency Table con_tab = dfs_new[0].merge(dfs_new[1], on = "row_num", suffixes = ("_dec", "_jan")) con_tab = con_tab[['row_num','Year_dec','Year_jan','return_dec', 'return_jan']] con_tab.set_index('row_num', inplace=True, drop=True) dof = (len(dfs_new)-1)*(con_tab.count()['Year_dec']-1) #print('Contingency Table contains: ' + str(len(dfs_new)) + " variables and " + str(con_tab.count()['Year_dec']) + " values. Degrees of Freedom are therefore "+ str(dof)) return con_tab, dof

## Create Contingency Table and Degrees of Freedom, assign to "CT" and "dof": CT, dof = Contingency_Table(Dec_returns, Jan_returns) print('Degrees of Freedom: ' + str(dof)) CT.head()

## Perform Chi²-Test ## Set confidence level: alpha = 0.05 ##Calculate Test-Statistics CT['Chi_sq_i'] = ((CT['return_dec']-CT['return_jan'])**2)/CT['return_jan'] Chi_sq_emp = CT['Chi_sq_i'].sum() print("The emperical Chi²-Value is: " + str(Chi_sq_emp)) ##Get theoretical Chi²-Value: from scipy.stats import chisquare

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Compare the U.S. Stock market in December and January. We suggested that the Stock's return, volume, and volatility in January is higher than December.

In conclusion. The U.S. Stocks' returns, trading volumes, and volatility don't have a significant differences between December and January

Chi-Square Test to test independency of the Stock returns of December and January.

Compare the U.S. Stock market in December and January. We suggested that the Stock's return, volume, and volatility in January is higher than December.