September Effect
!pip install yfinance --upgrade --no-cache-dir
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import yfinance as yf
# Question: find the symbol (i.e., google the instrument + 'yahoo finance') to any data series you are interested at
# e.g., market/sector index ETF for your chosen country and various asset classes (e.g., Comex Gold's symbol is 'GC=F')
# e.g., SPY (https://finance.yahoo.com/quote/SPY/)
#SPY is S&P 500 index
#IWM is the Russel 2000 for small cap company
#DJIA is the Dow Jones industrial index
symbols_list_SPY = ['SPY']
symbols_list_DJ = ['DJIA']
symbols_list_IWM = ['IWM']
start = dt.datetime(2015,9,1)
end = dt.datetime(2020,10,31)
data_SPY = yf.download(symbols_list_SPY, start=start, end=end)
data_DJ = yf.download(symbols_list_DJ, start=start, end=end)
data_IWM = yf.download(symbols_list_IWM, start=start, end=end)
data_SPY.info()
data_DJ.info()
data_IWM.info()
data_SPY.head()
data_DJ.head()
data_IWM.head()
df_SPY = data_SPY.reset_index()
df_DJ = data_DJ.reset_index()
df_IWM = data_IWM.reset_index()
df_SPY.info()
df_DJ.info()
df_IWM.info()
# filter column adjusted close
df_SPY = df_SPY[['Date','Adj Close', 'Volume']]
df_DJ = df_DJ[['Date','Adj Close', 'Volume']]
df_IWM = df_IWM[['Date','Adj Close', 'Volume']]
df_SPY.info()
df_DJ.info()
df_IWM.info()
# create variables
df_SPY['month_SPY'] = df_SPY['Date'].dt.month
df_DJ['month_DJ'] = df_DJ['Date'].dt.month
df_IWM['month_IWM'] = df_IWM['Date'].dt.month
df_SPY['return'] = df_SPY['Adj Close'].pct_change()
df_DJ['return'] = df_DJ['Adj Close'].pct_change()
df_IWM['return'] = df_IWM['Adj Close'].pct_change()
df_SPY['annualized_volatility'] = (df_SPY['return'].rolling(252).std())*(252)**(1/2)
df_DJ['annualized_volatility'] = (df_DJ['return'].rolling(252).std())*(252)**(1/2)
df_IWM['annualized_volatility'] = (df_IWM['return'].rolling(252).std())*(252)**(1/2)
#use 252 trading days in a year, and we are looking at Sep. and Oct.'s Difference, so we divide 252 by 12= 21
df_SPY.tail()
df_DJ.tail()
df_IWM.tail()
# create dataframes containing Sep. and Oct. returns respectively to the three indexes
September_returns_SPY = df_SPY.query('''month_SPY == 9''')[1:]
September_returns_DJ = df_DJ.query('''month_DJ == 9''')[1:]
September_returns_IWM = df_IWM.query('''month_IWM == 9''')[1:]
October_returns_SPY = df_SPY.query('''month_SPY == 10''')[1:]
October_returns_DJ = df_DJ.query('''month_DJ == 10''')[1:]
October_returns_IWM = df_IWM.query('''month_IWM == 10''')[1:]
September_returns_SPY.dropna()
September_returns_DJ.dropna()
September_returns_IWM.dropna()
October_returns_SPY.dropna()
October_returns_DJ.dropna()
October_returns_IWM.dropna()
#first 252 will be nan for volatiliy, need to be drop
September_returns_SPY['return'].hist(bins=80, color='r', alpha=0.5)
October_returns_SPY['return'].hist(bins=80, color='g', alpha=0.5)
plt.title('SPY Return')
September_returns_DJ['return'].hist(bins=80, color='r', alpha=0.5)
October_returns_DJ['return'].hist(bins=80, color='g', alpha=0.5)
plt.title('DJ Return')
September_returns_IWM['return'].hist(bins=80, color='r', alpha=0.5)
October_returns_IWM['return'].hist(bins=80, color='g', alpha=0.5)
plt.title('IWM Return')
September_returns_SPY['return'].describe()
September_returns_DJ['return'].describe()
September_returns_DJ['return'].describe()
October_returns_SPY['return'].describe()
October_returns_DJ['return'].describe()
October_returns_IWM['return'].describe()
import scipy.stats as stats
print("Difference in mean return of SPY: ")
print((September_returns_SPY['return'].mean() - October_returns_SPY['return'].mean())*100)
stat, p = stats.ttest_ind(September_returns_SPY['return'], October_returns_SPY['return'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean return of SPY is significantly different (reject H0)')
else:
print('The difference in mean return of SPY is not significantly different (fail to reject H0)')
print("Difference in mean return of DJ: ")
print((September_returns_DJ['return'].mean() - October_returns_DJ['return'].mean())*100)
stat, p = stats.ttest_ind(September_returns_DJ['return'], October_returns_DJ['return'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean return of DJ is significantly different (reject H0)')
else:
print('The difference in mean return of DJ is not significantly different (fail to reject H0)')
print("Difference in mean return of IWM: ")
print((September_returns_IWM['return'].mean() - October_returns_IWM['return'].mean())*100)
stat, p = stats.ttest_ind(September_returns_IWM['return'], October_returns_IWM['return'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean return of IWM is significantly different (reject H0)')
else:
print('The difference in mean return of IWM is not significantly different (fail to reject H0)')
All 3 indexes
September_returns_SPY['annualized_volatility'].hist(bins=80, color='r', alpha=0.5)
October_returns_SPY['annualized_volatility'].hist(bins=80, color='g', alpha=0.5)
plt.title('SPY Volatility')
September_returns_DJ['annualized_volatility'].hist(bins=80, color='r', alpha=0.5)
October_returns_DJ['annualized_volatility'].hist(bins=80, color='g', alpha=0.5)
plt.title('DJ Volatility')
September_returns_IWM['annualized_volatility'].hist(bins=80, color='r', alpha=0.5)
October_returns_IWM['annualized_volatility'].hist(bins=80, color='g', alpha=0.5)
plt.title('IWM Volatility')
September_returns_SPY['annualized_volatility'].describe()
September_returns_DJ['annualized_volatility'].describe()
September_returns_IWM['annualized_volatility'].describe()
October_returns_SPY['annualized_volatility'].describe()
October_returns_DJ['annualized_volatility'].describe()
October_returns_IWM['annualized_volatility'].describe()
import scipy.stats as stats
print("Difference in mean return of SPY volatility: ")
print((September_returns_SPY['annualized_volatility'].notna().mean() - October_returns_SPY['annualized_volatility'].notna().mean())*100)
stat, p = stats.ttest_ind(September_returns_SPY['annualized_volatility'].notna(), October_returns_SPY['annualized_volatility'].notna(), equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean return SPY volatility is significantly different (reject H0)')
else:
print('The difference in mean return SPY volatility is not significantly different (fail to reject H0)')
print("Difference in mean return of DJ volatility: ")
print((September_returns_DJ['annualized_volatility'].notna().mean() - October_returns_DJ['annualized_volatility'].notna().mean())*100)
stat, p = stats.ttest_ind(September_returns_DJ['annualized_volatility'].notna(), October_returns_DJ['annualized_volatility'].notna(), equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean return DJ volatility is significantly different (reject H0)')
else:
print('The difference in mean return DJ volatility is not significantly different (fail to reject H0)')
print("Difference in mean return of IWM volatility: ")
print((September_returns_IWM['annualized_volatility'].notna().mean() - October_returns_IWM['annualized_volatility'].notna().mean())*100)
stat, p = stats.ttest_ind(September_returns_IWM['annualized_volatility'].notna(), October_returns_IWM['annualized_volatility'].notna(), equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean return IWM volatility is significantly different (reject H0)')
else:
print('The difference in mean return IWM volatility is not significantly different (fail to reject H0)')
stat, p = stats.ttest_ind(df_SPY['annualized_volatility'], df_SPY['annualized_volatility'].notna(), equal_var=False)
print("p value is " + str(p))
Conclusion from our test: As we can see in our P-value result, we failed to reject the Ho for three indexes. It means that in the year of 2015 to 2020, the September and October return volatilities have no significant difference. That proves the September effect might be superstitious.
September_returns_SPY['Volume'].hist(bins=100, color='r', alpha=0.5)
October_returns_SPY['Volume'].hist(bins=100, color='g', alpha=0.5)
plt.title('SPY Volume')
September_returns_DJ['Volume'].hist(bins=100, color='r', alpha=0.5)
October_returns_DJ['Volume'].hist(bins=100, color='g', alpha=0.5)
plt.title('DJ Volume')
September_returns_IWM['Volume'].hist(bins=100, color='r', alpha=0.5)
October_returns_IWM['Volume'].hist(bins=100, color='g', alpha=0.5)
plt.title('IWM Volume')
September_returns_SPY['Volume'].describe()
September_returns_DJ['Volume'].describe()
September_returns_IWM['Volume'].describe()
October_returns_SPY['Volume'].describe()
October_returns_DJ['Volume'].describe()
October_returns_IWM['Volume'].describe()
import scipy.stats as stats
print("Difference in mean trading volume of SPY: ")
print(September_returns_SPY['Volume'].mean() - October_returns_SPY['Volume'].mean())
stat, p = stats.ttest_ind(September_returns_SPY['Volume'], October_returns_SPY['Volume'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean trading volume of SPY is significantly different (reject H0)')
else:
print('The difference in mean trading volume of SPY is not significantly different (fail to reject H0)')
#DJ
print("Difference in mean trading volume of SPY: ")
print(September_returns_DJ['Volume'].mean() - October_returns_DJ['Volume'].mean())
stat, p = stats.ttest_ind(September_returns_DJ['Volume'], October_returns_DJ['Volume'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean trading volume of DJ is significantly different (reject H0)')
else:
print('The difference in mean trading volume of DJ is not significantly different (fail to reject H0)')
#IWM
print("Difference in mean trading volume of SPY: ")
print(September_returns_IWM['Volume'].mean() - October_returns_IWM['Volume'].mean())
stat, p = stats.ttest_ind(September_returns_IWM['Volume'], October_returns_IWM['Volume'], equal_var=False)
# interpret p-value
alpha = 0.05
print("p value is " + str(p))
if p <= alpha:
print('The difference in mean trading volume of IWM is significantly different (reject H0)')
else:
print('The difference in mean trading volume of IWM is not significantly different (fail to reject H0)')
Conclusion from our test: As we can see in our P-value result, we failed to reject the Ho for three indexes. It means that in the year of 2015 to 2020, the September and the October return volumes have no significant difference. That proves the September effect might be superstitious.