Lab 4
# Start writing code here...
# math operations
from numpy import inf
# time operations
from datetime import timedelta
# for numerical analysis
import numpy as np
# to store and process data in dataframe
import pandas as pd
# basic visualization package
import matplotlib.pyplot as plt
# advanced ploting
import seaborn as sns
# interactive visualization
import plotly.express as px
import plotly.graph_objs as go
#from plotly.offline import plot, iplot, init_notebook_mode
#init_notebook_mode(connected=True)
# hide warnings
import warnings
warnings.filterwarnings("ignore")
# Import modules for API calls
import requests
import io
import pandas as pd
import requests
import json
from datetime import datetime
# Import module for plotting
import seaborn as sns
## JHU Vaccination Rates (Taken From: https://github.com/owid/covid-19-data/tree/master/public/data)
url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
download = requests.get(url).content
covid = pd.read_csv(io.StringIO(download.decode('utf-8')), parse_dates=['date'])
covid = covid.fillna(0)
covid = covid[covid["location"]=="Singapore"]
print(covid.info())
1. Singapore's Covid19 situation
cnf, dth, rec, act = "#393e46", "#ff2e63", "#21bf73", "#fe9801"
temp = covid[["date","new_cases", "new_cases_smoothed"]]
temp.head()
# Plot a stack area graph with the three types of cases (i.e., recovered, deaths, and active)
fig = px.line(
temp,
x="date",
y=["new_cases", "new_cases_smoothed"],
height=600,
width=700,
title="Cases over time",
color_discrete_sequence=[rec, dth, act],
)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
The new_case and new_case_smoothed (7-day) are used to plot the line graph above. It is able to illustrate the trend of new covid cases as well as the short term covid situation. If the new_case is below the new_case_smooth, we can conclude that the covid situation is getting better, vice versa. From the graph above, it is shown that after a period of case number stablisation between Sep 2020 and early July 2021, the new cases spiked up parabolically from mid-July and currently, new_case is above new_case_smoothed. We can conclude that the Covid sitation is getting worse recently based on the number of new cases.
cnf, dth, rec, act = "#393e46", "#ff2e63", "#21bf73", "#fe9801"
temp1 = covid[["date","reproduction_rate"]]
temp1["historical_average_r"] = temp1["reproduction_rate"].mean()
temp.head()
# Plot a stack area graph with the three types of cases (i.e., recovered, deaths, and active)
fig = px.line(
temp1,
x="date",
y=["reproduction_rate", "historical_average_r"],
height=600,
width=700,
title="R over time",
color_discrete_sequence=[rec, dth, act],
)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
Based on the the head of the Multi-Ministry Covid Task Force, Lawrence Wong, the reproduction R is the rate of the spread of the Covid in Singapore and they are closely monitoring the number. Therefore, the average historical R is used to plot against the daily reproduction R. It is to see if the current R is above or below the the average. From the graph above, since mid August 2021, the R has risen above the average R sharply, therefore, we can conclude the Covid situation is worsened. On a side note, it is worth observing the level of R tolerant for the Singapore government in the future for them to step up its Covid measures.
2. Evaluate the potential hidden cases (e.g., case positivity rate) and deaths (e.g., estimated infection fatality rate, excess death)
cnf, dth, rec, act = "#393e46", "#ff2e63", "#21bf73", "#fe9801"
temp2 = covid[["date","positive_rate"]]
temp2.head()
# Plot a stack area graph with the three types of cases (i.e., recovered, deaths, and active)
fig = px.line(
temp2,
x="date",
y=["positive_rate"],
height=600,
width=700,
title="Case positivity over time",
color_discrete_sequence=[rec, dth, act],
)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
The above line graph has shown almost 0 positivity rate since Oct 2020, it shows that Singapore government has done a good job in the efficiency of the Covid testing. Therefore, the probablity of hidden cases in Singapore is extremely low.
cnf, dth, rec, act = "#393e46", "#ff2e63", "#21bf73", "#fe9801"
temp2 = covid[["date","new_deaths","excess_mortality"]]
temp2.head()
# Plot a stack area graph with the three types of cases (i.e., recovered, deaths, and active)
fig = px.line(
temp2,
x="date",
y=["new_deaths","excess_mortality"],
height=600,
width=700,
title="New deaths over time",
color_discrete_sequence=[rec, dth, act],
)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
3. Explore the relationship between the country's Covid19 cases and deaths and government health intervention policies (e.g., vaccination rate, closure), as well as Google community mobility reports.
mobility = pd.read_csv("/work/2020_SG_Region_Mobility_Report.csv")
mobility.drop_duplicates()
mobility.head()
mobility.tail()
sg_mobility_1 = mobility.iloc[:,8:]
sg_mobility_1["date"] = pd.to_datetime(sg_mobility_1["date"])
sg_mobility_1.tail()
sg_mobility_1 = sg_mobility_1.rename(columns={'retail_and_recreation_percent_change_from_baseline': 'Retail & Recreation', 'grocery_and_pharmacy_percent_change_from_baseline':'Grocery & Pharma', 'parks_percent_change_from_baseline':'Parks', 'transit_stations_percent_change_from_baseline':'Transit Stations','workplaces_percent_change_from_baseline':'Workplace','residential_percent_change_from_baseline':'Residential' })
sg_mobility_1.head()
cnf, dth, rec, act = "#ff0000", "#00ff00", "#0000ff", "#ffff00"
fig = px.line(
sg_mobility_1,
x="date",
y=["Retail & Recreation","Grocery & Pharma","Transit Stations","Workplace"],
height=600,
width=700,
title="Mobility Over Time",
color_discrete_sequence=[cnf, dth, rec, act]
)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
sg_mobility_1['retail_SMA_7'] = sg_mobility_1.iloc[:,1].rolling(window=7).mean()
sg_mobility_1['grocery_SMA_7'] = sg_mobility_1.iloc[:,2].rolling(window=7).mean()
sg_mobility_1['parks_SMA_7'] = sg_mobility_1.iloc[:,3].rolling(window=7).mean()
sg_mobility_1['transit_stations_SMA_7'] = sg_mobility_1.iloc[:,4].rolling(window=7).mean()
sg_mobility_1['workplace_SMA_7'] = sg_mobility_1.iloc[:,5].rolling(window=7).mean()
sg_mobility_1['residential_SMA_7'] = sg_mobility_1.iloc[:,6].rolling(window=7).mean()
sg_mobility_1.tail(10)
cnf, dth, rec, act = "#ff0000", "#00ff00", "#0000ff", "#ffff00"
# Plot a stack area graph with the three types of cases (i.e., recovered, deaths, and active)
fig = px.line(
sg_mobility_1,
x="date",
y=["retail_SMA_7","grocery_SMA_7","transit_stations_SMA_7","workplace_SMA_7"],
labels={"value": "% change from baseline"},
height=600,
width=700,
title="Mobility in Singapore Over Time",
color_discrete_sequence=[cnf, dth, rec, act],
)
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
Data from Google Mobility Reports indicate a recovery from a trough in mid-2020, however recent information show a slight downward trend as cases rise and people are more fearful to move about.
relationship = covid.merge(sg_mobility_1,on = "date", how = "right")[["total_cases","new_cases","new_deaths","Retail & Recreation","Grocery & Pharma","Transit Stations","Workplace"]]
sns.pairplot(relationship)
#sns.jointplot(x = 'Tests/1M pop', y = 'Tot Cases/1M pop', data = worldometer_data, kind='reg')
#print(relationship.head(10))
The above correlation chart shows that daily new cases correlate negatively with mobility in Singapore. This is due to heightened government restrictions implemented in response to higher daily new cases.
Responsiveness of government to daily new cases
a) Singapore
stringency = covid[["date","stringency_index"]]
stringency.head()
newcases_vs_stringency = covid[["new_cases","stringency_index"]].dropna().corr()
fig, ax = plt.subplots(figsize=(10,5))
sns.heatmap(newcases_vs_stringency, annot=True, ax = ax)
b) Malaysia
covid_my = pd.read_csv(io.StringIO(download.decode('utf-8')), parse_dates=['date'])
covid_my = covid_my.fillna(0)
covid_my = covid_my.query('location == "Malaysia"')
covid_my.head()
newcases_vs_stringency_my = covid_my[["new_cases","stringency_index"]].dropna().corr()
fig, ax = plt.subplots(figsize=(10,5))
sns.heatmap(newcases_vs_stringency_my, annot=True, ax = ax)
c) Australia
covid_aus = pd.read_csv(io.StringIO(download.decode('utf-8')), parse_dates=['date'])
covid_aus = covid_aus.fillna(0)
covid_aus = covid_aus.query('location == "Australia"')
covid_aus.head()
newcases_vs_stringency_aus = covid_aus[["new_cases","stringency_index"]].dropna().corr()
fig, ax = plt.subplots(figsize=(10,5))
sns.heatmap(newcases_vs_stringency_aus, annot=True, ax = ax)
d) USA
covid_usa = pd.read_csv(io.StringIO(download.decode('utf-8')), parse_dates=['date'])
covid_usa = covid_usa.fillna(0)
covid_usa = covid_usa.query('location == "United States"')
covid_usa.head()
newcases_vs_stringency_usa = covid_usa[["new_cases","stringency_index"]].dropna().corr()
fig, ax = plt.subplots(figsize=(10,5))
sns.heatmap(newcases_vs_stringency_usa, annot=True, ax = ax)
Conclusively, among the 4 countries measured (Singapore, Malaysia, Australia, USA), Singapore's government is the most responsive to rises in daily new cases, with a 0.4 correlation coefficient between stringency and new cases, with the figure being -0.58 for Malaysia, 0.076 for Australia and 0.32 for the USA.
Level of compliance of citizens to government restrictions
Metric of compliance: The closer to -1 the correlation between mobility and stringency, the more compliant the citizens to government restrictions (pay attention to dark-coloured boxes)
a) Singapore
relationship1 = sg_mobility_1.merge(stringency,on = "date", how = "right")[["Retail & Recreation","Grocery & Pharma","Transit Stations","Workplace","stringency_index"]]
relationship1_corr = relationship1[["stringency_index", "Retail & Recreation","Grocery & Pharma","Transit Stations","Workplace"]].dropna().corr()
fig, ax = plt.subplots(figsize=(10,5))
sns.heatmap(relationship1_corr, annot=True, ax = ax)
b) Malaysia
mobility_my = pd.read_csv("/work/2020_MY_Region_Mobility_Report.csv")
mobility_my.drop_duplicates()
mobility_my["date"] = pd.to_datetime(mobility_my["date"])
mobility_my = mobility_my.rename(columns={'retail_and_recreation_percent_change_from_baseline': 'Retail & Recreation', 'grocery_and_pharmacy_percent_change_from_baseline':'Grocery & Pharma', 'parks_percent_change_from_baseline':'Parks', 'transit_stations_percent_change_from_baseline':'Transit Stations','workplaces_percent_change_from_baseline':'Workplace','residential_percent_change_from_baseline':'Residential' })
mobility_my.tail()
stringency_my = covid_my[["date","stringency_index"]]
relationship2 = mobility_my.merge(stringency_my,on = "date", how = "right")[["Retail & Recreation","Grocery & Pharma","Transit Stations","Workplace","stringency_index"]]
relationship2_corr = relationship2[["stringency_index", "Retail & Recreation","Grocery & Pharma","Transit Stations","Workplace"]].dropna().corr()
fig, ax = plt.subplots(figsize=(10,5))
sns.heatmap(relationship2_corr, annot=True, ax = ax)
c) Australia
mobility_aus = pd.read_csv("/work/2020_AU_Region_Mobility_Report.csv")
mobility_aus.drop_duplicates()
mobility_aus["date"] = pd.to_datetime(mobility_aus["date"])
mobility_aus = mobility_aus.rename(columns={'retail_and_recreation_percent_change_from_baseline': 'Retail & Recreation', 'grocery_and_pharmacy_percent_change_from_baseline':'Grocery & Pharma', 'parks_percent_change_from_baseline':'Parks', 'transit_stations_percent_change_from_baseline':'Transit Stations','workplaces_percent_change_from_baseline':'Workplace','residential_percent_change_from_baseline':'Residential' })
mobility_aus.tail()
stringency_aus = covid_aus[["date","stringency_index"]]
relationship3 = mobility_aus.merge(stringency_aus,on = "date", how = "right")[["Retail & Recreation","Grocery & Pharma","Transit Stations","Workplace","stringency_index"]]
relationship3_corr = relationship3[["stringency_index", "Retail & Recreation","Grocery & Pharma","Transit Stations","Workplace"]].dropna().corr()
fig, ax = plt.subplots(figsize=(10,5))
sns.heatmap(relationship3_corr, annot=True, ax = ax)
d) USA
mobility_us = pd.read_csv("/work/2020_US_Region_Mobility_Report.csv")
mobility_us.drop_duplicates()
mobility_us["date"] = pd.to_datetime(mobility_aus["date"])
mobility_us = mobility_us.rename(columns={'retail_and_recreation_percent_change_from_baseline': 'Retail & Recreation', 'grocery_and_pharmacy_percent_change_from_baseline':'Grocery & Pharma', 'parks_percent_change_from_baseline':'Parks', 'transit_stations_percent_change_from_baseline':'Transit Stations','workplaces_percent_change_from_baseline':'Workplace','residential_percent_change_from_baseline':'Residential' })
mobility_us.tail()
stringency_us = covid_aus[["date","stringency_index"]]
relationship4 = mobility_us.merge(stringency_us,on = "date", how = "right")[["Retail & Recreation","Grocery & Pharma","Transit Stations","Workplace","stringency_index"]]
relationship4_corr = relationship4[["stringency_index", "Retail & Recreation","Grocery & Pharma","Transit Stations","Workplace"]].dropna().corr()
fig, ax = plt.subplots(figsize=(10,5))
sns.heatmap(relationship4_corr, annot=True, ax = ax)
Conclusion: Singapore has the highest level of compliance to government restrictions among the selected countries, followed by Australia, USA and Malaysia (in this order).
4. Relationship between STI, selected Singaporean stocks and pandemic
!pip install yfinance --upgrade --no-cache-dir
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import yfinance as yf
symbols_list1 = ['^STI','OV8.SI','1337.HK','SE']
start1 = dt.datetime(2020,1,1)
end1 = dt.datetime(2021,9,15)
data1 = yf.download(symbols_list1, start=start1, end=end1)
data1 = data1["Adj Close"]
data1.head()
#Calculate daily return
data1['STI_ret'] =data1['^STI'].pct_change()[1:]
data1['OV8.SI_ret'] =data1['OV8.SI'].pct_change()[1:]
data1['1337.HK_ret'] =data1['1337.HK'].pct_change()[1:]
data1['SE_ret'] =data1['SE'].pct_change()[1:]
data1 = data1.ffill()
data1 = data1.reset_index()
data1 = data1.rename(columns={"Date":"date"})
data1.head()
data2 = data1.merge(stringency, on='date',how='right')
data2.head()
data2 = data2.merge(sg_mobility_1, on='date',how='right')
data2.head()
data2 = pd.merge(data2,covid[['date','new_cases']],on='date', how='left')
data2.head()
data2_corr = data2[['STI_ret','OV8.SI_ret','1337.HK_ret','SE_ret','Retail & Recreation','Workplace','new_cases','stringency_index']].dropna().corr()
fig, ax = plt.subplots(figsize=(20,10))
sns.heatmap(data2_corr, annot=True, ax = ax)
The above graphic shows correlations between Singaporean equities, mobility, daily new cases, and stringency of movement restrictions. Based on it, the STI is mostly uncorrelated with reductions in mobility, and increases in daily new cases and stringency. The two stocks with the highest correlation of the 4 selected are Sheng Siong (OV8.SI) and Sea Group (SE) - with each having correlations of 0.1 and 0.11 with the stringency index respectively.
Lab 5
1. Based on your investment strategy (i.e., chosen stocks, industries, asset classes), identify the maximum Sharpe ratio and minimum volatility portfolios.
Our strategy is to invest in the following stocks: Amazon (AMZN), Sheng Siong (OV8.SI), Moderna (MRNA), Novavax (NVAX), and Sea Group (SE)
# find the symbol (i.e., google the instrument + 'yahoo finance') to any data series you are interested at
# e.g., market/sector index ETF for your chosen country and various asset classes (e.g., Comex Gold's symbol is 'GC=F')
# e.g., SPY (https://finance.yahoo.com/quote/SPY/)
symbols_list = ['AMZN', 'OV8.SI', 'MRNA','NVAX','SE']
start = dt.datetime(2019,12,1)
end = dt.datetime(2021,9,15)
data = yf.download(symbols_list, start=start, end=end)
# filter column adjusted close
df = data['Adj Close']
df = df.ffill()
df.head()
df["OV8.SI_USD"] = df["OV8.SI"] * 0.74
df = df.drop("OV8.SI", axis=1)
df.head()
!pip install PyPortfolioOpt==1.2.1
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt import cla
from pypfopt.plotting import Plotting
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices
from matplotlib.ticker import FuncFormatter
import seaborn as sns
# Check NaN values in the data
nullin_df = pd.DataFrame(df,columns=symbols_list)
print(nullin_df.isnull().sum())
# Calculate portfolio mean return
mu = expected_returns.mean_historical_return(df)
print(mu)
# Calculate portfolio return variance
sigma = risk_models.sample_cov(df)
print(sigma)
2. Maximum Sharpe Portfolio
# Note max sharpe ratio is the tangency portfolio
# weight bounds in negative allows shorting of stocks
ef = EfficientFrontier(mu, sigma, weight_bounds=(-1,1))
# optional constraints possible, read pypfopt documentation.
sharpe_portfolio=ef.max_sharpe(risk_free_rate=0.008)
sharpe_portfolio_wt=ef.clean_weights()
print(sharpe_portfolio_wt)
Plotting.plot_weights(sharpe_portfolio_wt)
latest_prices = get_latest_prices(df)
# Allocate Portfolio Value in $ as required to show number of shares/stocks to buy,
# also bounds for shorting will affect allocation
# Maximum Sharpe Portfolio Allocation $1000000
da = DiscreteAllocation(sharpe_portfolio_wt, latest_prices, total_portfolio_value=1000000)
allocation, leftover = da.greedy_portfolio()
print(allocation)
print("Leftover Fund value for the maximum Sharpe portfolio is ${:.2f}".format(leftover))
a) If you have USD 1 million now, how much should you invest in each.
{'SE': 1281, 'MRNA': 609, 'OV8.SI_USD': 140594, 'NVAX': 480, 'AMZN': 12} Leftover Fund value for the maximum Sharpe portfolio is $3368.94
max_sharpe_cla = cla.CLA(mu, sigma)
max_sharpe_cla.max_sharpe()
Plotting.plot_efficient_frontier(max_sharpe_cla, show_assets="True")
sharpe_portfolio_wt_list = list(sharpe_portfolio_wt.values())
ret_data = df.pct_change()[1:]
weighted_returns = (sharpe_portfolio_wt_list * ret_data)
portfolio_ret = pd.DataFrame(weighted_returns.sum(axis=1))
ret_data = ret_data.merge(portfolio_ret, on="Date", how="left")
ret_data = ret_data.rename(columns={0: "portfolio_ret"})
ret_data.head()
ret_data['cumulative_portfolio_ret'] = (ret_data['portfolio_ret'] + 1).cumprod()
ret_data['cumulative_spy_ret'] = (ret_data['AMZN'] + 1).cumprod()
ret_data.head()
b) Plot a graph showing how your portfolios (maximum Sharpe and minimum volatility portfolios) perform relative to the market (i.e., SPY).
sns.scatterplot('Date', 'cumulative_portfolio_ret', data=ret_data)
sns.scatterplot('Date', 'cumulative_spy_ret', data=ret_data)
plt.legend(labels=["Our Portfolio","S&P 500"])
def cdf(data):
n = len(data)
print("length of numpy array: ", n)
x = np.sort(data)
print("x contains: ", x)
y = np.arange(1, n+1) / n
print("y contains: ", y)
return x, y
mean = np.mean(ret_data['cumulative_portfolio_ret'])
# cumulative distribution
x,y = cdf(ret_data['cumulative_portfolio_ret'])
# random sample from an exponential distribution calibrated with sample mean (parameter for exponential distribution)
samples_exp =np.random.exponential(mean,size=10000)
# cumulative distribution from the random sample drawn from an exponential distribution
x_theor_exp,y_theor_exp = cdf(samples_exp)
c) Bonus - Histogram and confidence intervals of portfolio returns
# plot histogram of random sample
plt.hist(samples_exp, bins = 50, density = True, histtype = 'step', color = 'red')
plt.xlabel('portfolio return')
plt.ylabel('f(x)')
plt.title('Probability Density Function')
# note: unlike drawing from the population, bootstrapping draws random sample (with replacement) from the actual data
def bootstrap_replicate_1d(data, func):
# bootstrap from actual data
# draw random sample from actual data with replacement
# sample size is the number of observations of the actual data
bs_sample = np.random.choice(data, len(data))
# return the mean of a draw
return func(bs_sample)
# draw bootstrap replicates
def draw_bs_reps(data, func, size=1):
# initialize a numpy array of the dimension size
bs_replicates = np.empty(size)
# call function bootstrap_replicate_1d
for i in range(size):
bs_replicates[i] = bootstrap_replicate_1d(data,func)
# return a numpy array of the means of draws
return bs_replicates
# bootstrap from actual samples
# note that this is not sample randomly drawn from a theoretical population
bs_replicates = draw_bs_reps(ret_data['cumulative_portfolio_ret'], np.mean, size=10000)
plt.hist(bs_replicates, bins=50, density=True, alpha = .8, color = 'red')
plt.xlabel('portfolio return')
plt.ylabel('P(T = t)')
plt.title('Probability Density Function')
# Bootstrapped confidence intervals
conf_int = np.percentile(bs_replicates,[2.5, 97.5])
print('95% bootstrapped confidence interval =', conf_int, '%')
# Theoretical confidence intervals
conf_int_actual_upper = ret_data['cumulative_portfolio_ret'].mean() + ret_data['cumulative_portfolio_ret'].std()/np.sqrt(ret_data['cumulative_portfolio_ret'].count())*1.96
conf_int_actual_lower = ret_data['cumulative_portfolio_ret'].mean() - ret_data['cumulative_portfolio_ret'].std()/np.sqrt(ret_data['cumulative_portfolio_ret'].count())*1.96
conf_int_actual = [conf_int_actual_lower, conf_int_actual_upper]
print('-'*120)
print('95% theoretical confidence interval =', conf_int_actual, '%')
3. Minimum Volatility Portfolio
#May use add objective to ensure minimum zero weighting to individual stocks
min_vol_portfolio=ef.min_volatility()
min_vol_portfolio_wt=ef.clean_weights()
print(min_vol_portfolio_wt)
Plotting.plot_weights(min_vol_portfolio_wt)
# Allocate Portfolio Value in $ as required to show number of shares/stocks to buy,
# also bounds for shorting will affect allocation
# Maximum Sharpe Portfolio Allocation $1000000
da = DiscreteAllocation(min_vol_portfolio_wt, latest_prices, total_portfolio_value=1000000)
allocation, leftover = da.greedy_portfolio()
print(allocation)
print("Leftover Fund value for the minimum volatility portfolio is ${:.2f}".format(leftover))
a) If you have USD 1 million now, how much should you invest in each.
{'OV8.SI_USD': 317812, 'SE': 704, 'AMZN': 52, 'MRNA': 396, 'NVAX': 271} Leftover Fund value for the minimum volatility portfolio is $2495.55
min_vol_cla = cla.CLA(mu, sigma)
min_vol_cla.min_volatility()
Plotting.plot_efficient_frontier(min_vol_cla, show_assets="True")
ret_data1 = df.pct_change()[1:]
min_vol_portfolio_wt_list = list(min_vol_portfolio_wt.values())
weighted_returns1 = (min_vol_portfolio_wt_list * ret_data1)
portfolio_ret1 = pd.DataFrame(weighted_returns1.sum(axis=1))
ret_data1 = ret_data1.merge(portfolio_ret1, on="Date", how="left")
ret_data1 = ret_data1.rename(columns={0: "portfolio_ret"})
ret_data1.head()
ret_data1['cumulative_portfolio_ret'] = (ret_data1['portfolio_ret'] + 1).cumprod()
ret_data1['cumulative_spy_ret'] = (ret_data1['AMZN'] + 1).cumprod()
ret_data1.head()
b) Plot a graph showing how your portfolios (minimum volatility portfolios) perform relative to the market (i.e., SPY).
sns.scatterplot('Date', 'cumulative_portfolio_ret', data=ret_data1)
sns.scatterplot('Date', 'cumulative_spy_ret', data=ret_data1)
plt.legend(labels=["Our Portfolio","S&P 500"])
mean = np.mean(ret_data1['cumulative_portfolio_ret'])
x,y = cdf(ret_data1['cumulative_portfolio_ret'])
# random sample from an exponential distribution calibrated with sample mean (parameter for exponential distribution)
samples_exp =np.random.exponential(mean,size=10000)
# cumulative distribution from the random sample drawn from an exponential distribution
x_theor_exp,y_theor_exp = cdf(samples_exp)
c) Bonus: Histogram and confidence intervals of portfolio returns
# plot histogram of random sample
plt.hist(samples_exp, bins = 50, density = True, histtype = 'step', color = 'red')
plt.xlabel('portfolio return')
plt.ylabel('f(x)')
plt.title('Probability Density Function')
# note: unlike drawing from the population, bootstrapping draws random sample (with replacement) from the actual data
def bootstrap_replicate_1d(data, func):
# bootstrap from actual data
# draw random sample from actual data with replacement
# sample size is the number of observations of the actual data
bs_sample = np.random.choice(data, len(data))
# return the mean of a draw
return func(bs_sample)
# draw bootstrap replicates
def draw_bs_reps(data, func, size=1):
# initialize a numpy array of the dimension size
bs_replicates = np.empty(size)
# call function bootstrap_replicate_1d
for i in range(size):
bs_replicates[i] = bootstrap_replicate_1d(data,func)
# return a numpy array of the means of draws
return bs_replicates
# bootstrap from actual samples
# note that this is not sample randomly drawn from a theoretical population
bs_replicates1 = draw_bs_reps(ret_data1['cumulative_portfolio_ret'], np.mean, size=10000)
plt.hist(bs_replicates1, bins=50, density=True, alpha = .8, color = 'red')
plt.xlabel('portfolio return')
plt.ylabel('P(T = t)')
plt.title('Probability Density Function')
# Bootstrapped confidence intervals
conf_int1 = np.percentile(bs_replicates1,[2.5, 97.5])
print('95% bootstrapped confidence interval =', conf_int1, '%')
# Theoretical confidence intervals
conf_int_actual_upper1 = ret_data1['cumulative_portfolio_ret'].mean() + ret_data1['cumulative_portfolio_ret'].std()/np.sqrt(ret_data1['cumulative_portfolio_ret'].count())*1.96
conf_int_actual_lower1 = ret_data1['cumulative_portfolio_ret'].mean() - ret_data1['cumulative_portfolio_ret'].std()/np.sqrt(ret_data1['cumulative_portfolio_ret'].count())*1.96
conf_int_actual1 = [conf_int_actual_lower1, conf_int_actual_upper1]
print('-'*120)
print('95% theoretical confidence interval =', conf_int_actual1, '%')
df = df.stack()
df.head()
df = df.reset_index()
df = df.rename(columns={"level_1": "ticker", 0: "price"})
df.head()
df = df.set_index('Date')
df['ret'] = df.groupby('ticker').pct_change()
df = df.dropna()
df.head()
sns.scatterplot('Date', 'price', data=df, hue='ticker')