Lab4: 1. Pick a country and evaluate the Covid19 situation in the country.
# Import modules for API calls
import requests
import io
import pandas as pd
import requests
import json
from datetime import datetime
import matplotlib.pyplot as plt
# Import module for plotting
import seaborn as sns
## JHU Vaccination Rates (Taken From: https://github.com/owid/covid-19-data/tree/master/public/data)
url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
download = requests.get(url).content
covid = pd.read_csv(io.StringIO(download.decode('utf-8')), parse_dates=['date'])
namelist = ['China','Singapore','United Arab Emirates', 'Japan']
selected_case = covid[covid['location'].isin(namelist)]
sns.scatterplot('date', 'new_cases', data=selected_case, hue='location')
plt.title('China against a selection of countries')
# Separate plot for comparison to United States and United Kingdom since these values dwarf the
# values for China and other countries.
namelist2 = ['China','United States','United Kingdom']
selected_case = covid[covid['location'].isin(namelist2)]
sns.scatterplot('date', 'new_cases', data=selected_case, hue='location')
plt.title('China against the United States and United Kingdom')
2. Evaluate the potential hidden cases (e.g., case positivity rate) and deaths (e.g., estimated infection fatality rate, excess death).
tmp = covid[covid['location']=="China"]
tmp=tmp.set_index('date')
China_data = tmp[['total_cases','new_cases','total_deaths','new_deaths','total_tests','new_tests','positive_rate','tests_per_case','total_vaccinations','new_vaccinations']]
China_data['total_vaccinations'].plot()
China_data.tail()
3. Explore the relationship between the country's Covid19 cases and deaths and government health intervention policies (e.g., vaccination rate, closure), as well as Google community mobility reports.
# new_vaccinations, people_fully_vaccinated, tests_per_case
China_data[['tests_per_case', 'new_cases']].plot()
China_data[['new_vaccinations', 'new_cases']].plot()
Lab5: 1. Based on your investment strategy (i.e., chosen stocks, industries, asset classes), identify the maximum Sharpe ratio and minimum volatility portfolios.
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
import yfinance as yf
# find the symbol (i.e., google the instrument + 'yahoo finance') to any data series you are interested at
# e.g., market/sector index ETF for your chosen country and various asset classes (e.g., Comex Gold's symbol is 'GC=F')
# e.g., SPY (https://finance.yahoo.com/quote/SPY/)
symbols_list = ['000001.SS', '510710.SS', '510500.SS','510300.SS','002049.SZ','600196.SS','002594.SZ','300750.SZ' ]
start = dt.datetime(2020,4,1)
end = dt.datetime(2021,9,17)
data = yf.download(symbols_list, start=start, end=end)
# filter column adjusted close
df = data['Adj Close']
df.columns = ['SSE','SSE50','CSI500',"CSI300","Unigroup","Fosun","BYD",'Amperex']
df = df.ffill()
df0 = df.copy()
df = df.loc[:,'SSE50':'Amperex']
df.head()
df.info()
!pip install PyPortfolioOpt==1.2.1
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt import cla
from pypfopt.plotting import Plotting
from pypfopt.discrete_allocation import DiscreteAllocation, get_latest_prices
from matplotlib.ticker import FuncFormatter
import seaborn as sns
# Check NaN values in the data
#nullin_df = pd.DataFrame(df,columns=symbols_list)
#print(nullin_df.isnull().sum())
# Calculate portfolio mean return
mu = expected_returns.mean_historical_return(df)
print(mu)
# Calculate portfolio return variance
sigma = risk_models.sample_cov(df)
print(sigma)
2+3. If you have USD 1 million now, how much should you invest in each? Plot a graph showing how your portfolios (maximum Sharpe and minimum volatility portfolios) perform relative to the market.
# Note max sharpe ratio is the tangency portfolio
# weight bounds in negative allows shorting of stocks
ef = EfficientFrontier(mu, sigma, weight_bounds=(-1,1)) # if don't short, put(0,1)
# optional constraints possible, read pypfopt documentation.
# Where for the last year, the range of the chinese government bond was from 2.21-2.7%
# ,so an average of 2.45% was taken.
sharpe_portfolio=ef.max_sharpe(risk_free_rate=0.0245)
sharpe_portfolio_wt=ef.clean_weights()
print(sharpe_portfolio_wt)
Plotting.plot_weights(sharpe_portfolio_wt)
latest_prices = get_latest_prices(df)
# Allocate Portfolio Value in $ as required to show number of shares/stocks to buy,
# also bounds for shorting will affect allocation
# Maximum Sharpe Portfolio Allocation $1000000
da = DiscreteAllocation(sharpe_portfolio_wt, latest_prices, total_portfolio_value=1000000)
allocation, leftover = da.greedy_portfolio()
print(allocation)
print("Leftover Fund value for the maximum Sharpe portfolio is ${:.2f}".format(leftover))
max_sharpe_cla = cla.CLA(mu, sigma)
max_sharpe_cla.max_sharpe()
Plotting.plot_efficient_frontier(max_sharpe_cla, show_assets="True")
sharpe_portfolio_wt_list = list(sharpe_portfolio_wt.values())
ret_data = df.pct_change()[1:]
# need keep first observation to aviod NaN in ret
weighted_returns = (sharpe_portfolio_wt_list * ret_data)
portfolio_ret = pd.DataFrame(weighted_returns.sum(axis=1))
ret_data = ret_data.merge(portfolio_ret, on="Date", how="left")
ret_data = ret_data.rename(columns={0: "portfolio_ret"})
ret_data['SSE'] = df0["SSE"].pct_change()[1:]
ret_data.head()
ret_data['portfolio_ret'].hist(bins = 50,density = True, histtype = 'step', color = 'red')
plt.ylabel('Return')
plt.title('Daily Return for Max-Sharpe Portfolio')
# note: unlike drawing from the population, bootstrapping draws random sample (with replacement) from the actual data
def bootstrap_replicate_1d(data, func):
# bootstrap from actual data
# draw random sample from actual data with replacement
# sample size is the number of observations of the actual data
bs_sample = np.random.choice(data, len(data))
# return the mean of a draw
return func(bs_sample)
# draw bootstrap replicates
def draw_bs_reps(data, func, size=1):
# initialize a numpy array of the dimension size
bs_replicates = np.empty(size)
# call function bootstrap_replicate_1d
for i in range(size):
bs_replicates[i] = bootstrap_replicate_1d(data,func)
# return a numpy array of the means of draws
return bs_replicates
# bootstrap from actual samples
# note that this is not sample randomly drawn from a theoretical population
data = ret_data.copy()
bs_replicates = draw_bs_reps(data['portfolio_ret'], np.mean, size=10000)
# pasted code for confidence interval, need changing
conf_int = np.percentile(bs_replicates,[2.5, 97.5])
print('95% bootstrapped confidence interval for portfolio is', conf_int)
# Theoretical confidence intervals
conf_int_actual_upper = data['portfolio_ret'].mean() + data['portfolio_ret'].std()/np.sqrt(data['portfolio_ret'].count())*1.96
conf_int_actual_lower = data['portfolio_ret'].mean() - data['portfolio_ret'].std()/np.sqrt(data['portfolio_ret'].count())*1.96
conf_int_actual = [conf_int_actual_lower, conf_int_actual_upper]
print('-'*120)
print('95% theoretical confidence interval for portfolio is=', conf_int_actual,)
ret_data['cumulative_portfolio_ret'] = (ret_data['portfolio_ret'] + 1).cumprod()
ret_data['cumulative_SSE_ret'] = (ret_data['SSE'] + 1).cumprod()
ret_data.tail()
sns.scatterplot('Date', 'cumulative_portfolio_ret', data=ret_data)
sns.scatterplot('Date', 'cumulative_SSE_ret', data=ret_data)
plt.title('Cumulative portfolio ret against time')
#May use add objective to ensure minimum zero weighting to individual stocks
min_vol_portfolio=ef.min_volatility()
min_vol_portfolio_wt=ef.clean_weights()
print(min_vol_portfolio_wt)
Plotting.plot_weights(min_vol_portfolio_wt)
# Allocate Portfolio Value in $ as required to show number of shares/stocks to buy,
# also bounds for shorting will affect allocation
# Maximum Sharpe Portfolio Allocation $1000000
da = DiscreteAllocation(min_vol_portfolio_wt, latest_prices, total_portfolio_value=1000000)
allocation, leftover = da.greedy_portfolio()
print(allocation)
print("Leftover Fund value for the maximum Sharpe portfolio is ${:.2f}".format(leftover))
min_vol_cla = cla.CLA(mu, sigma)
min_vol_cla.min_volatility()
Plotting.plot_efficient_frontier(min_vol_cla, show_assets="True")
min_vol_portfolio_wt_list = list(min_vol_portfolio_wt.values())
ret_data = df.pct_change()[1:]
# need keep first observation to aviod NaN in ret
weighted_returns = (min_vol_portfolio_wt_list * ret_data)
portfolio_ret = pd.DataFrame(weighted_returns.sum(axis=1))
ret_data = ret_data.merge(portfolio_ret, on="Date", how="left")
ret_data = ret_data.rename(columns={0: "portfolio_ret"})
ret_data['SSE'] = df0["SSE"].pct_change()[1:]
ret_data.head()
ret_data['cumulative_portfolio_ret'] = (ret_data['portfolio_ret'] + 1).cumprod()
ret_data['cumulative_SSE_ret'] = (ret_data['SSE'] + 1).cumprod()
ret_data.tail()
ret_data['portfolio_ret'].hist(bins = 50,density = True, histtype = 'step', color = 'red')
plt.ylabel('Return')
plt.title('Daily Return for Min-Vol Portfolio')
# pasted code for confidence interval, need changing
conf_int = np.percentile(bs_replicates,[2.5, 97.5])
print('95% bootstrapped confidence interval for portfolio is', conf_int)
# Theoretical confidence intervals
conf_int_actual_upper = data['portfolio_ret'].mean() + data['portfolio_ret'].std()/np.sqrt(data['portfolio_ret'].count())*1.96
conf_int_actual_lower = data['portfolio_ret'].mean() - data['portfolio_ret'].std()/np.sqrt(data['portfolio_ret'].count())*1.96
conf_int_actual = [conf_int_actual_lower, conf_int_actual_upper]
print('-'*120)
print('95% theoretical confidence interval for portfolio is=', conf_int_actual,)
sns.scatterplot('Date', 'cumulative_portfolio_ret', data=ret_data)
sns.scatterplot('Date', 'cumulative_SSE_ret', data=ret_data)
plt.title('Cumulative portfolio ret against time')
df = df.stack()
df.head()
df = df.reset_index()
df = df.rename(columns={"level_1": "ticker", 0: "price"})
df.head()
df = df.set_index('Date')
df['ret'] = df.groupby('ticker').pct_change()
df = df.dropna()
df.head()
sns.scatterplot('Date', 'price', data=df, hue='ticker')
plt.title('Price against date')