# Install yahoo finance module
!pip install yfinance --upgrade --no-cache-dir
# Import the necessary packages
import yfinance as yf
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Place the list of stocks that form your all-weather portfolio in Data Lab 2
# Draw data for the past five years
symbols_list = ["AAPL", "AMZN", "MSFT", "DIS", "BA", "CVX", "TSLA", "AON", "KO", "C", "V", "PG", "GM"]
start = dt.datetime(2017,8,31)
end = dt.datetime(2022,8,31) # not included
data = yf.download(symbols_list, start=start, end=end)
# Keep only the adjusted close in the dataframe
# Note that the date is in the index
price = data["Adj Close"]
# Calculate return using method pct_change
# Find out more about .pct_change with help!
daily_returns = price.pct_change()
daily_returns
# deep dive into the first five rows of observations in the dataframe
price.pct_change()[1:].head()
# function that takes portfolio weights and creates a time-series of daily portfolio returns
def portfolio_return_series(daily_returns, weights):
'''
INPUTS
daily returns: dataframe of daily returns. Each ticker column contains the series of daily returns for the ticker
weights: numpy array of the portfolio weight on each ticker (sorted in ascending order)
OUTPUTS
portfolio_daily_returns: the portfolio return series given the weights
portfolio_cumulative_returns: the portfolio cumulative return series given the weights
'''
# Create portfolio daily returns
portfolio_daily_returns = (weights * daily_returns).sum(axis=1)
# Calculate cumulative returns
# Hint: Use .cumprod()
portfolio_cumulative_returns = (portfolio_daily_returns + 1).cumprod() - 1
return portfolio_daily_returns, portfolio_cumulative_returns
# Install PyPortfolioOpt package
!pip install PyPortfolioOpt
# Import the packages
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt.efficient_frontier import EfficientFrontier
The Maximum Sharpe Portfolio
# Calculate expected returns mu
mu = expected_returns.mean_historical_return(price)
# Calculate the covariance matrix S
sigma = risk_models.sample_cov(price)
# Obtain the efficient frontier
ef = EfficientFrontier(mu, sigma)
# Calculate weights for the maximum Sharpe ratio portfolio
weights_maxsharpe = ef.max_sharpe()
cleaned_weights_maxsharpe = ef.clean_weights()
weights_list = list(cleaned_weights_maxsharpe.values())
weights_maxsharpe_array = np.array(weights_list)
# Inspect the calculated weights
print(cleaned_weights_maxsharpe)
ef.portfolio_performance(verbose=True)
The minimum volatility portfolio
# Obtain the efficient frontier
ef = EfficientFrontier(mu, sigma)
# Calculate weights for the minimum volatility portfolio
weights_minvol = ef.min_volatility()
cleaned_weights_minvol = ef.clean_weights()
weights_list = list(cleaned_weights_minvol.values())
weights_minvol_array = np.array(weights_list)
# Inspect the calculated weights
print(cleaned_weights_minvol)
ef.portfolio_performance(verbose=True)
L2 regularization with maximum sharpe
from pypfopt import objective_functions
# Obtain the efficient frontier
ef = EfficientFrontier(mu, sigma)
ef.add_objective(objective_functions. L2_reg, gamma=2)
# Calculate weights for the maximum sharpe portfolio
ef.max_sharpe()
cleaned_weights_L2 = ef.clean_weights()
weights_list = list(cleaned_weights_L2.values())
weights_L2_array_maxSharpe = np.array(weights_list)
# Inspect the calculated weights
print(cleaned_weights_L2)
ef.portfolio_performance(verbose=True)
L2 regularization with minimum volatility
from pypfopt import objective_functions
# Obtain the efficient frontier
ef = EfficientFrontier(mu, sigma)
ef.add_objective(objective_functions. L2_reg, gamma=2)
# Calculate weights for the minimum volatility portfolio
ef.min_volatility()
cleaned_weights_L2 = ef.clean_weights()
weights_list = list(cleaned_weights_L2.values())
weights_L2_array_minvol= np.array(weights_list)
# Inspect the calculated weights
print(cleaned_weights_L2)
ef.portfolio_performance(verbose=True)
port_daily_returns = pd.DataFrame(index=price.index, columns=['equally-weights','maxSharpe','minVolatility','maxsharpe_L2_regularization','minvolatility_L2_regularization'])
port_cumulative_returns = pd.DataFrame(index=price.index, columns=['equally-weights','maxSharpe','minVolatility','maxsharpe_L2_regularization','minvolatility_L2_regularization'])
# 1. Daily portfolio returns for the equally-weighted portfolio
len_columns = len(price.columns)
equally_weights = np.full(len_columns, 1/len_columns)
# Extract the first element from the function output for daily returns
# Extract the second element from the function output for cumulative returns
port_daily_returns['equally-weights'], port_cumulative_returns['equally-weights'] = portfolio_return_series(daily_returns, equally_weights)
# 2. Daily portfolio returns for the maximum Sharpe portfolio
# Extract the first element from the function output for daily returns
# Extract the second element from the function output for cumulative returns
port_daily_returns['maxSharpe'], port_cumulative_returns['maxSharpe'] = portfolio_return_series(daily_returns, weights_maxsharpe_array)
# 3. Daily portfolio returns for the minimum volatility portfolio
# Extract the first element from the function output for daily returns
# Extract the second element from the function output for cumulative returns
port_daily_returns['minVolatility'], port_cumulative_returns['minVolatility'] = portfolio_return_series(daily_returns, weights_minvol_array)
# 4. Daily portfolio returns for L2 regularization
# Extract the first element from the function output for daily returns
# Extract the second element from the function output for cumulative returns
port_daily_returns['maxsharpe_L2_regularization'], port_cumulative_returns['maxsharpe_L2_regularization'] = portfolio_return_series(daily_returns, weights_L2_array_maxSharpe)
port_daily_returns['minvolatility_L2_regularization'], port_cumulative_returns['minvolatility_L2_regularization'] = portfolio_return_series(daily_returns, weights_L2_array_minvol)
# Merge the six series side-by-side into dataframes
merged_basic_port_daily_returns = pd.DataFrame(index=price.index)
merged_all_port_daily_returns = pd.DataFrame(index=price.index)
for column in port_daily_returns:
merged_all_port_daily_returns[column] = port_daily_returns[column]
if column != 'minvolatility_L2_regularization' and column != 'maxsharpe_L2_regularization':
merged_basic_port_daily_returns[column] = port_daily_returns[column]
# Print basic portfolio returns
print("basic_portfolio_returns")
print(merged_basic_port_daily_returns)
# Print all portfolio returns
print("all_portfolio_returns")
print(merged_all_port_daily_returns)
merged_basic_port_daily_returns[1:].head()
2. Examine visually the drawdown behavior of each portfolio.
# function that takes portfolio weights and creates a time-series of daily portfolio returns
# from winreg import REG_NO_LAZY_FLUSH
def portfolio_drawdown(cumulative_portfolio_returns):
'''
INPUTS
cumulative_portfolio_returns: the portfolio cumulative return series
OUTPUTS
percentage drawdown series
'''
# Calculate the running maximum
running_max = np.maximum.accumulate(cumulative_portfolio_returns)
# Ensure the value never drops below 1
running_max[running_max < 1] = 1
# Calculate the percentage drawdown
drawdown = cumulative_portfolio_returns/running_max - 1
return drawdown
import matplotlib.pyplot as plt
# Daily portfolio returns for the equally-weighted portfolio
# Extract the second element from the function output
# print(portfolio_drawdown(port_daily_returns['equally-weights']))
portfolio_drawdown(port_cumulative_returns['equally-weights']).plot()
# Daily portfolio returns for the maximum Sharpe portfolio
portfolio_drawdown(port_cumulative_returns['maxSharpe']).plot()
# Daily portfolio returns for the minimum volatility portfolio
portfolio_drawdown(port_cumulative_returns['minVolatility']).plot()
# Daily portfolio returns for L2 regularization
portfolio_drawdown(port_cumulative_returns['maxsharpe_L2_regularization']).plot()
portfolio_drawdown(port_cumulative_returns['minvolatility_L2_regularization']).plot()
plt.ylabel('Drawdown')
plt.legend(loc = "best", fontsize=7)
plt.show()
3. Calculate the Value-at-Risk of each portfolio at 99% using the empirical (historical) distribution and theoretical distribution. Create graphs that show the empirical distribution with two vertical lines, representing the Value-at- Risk at 99% derived from the empirical and theoretical distribution, respectively.
# Import norm from scipy.stats
from scipy.stats import norm
# function that takes portfolio weights and creates a time-series of daily portfolio returns
def portfolio_var(daily_portfolio_returns, level):
'''
INPUTS
daily_portfolio_returns: the portfolio daily return series
level: tail risk at 90%, 95%, or 99% ## enter 90 for 90% and so on!
OUTPUT
Empirical VaR: Value at Risk of the portfolio using the empirical distribution
at the specified level (e.g., 90%, 95%, or 99%)
Theoretical VaR: Value at Risk of the portfolio using mathematical formulas (sampling distribution)
at the specified level (e.g., 90%, 95%, or 99%)
Plot object: empirical distribution of returns with
two lines indicating the empirical and theoretical VaRs at the specified level
'''
# Calculate the tail risk (i.e., 100 - level)
tail_risk = 100 - level
# Calculate empirical historical VaR at specified level
var_empirical = np.percentile(daily_portfolio_returns, tail_risk)
# Calculate theoretical VaR at specified level
# Note the norm.ppf function takes argument tail risk of 99% in 0.99, instead of 99
mu = np.mean(daily_portfolio_returns)
std = np.std(daily_portfolio_returns)
confidence_level = tail_risk/100
var_theoretical = norm.ppf(confidence_level, mu, std)
# Sort the returns for plotting
sorted_returns = sorted(daily_portfolio_returns)
# Plot the probability of each sorted return quantile
plt.hist(sorted_returns, density=True, stacked=True)
# Denote the VaR 99 quantile and plot the vertical lines
# plt.show()
plt.axvline(x=var_empirical, color='r', linestyle='-', label="VaR : {}%".format(level))
plt.axvline(x=var_theoretical, color='g', linestyle='-', label="VaR : {}%".format(level))
plt.legend()
plt.show()
return tail_risk, var_empirical, var_theoretical
# Call the function to calculate and plot VaR for the Maximum Sharpe Portfolio at 99%
portfolio_var(port_daily_returns['maxSharpe'], 99)
# Call the function to calculate and plot VaR for the Minimum Volatility Portfolio at 99%
portfolio_var(port_daily_returns['minVolatility'], 99)
# Call the function to calculate and plot VaR for the Equally Weighted Portfolio at 99%
portfolio_var(port_daily_returns['equally-weights'], 99)
# Call the function to calculate and plot VaR for the Maximum sharpe L2 regularization Portfolio at 99%
portfolio_var(port_daily_returns['maxsharpe_L2_regularization'], 99)
4. Is the portfolio return for the maximum Sharpe portfolio statistically different from that of the equally-weighted portfolio?
!pip install pingouin
import pingouin as pg
dir(pg)
# Deep dive into pg.ttest
pg.ttest
# Conduct a independent t-test on equally-weighted portfolio and max sharpe portfolio
# The null hypothesis is that their mean returns are equal
ttest_result = pg.ttest(port_daily_returns['equally-weights'], port_daily_returns['maxSharpe'])
# Print the paired test results
print(ttest_result)
t_value = ttest_result['T'][0]
p_value = ttest_result['p-val'][0]
print('-------------info------------------')
print('t value = ' + str(t_value))
print('p value = ' + str(p_value))
print('---------------conclusion----------')
if p_value <= 0.05:
print("Their mean portfolio returns are significantly.")
else:
print("Their mean portfolio returns are not significantly different.")
# Deep dive into the ANOVA test
pg.anova
# Reshape the dataframe, such that we have a column for portfolio type
basic_portfolio = pd.DataFrame(columns=['equally-weights','maxSharpe','minVolatility'])
basic_portfolio['equally-weights'] = port_daily_returns['equally-weights']
basic_portfolio['maxSharpe'] = port_daily_returns['maxSharpe']
basic_portfolio['minVolatility'] = port_daily_returns['minVolatility']
basic_portfolio = basic_portfolio.unstack()
# Inspect the dataframe
basic_portfolio
# Turn multiindex into columns
basic_daily_returns_df = basic_portfolio.reset_index()
basic_daily_returns_df
# Rename column names
basic_daily_returns_df = basic_daily_returns_df.rename(columns={0:"daily returns"})
basic_daily_returns_df = basic_daily_returns_df.rename(columns={"level_0":"profolio type"})
# Inspect dataframe
basic_daily_returns_df
# Test the equality of portfolio mean returns across three portfolio construction methods
# The null hypothesis is that their mean returns are equal
aov = pg.anova(data=basic_daily_returns_df, dv="daily returns",between="profolio type",detailed=True)
print(aov)
p_value = aov['p-unc'][0]
print('---------------conclusion------------------')
if p_value <= 0.05:
print("Their mean portfolio returns are significantly.")
else:
print("Their mean portfolio returns are not significantly different.")
# Reshape the dataframe, such that we have a column for portfolio type
with_L2_portfolio = port_daily_returns.unstack()
# Inspect the dataframe
with_L2_portfolio
# Turn multiindex into columns
with_L2_daily_returns_df = with_L2_portfolio.reset_index()
with_L2_daily_returns_df
# Rename column names
with_L2_daily_returns_df = with_L2_daily_returns_df.rename(columns={0:"daily returns"})
with_L2_daily_returns_df = with_L2_daily_returns_df.rename(columns={"level_0":"profolio type"})
# Inspect dataframe
with_L2_daily_returns_df
# Test the equality of portfolio mean returns across five portfolio construction methods
# The null hypothesis is that their mean returns are equal
aov = pg.anova(data=with_L2_daily_returns_df, dv="daily returns",between="profolio type",detailed=True)
print('------------anova info--------------------')
print(aov)
p_value = aov['p-unc'][0]
print('---------------conclusion------------------')
if p_value <= 0.05:
print("Their mean portfolio returns are significantly.")
else:
print("Their mean portfolio returns are not significantly different.")
5. Optional Bonus. What other hypotheses would you like to test? How do you test them? What do you find?
port_daily_returns
## Data Preprocessing
port_daily_returns_before = port_daily_returns[1:629]
port_daily_returns_after = port_daily_returns[630:-1]
# (1)euqally-weighted portfolio (equally-weighted)
# Test the equality of equally-weighted mean returns before COVID-19 pandemic and after COVID-19 pandemic
# The null hypothesis is that the mean returns are equal.
ttest_result_equally_weight = pg.ttest(port_daily_returns_before['equally-weights'], port_daily_returns_after['equally-weights'],paired=True)
# (2)max sharpe
# Test the equality of maxSharpe mean returns before COVID-19 pandemic and after COVID-19 pandemic
# The null hypothesis is that the mean returns are equal.
ttest_result_maxSharpe = pg.ttest(port_daily_returns_before['maxSharpe'], port_daily_returns_after['maxSharpe'],paired=True)
# (3)min volatility
# Test the equality of minVolatility mean returns before COVID-19 pandemic and after COVID-19 pandemic
# The null hypothesis is that the mean returns are equal.
ttest_result_minVolatility = pg.ttest(port_daily_returns_before['minVolatility'], port_daily_returns_after['minVolatility'],paired=True)
# (4)L2 regularization
# Test the equality of maxSharpe L2 regularization mean returns before COVID-19 pandemic and after COVID-19 pandemic
# The null hypothesis is that the mean returns are equal.
ttest_result_maxsharpe_L2 = pg.ttest(port_daily_returns_before['maxsharpe_L2_regularization'], port_daily_returns_after['maxsharpe_L2_regularization'], paired=True)
# Test the equality of minvolatility L2 regularization mean returns before COVID-19 pandemic and after COVID-19 pandemic
# The null hypothesis is that the mean returns are equal.
ttest_result_minvolatility_L2 = pg.ttest(port_daily_returns_before['minvolatility_L2_regularization'], port_daily_returns_after['minvolatility_L2_regularization'], paired=True)
print('-----------ttest for equally weights------------')
print(ttest_result_equally_weight)
p_value_equally = ttest_result_equally_weight['p-val'][0]
if p_value_equally <= 0.05:
print("Their mean portfolio returns are significantly.")
else:
print("Their mean portfolio returns are not significantly different.")
print('-----------ttest for maxSharpe------------')
print(ttest_result_maxSharpe)
p_value_maxsharpe = ttest_result_maxSharpe['p-val'][0]
if p_value_maxsharpe <= 0.05:
print("Their mean portfolio returns are significantly.")
else:
print("Their mean portfolio returns are not significantly different.")
print('-----------ttest for minVolatility------------')
print(ttest_result_minVolatility)
p_value_minVolatility = ttest_result_minVolatility['p-val'][0]
if p_value_minVolatility <= 0.05:
print("Their mean portfolio returns are significantly.")
else:
print("Their mean portfolio returns are not significantly different.")
print('-----------ttest for maxsharpe_L2_regularization------------')
print(ttest_result_maxsharpe_L2)
p_value_maxsharpe_L2 = ttest_result_maxsharpe_L2['p-val'][0]
if p_value_maxsharpe_L2 <= 0.05:
print("Their mean portfolio returns are significantly.")
else:
print("Their mean portfolio returns are not significantly different.")
print('-----------ttest for minvolatility_L2_regularization------------')
print(ttest_result_minvolatility_L2)
p_value_minvolatility_L2 = ttest_result_minvolatility_L2['p-val'][0]
if p_value_minvolatility_L2 <= 0.05:
print("Their mean portfolio returns are significantly.")
else:
print("Their mean portfolio returns are not significantly different.")
print("Results: I find that no matter whether equally-weighted, maximum Sharpe, minimum volatility, or L2 regularization, the mean portfolio returns of the all-weather portfolio before and after the COVID-19 pandemic are not significantly different. It seems that the COVID-19 pandemic did not affect the performance of the all-weather portfolio I crafted.")