__author__ = 'Arun A. Kalyanaraman'
#Import Required Packages For Analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import scipy.stats as stats
#Load data from excel file
source_data = pd.ExcelFile('./IndustryAbility.2016CaseData.xlsx')
industry_exposures = pd.read_excel(source_data, 'Industry Exposures', index_col=0)
industry_gmvs = pd.read_excel(source_data, 'Industry GMVs', index_col=0)
industry_factor_returns = pd.read_excel(source_data, 'Industry Factor Returns', index_col=0)
#Transform dataframe so columns are industries and rows are dates
industry_gmvs_date_indexed = pd.pivot_table(industry_gmvs, values='GMV', \
index=['Date'], columns=['Industry']).fillna(0)
industry_exposures_date_indexed = pd.pivot_table(industry_exposures, values='Exposure', \
index=['Date'], columns=['Industry']).fillna(0)
industry_factor_returns_date_indexed = pd.pivot_table(industry_factor_returns, values='DailyReturn', \
index=['Date'], columns=['Industry']).fillna(0)
industry_factor_returns_date_indexed = industry_factor_returns_date_indexed.loc[:\
, (industry_factor_returns_date_indexed != 0).any(axis=0)]
#Calculate current portfolio leverage by industry
industry_leverage_date_indexed = industry_gmvs_date_indexed/industry_exposures_date_indexed
#Calculate industry return contributions
industry_returns_date_indexed = (industry_exposures_date_indexed*industry_factor_returns_date_indexed)
#Compute covariance matrix using industry returns
industry_factor_returns_covariance_matrix = industry_factor_returns_date_indexed.cov()
industry_factor_returns_covariance_matrix_annualized = industry_factor_returns_covariance_matrix*252
#Compute annualized industry factor vol
industry_factor_vol_annualized = industry_factor_returns_date_indexed.std()*np.sqrt(252)
#Daily to compounded returns
compounded_returns = ((industry_returns_date_indexed).cumsum())
factor_compounded_returns = (industry_factor_returns_date_indexed+1).cumprod()-1
def calc_std(weight):
weight = pd.Series(weight)
std = np.sqrt(np.dot(weight.T, np.dot(industry_factor_returns_covariance_matrix_annualized, weight)))
return std
def marginal_risk(row):
marginal_risk_c = pd.DataFrame(row).T.dot(industry_factor_returns_covariance_matrix_annualized)\
/portfolio_risk.loc[row.name]
risk_contribution = row*marginal_risk_c
return pd.Series(risk_contribution.T[row.name])
portfolio_risk = industry_exposures_date_indexed.apply(lambda weight: calc_std(weight), axis=1)
risk_decomp = industry_exposures_date_indexed.apply(lambda row: marginal_risk(row), axis=1)
normalized_risk_decomposition = risk_decomp.div(risk_decomp.sum(axis=1), axis=0)
normalized_risk_decomposition_copy = normalized_risk_decomposition.copy()
top_n_risk_contribs = normalized_risk_decomposition_copy.stack().groupby(level=0).nlargest(5).unstack().\
reset_index(level=1, drop=True).reindex(columns=normalized_risk_decomposition_copy.columns)
top_n_risk_contribs = top_n_risk_contribs.fillna(0)
top_n_risk_contribs = top_n_risk_contribs.loc[:, (top_n_risk_contribs != 0).any(axis=0)]
top_n_risk_contribsC = top_n_risk_contribs.columns
top_n_risk_contribs.replace(0, np.nan, inplace=True)
top_n_risk_contribs.plot()
largest_return_contribs = (compounded_returns.iloc[-1]).sort_values()
#(compounded_returns.iloc[-1]-1).sort_values().sum()
largest_return_contribsL = largest_return_contribs.nlargest(n=15)
largest_return_contribsS = largest_return_contribs.nsmallest(n=15)
largest_return_contribs = pd.concat([largest_return_contribsL,largest_return_contribsS])
largestC = largest_return_contribs.index
positive_return_contribs = largest_return_contribs[largest_return_contribs>0].index
negative_return_contribs = largest_return_contribs[largest_return_contribs<0].index
factor_ret_times_risk = industry_factor_returns_date_indexed.multiply(industry_factor_vol_annualized, axis=1)
factor_ret_times_risk.corrwith(industry_exposures_date_indexed, axis=0)
factor_sharpe_ratio = (industry_factor_returns_date_indexed.mean()*252)/industry_factor_vol_annualized
factor_sharpe_ratio.sort_values()
portfolio_factor_sharpe_ratio = (industry_returns_date_indexed.mean()*252)/(industry_returns_date_indexed.std()*np.sqrt(252))
portfolio_factor_sharpe_ratio[largest_return_contribs.index].sort_values().plot.bar()
portfolio_sharpe_ratio = industry_returns_date_indexed.sum(axis=1).mean()/\
(np.std(industry_returns_date_indexed.sum(axis=1))*np.sqrt(252))
portfolio_sharpe_ratio
industry_returns_date_indexed.sum().sum()
top_5_industriesSR = list(portfolio_factor_sharpe_ratio.sort_values(ascending=False)[:5].index)
bottom_5_industriesSR = list(portfolio_factor_sharpe_ratio.sort_values(ascending=True)[:5].index)
top_5_industries_contribs = list(largest_return_contribs[:5].index)
bottom_5_industries_contribs = list(largest_return_contribs[-5:].index)
for i in top_5_industries_contribs:
pd.DataFrame({'Industry Exposure': industry_exposures_date_indexed[i],\
'Portfolio Return Contribs': compounded_returns[i],\
'Factor Return': factor_compounded_returns[i],\
'Normalized Risk Decomposition': normalized_risk_decomposition[i]}).plot(title=i, \
secondary_y='Portfolio Return Contribs')
for i in bottom_5_industries_contribs:
pd.DataFrame({'Industry Exposure': industry_exposures_date_indexed[i],\
'Portfolio Return Contribs': compounded_returns[i],\
'Factor Return': factor_compounded_returns[i],\
'Normalized Risk Decomposition': normalized_risk_decomposition[i]}).plot(title=i, \
secondary_y='Portfolio Return Contribs')
days_exposure_change_correct = {}
for n in industry_gmvs_date_indexed.columns:
threshold = .85
sector_bad_dates = list(industry_gmvs_date_indexed[n].sort_values()[int(252 * threshold):].index.values)
good_dates = industry_factor_returns_date_indexed[n].index.values
sector_dates = set(sector_bad_dates).intersection(good_dates)
exposure = industry_exposures_date_indexed.loc[sector_dates][n].sort_index()
change_in_exposure = exposure.diff(periods=1)
factor_return_on_dates = industry_factor_returns_date_indexed.loc[sector_dates][n]
combined = pd.DataFrame({'Change in Exposure':change_in_exposure, \
'Industry Factor Return': factor_return_on_dates})
combined['Correct'] = np.sign(combined['Change in Exposure']) == \
np.sign(combined['Industry Factor Return'])
days_exposure_change_correct[n] = combined['Correct'].sum()/int(252*(1-threshold))
correct_change_percentage = pd.DataFrame({'Correct':pd.Series(days_exposure_change_correct).loc[positive_return_contribs]})
correct_change_percentage['Over Threshold'] = (correct_change_percentage.where(correct_change_percentage>=0.5)-.5).fillna(0)
correct_change_percentage['Correct'] = correct_change_percentage['Correct'].mask(correct_change_percentage['Correct']>=.5, .5)
correct_change_percentage.plot.bar(stacked=True)
correct_change_percentage = pd.DataFrame({'Correct':pd.Series(days_exposure_change_correct).loc[negative_return_contribs]})
correct_change_percentage['Over Threshold'] = (correct_change_percentage.where(correct_change_percentage>=0.5)-.5).fillna(0)
correct_change_percentage['Correct'] = correct_change_percentage['Correct'].mask(correct_change_percentage['Correct']>=.5, .5)
correct_change_percentage.plot.bar(stacked=True)