import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.rcParams["figure.figsize"] = (14, 8)
df = pd.read_parquet('algostrata_name.parquet')
train = df.iloc[:int(df.shape[0] / 2), ]
test = df.iloc[int(df.shape[0] / 2):, ]
def calc_mean(returns):
return (np.prod(1 + returns))**(1/len(returns)) - 1
ann_mean = (1+train.apply(calc_mean))**52-1
std = (train.std())*(52)**(0.5)
sharpe = ann_mean / std
sharpe
strat_1_assets = sharpe.nlargest(5).index.tolist()
strat_1_assets
train[strat_1_assets].cov()
def test_portfolio(assets, inital_investment=1000000):
investment = inital_investment
weights = 1 / len(assets)
invest_over_time = []
for idx, row in test[assets].iterrows():
investment = (investment * (1 + row) * weights).sum()
invest_over_time.append(investment)
return pd.DataFrame(invest_over_time, index=test.index)
def plot_investment(data, inital_investment=1000000):
plt.plot(data)
plt.axhline(y=inital_investment, color='b', linestyle='--')
plt.legend(['Investment', f'Initial investment = {inital_investment}'])
plt.title('Investment over time')
plt.show()
plot_investment(test_portfolio(strat_1_assets))
top_200_assets = sharpe.nlargest(200).index.tolist()
from MST import MinimumSpanningTree
subset_df = train[top_200_assets]
for i in range(5):
subset, subset_df, corr_avg, PDI = MinimumSpanningTree(subset_df)
subset_df
subset_df.cov()
strat_2_assets = subset_df.columns.tolist()
plot_investment(test_portfolio(strat_2_assets))
strat_1_mean = sum(test[strat_1_assets].apply(calc_mean) * 1 / len(strat_1_assets))
strat_1_std = sum(test[strat_1_assets].std() * 1 / len(strat_1_assets))
print('Strategy 1')
print('Mean: ', strat_1_mean, "|", "std: ", strat_1_std)
strat_2_mean = sum(test[strat_2_assets].apply(calc_mean) * 1 / len(strat_2_assets))
strat_2_std = sum(test[strat_2_assets].std() * 1 / len(strat_2_assets))
print('Strategy 2')
print('Mean: ', strat_2_mean, "|", "std: ", strat_2_std)
print('Strategy 1 sharpe ratio: ', strat_1_mean / strat_1_std)
print('Strategy 2 sharpe ratio: ', strat_2_mean / strat_2_std)
def max_drawdown(data):
local_min_max = np.diff(np.sign(np.diff(data))).nonzero()[0] + 1
peak = data[0]
drawdown = 0
# For every min/max point
for i in local_min_max:
# If we find a peak, continue
if data[i] > peak:
peak = data[i]
continue
# Otherwise it must have gone down, so we calculate the drawdown
current_drawdown = (data[i] - peak) / peak
# If drawdown is larger than previously, replace
if current_drawdown < drawdown:
drawdown = current_drawdown
return drawdown
strat_1_investment = test_portfolio(strat_1_assets)
strat_2_investment = test_portfolio(strat_2_assets)
strat_1_drawdown = max_drawdown(strat_1_investment[0])
strat_2_drawdown = max_drawdown(strat_2_investment[0])
print('Strategy 1 drawdown:', strat_1_drawdown * 100, '%')
print('Strategy 2 drawdown:', strat_2_drawdown * 100, '%')
def compare_strats(strat_1_assets, strat_2_assets):
strat_1_mean = sum(test[strat_1_assets].apply(calc_mean) * 1 / len(strat_1_assets))
strat_1_std = sum(test[strat_1_assets].std() * 1 / len(strat_1_assets))
print('Strategy 1')
print('Mean: ', strat_1_mean, "|", "std: ", strat_1_std)
strat_2_mean = sum(test[strat_2_assets].apply(calc_mean) * 1 / len(strat_2_assets))
strat_2_std = sum(test[strat_2_assets].std() * 1 / len(strat_2_assets))
print('Strategy 2')
print('Mean: ', strat_2_mean, "|", "std: ", strat_2_std)
print('Strategy 1 sharpe ratio: ', strat_1_mean / strat_1_std)
print('Strategy 2 sharpe ratio: ', strat_2_mean / strat_2_std)
strat_1_investment = test_portfolio(strat_1_assets)
strat_2_investment = test_portfolio(strat_2_assets)
strat_1_drawdown = max_drawdown(strat_1_investment[0])
strat_2_drawdown = max_drawdown(strat_2_investment[0])
print('Strategy 1 drawdown:', strat_1_drawdown * 100, '%')
print('Strategy 2 drawdown:', strat_2_drawdown * 100, '%')
from Clustering import Cluster
df_cluster = Cluster(train[top_200_assets], nClusters=3, dendogram=False)
df_cluster.groupby("Cluster").count()
strat_3_asset_small = sharpe[df_cluster[df_cluster['Cluster'] == 'Cluster 2'].index.tolist()].idxmax()
strat_3_asset_small
subset_df = train[df_cluster[df_cluster['Cluster'] == 'Cluster 1'].index.tolist()]
for i in range(5):
subset, subset_df, corr_avg, PDI = MinimumSpanningTree(subset_df)
subset_df
strat_3_assets = subset_df.columns.tolist() + [strat_3_asset_small]
strat_3_assets
train[strat_3_assets].cov()
plot_investment(test_portfolio(strat_3_assets))
compare_strats(strat_2_assets, strat_3_assets)
stat_assets = list(set(strat_2_assets + strat_3_assets)) # Remove duplicates
stat_assets
stats_assets=df[stat_assets]
pd.options.plotting.backend = "matplotlib"
stats_assets.hist(bins=100)
plt.show()
stats_assets.plot.density()
plt.xlim([-0.02, 0.02])
plt.plot()
import pylab
import scipy.stats as stats
fig, axs = plt.subplots(2, 5, figsize=(16, 10), facecolor='w', edgecolor='k')
k = 0
for i in range(2):
for j in range(5):
stats.probplot(stats_assets[stat_assets[k]], dist ="norm", plot=axs[i][j])
axs[i][j].set_title(stat_assets[k])
k += 1
fig.tight_layout()
plt.show()
stat_assets
JB=pd.DataFrame(columns=stat_assets)
for x in stat_assets:
JB[x]=stats.jarque_bera(stats_assets[x])
JB
print('skewness for the selected assets is:')
stats_assets.skew()
print('Kurtosis for the selected assets is:')
stats_assets.kurtosis()