import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn import metrics
from scipy.stats import multivariate_normal
import scipy.stats as st
from scipy.stats import norm
from scipy import integrate
df1 = pd.read_excel('/work/ps1_datasets.xlsx', sheet_name = 'Table1')
df2 = pd.read_excel('/work/ps1_datasets.xlsx', sheet_name = 'Table2')
df3 = pd.read_excel('/work/ps1_datasets.xlsx', sheet_name = 'Table3')
def add_color(b):
a = b
a['color'] = 'blue'
for x in range(len(a['Bankrupt'])):
if a['Bankrupt'][x] == 0: a['color'][x] = 'blue'
elif a['Bankrupt'][x] == 1: a['color'][x] = 'red'
return a
df1 = add_color(df1)
plt.scatter(df1['Current_Ratio'], df1['Debt_to_Asset'], c = df1['color'])
plt.xlabel('Current Ratio (%)')
plt.ylabel('Debt to Asset Ratio (%)')
plt.title('Question 1, Part A')
X = df1[['Debt_to_Asset', 'Current_Ratio']]
y = df1[['Bankrupt']]
clf = LinearDiscriminantAnalysis(priors = [0.5, 0.5])
clf.fit(X, y)
clf.coef_
df1['LDA_Predictions'] = clf.predict(X)
clf.coef_
def z_score(current, debt_to_asset):
return 0.1118435*debt_to_asset - 2.07439046*current
df1['LDA_Zscore'] = z_score(df1['Current_Ratio'], df1['Debt_to_Asset'])
df1.head()
std_zscore = np.std(df1['LDA_Zscore'])
bankrupt_zone = np.mean(df1['LDA_Zscore']) + (2*std_zscore)
safe_zone = np.mean(df1['LDA_Zscore']) - (2*std_zscore)
zones = {'Bankrupt Zone': bankrupt_zone, 'Safe Zone': safe_zone}
zones
misclassified_counter = 0
for i in range(0, len(df1), 1):
if df1.iloc[i,3] != df1.iloc[i,5]:
misclassified_counter += 1
print('Number of Misclassified Bankruptcy Predictions:', misclassified_counter)
model = linear_model.LinearRegression()
model.fit(X, y)
df1['LPM_PD'] = model.predict(X)
model2 = LogisticRegression(random_state=0).fit(X, y)
df1['Logistic_Prediction'] = model2.predict(X)
df1
def logistic_pd(current, debt_to_asset):
return np.exp(0.01874503*debt_to_asset - 0.3476689*current)/(1+np.exp(0.01874503*debt_to_asset - 0.3476689*current))
df1['Logistic_PD'] = logistic_pd(df1['Current_Ratio'], df1['Debt_to_Asset'])
fig, axes = plt.subplots(1,2)
df1.plot(x = 'Current_Ratio', y = 'LPM_PD', kind = 'scatter', ax = axes[0])
df1.plot(x = 'Current_Ratio', y = 'Logistic_PD', kind = 'scatter', ax = axes[1])
fig, axes = plt.subplots(1,2)
df1.plot(x = 'Debt_to_Asset', y = 'LPM_PD', kind = 'scatter', ax = axes[0])
df1.plot(x = 'Debt_to_Asset', y = 'Logistic_PD', kind = 'scatter', ax = axes[1])
plt.rcParams['figure.figsize'] = [15, 10]
fig, axes = plt.subplots(1,2)
sns.regplot(data = df1, x = 'Current_Ratio', y = 'Bankrupt', fit_reg=True, logistic = True, ax = axes[0], truncate = False)
sns.regplot(data = df1, x = 'Debt_to_Asset', y = 'Bankrupt', fit_reg=True, logistic = True, ax = axes[1], truncate = False)
plt.rcParams['figure.figsize'] = [15, 10]
fig, axes = plt.subplots(1,2)
sns.regplot(data = df1, x = 'Current_Ratio', y = 'Bankrupt', fit_reg=True, logistic = False, ax = axes[0], truncate = False)
sns.regplot(data = df1, x = 'Debt_to_Asset', y = 'Bankrupt', fit_reg=True, logistic = False, ax = axes[1], truncate = False)
plot_confusion_matrix(model2, X, df1['Bankrupt'])
print('False Positive Rate: ', (2/9)*100,'%',
'\nTrue Positive Rate: ', (8/10)*100,'%',
'\nFalse Negative Rate: ', (2/9)*100,'%',
'\nTrue Negative Rate: ', (7/9)*100,'%', sep = '')
def simple_roc(labels, scores):
labels = np.flip([x for _,x in sorted(zip(scores,labels), reverse = False)])
return pd.DataFrame({'TPR': np.cumsum(labels)/sum(labels),
'FPR': np.cumsum(~labels)/sum(~labels)})
glm_simple_roc = simple_roc(df1.Bankrupt == 1, df1.Logistic_Prediction)
plt.figure()
ax = sns.lineplot(data = glm_simple_roc, x = 'FPR', y = 'TPR')
plt.plot([0,1], [0,1], linewidth = 2)
misclassified_counter_logit = 0
for i in range(0, len(df1), 1):
if df1.iloc[i,3] != df1.iloc[i,8]:
misclassified_counter_logit += 1
dfq2 = pd.DataFrame({'Misclassified LDA Predictions': 3, 'Misclassified Logistic Predictions': 4}, index = [0])
dfq2
df2
df3
value = [0]*7
for i in range(0,7,1):
value[i] = 6 + 6*np.exp(-df3.iloc[i,2]) + 6*np.exp((-df3.iloc[i,3])*2) + 6*np.exp((-df3.iloc[i,4])*3) + 106*np.exp((-df3.iloc[i,5])*4)
value
value.append(100*0.5113)
value
Expected_Value_1yr = np.sum(df2.iloc[3,1:]*value)
print('Expected Bond Value in One Year:', Expected_Value_1yr,"in millions")
df2.iloc[3,1:]
variance = np.sum(df2.iloc[3,1:]*np.square(value)) - np.square(Expected_Value_1yr)
std = np.sqrt(variance)
print('Standard deviation of Bond Value in One Year:', std, "in millions")
CVaR = Expected_Value_1yr-51.13
print('CVaR:',CVaR, "in millions")
value1 = [0]*7
for i in range(0,7,1):
value1[i] = 6 + 6*np.exp(-df3.iloc[i,2]) + 106*np.exp((-df3.iloc[i,3])*2)
value1.append(100*0.5113)
value1
Expected_Value_1yr_2 = np.sum(df2.iloc[2,1:]*value1)
print('Expected Bond Value in One Year:', Expected_Value_1yr_2)
import math
def bivariate_normal(Ra,Rb,rho=0.2):
return 1/(2*math.pi*np.sqrt(1-rho**2))*np.exp(-1/(2*(1-rho**2))*(Ra**2-2*rho*Ra*Rb+Rb**2))
bivariate_normal(1,2,0.2)
test = integrate.dblquad(bivariate_normal, -1.53, 1.9,
lambda x: -1.29,
lambda x: 1.5)
print(test)
Bond_BBB=df2.loc[3,:].values.flatten().tolist()[1:][::-1]
Bond_A=df2.loc[2,:].values.flatten().tolist()[1:][::-1]
Bond_BBB=np.cumsum(Bond_BBB)[::-1]
Bond_A=np.cumsum(Bond_A)[::-1]
Bond_BBB_ppf=norm.ppf(Bond_BBB)
Bond_A_ppf=norm.ppf(Bond_A)
Bond_BBB_ppf=np.append(Bond_BBB_ppf,float('-inf'))
Bond_A_ppf=np.append(Bond_A_ppf,float('-inf'))
z_score_BBB = Bond_BBB_ppf
z_score_A = Bond_A_ppf
def bivariate_normal(Ra,Rb,rho=0.2):
return 1/(2*math.pi*np.sqrt(1-rho**2))*np.exp(-1/(2*(1-rho**2))*(Ra**2-2*rho*Ra*Rb+Rb**2))
df4 = pd.DataFrame(index=['AAA', 'AA', 'A', 'BBB', 'BB', 'B', 'CCC', 'Default'],columns=['AAA', 'AA', 'A', 'BBB', 'BB', 'B', 'CCC', 'Default'])
for i in range(0,8):
for j in range(0,8):
df4.iloc[i,j] = round(integrate.dblquad(bivariate_normal, z_score_BBB[i], z_score_BBB[i+1],
lambda x: z_score_A[j],
lambda x: z_score_A[j+1])[0],4)
df4
print(df2.columns.tolist()[1:])
Transition_Matrix = df2
Credit_Spreads = df3
Transition_Matrix=Transition_Matrix.rename(columns = {"Unnamed: 0":"Category"})
def getExpectedValue_list(bondrate="BBB",expected_year=1,year=5,coupon=0.06,
TransMatrix=Transition_Matrix,CreditSpread=Credit_Spreads,rf=0.00,recovery_rate=0.5113):
Translist=TransMatrix.loc[Transition_Matrix['Category'] == bondrate].values.flatten().tolist()
#print(Translist)
CreditSpreadlist=CreditSpread
#print(CreditSpreadlist)
p=np.zeros(8)
for i in range(0,7):
temp=0
# print("CreditSpreadNum",CreditSpreadNum)
for j in range(0,year):
CreditSpreadNum=Credit_Spreads.iloc[i][j]
# print("CreditSpreadNum",CreditSpreadNum)
if(j==0):
temp+=coupon*100
elif(j!=(year-1)):
temp+=coupon*100*np.exp(-(rf+CreditSpreadNum)*j)
else:
temp+=(coupon*100+100)*np.exp(-(rf+CreditSpreadNum)*j)
# print(temp)
p[i]=temp
p[7]=100*recovery_rate
return p
getExpectedValue_list(bondrate="A",expected_year=1,year=3,coupon=0.06,
TransMatrix=Transition_Matrix,CreditSpread=Credit_Spreads,rf=0.00,recovery_rate=0.5113)
Ratings = pd.DataFrame(index=df3.columns.tolist()[1:],columns=df3.columns.tolist()[1:])
BondBBBlist=getExpectedValue_list(bondrate="BBB",expected_year=1,year=5,coupon=0.06,
TransMatrix=Transition_Matrix,CreditSpread=Credit_Spreads,rf=0.00,recovery_rate=0.5113)
BondAlist=getExpectedValue_list(bondrate="CCC",expected_year=1,year=3,coupon=0.06,
TransMatrix=Transition_Matrix,CreditSpread=Credit_Spreads,rf=0.00,recovery_rate=0.5113)
#print(BondBBBlist)
#print(BondAlist)
combinedresult=np.zeros((8,8))
for i in range(0,8):
for j in range(0,8):
combinedresult[i,j]=(BondBBBlist[i]+BondAlist[j])*df4.iloc[i,j]
PortfolioValue_combined=np.sum(combinedresult)
print("the portfolio value is: $",round(PortfolioValue_combined,2),' in millions', sep = '')
combinedresult_1=np.zeros((8,8))
for i in range(0,8):
for j in range(0,8):
combinedresult_1[i,j]=(np.square(BondBBBlist[i]+BondAlist[j]))*df4.iloc[i,j]
variance = PortfolioValue_combined_1 - np.square(PortfolioValue_combined)
print('Standard Deviation of Portfolio: ', np.sqrt(variance),' in millions', sep = '')
Recovery_Rate = 0.5113
Portfolio_CVaR = PortfolioValue_combined - 2*100*Recovery_Rate
print("the portfolio CVaR is: $",round(Portfolio_CVaR,2), "in millions", sep = '')