import pandas as pd
import numpy as np
import statsmodels as sm
import statsmodels.api as smapi
import pylogit as pl
import seaborn as sns
import matplotlib.pyplot as plt
import math
# Part a)
np.random.seed(999)
x = np.random.normal(0, 4, 1000)
a = (np.random.uniform(size=1000) < 0.5) * 1
z1 = np.random.normal(-2, 1, 1000)
z2 = np.random.normal(2, 1, 1000)
epsilon = np.multiply(a, z1) + np.multiply((1-a), z2)
y = (x + epsilon > 0) * 1
# Calculate beta by hand
X = x[:, np.newaxis]
b = np.dot(
np.dot(
np.linalg.inv(
np.dot(X.T,X)
),X.T),
y)
# b = np.linalg.lstsq(X, y, rcond=None)[0]
print(b)
[0.08104732]
# Verify that statsmodels estimates the same beta
# No need for robust standard errors, as there is no heteroskedasticity
smapi.OLS(y, x).fit().summary()
y_pred = x*b[0]
df = pd.DataFrame({"y":y, "y_pred":y_pred, "x":x})
df
plt.ylim([-0.05,1.05])
sns.regplot(x="x", y="y", data=df, ci=None, label="E[y|x]")
plt.legend(loc="best")
# Part b)
df = pd.read_stata("KEANE.dta")
# Status:
# 1 - School
# 2 - Home
# 3 - Work
df["status"] = df["status"].apply(lambda x : 0.0 if x == 3.0 else x)
mask = (df["year"] == 85)
df_est = df[mask].copy()
df_est.dropna(how="any", subset=["status", "educ", "exper", "expersq", "black"], inplace=True)
mapping = {
0.0:"At Work",
1.0:"School",
2.0:"Home"
}
df_est["status"] = df_est["status"].apply(lambda x : mapping[x])
# We add an intercept.
df_est["constant"] = 1
MNL = sm.discrete.discrete_model.MNLogit(df_est["status"], df_est[["constant", "educ", "exper", "expersq", "black"]]).fit()
MNL.summary()
Optimization terminated successfully.
Current function value: 0.656778
Iterations 7
MNL.get_margeff(at="mean").summary()
# Part c)
params = MNL.params
params.columns = ["Home", "School"]
params.reset_index(inplace=True)
params["Mean"] = params["index"].apply(lambda x : df_est[x].mean())
# Means of the variables in the estimation sample.
params[["index", "Mean"]]
for choice in ["Home", "School"]:
params[choice+" (Exp.)"] = params[choice].apply(lambda x : math.exp(x))
results = {}
results["Work_pred_black"] = 1
results["Work_pred_nonblack"] = 1
for choice in ["Home", "School"]:
results[choice+"_pred_black"] = math.exp(params.loc[0, choice]*params.loc[0, "Mean"] + \
params.loc[1, choice]*params.loc[1, "Mean"] + \
params.loc[2, choice]*params.loc[2, "Mean"] + \
params.loc[3, choice]*params.loc[3, "Mean"] + \
params.loc[4, choice]*1)
results[choice+"_pred_nonblack"] = math.exp(params.loc[0, choice]*params.loc[0, "Mean"] + \
params.loc[1, choice]*params.loc[1, "Mean"] + \
params.loc[2, choice]*params.loc[2, "Mean"] + \
params.loc[3, choice]*params.loc[3, "Mean"])
results["denom_black"] = sum([results[key] for key in results.keys() if "_black" in key])
results["denom_nonblack"] = sum([results[key] for key in results.keys() if "_nonblack" in key])
print("Probabilities and marginal effects of black race")
print("Choices: School and Home")
print("Evaluated at: Means")
print("\n*************************************\n")
print("CHOICE: SCHOOL")
print("Probability of choice: School for black race: " + \
str(results["School_pred_black"]/results["denom_black"]))
print("Probability of choice: School for non-black race: " + \
str(results["School_pred_nonblack"]/results["denom_nonblack"]))
print("Marginal effect: School for black race: " + \
str(results["School_pred_black"]/results["denom_black"] - results["School_pred_nonblack"]/results["denom_nonblack"]))
print("\n------------------------------------\n")
print("CHOICE: HOME")
print("Probability of choice: Home for black race: " + \
str(results["Home_pred_black"]/results["denom_black"]))
print("Probability of choice: Home for non-black race: " + \
str(results["Home_pred_nonblack"]/results["denom_nonblack"]))
print("Marginal effect: Home for black race: " + \
str(results["Home_pred_black"]/results["denom_black"] - results["Home_pred_nonblack"]/results["denom_nonblack"]))
Probabilities and marginal effects of black race
Choices: School and Home
Evaluated at: Means
*************************************
CHOICE: SCHOOL
Probability of choice: School for black race: 0.038500499160116726
Probability of choice: School for non-black race: 0.0812204203631209
Marginal effect: School for black race: -0.04271992120300417
------------------------------------
CHOICE: HOME
Probability of choice: Home for black race: 0.22367741586046824
Probability of choice: Home for non-black race: 0.18614621458064204
Marginal effect: Home for black race: 0.037531201279826204