import numpy as np
import pandas as pd
import statsmodels.api as sm
ctrip = pd.read_csv("ctrip.csv")
ctrip.head()
""" # BEGIN PROMPT
...
""" # END PROMPT
# BEGIN SOLUTION NO PROMPT
ctrip['volunteer'].value_counts(True)
# END SOLUTION
""" # BEGIN PROMPT
...
""" # END PROMPT
# BEGIN SOLUTION NO PROMPT
model_2b = sm.OLS(ctrip['commute'], sm.add_constant(ctrip[['volunteer']]))
results_2b = model_2b.fit(cov_type='HC1')
results_2b.summary()
# END SOLUTION
""" # BEGIN PROMPT
...
""" # END PROMPT
# BEGIN SOLUTION NO PROMPT
model_2c = sm.OLS(ctrip['tenure'], sm.add_constant(ctrip[['volunteer']]))
results_2c = model_2c.fit(cov_type='HC1')
results_2c.summary()
# END SOLUTION
""" # BEGIN PROMPT
...
""" # END PROMPT
# BEGIN SOLUTION NO PROMPT
ctrip['ln_calls'] = np.log(ctrip['calls'])
model_2d = sm.OLS(ctrip['ln_calls'], sm.add_constant(ctrip[['WFHShare']]), missing='drop')
results_2d = model_2d.fit(cov_type='HC1')
results_2d.summary()
# END SOLUTION
""" # BEGIN PROMPT
ctrip['longcommute'] = (...).astype(int)
""" # END PROMPT
# BEGIN SOLUTION NO PROMPT
ctrip['longcommute'] = (ctrip['commute'] >= 120).astype(int)
# END SOLUTION
""" # BEGIN PROMPT
...
""" # END PROMPT
# BEGIN SOLUTION NO PROMPT
ctrip['WFHShareXlongcommute'] = ctrip['WFHShare'] * ctrip['longcommute']
model_2h = sm.OLS(ctrip['ln_calls'], sm.add_constant(ctrip[['WFHShare', 'longcommute','WFHShareXlongcommute']]), missing='drop')
results_2h = model_2h.fit(cov_type='HC1')
results_2h.summary()
# END SOLUTION
cigads = pd.read_csv("cigads.csv")
cigads.head()
# Mean of annual grams of Tobacco Sold per Adult (15+) across the pre-treatment periods in Canada
pre_period = cigads[cigads['YEAR'] <= 1970]
np.mean(pre_period[pre_period['COUNTRY'] == "CAN"]['CIGSPC'])
""" # BEGIN PROMPT
cigads['post'] = (...).astype(int)
cigads['treat'] = (...).astype(int)
cigads['treatpost'] = ...
model_3b = ...
results_3b = ...
results_3b.summary()
""" # END PROMPT
# BEGIN SOLUTION NO PROMPT
cigads['post'] = (cigads['YEAR'] > 1970).astype(int)
cigads['treat'] = (cigads['COUNTRY'] == "US").astype(int)
cigads['treatpost'] = cigads['treat'] * cigads['post']
model_3b = sm.OLS(cigads['CIGSPC'], sm.add_constant(cigads[['post', 'treat','treatpost']]))
results_3b = model_3b.fit(cov_type='HC1')
results_3b.summary()
# END SOLUTION
""" # BEGIN PROMPT
model_3c = ...
results_3c = ...
results_3c.summary()
""" # END PROMPT
# BEGIN SOLUTION NO PROMPT
model_3c = sm.OLS(cigads['CIGSPC'], sm.add_constant(cigads[['post', 'treat','treatpost', 'PRICE']]))
results_3c = model_3c.fit(cov_type='HC1')
results_3c.summary()
# END SOLUTION