OLS Regression Sample 3

import pandas as pd wyday= pd.read_csv('obs_wylong.csv') statecoreplaces= pd.read_csv('statecoreplaces.csv') coreplacesWY = statecoreplaces[statecoreplaces.region == 'WY'] print(coreplacesWY.shape)

coreplacesWY.drop(columns = ['brands', 'latitude', 'longitude', 'street_address', 'iso_country_code', 'phone_number', 'open_hours', 'category_tags', 'opened_on', 'closed_on', 'tracking_closed_since', 'geometry_type'], inplace=True)

coreplacesWY.groupby('naics_code').size()

import numpy as np # variable 'bizgroup' = business group is in full/limited service restaurant # coreWY = only contains naics code for full/limited service restaurants and grocers coreWY = coreplacesWY.loc[coreplacesWY['naics_code'].isin([722511, 722513, 445110])].copy() coreWY['bizgroup'] = np.where(coreWY['naics_code'].isin([722511, 722513]), 1, 0) coreWY.groupby('bizgroup').size()

table = coreplacesWY.groupby(['city', 'naics_code']).size() top10 = table.groupby('city').size().nlargest(10)

top10biz = coreWY[coreWY.city.isin(top10.index)]

top10biz.groupby(['city', 'bizgroup']).size().reset_index().pivot(index='city', columns='bizgroup', values=0)

longcoreout = pd.merge(wyday, coreWY, on = 'placekey', how = 'outer', suffixes=('', '_y')) longcorein = pd.merge(wyday, coreWY, on = 'placekey', how = 'inner', suffixes=('', '_y'))

longcorein.to_csv('longcorein.csv', index = False)

print(len(longcorein))

sum(longcoreout.city.isna() & ~longcoreout.city_y.isna())

sum(~longcoreout.city.isna() & longcoreout.city_y.isna())

longcorein[longcorein.city.isin(top10.index)].groupby(['bizgroup', 'naics_code']).size()

longcoreintop10 = longcorein[longcorein.city.isin(top10.index)]

longcoreintop10.groupby('bizgroup').agg({'dailyvisits': 'mean'})

# statistical tests on the differences in average daily visits import scipy from scipy.stats import ttest_ind

a = longcoreintop10.loc[longcoreintop10['bizgroup'] == 1]['dailyvisits'] b = longcoreintop10.loc[longcoreintop10['bizgroup'] == 0]['dailyvisits'] t, p = scipy.stats.ttest_ind(a, b, equal_var=False, alternative = 'two-sided') print('''The t-test results are: t-statistics = {:.2f}, p-value = {:.2f}'''.format(t,p))

import statsmodels.formula.api as smf longcoreintop10['date'] = pd.to_datetime(longcoreintop10['date']) # dummy “Post0313” equal to one if date is at or after March 13, 2020 longcoreintop10['post0313'] = np.where(longcoreintop10['date'] > pd.to_datetime('2020-03-13'), 1, 0) longcoreintop10['bizgroup_post'] = longcoreintop10['bizgroup'] * longcoreintop10['post0313'] longcoreintop10['dayofweek'] = longcoreintop10['date'].dt.dayofweek longcoreintop10['weekend'] = np.where(longcoreintop10['dayofweek'] >= 5, 1, 0)

# regress ln(dailyvisits+1) on bizgroup, Post0313, bizgroup_post, and weekend results = smf.ols('np.log(dailyvisits+1) ~ bizgroup + post0313 + bizgroup_post + weekend', data=longcoreintop10).fit() print(results.summary())

!pip install linearmodels from linearmodels import PanelOLS

finaldata = longcoreintop10.copy() finaldata = finaldata.set_index(['city', 'date'])

# panelOLS on daily visits mod = PanelOLS(np.log(finaldata['dailyvisits']+1), finaldata[['bizgroup', 'post0313', 'bizgroup_post', 'weekend']], entity_effects=True, time_effects=False, drop_absorbed=True) results = mod.fit(cov_type='clustered') print(results)

mod2 = PanelOLS(np.log(finaldata['dailyvisits']+1), finaldata[['bizgroup', 'post0313', 'bizgroup_post', 'weekend']], entity_effects=False, time_effects=True, drop_absorbed=True) results2 = mod2.fit(cov_type='clustered') print(results2)

mod3 = PanelOLS(np.log(finaldata['dailyvisits']+1), finaldata[['bizgroup', 'post0313', 'bizgroup_post', 'weekend']], entity_effects=True, time_effects=True, drop_absorbed=True) results3 = mod3.fit(cov_type='clustered') print(results3)