!pip install statsmodels==0.13.0

Requirement already satisfied: statsmodels==0.13.0 in /root/venv/lib/python3.7/site-packages (0.13.0)
Requirement already satisfied: pandas>=0.25 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from statsmodels==0.13.0) (1.2.5)
Requirement already satisfied: patsy>=0.5.2 in /root/venv/lib/python3.7/site-packages (from statsmodels==0.13.0) (0.5.2)
Requirement already satisfied: scipy>=1.3 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from statsmodels==0.13.0) (1.7.1)
Requirement already satisfied: numpy>=1.17 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from statsmodels==0.13.0) (1.19.5)
Requirement already satisfied: pytz>=2017.3 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from pandas>=0.25->statsmodels==0.13.0) (2021.3)
Requirement already satisfied: python-dateutil>=2.7.3 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pandas>=0.25->statsmodels==0.13.0) (2.8.2)
Requirement already satisfied: six in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from patsy>=0.5.2->statsmodels==0.13.0) (1.16.0)

# Import modules for API calls import numpy as np import requests import datetime as dt import io import pandas as pd import requests import json from datetime import datetime import statsmodels.api as sm from statsmodels import regression import matplotlib.pyplot as plt # # Import module for plotting import seaborn as sns ## import COVID data url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv' download = requests.get(url).content covid = pd.read_csv(io.StringIO(download.decode('utf-8')), parse_dates=['date'])

#import strigency index data url2 = 'https://covidtrackerapi.bsg.ox.ac.uk/api/v2/stringency/date-range/2020-01-01/2021-10-08' download2 = requests.get(url).content stringency_index = pd.read_csv(io.StringIO(download2.decode('utf-8')), parse_dates=['date']) stringency_index = stringency_index[['date','stringency_index','iso_code']] stringency_index.head()

#Reduce covid dataframe to relevant columns covid = covid[['iso_code','date','total_cases','new_cases','new_deaths']]

#Merger of the two dataframes on date and iso_code Merged_df = covid.merge(stringency_index, on=['date', 'iso_code'])

#Cleaning the data Merged_df.isna().count() Merged_df.dropna()

1) Do government interventions impact the number of new COVID-19 cases? What does the relationship look like?

# Regression 1: New Cases against Stringency X = Merged_df['stringency_index'] y = Merged_df['new_cases'] # Note the difference in argument order X = sm.add_constant(X) model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit() predictions = model.predict(X.astype(float)) # make the predictions by the model # Print out the statistics print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:              new_cases   R-squared:                       0.009
Model:                            OLS   Adj. R-squared:                  0.009
Method:                 Least Squares   F-statistic:                     926.8
Date:                Wed, 06 Oct 2021   Prob (F-statistic):          1.22e-202
Time:                        13:34:07   Log-Likelihood:            -1.0772e+06
No. Observations:               99771   AIC:                         2.154e+06
Df Residuals:                   99769   BIC:                         2.155e+06
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
====================================================================================
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const             -904.5578    112.064     -8.072      0.000   -1124.202    -684.914
stringency_index    55.9761      1.839     30.444      0.000      52.372      59.580
==============================================================================
Omnibus:                   198180.202   Durbin-Watson:                   0.111
Prob(Omnibus):                  0.000   Jarque-Bera (JB):        574757475.515
Skew:                          16.227   Prob(JB):                         0.00
Kurtosis:                     373.412   Cond. No.                         182.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

#visualize the relationship between stringecy (X) and new_cases (Y) Merged_df_dropna = Merged_df.dropna() #drop NaN in order to plot best-fit lines plt.figure(figsize=(20,6)) plt.plot(Merged_df_dropna['stringency_index'], Merged_df_dropna['new_cases'], marker='.', linestyle='none', color = 'purple') m, b = np.polyfit(Merged_df_dropna['stringency_index'], Merged_df_dropna['new_cases'], 1) plt.plot(Merged_df_dropna['stringency_index'], m*Merged_df_dropna['stringency_index'] + b) plt.ylim(0, 200000) plt.xlabel('Stringency Index') plt.ylabel('New Cases') plt.title('Stringency Index against New Cases \n')

Merged_df_log= Merged_df[Merged_df['stringency_index'] != 0] Merged_df_log= Merged_df[Merged_df['new_cases'] != 0] # LOG Regression 1: New Cases against Stringency X = Merged_df_log['stringency_index'] y = np.log2(Merged_df_log['new_cases']) # Note the difference in argument order X = sm.add_constant(X) model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit() predictions = model.predict(X.astype(float)) # make the predictions by the model # Print out the statistics print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:              new_cases   R-squared:                       0.059
Model:                            OLS   Adj. R-squared:                  0.059
Method:                 Least Squares   F-statistic:                     5085.
Date:                Wed, 06 Oct 2021   Prob (F-statistic):               0.00
Time:                        13:34:08   Log-Likelihood:            -2.1909e+05
No. Observations:               81455   AIC:                         4.382e+05
Df Residuals:                   81453   BIC:                         4.382e+05
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
====================================================================================
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                4.8195      0.042    115.329      0.000       4.738       4.901
stringency_index     0.0474      0.001     71.309      0.000       0.046       0.049
==============================================================================
Omnibus:                     1572.520   Durbin-Watson:                   0.084
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              990.392
Skew:                          -0.130   Prob(JB):                    8.69e-216
Kurtosis:                       2.527   Cond. No.                         211.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/arraylike.py:358: RuntimeWarning: invalid value encountered in log2
  result = getattr(ufunc, method)(*inputs, **kwargs)

2) Do government interventions impact the number of new COVID-19 deaths? What does the relationship look like?

# Regression 2: New Deaths against Stringency X = Merged_df['stringency_index'] y = Merged_df['new_deaths'] # Note the difference in argument order X = sm.add_constant(X) model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit() predictions = model.predict(X.astype(float)) # make the predictions by the model # Print out the statistics print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:             new_deaths   R-squared:                       0.017
Model:                            OLS   Adj. R-squared:                  0.017
Method:                 Least Squares   F-statistic:                     1587.
Date:                Wed, 06 Oct 2021   Prob (F-statistic):               0.00
Time:                        13:34:08   Log-Likelihood:            -6.2546e+05
No. Observations:               92231   AIC:                         1.251e+06
Df Residuals:                   92229   BIC:                         1.251e+06
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
====================================================================================
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const              -33.3103      2.235    -14.902      0.000     -37.692     -28.929
stringency_index     1.4468      0.036     39.839      0.000       1.376       1.518
==============================================================================
Omnibus:                   144498.348   Durbin-Watson:                   0.181
Prob(Omnibus):                  0.000   Jarque-Bera (JB):         84199140.301
Skew:                          10.142   Prob(JB):                         0.00
Kurtosis:                     149.624   Cond. No.                         196.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.

#visualize the relationship between stringecy (X) and new_deaths (Y) plt.figure(figsize=(20,6)) plt.plot(Merged_df_dropna['stringency_index'], Merged_df_dropna['new_deaths'], marker='.', linestyle='none', color = 'orange') m, b = np.polyfit(Merged_df_dropna['stringency_index'], Merged_df_dropna['new_deaths'], 1) plt.plot(Merged_df_dropna['stringency_index'], m*Merged_df_dropna['stringency_index'] + b) plt.ylim(0, 4200) plt.xlabel('Stringency Index') plt.ylabel('New Deaths') plt.title('Stringency Index against New Deaths \n')

Merged_df_log= Merged_df[Merged_df['stringency_index'] != 0] Merged_df_log= Merged_df[Merged_df['new_deaths'] != 0] # LOG Regression 1: New Cases against Stringency X = Merged_df_log['stringency_index'] y = np.log2(Merged_df_log['new_deaths']) # Note the difference in argument order X = sm.add_constant(X) model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit() predictions = model.predict(X.astype(float)) # make the predictions by the model # Print out the statistics print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:             new_deaths   R-squared:                       0.048
Model:                            OLS   Adj. R-squared:                  0.048
Method:                 Least Squares   F-statistic:                     2794.
Date:                Wed, 06 Oct 2021   Prob (F-statistic):               0.00
Time:                        13:34:08   Log-Likelihood:            -1.3286e+05
No. Observations:               55818   AIC:                         2.657e+05
Df Residuals:                   55816   BIC:                         2.657e+05
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
====================================================================================
                       coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------
const                1.5885      0.042     38.219      0.000       1.507       1.670
stringency_index     0.0339      0.001     52.859      0.000       0.033       0.035
==============================================================================
Omnibus:                     2249.797   Durbin-Watson:                   0.144
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2210.522
Skew:                           0.447   Prob(JB):                         0.00
Kurtosis:                       2.611   Cond. No.                         244.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/arraylike.py:358: RuntimeWarning: invalid value encountered in log2
  result = getattr(ufunc, method)(*inputs, **kwargs)

# SWAPPED DEPENDENT AND INDEPENDENT VARIABLE (New Cases) X = Merged_df['new_cases'] y = Merged_df['stringency_index'] # Note the difference in argument order X = sm.add_constant(X) model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit() predictions = model.predict(X.astype(float)) # make the predictions by the model # Print out the statistics print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:       stringency_index   R-squared:                       0.009
Model:                            OLS   Adj. R-squared:                  0.009
Method:                 Least Squares   F-statistic:                     926.8
Date:                Wed, 06 Oct 2021   Prob (F-statistic):          1.22e-202
Time:                        13:34:08   Log-Likelihood:            -4.4181e+05
No. Observations:               99771   AIC:                         8.836e+05
Df Residuals:                   99769   BIC:                         8.836e+05
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         57.0653      0.065    872.756      0.000      56.937      57.193
new_cases      0.0002    5.4e-06     30.444      0.000       0.000       0.000
==============================================================================
Omnibus:                     2800.221   Durbin-Watson:                   0.022
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             2317.595
Skew:                          -0.302   Prob(JB):                         0.00
Kurtosis:                       2.561   Cond. No.                     1.23e+04
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.23e+04. This might indicate that there are
strong multicollinearity or other numerical problems.

# SWAPPED DEPENDENT AND INDEPENDENT VARIABLE (New Deaths) X = Merged_df['new_deaths'] y = Merged_df['stringency_index'] # Note the difference in argument order X = sm.add_constant(X) model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit() predictions = model.predict(X.astype(float)) # make the predictions by the model # Print out the statistics print(model.summary())

                            OLS Regression Results                            
==============================================================================
Dep. Variable:       stringency_index   R-squared:                       0.017
Model:                            OLS   Adj. R-squared:                  0.017
Method:                 Least Squares   F-statistic:                     1587.
Date:                Wed, 06 Oct 2021   Prob (F-statistic):               0.00
Time:                        13:34:09   Log-Likelihood:            -4.0328e+05
No. Observations:               92231   AIC:                         8.066e+05
Df Residuals:                   92229   BIC:                         8.066e+05
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         57.8382      0.065    891.177      0.000      57.711      57.965
new_deaths     0.0117      0.000     39.839      0.000       0.011       0.012
==============================================================================
Omnibus:                     2210.322   Durbin-Watson:                   0.022
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1760.169
Skew:                          -0.260   Prob(JB):                         0.00
Kurtosis:                       2.566   Cond. No.                         227.
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.