!pip install statsmodels==0.13.0
# Import modules for API calls
import numpy as np
import requests
import datetime as dt
import io
import pandas as pd
import requests
import json
from datetime import datetime
import statsmodels.api as sm
from statsmodels import regression
import matplotlib.pyplot as plt
#
# Import module for plotting
import seaborn as sns
## import COVID data
url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
download = requests.get(url).content
covid = pd.read_csv(io.StringIO(download.decode('utf-8')), parse_dates=['date'])
#import strigency index data
url2 = 'https://covidtrackerapi.bsg.ox.ac.uk/api/v2/stringency/date-range/2020-01-01/2021-10-08'
download2 = requests.get(url).content
stringency_index = pd.read_csv(io.StringIO(download2.decode('utf-8')), parse_dates=['date'])
stringency_index = stringency_index[['date','stringency_index','iso_code']]
stringency_index.head()
#Reduce covid dataframe to relevant columns
covid = covid[['iso_code','date','total_cases','new_cases','new_deaths']]
#Merger of the two dataframes on date and iso_code
Merged_df = covid.merge(stringency_index, on=['date', 'iso_code'])
#Cleaning the data
Merged_df.isna().count()
Merged_df.dropna()
1) Do government interventions impact the number of new COVID-19 cases? What does the relationship look like?
# Regression 1: New Cases against Stringency
X = Merged_df['stringency_index']
y = Merged_df['new_cases']
# Note the difference in argument order
X = sm.add_constant(X)
model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit()
predictions = model.predict(X.astype(float)) # make the predictions by the model
# Print out the statistics
print(model.summary())
#visualize the relationship between stringecy (X) and new_cases (Y)
Merged_df_dropna = Merged_df.dropna() #drop NaN in order to plot best-fit lines
plt.figure(figsize=(20,6))
plt.plot(Merged_df_dropna['stringency_index'], Merged_df_dropna['new_cases'], marker='.', linestyle='none', color = 'purple')
m, b = np.polyfit(Merged_df_dropna['stringency_index'], Merged_df_dropna['new_cases'], 1)
plt.plot(Merged_df_dropna['stringency_index'], m*Merged_df_dropna['stringency_index'] + b)
plt.ylim(0, 200000)
plt.xlabel('Stringency Index')
plt.ylabel('New Cases')
plt.title('Stringency Index against New Cases \n')
Merged_df_log= Merged_df[Merged_df['stringency_index'] != 0]
Merged_df_log= Merged_df[Merged_df['new_cases'] != 0]
# LOG Regression 1: New Cases against Stringency
X = Merged_df_log['stringency_index']
y = np.log2(Merged_df_log['new_cases'])
# Note the difference in argument order
X = sm.add_constant(X)
model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit()
predictions = model.predict(X.astype(float)) # make the predictions by the model
# Print out the statistics
print(model.summary())
2) Do government interventions impact the number of new COVID-19 deaths? What does the relationship look like?
# Regression 2: New Deaths against Stringency
X = Merged_df['stringency_index']
y = Merged_df['new_deaths']
# Note the difference in argument order
X = sm.add_constant(X)
model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit()
predictions = model.predict(X.astype(float)) # make the predictions by the model
# Print out the statistics
print(model.summary())
#visualize the relationship between stringecy (X) and new_deaths (Y)
plt.figure(figsize=(20,6))
plt.plot(Merged_df_dropna['stringency_index'], Merged_df_dropna['new_deaths'], marker='.', linestyle='none', color = 'orange')
m, b = np.polyfit(Merged_df_dropna['stringency_index'], Merged_df_dropna['new_deaths'], 1)
plt.plot(Merged_df_dropna['stringency_index'], m*Merged_df_dropna['stringency_index'] + b)
plt.ylim(0, 4200)
plt.xlabel('Stringency Index')
plt.ylabel('New Deaths')
plt.title('Stringency Index against New Deaths \n')
Merged_df_log= Merged_df[Merged_df['stringency_index'] != 0]
Merged_df_log= Merged_df[Merged_df['new_deaths'] != 0]
# LOG Regression 1: New Cases against Stringency
X = Merged_df_log['stringency_index']
y = np.log2(Merged_df_log['new_deaths'])
# Note the difference in argument order
X = sm.add_constant(X)
model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit()
predictions = model.predict(X.astype(float)) # make the predictions by the model
# Print out the statistics
print(model.summary())
# SWAPPED DEPENDENT AND INDEPENDENT VARIABLE (New Cases)
X = Merged_df['new_cases']
y = Merged_df['stringency_index']
# Note the difference in argument order
X = sm.add_constant(X)
model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit()
predictions = model.predict(X.astype(float)) # make the predictions by the model
# Print out the statistics
print(model.summary())
# SWAPPED DEPENDENT AND INDEPENDENT VARIABLE (New Deaths)
X = Merged_df['new_deaths']
y = Merged_df['stringency_index']
# Note the difference in argument order
X = sm.add_constant(X)
model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit()
predictions = model.predict(X.astype(float)) # make the predictions by the model
# Print out the statistics
print(model.summary())