!pip install statsmodels
# Import modules for API calls
import requests
import io
import pandas as pd
import requests
import json
from datetime import datetime
# Import module for plotting
import seaborn as sns
# Import module for regressions
import statsmodels.api as sm
from statsmodels import regression
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col
## JHU Vaccination Rates (Taken From: https://github.com/owid/covid-19-data/tree/master/public/data)
url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
download = requests.get(url).content
covid = pd.read_csv(io.StringIO(download.decode('utf-8')), parse_dates=['date'])
covid.tail()
covid.info()
#Filtering by date. A shorter time period was chosen for recency in the analysis of government stringency vs covid severity
covid1 = covid[covid['date'] >= '2021-07-01T00:00:00.000000']
covid1.info()
#Grouping the rows by country and taking the mean of all values so that each row represents one country
covid1 = covid1.groupby('location').mean()
covid1.info()
covid1.head()
covid1.info()
sns.regplot('stringency_index','reproduction_rate',data=covid1)
sns.regplot('stringency_index','new_cases_per_million',data=covid1)
sns.regplot('stringency_index','new_deaths_per_million',data=covid1)
sns.regplot('stringency_index','weekly_icu_admissions_per_million',data=covid1)
sns.regplot('stringency_index','weekly_hosp_admissions_per_million',data=covid1)
mod = smf.ols('reproduction_rate ~ stringency_index', data=covid1)
res = mod.fit()
print(res.summary())
mod1 = smf.ols('new_cases_per_million ~ stringency_index', data=covid1)
res1 = mod1.fit()
print(res1.summary())
mod2 = smf.ols('new_deaths_per_million ~ stringency_index', data=covid1)
res2 = mod2.fit()
print(res2.summary())
mod3 = smf.ols('weekly_icu_admissions_per_million ~ stringency_index', data=covid1)
res3 = mod3.fit()
print(res3.summary())
mod4= smf.ols('weekly_hosp_admissions_per_million ~ stringency_index', data=covid1)
res4 = mod4.fit()
print(res4.summary())
# Creating the regression output table for better model comparison
dfoutput = summary_col([res,res1,res2,res3,res4],stars=True)
print(dfoutput)
input_1