# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
!pip install statsmodels==0.13.0
!pip install statsmodels==0.13.0
# Data Management
from dateutil import relativedelta as rd
import pandas as pd
import numpy as np
# Visualization
import matplotlib.pyplot as plt
import plotly as py
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo
pyo.init_notebook_mode()
import seaborn as sns
# Regression
import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.graphics.api as smg
# Import modules for API calls
import requests
import io
import pandas as pd
import requests
import json
from datetime import datetime
# Import module for plotting
import seaborn as sns
## JHU Vaccination Rates (Taken From: https://github.com/owid/covid-19-data/tree/master/public/data)
url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
download = requests.get(url).content
covid = pd.read_csv(io.StringIO(download.decode('utf-8')), parse_dates=['date'])
covid.tail()
covid.info()
sns.heatmap(covid.isnull(), cbar=False)
def plot_gov_action(covid_outcome, gov_action, full_grouped_df):
"""
Function plots the government action and outcome
Parameters:
covid_outcome (str): The outcome from covid
gov_action (str): The government action to be analysed
full_grouped_df (pandas.DataFrame): the data source used. From Kaggle's full_grouped data set
Returns:
null
"""
fig = px.scatter(
full_grouped_df[full_grouped_df[gov_action] != None],
x=gov_action,
y=covid_outcome,
color="iso_code",
title=gov_action,
height=600,
)
fig.update_layout(yaxis=dict(range=[0, full_grouped_df[covid_outcome].max()]))
fig.show()
plot_gov_action('new_cases_per_million', 'stringency_index', covid)
plot_gov_action('new_deaths_per_million', 'stringency_index', covid)
plot_gov_action('positive_rate', 'stringency_index', covid)
import numpy as np
covid['log_new_cases_per_million']=np.log(covid['new_cases_per_million']+1)
covid['log_new_deaths_per_million']=np.log(covid['new_deaths_per_million']+1)
g = sns.pairplot(covid[['log_new_cases_per_million', 'stringency_index', 'handwashing_facilities', 'people_fully_vaccinated_per_hundred', 'population_density', 'median_age', 'continent']], hue='continent')
#plt.matshow(full_grouped.corr())
#plt.show()
f, ax = plt.subplots(figsize=(10, 8))
corr = covid.corr()
# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)
# Plot heatmap
sns.heatmap(corr, mask=mask, cmap=cmap, square=True, ax=ax)
# OLS regression
y = covid['log_new_cases_per_million']
X = covid[['stringency_index','handwashing_facilities', 'people_fully_vaccinated_per_hundred', 'median_age', 'population_density']]
X = sm.add_constant(X)
ols_model=sm.OLS(y,X.astype(float), missing='drop')
result=ols_model.fit()
print(result.summary2())
# OLS regression
covid['sqr_stringency_index'] = covid['stringency_index'] **2
y = covid['log_new_cases_per_million']
X = covid[['sqr_stringency_index', 'stringency_index', 'handwashing_facilities', 'people_fully_vaccinated_per_hundred', 'median_age', 'population_density']]
X = sm.add_constant(X)
ols_model=sm.OLS(y,X.astype(float), missing='drop')
result=ols_model.fit()
print(result.summary2())