Untitled Python Project

# This Python 3 environment comes with many helpful analytics libraries installed # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python # For example, here's several helpful packages to load import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) # Input data files are available in the read-only "../input/" directory # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory import os for dirname, _, filenames in os.walk('/kaggle/input'): for filename in filenames: print(os.path.join(dirname, filename)) # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

!pip install statsmodels==0.13.0

# Data Management from dateutil import relativedelta as rd import pandas as pd import numpy as np # Visualization import matplotlib.pyplot as plt import plotly as py import plotly.express as px import plotly.graph_objects as go import plotly.offline as pyo pyo.init_notebook_mode() import seaborn as sns # Regression import statsmodels.api as sm from statsmodels.formula.api import ols import statsmodels.graphics.api as smg

# Import modules for API calls import requests import io import pandas as pd import requests import json from datetime import datetime # Import module for plotting import seaborn as sns ## JHU Vaccination Rates (Taken From: https://github.com/owid/covid-19-data/tree/master/public/data) url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv' download = requests.get(url).content covid = pd.read_csv(io.StringIO(download.decode('utf-8')), parse_dates=['date']) covid.tail()

covid.info()

sns.heatmap(covid.isnull(), cbar=False)

def plot_gov_action(covid_outcome, gov_action, full_grouped_df): """ Function plots the government action and outcome Parameters: covid_outcome (str): The outcome from covid gov_action (str): The government action to be analysed full_grouped_df (pandas.DataFrame): the data source used. From Kaggle's full_grouped data set Returns: null """ fig = px.scatter( full_grouped_df[full_grouped_df[gov_action] != None], x=gov_action, y=covid_outcome, color="iso_code", title=gov_action, height=600, ) fig.update_layout(yaxis=dict(range=[0, full_grouped_df[covid_outcome].max()])) fig.show()

plot_gov_action('new_cases_per_million', 'stringency_index', covid) plot_gov_action('new_deaths_per_million', 'stringency_index', covid) plot_gov_action('positive_rate', 'stringency_index', covid)

import numpy as np

covid['log_new_cases_per_million']=np.log(covid['new_cases_per_million']+1) covid['log_new_deaths_per_million']=np.log(covid['new_deaths_per_million']+1)

g = sns.pairplot(covid[['log_new_cases_per_million', 'stringency_index', 'handwashing_facilities', 'people_fully_vaccinated_per_hundred', 'population_density', 'median_age', 'continent']], hue='continent')

#plt.matshow(full_grouped.corr()) #plt.show() f, ax = plt.subplots(figsize=(10, 8)) corr = covid.corr() # Generate a mask for the upper triangle mask = np.triu(np.ones_like(corr, dtype=bool)) # Generate a custom diverging colormap cmap = sns.diverging_palette(230, 20, as_cmap=True) # Plot heatmap sns.heatmap(corr, mask=mask, cmap=cmap, square=True, ax=ax)

# OLS regression y = covid['log_new_cases_per_million'] X = covid[['stringency_index','handwashing_facilities', 'people_fully_vaccinated_per_hundred', 'median_age', 'population_density']] X = sm.add_constant(X) ols_model=sm.OLS(y,X.astype(float), missing='drop') result=ols_model.fit() print(result.summary2())

# OLS regression covid['sqr_stringency_index'] = covid['stringency_index'] **2 y = covid['log_new_cases_per_million'] X = covid[['sqr_stringency_index', 'stringency_index', 'handwashing_facilities', 'people_fully_vaccinated_per_hundred', 'median_age', 'population_density']] X = sm.add_constant(X) ols_model=sm.OLS(y,X.astype(float), missing='drop') result=ols_model.fit() print(result.summary2())