Exploring the Relationship Between State Vaccination Rates & Political Affiliations

Jame Zou, 6/6/2021

Data preparation

import pandas as pd df_abbr = pd.read_csv("US_Abbreviations.csv") df_vacc = pd.read_csv("US_Vaccinations.csv") df_elec = pd.read_csv("2016_Elections.csv")

df_vacc['date'] = pd.to_datetime(df_vacc['date']) df_vacc.dtypes

State and column example

state = 'California' column = 'people_vaccinated_per_hundred' def state_data(state, column): state_sr = df_vacc[df_vacc['location'] == state].set_index('date')[column].fillna(method='ffill') return state_sr state_sr = state_data(state, column) state_sr

import matplotlib.pyplot as plt def state_plot(state, series, startDate): x = series.index y = series.values plt.xlabel(f"Date") plt.ylabel("% of population vaccinated") plt.title(f"{state} Vaccinations") plt.xticks(rotation=45, ha='right') plt.plot(x, y) plt.show() startDate = state_sr.index[0] state_plot(state, state_sr, startDate)

Fitting a logistic model

import numpy as np from scipy.optimize import curve_fit def logistic_curve(x, β0, β1, β2): return β0 / (1 + np.exp(β1 * (-x + β2))) def logistic_betas(series): xs = np.arange(len(series)) ys = series.values max_vacc = series.max() half_way = len(series) / 2 guessed_betas = [max_vacc, 1, half_way] try: found_betas, covariance = curve_fit(logistic_curve, xs, ys, p0=guessed_betas) β0, β1, β2 = found_betas return β0, β1, β2 except: β0, β1, β2 = np.NaN, np.NaN, np.NaN return β0, β1, β2 β0, β1, β2 = logistic_betas(state_sr) β0, β1, β2

def logistic_model(series, β0, β1, β2, startDate, state): xs = np.arange(len(series)) ys = series.values fit_model = lambda x: logistic_curve(x, β0, β1, β2) plt.plot(xs, ys, label='data') plt.plot(xs, fit_model(xs), label='model') plt.legend() plt.xlabel(f"Days since {series.index.min()}") plt.ylabel("% of population vaccinated") plt.title(f"{state} Vaccinations and Logistic Model") plt.show() logistic_model(state_sr, β0, β1, β2, startDate, state)

Finding βs for all states

def us_betas(state): data = state_data(state, 'people_vaccinated_per_hundred') return logistic_betas(data) all_betas = df_abbr['US STATE'].apply(us_betas) all_betas.head()

all_states = df_elec[['State', 'Clinton_Percent', 'Trump_Percent']].copy() all_states.insert(0, 'State Name', df_abbr['US STATE']) all_states['Maximum Vaccinations'] = all_betas.apply(lambda x: x[0]) all_states['Rate of Vaccination'] = all_betas.apply(lambda x: x[1]) all_states['Time of Maximum Increase'] = all_betas.apply(lambda x: x[2]) all_states.head()

Visualization

import seaborn as sns sns.pairplot(all_states) plt.tight_layout() plt.show()

numeric_only = all_states[['Clinton_Percent', 'Trump_Percent', 'Maximum Vaccinations', 'Rate of Vaccination', 'Time of Maximum Increase']].dropna() corr_coeff = np.corrcoef(numeric_only, rowvar=False) sns.heatmap(corr_coeff, annot=True) plt.xticks(np.arange(5)+0.5, numeric_only.columns, rotation=20, ha='right') plt.yticks(np.arange(5)+0.5, numeric_only.columns, rotation=0) plt.show()

Hypothesis test

import scipy.stats as stats alpha = 0.05 blue = all_states[all_states['Clinton_Percent'] > all_states['Trump_Percent']]['Time of Maximum Increase'].dropna() red = all_states[all_states['Trump_Percent'] > all_states['Clinton_Percent']]['Time of Maximum Increase'].dropna() t_stat, p_value = stats.ttest_ind(blue, red, equal_var=False) reject_H0 = p_value < alpha alpha, p_value, reject_H0