Part 2: Creating a COVID Time Series Dataset
# Installing covidcast
!pip install covidcast
from datetime import date
import covidcast
import pandas as pd
Getting the fips and names of each CA county
ca_counties = covidcast.fips_to_name("^06.*", ties_method = "all")
ca_counties
ca_fips_codes = list(ca_counties[0].keys())
Data Frame of Daily Cases per County
# Required daily cases
new_cases_daily = covidcast.signal("indicator-combination", "confirmed_incidence_num",date(2021, 3, 2), date(2021, 10, 2),
geo_type="county", geo_values= ca_fips_codes)
new_cases_daily_final = new_cases_daily[['geo_value','time_value','value' ]].rename({'value':'confirmed_incidence_num'}, axis = 'columns')
new_cases_daily_final
Estimated Percentage of Doctor Visits with Confirmed COVID
# Source - Change Healthcare
smoothed_adj_outpatient_covid = covidcast.signal("chng", "smoothed_adj_outpatient_covid", date(2021, 3, 2), date(2021, 10, 2),
geo_type="county", geo_values= ca_fips_codes)
smoothed_adj_outpatient_covid_final = smoothed_adj_outpatient_covid[['geo_value','time_value','value' ]].rename({'value':'smoothed_adj_outpatient_covid'}, axis='columns')
smoothed_adj_outpatient_covid_final
Percent of Doctor Visits Related to COVID-19 Symptoms
smoothed_adj_outpatient_cli = covidcast.signal("chng", "smoothed_adj_outpatient_cli", date(2021, 3, 2), date(2021, 10, 2), geo_type="county", geo_values= ca_fips_codes)
smoothed_adj_outpatient_cli_final = smoothed_adj_outpatient_cli[['geo_value','time_value','value' ]].rename({'value':'smoothed_adj_outpatient_cli'}, axis='columns')
smoothed_adj_outpatient_cli_final
Outpatient Doctor Visits Primarily about COVID Symptoms
smoothed_cli = covidcast.signal("doctor-visits", "smoothed_cli", date(2021, 3, 2), date(2021, 10, 2), geo_type="county", geo_values= ca_fips_codes)
smoothed_cli_final = smoothed_cli[['geo_value','time_value','value' ]].rename({'value':'smoothed_cli'}, axis='columns')
smoothed_cli_final
Proportion of PCR Specimens Tested that have a Positive Result
pcr_specimen_positivity_rate = covidcast.signal("covid-act-now", "pcr_specimen_positivity_rate", date(2021, 3, 2), date(2021, 10, 2), geo_type="county", geo_values= ca_fips_codes)
pcr_specimen_positivity_rate_final = pcr_specimen_positivity_rate[['geo_value','time_value','value' ]].rename({'value':'pcr_specimen_positivity_rate'}, axis='columns')
pcr_specimen_positivity_rate_final
Estimated Percentage of Hospital Admissions with COVID Associated Diagnosis
smoothed_adj_covid19_from_claims = covidcast.signal("hospital-admissions", "smoothed_adj_covid19_from_claims", date(2021, 3, 2), date(2021, 10, 2), geo_type="county", geo_values= ca_fips_codes)
smoothed_adj_covid19_from_claims_final = smoothed_adj_covid19_from_claims[['geo_value','time_value','value' ]].rename({'value':'smoothed_adj_covid19_from_claims'}, axis='columns')
smoothed_adj_covid19_from_claims_final
Merging the Dataframes
df3 = pd.merge(new_cases_daily_final, smoothed_adj_covid19_from_claims_final, on=["geo_value", "time_value"], how="left")
df3 = pd.merge(df3, pcr_specimen_positivity_rate_final, on=["geo_value", "time_value"], how="left")
df3 = pd.merge(df3, smoothed_cli_final, on=["geo_value", "time_value"], how="left")
df3 = pd.merge(df3, smoothed_adj_outpatient_cli_final, on=["geo_value", "time_value"], how="left")
df3 = pd.merge(df3, smoothed_adj_outpatient_covid_final, on=["geo_value", "time_value"], how="left")
df3 = df3[['time_value','geo_value','smoothed_adj_covid19_from_claims','pcr_specimen_positivity_rate','smoothed_cli','smoothed_adj_outpatient_cli','smoothed_adj_outpatient_covid','confirmed_incidence_num']]
df3
Final_COVID_Dataset = df3.apply(lambda x: x.fillna(x.mean()),axis=0) #filling missing values with averages
Final_COVID_Dataset
Final_COVID_Dataset.to_csv('Final_COVID_Dataset_Cool_Group')