#Importing libraries and data files
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
data = pd.read_csv ('U.S._Chronic_Disease_Indicators__CDI_.csv')
data
asthma = pd.read_csv('asthma.csv')
asthma.head()
#looking specifically at asthma mortality rate:
asthma_mortality = asthma[asthma["Question"] == "Asthma mortality rate"]
#pivot table by both location(state) and year
cleaned_asthma = asthma_mortality.pivot_table(index=["LocationDesc", "YearStart"], columns="DataValueType", values = "DataValue")
cleaned_asthma
#Clean the pivot table by dropping United States
cleaned_asthma = cleaned_asthma.drop('United States')
#Reset index
cleaned_asthma.reset_index(level=0, inplace=True)
cleaned_asthma.reset_index(level=0, inplace=True)
cleaned_asthma
#Visualize the average mortality rate of asthma from 2010-2017 across states
average = cleaned_asthma.groupby('LocationDesc', as_index=False)['Number'].mean()
plt.figure(figsize=(15,8))
sns.barplot(x='LocationDesc', y='Number', data=average)
plt.xticks(rotation=90);
plt.xlabel('States')
plt.ylabel('Average Mortality Rate 2010-2017')
plt.title('Average Mortality Rate of Asthma 2010-2017 Across States')
state_fips = pd.read_csv('us-state-ansi-fips.csv')
cleaned_state_fips = state_fips[['stname', ' st']]
cleaned_state_fips.head()
#importing the pm2 data
pm2 = pd.read_csv("pm2.csv")
pm2.head()
#importing the oz data file
oz = pd.read_csv("oz.csv")
oz.head()
#data cleaning
oz_cleaned = oz[['statefips', 'ds_o3_pred']]
#data cleaning
pm2_cleaned = pm2[['statefips', 'ds_pm_pred']]
pm2_cleaned.head()
average_pm_by_statefips = pm2_cleaned.groupby(by = 'statefips').mean()
average_pm_by_statefips.head()
plt.hist(average_pm_by_statefips['ds_pm_pred'], bins = 20)
plt.title("average PM2.5 concentration in μg/m3 2011-2014")
plt.xlabel('PM2.5 concentration in μg/m3')
plt.ylabel('number of states')
average_pm_by_statefips['statefips'] = average_pm_by_statefips.index
average_pm_by_statefips = average_pm_by_statefips.sort_values(by = 'ds_pm_pred', ascending = False)
average_pm_by_statefips = average_pm_by_statefips.rename(columns={"statefips": "state_fips"})
average_pm_by_statefips.head()
average_pm_by_statename = average_pm_by_statefips.merge(cleaned_state_fips, left_on='state_fips', right_on=' st')
average_pm_by_statename
average_pm_for_all = average_pm_by_statename['ds_pm_pred'].mean()
average_pm_for_all
plt.figure(figsize=(15,8))
graph = sns.barplot(x='stname', y='ds_pm_pred', data=average_pm_by_statename)
graph.axhline(average_pm_for_all)
plt.xticks(rotation=90);
plt.xlabel('States')
plt.ylabel('Average PM2.5 2011-2014')
plt.title('Average PM2.5 concentration in μg/m3 2011-2014 Across States')
average_oz_by_statefips = oz_cleaned.groupby(by = 'statefips').mean()
average_oz_by_statefips.head()
plt.hist(average_oz_by_statefips['ds_o3_pred'], bins = 20)
plt.title("average OZ in ppb 2011-2014")
plt.xlabel('OZ concentration in ppb')
plt.ylabel('number of states')
average_oz_by_statefips['statefips'] = average_oz_by_statefips.index
average_oz_by_statefips = average_oz_by_statefips.sort_values(by = 'ds_o3_pred', ascending = False)
average_oz_by_statefips = average_oz_by_statefips.rename(columns={"statefips": "state_fips"})
average_oz_by_statefips.head()
average_oz_by_statename = average_oz_by_statefips.merge(cleaned_state_fips, left_on='state_fips', right_on=' st')
average_oz_by_statename
average_oz_for_all = average_oz_by_statename['ds_o3_pred'].mean()
average_oz_for_all
plt.figure(figsize=(15,8))
graph = sns.barplot(x='stname', y='ds_o3_pred', data=average_oz_by_statename)
graph.axhline(average_oz_for_all)
plt.xticks(rotation=90);
plt.xlabel('States')
plt.ylabel('Average OZ Concentration 2011-2014')
plt.title('Average OZ concentration in ppb (parts per billion) 2011-2014 Across States')