import pandas as pd
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
#Importing Pollution dataset from OWID
pollution = pd.read_csv(
'https://raw.githubusercontent.com/owid/owid-datasets/master/datasets/Air%20pollution%20by%20city%20-%20Fouquet%20and%20DPCC%20(2011)/Air%20pollution%20by%20city%20-%20Fouquet%20and%20DPCC%20(2011).csv'
)
len(pollution)
pollution.columns
#Renaming Columns
pollution = pollution.rename(
columns={'Suspended Particulate Matter (SPM) (Fouquet and DPCC (2011))':'SPM',
'Smoke (Fouquet and DPCC (2011))':'Smoke',
'Entity':'City'
})
pollution.dtypes
pollution.City.unique()
pollution.Year.min(), pollution.Year.max()
pollution['Year'] = pollution['Year'].apply(lambda x:datetime.datetime.strptime(str(x), '%Y'))
pollution.dtypes
pollution.isnull().mean()
pollution.isnull().sum()
pollution.describe()
plt.figure(figsize=(15,8))
n, bins, patches = plt.hist(x=pollution['SPM'],
bins='auto', alpha=0.7, rwidth=0.85)
plt.grid(axis='y', alpha=0.75)
plt.xlabel('SPM')
plt.ylabel('Frequency');