import numpy as np
import urllib.request
import pandas as pd
from urllib.request import urlopen
import json
state_code_temp = {'andaman and nicobar islands':'an', #ut
'andhra pradesh':'ap',
'arunachal pradesh':'ar',
'assam':'as',
'bihar':'br',
'chandigarh':'ch', #ut
'chhattisgarh':'ct',
'delhi':'dl', #ut
'dadra & nagar haveli & daman & diu':'dn', #ut
'goa':'ga',
'gujarat':'gj',
'himachal pradesh':'hp',
'haryana':'hr',
'jharkhand':'jh',
'jammu and kashmir':'jk',
'karnataka':'ka',
'kerala':'kl',
'ladakh':'la', #ut
'lakshadweep':'ld', #ut
'maharashtra':'mh',
'manipur':'mn',
'madhya pradesh':'mp',
'mizoram':'mz',
'nagaland':'nl',
'odisha':'or',
'punjab':'pb',
'puducherry':'py', #ut
'rajasthan':'rj',
'sikkim':'sk',
'telangana':'tg',
'tamil nadu':'tn',
'tripura':'tr',
'uttar pradesh':'up',
'uttarakhand':'ut',
'west bengal':'wb',
'states unassigned':'un'
}
statesonly = ['andhra pradesh', 'arunachal pradesh', 'assam', 'bihar', 'chhattisgarh', 'goa', 'gujarat', 'himachal pradesh','haryana',
'jharkhand','jammu and kashmir', 'karnataka', 'kerala', 'maharashtra','manipur','madhya pradesh','mizoram', 'nagaland',
'odisha','punjab','rajasthan','sikkim','telangana','tamil nadu','tripura','uttar pradesh','uttarakhand','west bengal']
state_code = {}
for i in state_code_temp:
code = state_code_temp[i]
state_code[code]=i
state_code[i]=code
url = "https://data.covid19india.org/states_daily.json"
urlfile = urlopen(url)
data = json.loads(urlfile.read())
df = pd.json_normalize(data, record_path =['states_daily'])
# print((df.columns))
#code for Q1 A
def get_total_sum(status):
currentdata = df.loc[df['status']==status]
return currentdata['tt'].astype(int).sum()
print("Q1 A")
print("Total Number of Confirmed Cases:",get_total_sum('Confirmed'))
print("Total Number of Recovered Cases:",get_total_sum('Recovered'))
print("Total Number of Deceased Cases:",get_total_sum('Deceased'))
#Code for Q1 B
def get_statewise_total(stateid,status):
currentdata = df.loc[df['status']==status]
return currentdata[stateid].astype(int).sum()
states = ['delhi','maharashtra','west bengal','tamil nadu']
print("Q1 B")
for i in states:
code = state_code[i]
i = i.upper()
status = ['Confirmed','Recovered','Deceased']
for s in status:
total = get_statewise_total(code,s)
print("Total Number of %s Cases in %s: %d"%(s,i,total))
print()
#code for Q1 C
states = list(state_code_temp.keys())
recovery_rates = {}
print("Q1 C")
for i in statesonly:
code = state_code[i]
i = i.upper()
total_recovered = get_statewise_total(code,'Recovered')
total_confirmed = get_statewise_total(code,'Confirmed')
if total_recovered>0 and total_confirmed>0:
# print(i,total_recovered,total_confirmed)
recovery_rates[i] = total_recovered/total_confirmed * 100
sort_states = sorted(recovery_rates.items(), key = lambda x:x[1])
# print(sort_states)
print("Top 10 States with highest recovery rates\n")
for i in sort_states[:-11:-1]:
print(i[0],"%.2f%%" % i[1])
print("\nTop 10 States with lowest recovery rates\n")
for i in sort_states[:10]:
print(i[0],"%.2f%%" % i[1])
#code for Q1 D,E
confirmed_count = {}
recovered_count = {}
deceased_count = {}
for i in statesonly:
code = state_code[i]
i = i.upper()
total_recovered = get_statewise_total(code,'Recovered')
total_confirmed = get_statewise_total(code,'Confirmed')
total_deceased = get_statewise_total(code,'Deceased')
if total_recovered>0:
recovered_count[i] = total_recovered
if total_confirmed>0:
confirmed_count[i] = total_confirmed
if total_deceased>0:
deceased_count[i] = total_deceased
sort_states_r = sorted(recovered_count.items(), key = lambda x:x[1])
sort_states_c = sorted(confirmed_count.items(), key = lambda x:x[1])
sort_states_d = sorted(deceased_count.items(), key = lambda x:x[1])
print("Q1 D")
print("\nTop 3 highest affected states in terms of confirmed cases")
for i in sort_states_c[:-4:-1]:
print(i[0],i[1])
print("\nTop 3 highest affected states in terms of recovered cases")
for i in sort_states_r[:-4:-1]:
print(i[0],i[1])
print("\nTop 3 highest affected states in terms of deceased cases")
for i in sort_states_d[:-4:-1]:
print(i[0],i[1])
print("\n\nQ1 E")
print("\nTop 3 lowest affected states in terms of confirmed cases")
for i in sort_states_c[:3]:
print(i[0],i[1])
print("\nTop 3 lowest affected states in terms of recovered cases")
for i in sort_states_r[:3]:
print(i[0],i[1])
print("\nTop 3 lowest affected states in terms of deceased cases")
for i in sort_states_d[:3]:
print(i[0],i[1])
#code for Q1 F
dates = df.loc[df['status']=="Confirmed"]['date'].tolist()
status = ['Confirmed','Recovered','Deceased']
for i in states:
code = state_code[i]
i = i.upper()
print(i)
for s in status:
cases = df.loc[df['status']==s][code].astype(int)
cases.index = dates
dailydiff = cases.diff()
# print(cases)
# print(dailydiff)
d = cases.idxmax()
amount = cases[d]
print("Highest spike in %s cases on %s by %d"%(s,d,amount))
print()
#code for Q1 G
print("Active cases on 15 Aug 2021")
for i in state_code_temp:
code = state_code[i]
i = i.upper()
confirmed_cases = df.loc[df['status']=='Confirmed'][code].astype(int)
recovered_cases = df.loc[df['status']=='Recovered'][code].astype(int)
deceased_cases = df.loc[df['status']=='Deceased'][code].astype(int)
df_cc = pd.DataFrame({'tt':confirmed_cases.values})
df_rc = pd.DataFrame({'tt':recovered_cases.values})
df_dc = pd.DataFrame({'tt':deceased_cases.values})
df_cc['ctt']=df_cc['tt'].cumsum()
df_rc['ctt']=df_rc['tt'].cumsum()
df_dc['ctt']=df_dc['tt'].cumsum()
sum1 = df_rc['ctt']+df_dc['ctt']
df_cc['sctt']=sum1
diff = df_cc['ctt']-df_cc['sctt']
df_cc['att']=diff
active_cases = df_cc.iloc[:,3]
active_cases = active_cases.tolist()
print("%s : %d"%(i,active_cases[-2]))
#Code for Q2 A
import matplotlib.pyplot as plt
import datetime as datetime
import matplotlib.dates as mdates
from matplotlib.pyplot import figure
dateob = datetime.datetime
dates=[]
cases = df[['date','status','tt']]
dates_temp = cases.loc[cases['status']=='Confirmed']['date']
dates_temp = dates_temp.tolist()
for i in range(0,len(dates_temp)):
strdate = dates_temp[i]
dates.append(dateob.strptime(strdate,'%d-%b-%y'))
confirmed_cases = cases.loc[cases['status']=='Confirmed']['tt'].astype(int)
recovered_cases = cases.loc[cases['status']=='Recovered']['tt'].astype(int)
deceased_cases = cases.loc[cases['status']=='Deceased']['tt'].astype(int)
start = dates[0]
end = dates[-1]
figure(figsize=(14, 4), dpi=150)
plt.subplot(1,2,1)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=60))
plt.gca().set_xbound(start, end)
plt.plot(dates,confirmed_cases,label='Confirmed Cases')
plt.plot(dates,recovered_cases,label='Recovered Cases')
plt.legend(loc="upper left")
plt.subplot(1,2,2)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=60))
plt.plot(dates,deceased_cases,label='Deceased Cases',color='green')
plt.legend(loc="upper left")
plt.show()
#Code for Q2 B
dateob = datetime.datetime
dates=[]
cases = df[['date','status','dl']]
dates_temp = cases.loc[cases['status']=='Confirmed']['date']
dates_temp = dates_temp.tolist()
print("DELHI")
for i in range(0,len(dates_temp)):
strdate = dates_temp[i]
dates.append(dateob.strptime(strdate,'%d-%b-%y'))
confirmed_cases = cases.loc[cases['status']=='Confirmed']['dl'].astype(int)
recovered_cases = cases.loc[cases['status']=='Recovered']['dl'].astype(int)
deceased_cases = cases.loc[cases['status']=='Deceased']['dl'].astype(int)
start = dates[0]
end = dates[-1]
figure(figsize=(14, 4), dpi=150)
plt.subplot(1,2,1)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=60))
plt.gca().set_xbound(start, end)
plt.plot(dates,confirmed_cases,label='Confirmed Cases')
plt.plot(dates,recovered_cases,label='Recovered Cases')
plt.legend(loc="upper left")
plt.subplot(1,2,2)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=60))
plt.plot(dates,deceased_cases,label='Deceased Cases',color='green')
plt.legend(loc="upper left")
plt.show()
import matplotlib.ticker as ticker
dateob = datetime.datetime
dates=[]
cases = df[['date','status','tt']]
dates_temp = cases.loc[cases['status']=='Confirmed']['date']
dates_temp = dates_temp.tolist()
for i in range(0,len(dates_temp)):
strdate = dates_temp[i]
dates.append(dateob.strptime(strdate,'%d-%b-%y'))
confirmed_cases = cases.loc[cases['status']=='Confirmed']['tt'].astype(int)
recovered_cases = cases.loc[cases['status']=='Recovered']['tt'].astype(int)
deceased_cases = cases.loc[cases['status']=='Deceased']['tt'].astype(int)
df_cc = pd.DataFrame({'tt':confirmed_cases.values})
df_rc = pd.DataFrame({'tt':recovered_cases.values})
df_dc = pd.DataFrame({'tt':deceased_cases.values})
df_cc['ctt']=df_cc['tt'].cumsum()
df_rc['ctt']=df_rc['tt'].cumsum()
df_dc['ctt']=df_dc['tt'].cumsum()
sum1 = df_rc['ctt']+df_dc['ctt']
df_cc['sctt']=sum1
diff = df_cc['ctt']-df_cc['sctt']
df_cc['att']=diff
active_cases = df_cc.iloc[:,3]
# print(active_cases)
start = dates[0]
end = dates[-1]
figure(figsize=(8, 4), dpi=100)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b%y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=60))
plt.gca().set_xbound(start, end)
plt.gca().yaxis.set_major_formatter(ticker.EngFormatter())
plt.plot(dates,df_cc['att'],label='Active Cases')
plt.legend(loc="upper left")
plt.show()
#Code for Q2 D
df_new = df.iloc[50:200]
states = ['delhi','tamil nadu','gujarat']
dateob = datetime.datetime
dates=[]
dates_temp = df_new.loc[df_new['status']=='Confirmed']['date']
dates_temp = dates_temp.tolist()
for i in range(0,len(dates_temp)):
strdate = dates_temp[i]
dates.append(dateob.strptime(strdate,'%d-%b-%y'))
print("Start Date:",dates[0])
print("End Date:",dates[-1])
for i in states:
code = state_code[i]
i = i.upper()
confirmed_cases = df_new.loc[df_new['status']=='Confirmed'][code].astype(int)
recovered_cases = df_new.loc[df_new['status']=='Recovered'][code].astype(int)
deceased_cases = df_new.loc[df_new['status']=='Deceased'][code].astype(int)
df_cc = pd.DataFrame({'tt':confirmed_cases.values})
df_rc = pd.DataFrame({'tt':recovered_cases.values})
df_dc = pd.DataFrame({'tt':deceased_cases.values})
df_cc['ctt']=df_cc['tt'].cumsum()
df_rc['ctt']=df_rc['tt'].cumsum()
df_dc['ctt']=df_dc['tt'].cumsum()
sum1 = df_rc['ctt']+df_dc['ctt']
df_cc['sctt']=sum1
diff = df_cc['ctt']-df_cc['sctt']
df_cc['att']=diff
active_cases = df_cc.iloc[:,3]
active_cases = active_cases.tolist()
# print("%s : %d"%(i,active_cases[-2]))
# print(dates[-1])#noice
start = dates[0]
end = dates[-1]
figure(figsize=(8, 4), dpi=100)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%d %b %y'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=10))
plt.gca().set_xbound(start, end)
plt.gca().yaxis.set_major_formatter(ticker.EngFormatter())
plt.xticks(rotation=-30)
plt.bar(dates,active_cases,label='Active Cases : %s'%(i))
plt.legend(loc="upper left")
plt.show()