期末報告
Covid-19 疫苗接種數據的視覺化
1. 獲取資料
!pip install plotly_express #引入plotly圖形庫
import plotly.express as px
import pandas as pd
import os
print("資料擷取,AI 運算中.....")
import time
tStart = time.time() #計時開始
#從WHO官網讀取'WHO-vaccine.csv'資料檔
data = pd.read_csv('https://covid19.who.int/who-data/vaccination-data.csv')
data.to_csv('WHO-vaccine.csv')
print("完成", end='')
tEnd = time.time() #計時結束
print(f'{len(data):.0f} 筆資料,{(tEnd - tStart):.2f}秒') #數字顯示規格設定
print("共有多少國家納入WHO統計:"+str(data['COUNTRY'].nunique())) #找出相異資料
print("共有多少地區納入WHO統計:"+str(data['WHO_REGION'].nunique())) #找出相異資料
data.head()
WHO的資料已經非常乾淨,因此我們可以不用再進行數據清洗,可以開始進行視覺化和分析。
2. 資料視覺化
2-1. 疫苗施打總劑數的地區分佈
import plotly.express as px
fig_1 = px.pie(data, names='WHO_REGION', values='TOTAL_VACCINATIONS', title="疫苗施打總劑數的地區分佈")
fig_1.show()
2-2. 全球疫苗施打總劑數排名
data_sort = data.sort_values(by=['TOTAL_VACCINATIONS'],ascending=False).head(15)
fig = px.bar(data_sort, y='TOTAL_VACCINATIONS', x='COUNTRY', text='TOTAL_VACCINATIONS', color='WHO_REGION',title='全球疫苗施打總劑數排名')
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', xaxis={'categoryorder':'total descending'})
fig.show()
2-3. 全球疫苗施打總劑數排名,和其相對應所施打的疫苗種類數量
fig = px.bar(data_sort, y='TOTAL_VACCINATIONS', x='COUNTRY', text='NUMBER_VACCINES_TYPES_USED', color='NUMBER_VACCINES_TYPES_USED', title='全球疫苗施打總劑數排名,和其相對應所施打的疫苗種類數量')
fig.update_traces(texttemplate='%{text:s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', xaxis={'categoryorder':'total descending'})
fig.show()
從圖2-2, 2-3中可以看到,中國大陸作為總疫苗施打數最多的國家,其施打的疫苗種類(品牌)也最多,多達8種。
相對地,美國的總施打劑量為全球第三名,但是施打的疫苗種類呈現少數,僅施打三類疫苗。
2-4. 各國完全接種疫苗的累計人數排名
data_fully_vac = data.sort_values(by=['PERSONS_FULLY_VACCINATED'],ascending=False).head(15)
fig = px.bar(data_fully_vac, y='PERSONS_FULLY_VACCINATED', x='COUNTRY', text='PERSONS_FULLY_VACCINATED', color='WHO_REGION', title='各國完全接種疫苗的累計人數')
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', xaxis={'categoryorder':'total descending'})
fig.show()
2-5. 判斷全球平均接種的疫苗種類數量,和對應的地區
fig = px.scatter(data, x='NUMBER_VACCINES_TYPES_USED',y='COUNTRY',color='WHO_REGION')
fig.show()
從圖2-5可以觀察到,多數國家接種的疫苗種類數量為4種,較少國家施打8種以上的疫苗。
此外,接種疫苗的數量較多者,多分佈在AMRO(美洲)和 WPRO(西太平洋)區域,而AFRO(非洲)和 EURO(歐洲)區域的接種疫苗種類數量則較少。
Covid-19 確診和死亡數據的視覺化
1. 獲取資料
import plotly.express as px
import pandas as pd
import os
print("資料擷取,AI 運算中.....")
import time
tStart = time.time() #計時開始
data_death = pd.read_csv('https://covid19.who.int/WHO-COVID-19-global-table-data.csv')
#由於原始csv檔案中將第一個欄位設為index, 直接叫出來會出錯,因此需要做資料前處理
data_death.reset_index(inplace=True) #將檔案回覆到原始狀態,即不會自動將index欄位的column名稱屏蔽
data_death.rename(columns={"index":"Country", #欄位重命名
"Name":'WHO Region',
"WHO Region":"Cases - cumulative total",
"Cases - cumulative total":"Cases - cumulative total per 1000000 population",
"Cases - cumulative total per 1000000 population":"Cases - newly reported in last 7 days",
"Cases - newly reported in last 7 days": "Cases - newly reported in last 7 days per 1000000 population",
"Cases - newly reported in last 7 days per 1000000 population":"Cases - newly reported in last 24 hours",
"Cases - newly reported in last 24 hours": "Deaths - cumulative total",
"Deaths - cumulative total": "Deaths - cumulative total per 100000 population",
"Deaths - cumulative total per 100000 population": "Deaths - newly reported in last 7 days",
"Deaths - newly reported in last 7 days":"Deaths - newly reported in last 7 days per 100000 population",
"Deaths - newly reported in last 7 days per 100000 population": 'Deaths - newly reported in last 24 hours.'
},inplace=True)
data_death = data_death.drop('Deaths - newly reported in last 24 hours',1) #將最後一個欄位捨棄
data_death.to_csv("WHO-death.csv")
print("完成", end='')
tEnd = time.time() #計時結束
print(f'{len(data_death):.0f} 筆資料,{(tEnd - tStart):.2f}秒') #數字顯示規格設定
data_death.head()
data_death = data_death.replace(['United States of America'],'United States')
data_death.head()
country_code = pd.read_csv('countries_codes_and_coordinates.csv',usecols=['Country','Alpha-3 code'])
country_code.to_csv("country code.csv")
print(f'{len(country_code):.0f} 筆資料,{(tEnd - tStart):.2f}秒') #數字顯示規格設定
country_code.head()
2. 將數據資料和地圖代碼資料
將death_data和country code兩張表依據Country欄位合併
merge_file = pd.merge(data_death,country_code,on='Country')
print(f'{len(merge_file):.0f} 筆資料,{(tEnd - tStart):.2f}秒') #數字顯示規格設定
merge_file.head()
3. 數據視覺化
3-1. 各國新冠死亡總人數地圖
fig = px.choropleth(merge_file,
locations ='Country', color='Deaths - cumulative total',
# animation_frame="Cases - cumulative total",
locationmode='country names',
color_continuous_scale=px.colors.sequential.solar_r, #colors.diverging.RdBu,
projection = "orthographic", #地球呈現方式
title="各國新冠疫情死亡人數總計"
)
fig.update_layout(height=600,margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
3-3 各國新冠疫情感染人數——每百萬人口
data_death_sort_100 = data_death.sort_values(by=['Cases - cumulative total per 1000000 population'],ascending=False).head(15)
fig = px.bar(data_death_sort_100, y="Cases - cumulative total per 1000000 population", x='Country',color='WHO Region', text="Cases - cumulative total per 1000000 population",title="各國新冠疫情感染人數——每百萬人口",)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', xaxis={'categoryorder':'total descending'})
fig.show()