library(LalRUtils)
libreq(data.table, ggplot2, janitor, anytime, stringr, dplyr, lubridate, zoo, patchwork, ggrepel, hrbrthemes)
theme_set(lal_plot_theme())
options(repr.plot.width = 20, repr.plot.height=12)
options(ggplot2.discrete.fill = RColorBrewer::brewer.pal(9, "Set1"))
options(ggplot2.discrete.colour = RColorBrewer::brewer.pal(9, "Set1"))
options(ggplot2.continuous.fill = "viridis"); options(ggplot2.continuous.colour = "viridis")
chr = function(...) as.character(...) %>% display_html()
jhu_cases_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
jhu_deaths_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"
try(system(paste0("rm ", "time_series_covid19_confirmed_global.csv")))
try(system(paste0("rm ", "time_series_covid19_deaths_global.csv")))
cases = fread("time_series_covid19_confirmed_global.csv")
deaths = fread("time_series_covid19_deaths_global.csv")
name_clean = function(df) setnames(df, str_replace(colnames(df), "/", "_"))
cases_long = melt(cases, id.vars = c("Province/State", "Country/Region", "Lat", "Long"),
variable.name = "date", value.name = "cases")
name_clean(cases_long)
deaths_long = melt(deaths, id.vars = c("Province/State", "Country/Region", "Lat", "Long"),
variable.name = "date", value.name = "deaths")
name_clean(deaths_long)
deaths_long[, c('Lat', 'Long') := NULL]
df = merge(cases_long, deaths_long,
by = c("Province_State", "Country_Region", "date"), all.x = T)
df[, date := mdy(date)]
df[, day := weekdays(date)]
df[is.na(df)] = 0
df %>% glimpse
country_ts = df[, lapply(.SD, sum) , by = .(Country_Region, date), .SDcols = c("cases", "deaths")]
(t10countries = country_ts[date == max(country_ts$date)][order(-cases)][1:10])
t10_subset = country_ts[Country_Region %in% t10countries$Country_Region]
other_series = country_ts[!(Country_Region %in% t10countries$Country_Region)][,
Country_Region := " Other"][, lapply(.SD, sum) , by = .(Country_Region, date), .SDcols = c("cases", "deaths")]
t10_subset = rbind(t10_subset, other_series)
setorder(t10_subset, Country_Region, date)
t10_subset[, new_cases := cases - shift(cases), by = .(Country_Region)]
t10_subset[, new_deaths := deaths - shift(deaths), by = .(Country_Region)]
t10_subset[, cfr := deaths/cases]
smoothvars = c('cases', 'new_cases', 'deaths', 'new_deaths', 'cfr')
t10_subset[, paste0("rm7_", smoothvars) := lapply(.SD, rollmean, k = 7, fill = NA, na.pad = T),
by = .(Country_Region), .SDcols = smoothvars]
t10_subset[, label := ifelse(date == max(date), Country_Region, NA)]
options(warn=-1)
p1 = ggplot(t10_subset,
aes(x = c_time, y = cases, group = Country_Region, colour = Country_Region)) +
geom_point(aes(y = cases), size = 0.5) + geom_line(aes(y = rm7_cases)) +
scale_y_log10(limits = c(10, NA)) +
scale_colour_brewer(palette = "Spectral") +
theme(legend.position = "None") +
geom_text_repel(aes(label = label),
nudge_x = 1,
na.rm = TRUE) +
ggtitle('Cumulative Case Counts in Worst-Hit Countries')
p2 =
t10_subset %>%
ggplot(aes(x = c_time,y = new_cases, group = Country_Region, colour = Country_Region)) +
geom_point(aes(y = new_cases), size = 0.5) +
# geom_line(aes(y = rm7_new_cases)) +
geom_smooth(se = F) +
scale_y_log10() +
scale_colour_brewer(palette = "Spectral") +
theme(legend.position = "None") +
geom_text_repel(aes(label = label),
nudge_x = 1,
na.rm = TRUE) +
ggtitle('Growth in Cases in Worst-Hit Countries')
(p1 | p2 )
p1 = ggplot(t10_subset,
aes(x = d_time, y = deaths, group = Country_Region, colour = Country_Region)) +
geom_point(aes(y = deaths), size = 0.5) + geom_line(aes(y = rm7_deaths)) +
scale_y_log10(limits = c(10, NA)) +
scale_colour_brewer(palette = "Spectral") +
theme(legend.position = "None") +
geom_text_repel(aes(label = label),
nudge_x = 1,
na.rm = TRUE) +
ggtitle('Cumulative Death Counts in Worst-Hit Countries')
p2 = t10_subset %>%
ggplot(aes(x = d_time, y = new_deaths, group = Country_Region, colour = Country_Region)) +
geom_point(aes(y = new_deaths), size = 0.5) +
geom_smooth(se = F) +
scale_y_log10() +
scale_colour_brewer(palette = "Spectral") +
theme(legend.position = "None") +
geom_text_repel(aes(label = label),
nudge_x = 1,
na.rm = TRUE) +
ggtitle('Growth in Deaths in Worst-Hit Countries')
(p1 | p2 )
p3 = t10_subset %>%
ggplot(aes(x = d_time, y = cfr, group = Country_Region, colour = Country_Region)) +
geom_point(size = 0.5) +
# geom_line(aes(y = rm7_cfr)) +
geom_smooth(se = F) +
ylim(c(0, 0.3)) +
scale_colour_brewer(palette = "Spectral") +
theme(legend.position = "None") +
geom_text_repel(aes(label = label),
na.rm = TRUE) +
labs(title = 'Case Fatality Rate in Worst-Hit Countries')
p3
t10_subset[, denom_cases := sum(rm7_new_cases), by = date][,
denom_deaths := sum(rm7_new_deaths), by = date]
t10_subset[, newcase_share := rm7_new_cases / denom_cases][,
newdeath_share := rm7_new_deaths /denom_deaths]
p1 = ggplot(t10_subset[date >="2020-02-15"], aes(x = date, y = newcase_share, fill = Country_Region, colour = Country_Region)) +
geom_area(position="fill") +
scale_y_continuous(breaks = seq(0, 1, .1))+ ylim(c(0, 1)) +
scale_fill_brewer(palette = "Spectral") +
scale_colour_brewer(palette = "Spectral") +
ggtitle("New Cases (normalised)") + theme(legend.position = "None")
p11 = ggplot(t10_subset[date >="2020-02-15"], aes(x = date, y = rm7_new_cases, fill = Country_Region, colour = Country_Region)) +
geom_area() + ylim(c(0, NA)) + scale_y_log10() +
scale_fill_brewer(palette = "Spectral") +
scale_colour_brewer(palette = "Spectral") +
ggtitle("New Cases")
options(repr.plot.width = 20, repr.plot.height = 16)
(p = (p1 / p11 ) + plot_annotation(title = "Tracking the epidemic's hotspots over time",
subtitle = "each country's share of global 7 day rolling mean in deaths and cases"))
system(paste0("wget -q -O", "https://covid.ourworldindata.org/data/owid-covid-data.csv", "owid-covid-data.csv"))
owid = fread("owid-covid-data.csv")
owid[, d := ymd(date)]
owid %>% glimpse
all_countries_xs = owid[location!="World"][
order(-d)][
, .SD[1], by = .(iso_code)]
(t10 = all_countries_xs[order(-total_cases)][1:10])
# [order(-total_cases)][1:10])
owid_t10_subset = owid[location %in% t10$location]
vars = c('new_cases', 'new_deaths', 'new_tests')
owid_t10_subset[, paste0('denom_', vars):= lapply(.SD, sum), by = date, .SDcols = vars]
owid_t10_subset[, `:=`(
newcase_share = new_cases / denom_new_cases,
newdeath_share = new_deaths / denom_new_deaths,
newtest_share = new_tests / denom_new_tests
)][ newcase_share < 0, newcase_share := 0]
p1 = ggplot(owid_t10_subset[d >="2020-02-15"], aes(x = d, y = newcase_share, fill = location, colour = location)) +
geom_area(position="fill") +
scale_y_continuous(breaks = seq(0, 1, .1))+
scale_fill_brewer(palette = "Spectral") +
scale_colour_brewer(palette = "Spectral") +
ggtitle("New Cases")+ theme(legend.pos = "None")
p2 = ggplot(owid_t10_subset[d >="2020-02-15"], aes(x = d, y = newdeath_share, fill = location, colour = location)) +
geom_area(position="fill") +
scale_y_continuous(breaks = seq(0, 1, .1))+
scale_fill_brewer(palette = "Spectral") +
scale_colour_brewer(palette = "Spectral") + ylim(c(0, 1)) +
ggtitle("New Deaths")
options(repr.plot.width = 20, repr.plot.height = 12)
(p1 / p2 ) + plot_annotation(title = "Shares of Cases and Deaths over time")
country_plot = function(country){
npl = owid[location == country][order(-date)]
p1 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = total_cases_per_million)) + scale_y_log10() +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('Total Cases per Million')
p2 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = new_cases_per_million)) +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('New Cases per Million')
p3 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = total_deaths_per_million)) +
ylim(c(0, NA)) +
geom_point(size = 0.5) + geom_smooth(se = F) +
ggtitle('Total Deaths per Million')
p4 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = new_deaths_per_million)) +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('New Deaths per Million')
p5 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = total_tests_per_thousand)) + scale_y_log10() +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('Total Tests per thousand')
p6 = npl[d>="2020-03-15"] %>%
ggplot(aes(x = d, y = new_tests_per_thousand)) +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('New Tests per thousand')
p7 = npl[d>="2020-12-01"] %>%
ggplot(aes(x = d, y = total_vaccinations_per_hundred)) + scale_y_log10() +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('Total Vaccines per 100')
p8 = npl[d>="2020-12-01"] %>%
ggplot(aes(x = d, y = people_vaccinated_per_hundred)) +
geom_point(size = 0.5) + geom_smooth(se = F) +
ylim(c(0, NA)) +
ggtitle('Share of population vaccinated')
suppressMessages(suppressWarnings(print(
(p5 | p6) /(p7 | p8) / (p1 | p2) / (p3 | p4) + plot_annotation(title = paste0("Covid Profile : ", country))
)))
}
sa_data = owid[location %in% c("Nepal", "India", "Bangladesh", "Sri Lanka", "Pakistan")][, .(date, location, total_vaccinations_per_hundred)]
sa_data %>% head
options(repr.plot.width = 10, repr.plot.height=10)
suppressMessages(suppressWarnings(print(
sa_data[date >= "2021-01-01"] %>%
ggplot(aes(x = date, y = total_vaccinations_per_hundred, colour = as.factor(location))) +
geom_point() + geom_smooth(alpha = 0.5, se = F) +
labs(title = "total vaccinations per 100 in South Asia", colour = "")
)))
options(repr.plot.width = 20, repr.plot.height=20)
country_plot("Nepal")
country_plot("India")
country_plot("Bangladesh")
country_plot("Pakistan")
country_plot("Ghana")
country_plot("Nigeria")
country_plot("Kenya")
country_plot("Uganda")
country_plot("Russia")
country_plot("Poland")
country_plot("Belarus")
country_plot("United States")
country_plot("Canada")
country_plot("Mexico")
country_plot("United Kingdom")
country_plot("France")
country_plot("Germany")
country_plot("Spain")
country_plot("Italy")