mypack <- function(package){
new.package <- package[!(package %in% installed.packages()[, "Package"])]
if (length(new.package))
install.packages(new.package, dependencies = TRUE)
sapply(package, require, character.only = TRUE)
}
#create a vector of required R packages
packages = c("scales", "RColorBrewer", "ggthemes", "lubridate", "ggrepel", "reshape",
"gridExtra", "maps", "stringr", "ggcorrplot", "viridis")
# mypack(packages) # Uncomment this if you want install all the libraries
library(tidyverse)
library(scales)
library(RColorBrewer)
library(ggthemes)
library(lubridate)
library(ggrepel)
library(reshape)
library(gridExtra)
library(maps)
library(stringr)
library(ggcorrplot)
library(viridis)
options(warn = -1)
options(scipen = 10000)
options(repr.plot.width = 13.8, repr.plot.height = 9.2)
annotate <- ggplot2::annotate
theme_abz <- theme(legend.position = "top", legend.direction = "horizontal", axis.text = element_text(size = 18),
plot.caption = element_text(color = "gray65", face = "bold", size = 10), legend.text = element_text(size = 15.4),
axis.title = element_text(size = 15.9, face = "bold", color = "gray25"), legend.title = element_text(size = 15.4),
axis.line = element_line(size = 0.4), plot.title = element_text(size = 19.5), plot.subtitle = element_text(size = 15.9),
strip.text = element_text(size = 14.4, face = "bold"))
data <- read.csv("covid-world-vaccination-progress/country_vaccinations.csv")
data <- data[,c("country", "total_vaccinations", "date", "people_vaccinated", "daily_vaccinations_raw",
"people_vaccinated_per_hundred", "daily_vaccinations_per_million", "vaccines")]
data$date <- as.Date(data$date)
data$total_vaccinations[is.na(data$total_vaccinations)==T] <- 0
data$people_vaccinated[is.na(data$people_vaccinated)==T] <- 0
data$daily_vaccinations_raw[is.na(data$daily_vaccinations_raw)==T] <- 0
data$people_vaccinated_per_hundred[is.na(data$people_vaccinated_per_hundred)==T] <- 0
data$daily_vaccinations_per_million[is.na(data$daily_vaccinations_per_million)==T] <- 0
head <- data[sample(1:nrow(data),5), ]
head[order(head$date),]
data$month <- month(data$date)
data$weekday <- weekdays(data$date)
data$percent_people <- data$people_vaccinated_per_hundred/100
full1 <- data %>%
group_by(date, country) %>%
filter(as.Date(date) > "2020-12-18")
pkr1 <- data %>%
group_by(date, country) %>%
filter(country == "Pakistan") %>%
filter(as.Date(date) > "2020-12-18")
full1$daily_vaccinations_raw <- full1$daily_vaccinations_raw/1000000
pkr1$daily_vaccinations_raw <- pkr1$daily_vaccinations_raw/1000000
ggplot()+
geom_line(data = full1, aes(date, daily_vaccinations_raw, group = country), size = 0.8, colour = "gray80")+
geom_line(data = pkr1, aes(date, daily_vaccinations_raw), size = 0.9, colour = "green2")+
geom_text(data = pkr1, aes(x = max(pkr1$date), y = pkr1$daily_vaccinations_raw[pkr1$date==max(pkr1$date)],
label = country), hjust = 0.8, vjust = -0.9, size = 5.9, color = "green2")+
scale_x_date(date_labels = "%d %b %y", date_breaks = "28 days")+
labs(x = "Date", y = "Vaccinations (in milions)", title = "Daily vaccinations", subtitle = "per country, since 19-12-2020",
caption = "© Made by deepnote.com/@abid")+
theme_classic()+
theme_abz
full2 <- data %>%
group_by(date, country) %>%
filter(people_vaccinated>0) %>%
filter(as.Date(date) > "2020-12-18")
pkr2 <- data %>%
group_by(date, country) %>%
filter(country == "Pakistan") %>%
filter(people_vaccinated>0) %>%
filter(as.Date(date) > "2020-12-18")
full2$people_vaccinated <- full2$people_vaccinated/1000000
pkr2$people_vaccinated <- pkr2$people_vaccinated/1000000
ggplot()+
geom_line(data = full2, aes(date, people_vaccinated, group = country), size = 0.8, colour = "gray80")+
geom_line(data = pkr2, aes(date, people_vaccinated), size = 0.9, colour = "green2")+
geom_text(data = pkr2, aes(x = max(pkr2$date), y = pkr2$people_vaccinated[pkr2$date==max(pkr2$date)],
label = country), hjust = 0.9, vjust = -0.7, size = 5.9, color = "green2")+
scale_x_date(date_labels = "%d %b %y", date_breaks = "28 days")+
labs(x = "Date", y = "Vaccinations (cumulated, in milions)", title = "Number of people vaccinated at least one time",
subtitle = "per country, since 19-12-2020", caption = "© Made by Abid")+
theme_classic()+
theme_abz
ggplot()+
geom_line(data = full2, aes(date, percent_people, group = country), size = 0.8, colour = "gray80")+
geom_line(data = pkr2, aes(date, percent_people), size = 0.9, colour = "green2")+
geom_text(data = pkr2, aes(x = max(pkr2$date), y = pkr2$percent_people[pkr2$date==max(pkr2$date)],
label = country), hjust = 0.9, vjust = -0.7, size = 5.9, color = "green2")+
scale_x_date(date_labels = "%d %b %y", date_breaks = "28 days")+
scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
labs(x = "Date", y = "Share of vaccinated people nationwide (%)",
title = "Number of vaccinations in proportion to the number of citizens",
subtitle = "per country, since 19-12-2020", caption = "© Made by deepnote.com/@abid")+
theme_classic()+
theme_abz
world_map <- map_data("world") %>%
filter(between(lat, 20, 47)) %>%
filter(between(long, 40, 100))
country <- data %>%
filter(date == "2021-06-01")%>%
group_by(country) %>%
select(country, people_vaccinated_per_hundred)
names(country)[1] <- "region"
world_map <- world_map %>%
left_join(country, by = "region")
world_map2 <- world_map %>%
select(long, lat, region, people_vaccinated_per_hundred) %>%
group_by(region) %>%
summarise(long = mean(long), lat = mean(lat), people_vaccinated_per_hundred = mean(people_vaccinated_per_hundred),
.groups = 'drop')
world_map2$pakistan <- ifelse(world_map2$region=="Pakistan","green2","gray80")
ggplot()+
geom_polygon(data = world_map, aes(x = long, y = lat, group = group), fill = "white", colour = "gray70")+
geom_point(data = world_map2, aes(x = long, y = lat, size = people_vaccinated_per_hundred),
shape = 21, fill = world_map2$pakistan)+
geom_text_repel(data = world_map2[is.na(world_map2$people_vaccinated_per_hundred)==F,],
aes(x = long, y = lat, label = paste0(region, ",", round(people_vaccinated_per_hundred,0), "%")),
size = 6, hjust = 0.3, vjust = 0.1)+
scale_size(range = c(0, 18))+
scale_fill_manual(values = c("green2", "gray80"))+
labs(x = "Latitude", y = "Longitude",
title = "Share of vaccinated inhabitants among countries near Pakistan",
size = "Share of\nvaccinated (in %)", subtitle = "per country, in 12-06-2021", caption = "© Made by deepnote.com/@abid")+
theme_classic()+
theme(legend.position = "right", legend.direction = "vertical")
full3 <- data %>%
filter(as.Date(date) > "2020-12-18") %>%
select(weekday, country, daily_vaccinations_per_million) %>%
group_by(weekday, country) %>%
summarise(sr = mean(daily_vaccinations_per_million), .groups = 'drop')
full3$pakistan <- ifelse(full3$country=="Pakistan","Pakistan", "Rest of World")
full3pkr <- full3 %>%
filter(pakistan=="Pakistan")
full3row <- full3 %>%
filter(pakistan=="Rest of World")
full3row <- full3row %>%
group_by(weekday) %>%
summarise(sr = mean(sr), .groups = 'drop')
full3row$country <- "Rest of World"
full3pkr <- full3pkr[,1:3]
full3row <- full3row[,c(1,3,2)]
full3pkr <- as.data.frame(full3pkr)
full3pkr$sr <- full3pkr$sr/sum(full3pkr$sr)
full3row <- as.data.frame(full3row)
full3row$sr <- full3row$sr/sum(full3row$sr)
full3 <- rbind(full3pkr, full3row)
ggplot(full3, aes(weekday, sr, fill = country))+
geom_bar(stat = "identity", width = 0.75, position=position_dodge(), col = "gray30")+
scale_x_discrete(limit = c("Monday","Tuesday","Wednesday","Thursday","Friday","Saturday","Sunday"))+
scale_fill_manual(values = c("green2","gray80"))+
scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
labs(x = "Weekday", y = "Average share of a day in weekly vaccinations (in%)",
title = "Daily vaccinations per million inhabitants by weekday",
fill = "Country", subtitle = "per country, since 19-12-2020", caption = "© Made by deepnote.com/@abid")+
theme_classic()+
theme_abz+
theme(legend.position = "right", legend.direction = "vertical", axis.title.y = element_text(size = 15))
data$number_of_vac <- str_count(data$vaccines, ",")+1
vacx <- data %>%
filter(as.Date(date) > "2020-12-18") %>%
group_by(country, number_of_vac) %>%
summarise(avg = mean(number_of_vac), .groups = 'drop') %>%
filter(avg>0)
vacx$pakistan <- ifelse(vacx$country=="Pakistan", "Pakistan", "Rest of World")
ggplot(vacx, aes(reorder(country, +avg), avg, fill = pakistan, colour = pakistan))+
geom_bar(stat = "identity", width = 0.9)+
scale_fill_manual(values = c("green2", "gray80"))+
scale_colour_manual(values = c("green2", "gray80"), guide = F)+
coord_flip()+
labs(y = "Number of suppliers", x = "Country", title = "Number of vaccine suppliers", fill = "Country",
subtitle = "per country, since 19-12-2020", caption = "© Made by deepnote.com/@abid")+
theme_classic()+
theme_abz+
theme(legend.position = "right", legend.direction = "vertical", axis.text.y = element_text(size = 0),
panel.grid.major.y = element_blank())
other <- read.csv("countries-of-the-world/countries of the world.csv", stringsAsFactors = F)
other <- other[,c("Country", "Population", "Area..sq..mi..", "Pop..Density..per.sq..mi..", "GDP....per.capita.",
"Phones..per.1000.", "Literacy....", "Birthrate", "Deathrate", "Agriculture", "Industry", "Service")]
names(other) <- c("region", "Population", "Area", "PopDensity", "GDP", "Phones", "Literacy", "Birthrate",
"Deathrate", "Agriculture", "Industry", "Service")
other$region <- gsub(" ", "", other$region)
other$PopDensity <- as.numeric(gsub(",", ".", other$PopDensity))
other$Phones <- as.numeric(gsub(",", ".", other$Phones))
other$Literacy <- as.numeric(gsub(",", ".", other$Literacy))
other$Birthrate <- as.numeric(gsub(",", ".", other$Birthrate))
other$Deathrate <- as.numeric(gsub(",", ".", other$Deathrate))
other$Agriculture <- as.numeric(gsub(",", ".", other$Agriculture))
other$Industry <- as.numeric(gsub(",", ".", other$Industry))
other$Service <- as.numeric(gsub(",", ".", other$Service))
country <- as.data.frame(country)
country$region <- as.character(country$region)
#other <- other %>%
# left_join(country, by = "region")
other <- merge(other, country, by = "region", all.x = T, )
names(other)[13] <- "Vaccinate_Percentage"
head2 <- other[sample(1:nrow(other),5), ]
head2
core <- cor(other[,c(2:ncol(other))], method = "spearman", use = "complete.obs")
options(repr.plot.width = 13, repr.plot.height = 11.18)
ggcorrplot(core, outline.col = "gray30", type = "upper", lab = T, lab_size = 5.5,
legend.title = "Strength of \ncorrelation", colors = c("#6D9EC1", "white", "#E46726"))+
labs(y = "", x = "", title = "Spearman's correlation matrix for continuous variables",
subtitle = "by countries, vaccinations from 01-06-2021", caption = "© Made by deepnote.com/@abid")+
annotate("rect", xmin = 0.5, xmax = 11.5, ymin = 10.5, ymax = 11.5, alpha = 0.1, color = "green2", size = 0.85)+
guides(fill = guide_colorbar(title.position = "top", barheight = 10, barwidth = 1.7))+
theme_classic()+
theme_abz+
theme(legend.position = "right", legend.direction = "vertical", axis.text.x = element_text(angle = 40, hjust = 1))
other$pakistan <- ifelse(other$region=="Pakistan","Pakistan", "Rest of World")
options(repr.plot.width = 13.8, repr.plot.height = 9.2)
ggplot(other, aes(Area, Population/1000000, fill = pakistan, size = PopDensity))+
geom_point(shape = 21)+
scale_fill_manual(values = c("green2", "gray80"), guide = F)+
scale_size(range = c(0.2, 14))+
scale_y_log10()+
scale_x_log10()+
annotate(geom = "curve", xend = 700000, y = 500, x = 100000, yend = 200, curvature = -0.25, arrow = arrow(length = unit(3.5, "mm")))+
annotate("text", x = 30000, y = 600, label = "Pakistan", size = 5.5, colour = "green2", fontface = 2)+
labs(y = "Population (in milions, logarithmic scale)", x = "Area (in km2, logarithmic scale)", title = "Population, area and density of population", fill = "Country",
subtitle = "by countries", size = "Population Density\n(per square mile)", caption = "© Made by deepnote.com/@abid")+
# guides(fill = guide_legend(override.aes = list(size = 4)))+
theme_classic()+
theme_abz+
theme(legend.position = "right", legend.direction = "vertical")
ggplot(other, aes(Birthrate, Deathrate, fill = pakistan))+
geom_point(shape = 21, size = 3.5, alpha = 0.85)+
scale_fill_manual(values = c("green2", "gray80"), guide = F)+
annotate(geom = "curve", xend = 29, y = 11.7, x = 24, yend = 9, curvature = -0.25, arrow = arrow(length = unit(3.5, "mm")))+
annotate("text", x = 23, y = 12.5, label = "Pakistan", size = 5.5, colour = "green2", fontface = 2)+
labs(y = "Deathrate", x = "Birthrate", title = "Birthrate and deathrate", fill = "Country",
subtitle = "by countries", caption = "© Made by deepnote.com/@abid")+
theme_classic()+
theme_abz+
theme(legend.position = "right", legend.direction = "vertical")
ggplot(other, aes(Phones, Literacy/100, fill = pakistan))+
geom_point(shape = 21, size = 3.5, alpha = 0.85)+
scale_fill_manual(values = c("green2", "gray80"), guide = F)+
scale_x_log10()+
scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
annotate(geom = "curve", xend = 40, y = 0.76, x = 800, yend = 0.46, curvature = -0.35, arrow = arrow(length = unit(3.5, "mm")))+
annotate("text", x = 800, y = 0.8, label = "Pakistan", size = 5.5, colour = "green2", fontface = 2)+
labs(y = "Literacy (in %)", x = "Phones (per 1000 inhabitants, logarithmic scale)", title = "Literacy and number of phones",
fill = "Country", subtitle = "by countries", caption = "© Made by deepnote.com/@abid")+
theme_classic()+
theme_abz+
theme(legend.position = "right", legend.direction = "vertical")
ggplot(other, aes(reorder(region, + GDP), GDP, fill = pakistan, size = pakistan))+
geom_bar(stat = "identity", width = 0.9)+
scale_fill_manual(values = c("green2", "gray80"))+
scale_colour_manual(values = c("green2", "gray80"))+
scale_size_manual(values = c(7,2), guide = F)+
coord_flip()+
labs(y = "GDP per capita (in $)", x = "Country", title = "GDP per capita", fill = "Country",
subtitle = "by countries", caption = "© Made by deepnote.com/@abid")+
theme_classic()+
theme_abz+
theme(legend.position = "right", legend.direction = "vertical", axis.text.y = element_text(size = 0),
panel.grid.major.y = element_blank())
gosp <- other %>%
select(region, Agriculture, Industry, Service) %>%
filter(is.na(Agriculture)==F) %>%
filter(is.na(Industry)==F) %>%
filter(is.na(Service)==F) %>%
melt(., id.vars = "region")
gosp <- gosp[gosp$region %in% c("Pakistan", "Bangladesh", "Nepal", "Iran", "Afghanistan", "UnitedArabEmirates",
"Spain", "NewZealand", "Qatar","Greece", "Cyprus", "Kuwait", "Slovenia", "Portugal", "Korea,South",
"Oman", "Bahrain", "Finland", "CzechRepublic", "Hungary", "UnitedKingdom","India"),]
gosp$pakistan <- ifelse(gosp$region=="Pakistan","Pakistan", "Rest of World")
a <- ifelse(gosp$region=="Pakistan","green2", "gray25")
ggplot(gosp, aes(region, color = pakistan, weight = value))+
geom_bar(aes(fill = variable), width = 0.75)+
scale_fill_brewer(palette = "Spectral", direction = -1)+
scale_colour_manual(values = c("green2", "gray45"), guide = F)+
scale_y_continuous(labels = scales::percent_format(accuracy = 1))+
coord_flip()+
labs(x = "Country", y = "Share of the sector in the employment structure", title = "Agriculture, industry and service",
colour = "Country", fill = "Employment\nstructure", subtitle = "by countries", caption = "© Made by deepnote.com/@abid")+
theme_classic()+
theme_abz+
theme(legend.position = "right", legend.direction = "vertical", axis.text.y = element_text(colour = a))