install.packages("ggthemes")
Preparation: Activate R packages
# Activating packages
suppressWarnings(suppressMessages({
library(tidyverse) # Modern data science workflow
library(stringr) # Functions designed to work with strings
library(sf) # Encodes spatial vector data
library(ggplot2) # Modern visualization tools
library(ggpubr) # Provides some easy-to-use functions for creating and customizing ggplot2
library(ggthemes) # For extra themes, scales and geoms for 'ggplot2'
library(patchwork) # make it simple to combine separate ggplots
library(plotly) # Create interactive web graphics from 'ggplot2'
library(ggrepel) ## For displaying label on ggplot2 object
}))
# Adjustments
knitr::opts_chunk$set(fig.align = 'center')
options(warn = -1)
options(scipen = 10000)
options(repr.plot.width = 6, repr.plot.height = 4)
Load non-spatial data
df <- read.csv("https://raw.githubusercontent.com/haginta/regional-convergence-indonesia-satellite-gdp/main/java118_.csv")
head(df)
summary(df[c("sat2019", "sat2020", "gdp2019", "gdp2020")])
Log transform "sat" and "gdp" columns
df[,4:7] <- log(df[4:7])
head(df)
Create new columns of the proximate growth rate of nighttime light and GDP as log difference
df <- df %>%
mutate(sat_g = sat2020 - sat2019,
gdp_g = gdp2020 - gdp2019)
df
Explore the data
#Plot the growth rate of GDP in 2020
ggbarplot(df, x = "province", y = "gdp_g",
add = c("mean_se", "point"),
color = "province", alpha = 0.5) +
xlab("Province") + ylab("GDP growth 2020 (%, yoy)") +
ggrepel::geom_text_repel(aes(label = district), size = 2) +
theme(axis.text.y = element_text(size = 8), axis.text.x = element_text(size = 8, angle = 90, vjust = 0.5, hjust=1),legend.position = "none")
#Interactive plot of GDP growth in 2020, without boxplot
b <- ggplot(df, aes(province, gdp_g, label=district, color=province)) + geom_point() + xlab("Province") + ylab("GDP growth 2020 (%, yoy)") + theme_bw()
ggplotly(b)
#Interactive plot of GDP growth in 2020, with boxplot
c <- ggbarplot(df, x = "province", y = "gdp_g", color="province",
add = c("mean_se", "point")) +
geom_point(aes(label=district, color=province)) +
xlab("Province") + ylab("GDP growth 2020 (%, yoy)") +
theme(axis.text.y = element_text(size = 8), axis.text.x = element_text(size = 8, angle = 90, vjust = 0.5, hjust=1),legend.position = "none")
ggplotly(c)
Load spatial data stored in geo package file
mymap <- read_sf("https://github.com/haginta/regional-convergence-indonesia-satellite-gdp/raw/main/java_118.gpkg")
str(mymap)
Merge non-spatial and spatial data
df_map <- merge(mymap, df, by=c("districtID", "district", "province"))
df_map <- st_make_valid(df_map)
glimpse(df_map)
sapply(df_map, function(x) sum(is.na(x)))
Display the district boundaries of Java map
ggplot(df_map) +
geom_sf() +
ggtitle("Districts level boundaries map of Indonesia", subtitle = paste0("(", length(unique(df_map$districtID)), " districts)")) +
labs(x = "Longitude", y="Latitude")
#Create dataframe of highlighted popular cities
city <- c("Kota Jakarta Pusat", "Kota Yogyakarta", "Kota Surabaya")
h_city <- df_map %>% filter(district %in% city)
#Add coordinates
coordinates <- as.data.frame(st_coordinates(st_centroid(h_city)))
h_city$X <- coordinates$X
h_city$Y <- coordinates$Y
#Visualize the highlighted popular cities
ggplot() +
geom_sf(data = h_city, aes(fill = district), lwd = 0) +
geom_sf(data = df_map, aes(fill = NA, alpha=0), show.legend = F) +
geom_sf(data = h_city, aes(fill = district, alpha=0), show.legend = F, color="red") +
labs(x = "Longitude", y="Latitude") +
labs(title="Three popular cities in Java") +
geom_label_repel(data = h_city, aes(x = X, y = Y, label = district), size=2, color="blue", point.padding = NA) +
theme(legend.position="None")
Quantile map: spatial distribution of nighttime light intensity in 2019
ggplot(df_map) +
geom_sf(aes(fill=sat2019), color = "white", lwd = 0.05) +
scale_fill_viridis_c(option = "magma", name = "Radiance (log)") +
theme_map() +
theme(legend.direction="horizontal") +
labs(title = "Districts of Java seen from space at night",
subtitle = "Nighttime light (NTL) intensity across 118 districts in Java, Indonesia, 2019",
caption = "Geometries: GADM; Data: Earth Observation Group, https://eogdata.mines.edu/products/vnl/")
Quantile map: spatial distribution of nighttime light intensity in 2019
ggplot(df_map) +
geom_sf(aes(fill=gdp2019), color = "white", lwd = 0.05) +
scale_fill_viridis_c(option = "magma", name = "GDP (log)") +
theme_map() +
theme(legend.direction="horizontal") +
labs(title = "GDP at district level",
subtitle = "GDP across 118 districts in Java, Indonesia, 2019 (constant price 2010)",
caption = "Geometries: GADM; Data: Indonesia Central Statistics")
Quantile map: spatial distribution of nighttime light intensity in 2020
p1 <- ggplot(df_map) +
geom_sf(aes(fill=sat2020), color = "white", lwd = 0.05) +
scale_fill_viridis_c(option = "magma", name = "Radiance (log)") +
theme_map() +
theme(legend.direction="horizontal")
p2 <- ggplot(data = df_map) +
geom_sf(aes(fill=gdp2020), color = "white", lwd = 0.05) +
scale_fill_viridis_c(option = "magma", name = "GDP (log)") +
theme_map() +
theme(legend.direction="horizontal")
p1 / p2
Interactive map
p <- ggplot(df_map) + geom_sf(aes(fill=gdp2019, label=district), lwd = 0.05) +
scale_fill_viridis_c(option = "magma", name = "GDP (log)") + theme_bw() +
labs(x = "Longitude", y="Latitude", title = "GDP across 118 districts in Java, Indonesia, 2019 (constant price 2010)")
p_inter <- ggplotly(p)
p_inter
Run cross-section linear regression of nighttime light and GDP in 2019
ggplot(data = df, aes(x = sat2019 , y = gdp2019)) +
geom_point(size=0.3, color = "black") +
geom_smooth(method = "lm", se = FALSE, linetype = 5,color="blue") + xlim(4, NA) +
stat_regline_equation(label.x=5, label.y=12) +
stat_cor(aes(label=..rr.label..), label.x=5, label.y=11.5) +
labs(title = "Year 2019", x ="Nighttime lights intensity (log)", y = "GDP (log)") + theme_classic()
Interactive plot
p2019 <- ggplot(data = df, aes(x = sat2019 , y = gdp2019, label = district)) +
geom_point(size=0.3, color = "black") +
geom_smooth(method = "lm", se = FALSE, linetype = 5,color="blue") +
annotate("text", x=5, y=12, label= "y = 3.1 + 0.83x") + annotate("text", x=5, y=11.5, label= paste0("R<sup>2</sup> = 0.65")) +
labs(title = "Year 2019", x ="Nighttime lights intensity (log)", y = "GDP (log)") + theme_classic()
ggplotly(p2019)
Run cross-section linear regression of nighttime light and GDP in 2020
ggplot(data = df, aes(x = sat2020 , y = gdp2020)) +
geom_point(size=0.3, color = "black") +
geom_smooth(method = "lm", se = FALSE, linetype = 5,color="blue") + xlim(4, NA) +
stat_regline_equation(label.x=5, label.y=12) +
stat_cor(aes(label=..rr.label..), label.x=5, label.y=11.5) +
labs(title = "Year 2020", x ="Nighttime lights intensity (log)", y = "GDP (log)") + theme_classic()
Interactive plot
p2020 <- ggplot(data = df, aes(x = sat2020 , y = gdp2020, label = district)) +
geom_point(size=0.3, color = "black") +
geom_smooth(method = "lm", se = FALSE, linetype = 5,color="blue") +
annotate("text", x=5, y=12, label= "y = 3.1 + 0.82x") + annotate("text", x=5, y=11.5, label= paste0("R<sup>2</sup> = 0.64")) +
labs(title = "Year 2020", x ="Nighttime lights intensity (log)", y = "GDP (log)") + theme_classic()
ggplotly(p2020)