A guide to compute unconditional and conditional sigma and beta convergence plus half-life with REAT package and MGWR
Install and load packages
#devtools::install_version("REAT", version = "2.1.1", repos = "http://cran.us.r-project.org")
#devtools::install_version("ConvergenceClubs", version = "2.2.1", repos = "http://cran.us.r-project.org")
library(sf)
library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(ggrepel)
library(skimr)
library(ConvergenceClubs)
library(REAT)
library(imager)
library(spdep)
library(ggrepel)
library(dplyr)
Load dataset
vnm1 <- st_read("VNM_adm1.gpkg") #province data for plotting boundaries
vnm <- st_read("vnm.gpkg") #district data for analysis
vnm_df <- read_csv("vnm.csv")
skim(vnm_df)
Part 1: identifying (un)conditional sigma and beta convergence on average using REAT package
Compute sigma convergence for 2 time periods: 2012 and 2018
sigma_anova_sd_log <- sigmaconv(
vnm$sat12,
2012,
vnm$sat18,
2018,
sigma.measure = "sd",
sigma.log = TRUE, # Fist take log of Y, and then do the analysis
output.results = TRUE
)
Compute beta convergence for 2 time periods: 2012 and 2018
conditions <- data.frame(vnm$elevation, vnm$pop20)
betaconv_ols <- betaconv.ols (vnm$sat12,
2012,
vnm$sat18,
2018,
conditions = conditions,
beta.plot = TRUE,
beta.plotLine = TRUE,
beta.plotLineCol = "red",
beta.plotX = "Log of mean night-time lights intensity in 2012",
beta.plotY = "Log of mean night-time lights intensity growth (2012-2018)",
beta.plotTitle = "Beta convergence",
beta.bgrid = TRUE,
beta.bgridType = "solid",
output.results = TRUE)
Part 2: identifying conditional beta-convergence with MGWR application
Compute log(growth rate) and log(initial value) for MGWR
vnm <- vnm %>%
mutate(sat_log_gr = log(sat18 / sat12), .after = sat18) %>%
mutate(sat_log_12 = log(sat12), .after = sat_log_gr)
Cross-check with REAT package
lm <- lm(vnm$sat_log_gr ~ vnm$sat_log_12)
summary(lm)
Save and load in MGWR application
Download link: https://sgsup.asu.edu/sparc/multiscale-gwr
vnm_df <- st_drop_geometry(vnm)
write.csv(vnm_df, "vnm.csv")
im<-load.image("MGWR_specifications.png")
plot(im)
Load MGWR session results
vnm_mgwr <- read_csv("MGWR_session_results.csv")
Compute speed of convergence and half life
vnm_mgwr <- vnm_mgwr %>%
mutate(sat_speed_1218 = -log(1+beta_sat_log_12)/7) %>%
mutate(sat_hlife_1218 = log(2)/sat_speed_1218) %>%
select(id, beta_sat_log_12, p_sat_log_12, sat_speed_1218, sat_hlife_1218, beta_pop20, p_pop20, beta_elevation, p_elevation)
- When beta-coefficient is < -1, half-life became NA, - When beta-convergence is near -1 i.e., -0.9, half-life is near 0.5 (meaning regions are already converged or is converging at a really fast speed) - Hence, replacing NA with 0 half-life
vnm_mgwr$sat_hlife_1218 <- vnm_mgwr$sat_hlife_1218 %>% replace_na("0")
Compute significantly converging or diverging districts
#GROWTH TREND
##Negative half-life means diverging, positive half-life means convergng
vnm_mgwr <- vnm_mgwr %>% mutate(sat_hlife_label = ifelse(sat_hlife_1218 < 0, "Diverging", "Converging"))
##0.01 significance threshold
vnm_mgwr <- vnm_mgwr %>% mutate(sat_p_label = ifelse(p_sat_log_12 <= 0.01, "Significant", "Not Significant"))
##significant converging/diverging regions
vnm_mgwr <- vnm_mgwr %>% mutate(sigconv = ifelse(sat_p_label == "Significant" & sat_hlife_label == "Converging", "Converging",
ifelse(sat_p_label == "Significant" & sat_hlife_label == "Diverging", "Diverging", "Not Significant")))
#PHYSICAL ELEVATION
##Negative coefficient means negative correlation
vnm_mgwr <- vnm_mgwr %>% mutate(elevation_label = ifelse(beta_elevation < 0, "Negative", "Positive"))
##0.01 significance threshold
vnm_mgwr <- vnm_mgwr %>% mutate(elevation_p_label = ifelse(p_elevation <= 0.01, "Significant", "Not Significant"))
##significant converging/diverging regions
vnm_mgwr <- vnm_mgwr %>% mutate(sigele = ifelse(elevation_p_label == "Significant" & elevation_label == "Positive", "Positive",
ifelse(elevation_p_label == "Significant" & elevation_label == "Negative", "Negative", "Not Significant")))
- Save NA as "not significant"
vnm_mgwr$sigconv <- vnm_mgwr$sigconv %>% replace_na("Not Significant")
vnm_mgwr$sigele <- vnm_mgwr$sigele %>% replace_na("Not Significant")
Merge to shapefile and save
vnm_mgwr <- merge(vnm, vnm_mgwr, by.x=c("id"), by.y=c("id"), all.x = TRUE)
vnm_mgwr_df <- st_drop_geometry(vnm_mgwr)
write.csv(vnm_mgwr_df, "vnm_mgwr_df.csv")
vnm_mgwr_df
Part 3: Visualisation
Create a significantly converging subsets
vnm_mgwr_sigconv <- vnm_mgwr %>% filter(sigconv == "Converging")
Create borders (North, Central South Industrial zones)
North <- c("Hà Nội", "Hải Phòng", "Bắc Ninh", "Hải Dương","Hưng Yên","Quảng Ninh", "Vĩnh Phúc")
Central <- c("Đà Nẵng", "Bình Định", "Quảng Nam", "Quảng Ngãi", "Thừa Thiên - Huế")
South <- c("Đồng Nai", "Bà Rịa - Vũng Tàu", "Bình Dương", "Bình Phước", "Hồ Chí Minh city", "Tiền Giang", "Long An", "Tây Ninh")
Industrial <- c(North, Central, South)
vnmi <- vnm1 %>% filter(NAME_1 %in% Industrial)
vnmi <- vnmi %>% mutate(key = ifelse(NAME_1 %in% North, "Northern",
ifelse(NAME_1 %in% Central, "Central", "Southern")))
Create labels (Top 5 Provincial Competitiveness Index, 2018)
topfive <- c("Quảng Ninh", "Đà Nẵng", "Đồng Tháp", "Long An", "Bến Tre")
vnmt <- vnm1 %>% filter(NAME_1 %in% topfive)
#add coords
coord <- st_coordinates(st_centroid(vnmt))
coordinates <- as.data.frame(coord)
vnmt$X <- coordinates$X
vnmt$Y <- coordinates$Y
Visualise North, Central and South Industrial zones and Top 5 PCI provinces
ggplot() +
geom_sf(data = vnmi, aes(fill = key), lwd = 0) +
geom_sf(data = vnm1, aes(fill = NA, alpha=0), show.legend = FALSE, lwd=0.5) +
geom_sf(data = vnmi, aes(fill = NA, alpha=0), show.legend = FALSE, lwd = 0.8, color="black") +
theme_set(theme_bw()) +
theme(panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.background = element_blank()) +
labs(title="Key Economic Zones")+
theme(plot.title=element_text(hjust=0.5, lineheight=1.2)) +
scale_fill_manual(name = " ",
values = c('#ffeda0','#feb24c','#e31a1c'))
ggsave("keyeconomiczones.png")
ggplot() +
geom_sf(data = vnmt, aes(fill = NAME_1), lwd = 0) +
geom_sf(data = vnm1, aes(fill = NA, alpha=0), show.legend = FALSE) +
geom_sf(data = vnmi, aes(fill = NA, alpha=0), show.legend = FALSE, lwd = 1, color="black") +
theme_set(theme_bw()) +
theme(panel.grid = element_blank(),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.background = element_blank()) +
labs(title="Top 5 performing provinces")+
theme(plot.title=element_text(hjust=0.5, lineheight=1.2)) +
scale_fill_manual(name = "Ranking",
values = c('#800026','#bd0026','#e31a1c', '#fc4e2a','#fd8d3c','#feb24c','#fed976','#ffeda0','#ffffcc','#ffffd9')) +
geom_label_repel(data = vnmt, aes(x = X, y = Y, label = NAME_1), size=3, point.padding = NA, nudge_x = c(-2), nudge_y = c(-1,1))
ggsave("top5pci.png")
betaconv_ols[["regdata"]] %>%
ggplot(aes(x = ln_initial, y = growth, col = substr(vnm_mgwr$sat_hlife_1218, 1,1))) +
geom_point() +
geom_smooth() +
theme_minimal() +
labs(x = "Log of mean night-time lights intensity in 2012",
y = "Log of mean night-time lights intensity growth (2012-2018)")+
scale_x_continuous(limits = c(-5, 5)) +
scale_y_continuous(limits = c(0, 90)) +
labs(title="Conditional beta-convergence to national average")+
theme(plot.title=element_text(hjust=0.5, lineheight=1.2))+
theme(legend.position = "none") +
scale_color_manual(name = "Years",
values = c('#ffffd9','#800026','#bd0026','#e31a1c', '#fc4e2a','#fd8d3c','#feb24c','#fed976','#ffeda0','#ffffcc','#ffffd9','#ffffd9'))
ggsave("betaconv2.png")
Plot conditional beta-convergence graph and map
betaconv_ols[["regdata"]] %>%
ggplot(aes(x = ln_initial, y = growth, col = substr(vnm_mgwr$sat_hlife_1218, 1,1))) +
geom_point() +
geom_smooth() +
theme_minimal() +
labs(x = "Log of mean night-time light intensity (2012)",
y = "Log of mean night-time light intensity growth (2012-2018)")+
scale_x_continuous(limits = c(-5, 5)) +
scale_y_continuous(limits = c(0, 90)) +
labs(title="Conditional beta-convergence to national average")+
theme(plot.title=element_text(hjust=0.5, lineheight=1.2))+
theme(legend.position = "none") +
scale_color_manual(name = "Years",
values = c('#ffffd9','#800026','#bd0026','#e31a1c', '#fc4e2a','#fd8d3c','#feb24c','#fed976','#ffeda0','#ffffcc','#ffffd9','#ffffd9')) +
geom_label_repel(aes(label = vnm_df$province))
ggsave("betaconv.png")
ggplot() +
geom_sf(data = vnmi, aes(fill = key), lwd = 0) +
geom_sf(data = vnm1, aes(fill = NA, alpha=0), show.legend = FALSE, lwd=0.05) +
geom_sf(data = vnmi, aes(fill = NA, alpha=0), show.legend = FALSE, lwd = 0.8, color="black") +
theme_set(theme_bw()) +
labs(title="Key Economic Zones")+
theme(plot.title=element_text(hjust=0.5, lineheight=1.2)) +
scale_fill_manual(name = " ",
values = c('#ffeda0','#feb24c','#e31a1c'))
ggsave("keyeconomiczones2.png")
ggplot() +
geom_sf(data = vnm_mgwr_sigconv, aes(fill = substr(sat_hlife_1218, 1,1)), lwd = 0) +
geom_sf(data = vnm1, aes(fill = NA, alpha=0), show.legend = FALSE, lwd = 0.2) +
geom_sf(data = vnmi, aes(fill = NA, alpha=0), show.legend = FALSE, lwd = 0.8, color="black") +
theme_set(theme_bw()) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank())+
labs(title="Half-life")+
theme(plot.title=element_text(hjust=0.5, lineheight=1.2)) +
scale_fill_manual(name = "Years",
values = c('#800026','#bd0026','#e31a1c', '#fc4e2a','#fd8d3c','#feb24c','#fed976','#ffeda0','#ffffcc','#ffffd9'))
ggsave("halflife.png")
Physical elevation as a control
ggplot() +
geom_sf(data = vnm_mgwr, aes(fill = elevation), lwd = 0) +
scale_fill_gradientn(name = "Metres
", colours = terrain.colors(7))+
labs(title="Physical elevation")+
theme(plot.title=element_text(hjust=0.5, lineheight=1.2))
ggsave("elevation.png")
Population density as a control
ggplot() +
geom_sf(data = vnm_mgwr, aes(fill = pop20), lwd = 0) +
scale_fill_gradient(name = "People/km²", low = "lightblue", high = "darkblue")+
labs(title="Population density")+
theme(plot.title=element_text(hjust=0.5, lineheight=1.2))
ggsave("Pop.png")
MGWR session summary
================================================================================
MGWR Version: 2.2.1
Released on: 03/20/2020
Source code is available at: https://github.com/pysal/mgwr
Development Team: Ziqi Li, Taylor Oshan, Stewart Fotheringham, Wei Kang,
Levi Wolf, Hanchen Yu, Mehak Sachdeva, and Sarah Bardin
Spatial Analysis Research Center (SPARC)
Arizona State University, Tempe, USA
================================================================================
Model type: Gaussian
Number of observations: 671
Number of covariates: 4
Dependent variable: sat_log_gr
Variable standardization: On
Total runtime: 0:01:43
Global Regression Results
--------------------------------------------------------------------------------
Residual sum of squares: 391.988
Log-likelihood: -771.764
AIC: 1551.528
AICc: 1553.618
R2: 0.416
Adj. R2: 0.413
Variable Est. SE t(Est/SE) p-value
------------------------------------ ---------- ---------- ---------- ----------
Intercept 0.000 0.030 0.000 1.000
sat_log_12 -0.662 0.037 -17.682 0.000 #higher convergence rate after adding controlled variables
pop20 0.081 0.036 2.243 0.025 #small positive but not high significant correlation
elevation -0.474 0.031 -15.125 0.000 #negative correlation
Multiscale Geographically Weighted Regression (MGWR) Results
--------------------------------------------------------------------------------
Coordinates type: Projected
Spatial kernel: Adaptive bisquare
Criterion for optimal bandwidth: AICc
Score of change (SOC) type: Smoothing f
Termination criterion for MGWR: 1.0e-05
Number of iterations used: 200
MGWR bandwidths
--------------------------------------------------------------------------------
Variable Bandwidth ENP_j Adj t-val(95%) DoD_j
Intercept 43.000 30.882 3.165 0.473
sat_log_12 43.000 32.771 3.183 0.464
pop20 670.000 1.369 2.095 0.952
elevation 43.000 28.588 3.142 0.485
Diagnostic Information
--------------------------------------------------------------------------------
Residual sum of squares: 163.195
Effective number of parameters (trace(S)): 93.610
Degree of freedom (n - trace(S)): 577.390
Sigma estimate: 0.532
Log-likelihood: -477.770
Degree of Dependency (DoD): 0.516
AIC: 1144.760
AICc: 1176.201
BIC: 1571.334
R2: 0.757
Adj. R2: 0.717
Summary Statistics For MGWR Parameter Estimates
--------------------------------------------------------------------------------
Variable Mean STD Min Median Max
-------------------- ---------- ---------- ---------- ---------- ----------
Intercept -2.054 2.722 -7.467 -0.627 0.255
sat_log_12 -0.523 0.454 -2.106 -0.457 0.370
pop20 0.026 0.008 0.017 0.023 0.037
elevation -3.259 4.647 -11.985 -0.659 0.015
================================================================================
Acknowledgement:
We acknowledge the support of the National Science Foundation under Award 1758786
from the Geography and Spatial Sciences Program to A. S. Fotheringham which
enabled this software to be written and made freely available.
================================================================================