Setup
#devtools::install_github("amvallone/estdaR")
#install.packages("ggrepel")
suppressWarnings(suppressMessages({
library(estdaR)
library(sf)
library(spdep)
library(rgeos)
library(tidyverse)
library(ggrepel)
library(ExPanDaR)
library(plotly)
}))
options(repr.plot.width = 6, repr.plot.height = 4)
Import data
gdppc <- st_read("https://raw.githubusercontent.com/quarcs-lab/data-open/master/mexico/mexico.geojson")
Reading layer `mexico' from data source
`https://raw.githubusercontent.com/quarcs-lab/data-open/master/mexico/mexico.geojson'
using driver `GeoJSON'
Simple feature collection with 32 features and 17 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -117.1224 ymin: 14.55055 xmax: -86.735 ymax: 32.72081
Geodetic CRS: WGS 84
glimpse(gdppc)
Rows: 32
Columns: 18
$ POLY_ID <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
$ AREA <dbl> 72527513755, 72259877689, 27319568586, 79610082850, 5467029…
$ CODE <chr> "MX02", "MX03", "MX18", "MX14", "MX01", "MX11", "MX22", "MX…
$ NAME <chr> "Baja California Norte", "Baja California Sur", "Nayarit", …
$ PCGDP1940 <dbl> 22361, 9573, 4836, 5309, 10384, 4359, 11016, 4414, 3327, 34…
$ PCGDP1950 <dbl> 20977, 16013, 7515, 8232, 6234, 5686, 5560, 5194, 5272, 497…
$ PCGDP1960 <dbl> 17865, 16707, 7621, 9953, 8714, 8209, 7110, 6399, 5244, 905…
$ PCGDP1970 <dbl> 25321, 24384, 10880, 16288, 16078, 11635, 14073, 7767, 8109…
$ PCGDP1980 <dbl> 29283, 29038, 13354, 20659, 21022, 13864, 20088, 12391, 112…
$ PCGDP1990 <dbl> 26839, 25842, 12757, 20133, 20787, 13607, 22441, 13091, 109…
$ PCGDP2000 <dbl> 29855, 26103, 11478, 21610, 27782, 15585, 26149, 12348, 118…
$ HANSON03 <dbl> 1, 2, 2, 3, 2, 3, 3, 3, 3, 4, 4, 3, 3, 6, 6, 3, 6, 3, 5, 5,…
$ HANSON98 <dbl> 1, 2, 2, 3, 2, 3, 3, 3, 3, 4, 4, 3, 3, 5, 5, 3, 5, 3, 5, 5,…
$ ESQUIVEL99 <dbl> 5, 6, 6, 6, 3, 3, 3, 2, 7, 1, 1, 6, 2, 4, 4, 2, 4, 2, 7, 7,…
$ INEGI <dbl> 1, 1, 4, 4, 4, 4, 3, 3, 4, 3, 3, 4, 3, 5, 5, 3, 5, 3, 5, 5,…
$ INEGI2 <dbl> 1, 1, 4, 4, 4, 4, 4, 3, 4, 3, 3, 4, 3, 5, 5, 5, 5, 3, 5, 5,…
$ MAXP <dbl> 2, 2, 1, 4, 4, 5, 5, 3, 3, 5, 5, 4, 5, 1, 1, 3, 1, 3, 3, 3,…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-113.1397 2..., MULTIPOLYGON (…
y1940 <- gdppc$PCGDP1940/mean(gdppc$PCGDP1940)
y1950 <- gdppc$PCGDP1950/mean(gdppc$PCGDP1950)
y1960 <- gdppc$PCGDP1960/mean(gdppc$PCGDP1960)
y1970 <- gdppc$PCGDP1970/mean(gdppc$PCGDP1970)
y1980 <- gdppc$PCGDP1980/mean(gdppc$PCGDP1980)
y1990 <- gdppc$PCGDP1990/mean(gdppc$PCGDP1990)
regionName <- gdppc$NAME
Weights matrix
Wqueen <- nb2listw(poly2nb(gdppc))
Spatial lags
Wy1940 <- lag.listw(Wqueen, y1940)
Wy1950 <- lag.listw(Wqueen, y1950)
Wy1960 <- lag.listw(Wqueen, y1960)
Wy1970 <- lag.listw(Wqueen, y1970)
Wy1980 <- lag.listw(Wqueen, y1980)
Wy1990 <- lag.listw(Wqueen, y1990)
RegimeMaxP <- gdppc$MAXP
Moran scatter plot
dy <- y1950 - y1940
dW <- Wy1950 - Wy1940
NofRows <- nrow(gdppc)
y0 <- rep(0, NofRows)
W0 <- rep(0, NofRows)
df <- as.data.frame(cbind(y1940, y1950, dy, Wy1940, Wy1950, dW, y0, W0))
df <- cbind(regionName, df)
head(df)
ggplot(df, aes(x = y1940, y = Wy1940)) +
geom_point(color = "dodgerblue4") +
geom_text_repel(aes(x = y1940, y = Wy1940, label = regionName), size = 2.5) +
geom_hline(yintercept=mean(Wy1940), lty=2, color = "dodgerblue4") +
geom_vline(xintercept=mean(y1940), lty=2, color = "dodgerblue4") +
labs(title="Moran scatter plot in 1940", y="Relative income of neighboring regions", x= "Relative income of focal region")
ggplot(df, aes(x = y1950, y = Wy1950)) +
geom_point(color = "dodgerblue4") +
geom_text_repel(aes(x = y1950, y = Wy1950, label = regionName), size = 2.5) +
geom_hline(yintercept=mean(Wy1950), lty=2, color = "dodgerblue4") +
geom_vline(xintercept=mean(y1950), lty=2, color = "dodgerblue4") +
labs(title="Moran scatter plot in 1950", y="Relative income of neighboring regions", x= "Relative income of focal region")
Warning message:
“ggrepel: 4 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
Directional Moran
ggplot(df) +
geom_segment(aes(x = y1940, y = Wy1940, xend = y1950, yend = Wy1950, color = y1940), arrow = arrow(length = unit(0.1, "cm"))) +
scale_color_gradient(low='#2c7bb6', high='#d7191c', name='Initial \nIncome') +
geom_text_repel(aes(x = y1950, y = Wy1950, label = regionName), size = 2.5) +
geom_hline(yintercept=1, lty=2, color = "dodgerblue4") +
geom_vline(xintercept=1, lty=2, color = "dodgerblue4") +
labs(title="Directional Moran scatter plot 1940-1950", y="Relative income of neighboring regions", x= "Relative income of focal region")
Warning message:
“ggrepel: 10 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
Standardized directional Moran
ggplot(df) +
geom_segment(aes(x = y0, y = W0, xend = dy, yend = dW, color = y1940), arrow = arrow(length = unit(0.1, "cm"))) +
scale_color_gradient(low='#2c7bb6', high='#d7191c', name='Initial \nIncome') +
geom_text_repel(aes(x = dy, y = dW, label = regionName), size = 2.5) +
geom_hline(yintercept=0, lty=2, size = 0.3) +
geom_vline(xintercept=0, lty=2, size = 0.3) +
geom_abline(intercept = 0, slope = 1, lty=2, size = 0.3) +
geom_abline(intercept = 0, slope = -1, lty=2, size = 0.3) +
labs(title="Standardized directional Moran scatter plot 1940-1950", y="Change in rel. income of neighboring regions", x= "Change in rel. income of focal region")
Using the estdaR package
directionalLISA1940_1950 <- d.LISA(y1940, y1950, Wqueen, k=4, nsim=999)
directionalLISA1940_1950$lisamap
directionalLISA1940_1950$rose
directionalLISA1940_1950$counts
directionalLISA1940_1950$p.value