A guide to perform Bayesian Model Averaging and Geographically Weighted Regression using BMS and MGWR application
Environment: quarcslab/deepnote-r-spatial
Load libraries
#devtools::install_version("BMS", version = "0.3.4", repos = "http://cran.us.r-project.org")
library(sf)
library(tidyverse)
library(ggplot2)
library(RColorBrewer)
library(ggrepel)
library(skimr)
library(imager)
library(spdep)
library(ggrepel)
library(dplyr)
library(BMS)
Load shapefiles and datasets
phl_adm1 <- st_read("phl_adm1.gpkg") #for correlation between ecnomic output vs energy consumption
phl_adm2 <- st_read("phl_adm2.gpkg") #for borders
phl_adm3 <- st_read("phl_adm3.gpkg") #for energy consumption vs environmental and climatic determinants
phl_bma = read_csv("phl_bma.csv") #for running bma
phl_adm3_df <- st_drop_geometry(phl_adm3)
skim(phl_adm3_df)
1. Correlate GRP with Lights using OLS
lm <- lm(phl_adm1$sat2012 ~ phl_adm1$gdp2012)
summary(lm)
theme_update(plot.title = element_text(hjust = 0.5))
ggplot(phl_adm1, aes(x=gdp2012, y=sat2012)) +
geom_point() +
geom_smooth(method=lm, se=FALSE) +
geom_label(aes(label = region)) +
labs(title = "Year 2012 (R2: 0.79)", x ="Night-time lights intensity (radiance)", y = "GRP in current prices (PHP)") +
theme_set(theme_bw())
ggsave("sat-grp12.png")
lm20 <- lm(phl_adm1$sat2020 ~ phl_adm1$gdp2020)
summary(lm20)
theme_update(plot.title = element_text(hjust = 0.5))
ggplot(phl_adm1, aes(x=gdp2020, y=sat2020)) +
geom_point() +
geom_smooth(method=lm, se=FALSE) +
geom_label(aes(label = region)) +
labs(title = "Year 2020 (R2: 0.83)", x ="Night-time lights intensity (radiance)", y = "GRP in current prices (PHP)") +
theme_set(theme_bw())
ggsave("sat-grp20.png")
2. Explore economic activities hotspots using ESDA using GeoDa
3. Feature selection using BMA
phl_bms = bms(phl_bma, burn =100000, iter=200000, g="BRIC", mprior="uniform", mcmc="bd", nmodel=2000)
image(phl_bms)
summary(phl_bms)
4. Run GWR for the highest probability covariates/variables
im<-load.image("gwr_spec.png")
plot(im)
#Load MGWR session results
phl_gwr <- read_csv("phl_gwr.csv")
#select relevant variables
var <- c("gqid", "x_coor", "y_coor", "beta_land", "beta_pop", "beta_ele", "beta_solar", "beta_ozone", "beta_coast", "p_land", "p_pop", "p_ele", "p_solar", "p_ozone", "p_coast")
phl_gwr <- phl_gwr%>% select(var)
##coefficient is negative, meaning negative correlation
phl_gwr <- phl_gwr %>% mutate(solar_label = ifelse(beta_solar < 0, "Negative", "Positive"))
phl_gwr <- phl_gwr %>% mutate(elevation_label = ifelse(beta_ele < 0, "Negative", "Positive"))
phl_gwr <- phl_gwr %>% mutate(ozone_label = ifelse(beta_ozone < 0, "Negative", "Positive"))
phl_gwr <- phl_gwr %>% mutate(landt_label = ifelse(beta_land < 0, "Negative", "Positive"))
phl_gwr <- phl_gwr %>% mutate(coast_label = ifelse(beta_coast < 0, "Negative", "Positive"))
phl_gwr <- phl_gwr %>% mutate(pop_label = ifelse(beta_pop < 0, "Negative", "Positive"))
##0.01 significance threshold
phl_gwr <- phl_gwr %>% mutate(solar_p_label = ifelse(p_solar <= 0.01, "Significant", "Not Significant"))
phl_gwr <- phl_gwr %>% mutate(elevation_p_label = ifelse(p_ele <= 0.01, "Significant", "Not Significant"))
phl_gwr <- phl_gwr %>% mutate(ozone_p_label = ifelse(p_ozone <= 0.01, "Significant", "Not Significant"))
phl_gwr <- phl_gwr %>% mutate(landt_p_label = ifelse(p_land <= 0.01, "Significant", "Not Significant"))
phl_gwr <- phl_gwr %>% mutate(coast_p_label = ifelse(p_coast <= 0.01, "Significant", "Not Significant"))
phl_gwr <- phl_gwr %>% mutate(pop_p_label = ifelse(p_pop <= 0.01, "Significant", "Not Significant"))
##significant positive/negative regions
phl_gwr <- phl_gwr %>% mutate(sig_solar = ifelse(solar_p_label == "Significant" & solar_label == "Positive", "Positive",
ifelse(solar_p_label == "Significant" & solar_label == "Negative", "Negative", "Not Significant")))
phl_gwr <- phl_gwr %>% mutate(sig_elevation = ifelse(elevation_p_label == "Significant" & elevation_label == "Positive", "Positive",
ifelse(elevation_p_label == "Significant" & elevation_label == "Negative", "Negative", "Not Significant")))
phl_gwr <- phl_gwr %>% mutate(sig_ozone= ifelse(ozone_p_label == "Significant" & ozone_label == "Positive", "Positive",
ifelse(ozone_p_label == "Significant" & ozone_label == "Negative", "Negative", "Not Significant")))
phl_gwr <- phl_gwr %>% mutate(sig_landt = ifelse(landt_p_label == "Significant" & landt_label == "Positive", "Positive",
ifelse(landt_p_label == "Significant" & landt_label == "Negative", "Negative", "Not Significant")))
phl_gwr <- phl_gwr %>% mutate(sig_coast = ifelse(coast_p_label == "Significant" & coast_label == "Positive", "Positive",
ifelse(coast_p_label == "Significant" & coast_label == "Negative", "Negative", "Not Significant")))
phl_gwr <- phl_gwr %>% mutate(sig_pop = ifelse(pop_p_label == "Significant" & pop_label == "Positive", "Positive",
ifelse(pop_p_label == "Significant" & pop_label == "Negative", "Negative", "Not Significant")))
#Merge, save csv and display
phl_gwr <- merge(phl_adm3, phl_gwr <- phl_gwr , by.x=c("gqid"), by.y=c("gqid"), all.x = TRUE)
phl_gwr
ggplot() +
geom_sf(data = phl_gwr, aes(fill = sig_elevation), lwd = 0) +
geom_sf(data = phl_adm1, aes(fill = NA, alpha=0), show.legend = FALSE) +
scale_fill_manual(name = "Correlation",
values = c('#CE1126', '#FFFFFF', '#0038A8')) +
theme_set(theme_bw()) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank()) +
labs(title="Physical Elevation (metres)") +
theme(plot.title=element_text(hjust=0.5, lineheight=1.2))
ggsave("elevation.png")
ggplot() +
geom_sf(data = phl_gwr, aes(fill = sig_ozone), lwd = 0) +
geom_sf(data = phl_adm1, aes(fill = NA, alpha=0), show.legend = FALSE) +
scale_fill_manual(name = "Correlation",
values = c('#CE1126', '#FFFFFF', '#0038A8')) +
theme_set(theme_bw()) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank()) +
labs(title="Ozone concentration (ug/m3)") +
theme(plot.title=element_text(hjust=0.5, lineheight=1.2))
ggsave("ozone.png")
ggplot() +
geom_sf(data = phl_gwr, aes(fill = sig_solar), lwd = 0) +
geom_sf(data = phl_adm1, aes(fill = NA, alpha=0), show.legend = FALSE) +
scale_fill_manual(name = "Correlation",
values = c('#CE1126', '#FFFFFF', '#0038A8')) +
theme_set(theme_bw()) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank()) +
labs(title="Photovoltaic power potential (kWh/kWp)") +
theme(plot.title=element_text(hjust=0.5, lineheight=1.2))
ggsave("solar.png")
ggplot() +
geom_sf(data = phl_gwr, aes(fill = sig_pop), lwd = 0) +
geom_sf(data = phl_adm1, aes(fill = NA, alpha=0), show.legend = FALSE) +
scale_fill_manual(name = "Correlation",
values = c('#FFFFFF', '#0038A8')) +
theme_set(theme_bw()) +
theme(axis.title.x = element_blank(),
axis.title.y = element_blank()) +
labs(title="Population density (person/km2)") +
theme(plot.title=element_text(hjust=0.5, lineheight=1.2))
ggsave("pop.png")
================================================================================
MGWR Version: 2.2.1
Released on: 03/20/2020
Source code is available at: https://github.com/pysal/mgwr
Development Team: Ziqi Li, Taylor Oshan, Stewart Fotheringham, Wei Kang,
Levi Wolf, Hanchen Yu, Mehak Sachdeva, and Sarah Bardin
Spatial Analysis Research Center (SPARC)
Arizona State University, Tempe, USA
================================================================================
Model type: Gaussian
Number of observations: 1647
Number of covariates: 7
Dependent variable: Night-time lights
Variable standardization: On
Total runtime: 1:27:39
Global Regression Results
--------------------------------------------------------------------------------
Residual sum of squares: 346.844
Log-likelihood: -1054.113
AIC: 2122.226
AICc: 2124.314
R2: 0.789
Adj. R2: 0.789
Variable Est. SE t(Est/SE) p-value
------------------------------------ ---------- ---------- ---------- ----------
Intercept 0.000 0.011 0.000 1.000
Land surface temperature 0.025 0.013 1.924 0.054
Population density 0.885 0.012 75.865 0.000
Physical elevation -0.007 0.013 -0.540 0.589
Photovoltaic power potential -0.003 0.014 -0.231 0.817
Ozone concentration -0.029 0.012 -2.461 0.014
Distance to coast 0.003 0.013 0.203 0.839
Geographically Weighted Regression (GWR) Results
--------------------------------------------------------------------------------
Coordinates type: Projected
Spatial kernel: Adaptive bisquare
Criterion for optimal bandwidth: AICc
Bandwidth used: 57.000
Bandwidth confidence interval (95%): (57.0, 57.0)
Diagnostic Information
--------------------------------------------------------------------------------
Residual sum of squares: 123.996
Effective number of parameters (trace(S)): 362.849
Degree of freedom (n - trace(S)): 1284.151
Sigma estimate: 0.311
Log-likelihood: -207.039
Degree of Dependency (DoD): 0.467
AIC: 1141.775
AICc: 1348.849
BIC: 3109.001
R2: 0.925
Adj. R2: 0.903
Adj. alpha (95%): 0.001
Adj. critical t value (95%): 3.307
Monte Carlo Test for Spatial Variability
--------------------------------------------------------------------------------
Variable p-value
Intercept 0.000
Land surface temperature 0.779
Population density 0.725
Physical elevation 0.000
Photovoltaic power potential 0.000
Ozone concentration 0.000
Distance to coast 0.000
Summary Statistics For GWR Parameter Estimates
--------------------------------------------------------------------------------
Variable Mean STD Min Median Max
-------------------- ---------- ---------- ---------- ---------- ----------
Intercept 0.188 1.614 -3.337 -0.049 26.031
Land surface temperature 0.074 0.464 -3.555 0.015 4.176
Population density 0.736 0.418 -0.191 0.737 2.555
Physical elevation -0.040 0.585 -11.458 0.009 1.354
Photovoltaic power potential -0.072 0.421 -4.368 -0.004 0.886
Ozone concentration -0.274 1.856 -31.683 0.004 3.379
Distance to coast -0.081 0.572 -4.825 -0.001 1.078
================================================================================
Acknowledgement:
We acknowledge the support of the National Science Foundation under Award 1758786
from the Geography and Spatial Sciences Program to A. S. Fotheringham which
enabled this software to be written and made freely available.
================================================================================