Introduction to GWR and MGWR
Setup
# Load libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
from pylab import rcParams
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 72
import seaborn as sns
sns.set_style("darkgrid")
sns.set_context(context="paper", font_scale=1.5, rc=None)
sns.set(font="serif")
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd
import libpysal as ps
from libpysal import weights
from libpysal.weights import Queen
import esda
from esda.moran import Moran, Moran_Local
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation
from splot.libpysal import plot_spatial_weights
from giddy.directional import Rose
import statsmodels.api as sm
import statsmodels.formula.api as smf
from stargazer.stargazer import Stargazer, LineLocation
from spreg import OLS
from spreg import MoranRes
from spreg import ML_Lag
from spreg import ML_Error
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW
from mgwr.utils import shift_colormap, truncate_colormap
import warnings
warnings.filterwarnings('ignore')
import time
Import data
# Load Georgia dataset
gdf = gpd.read_file(ps.examples.get_path('G_utm.shp'))
gdf.columns
fig, ax = plt.subplots(figsize=(6, 6))
gdf.plot(color = 'white', edgecolor = 'black', ax = ax)
gdf.centroid.plot(ax=ax)
ax.set_title('Map of Georgia and centroids', fontsize=12)
ax.axis("off")
#plt.savefig('myMap.png',dpi=150, bbox_inches='tight')
plt.show()
fig, ax = plt.subplots(figsize=(6, 6))
gdf.plot(column='PctBach', cmap = 'coolwarm', linewidth=0.01, scheme = 'FisherJenks', k=5, legend=True, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=ax)
#gdf.plot(column='PctBach', cmap = 'coolwarm', linewidth=0.01, scheme = 'box_plot', k=5, legend=True, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=ax)
ax.set_title('Population with BA degree (%)', fontsize=12)
ax.axis("off")
#plt.savefig('myMap.png',dpi=150, bbox_inches='tight')
plt.show()
Select variables (y, X)
y = gdf['PctBach'].values.reshape((-1,1)) # reshape is needed to have column array
y.shape
X = gdf[['PctFB', 'PctBlack', 'PctRural']].values
X.shape
Define coordinates (u,v)
u = gdf['X']
v = gdf['Y']
coords = list(zip(u,v))
Estimate GWR model
Select GWR bandwidth
%%time
gwr_selector = Sel_BW(coords, y, X)
gwr_bw = gwr_selector.search()
print('GWR bandwidth =', gwr_bw)
Fit GWR model
gwr_results = GWR(coords, y, X, gwr_bw).fit()
gwr_results.summary()
# As reference, here is the (average) R2, AIC, and AICc
print('Mean R2 =', gwr_results.R2)
print('AIC =', gwr_results.aic)
print('AICc =', gwr_results.aicc)
# Add R2 to GeoDataframe
gdf['gwr_R2'] = gwr_results.localR2
fig, ax = plt.subplots(figsize=(6, 6))
gdf.plot(column='gwr_R2', cmap = 'coolwarm', linewidth=0.01, scheme = 'FisherJenks', k=5, legend=True, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=ax)
ax.set_title('Local R2', fontsize=12)
ax.axis("off")
#plt.savefig('myMap.png',dpi=150, bbox_inches='tight')
plt.show()
Add coefficients to data frame
gdf['gwr_intercept'] = gwr_results.params[:,0]
gdf['gwr_fb'] = gwr_results.params[:,1]
gdf['gwr_aa'] = gwr_results.params[:,2]
gdf['gwr_rural'] = gwr_results.params[:,3]
Filter/correct t-stats
# Filter t-values: standard alpha = 0.05
gwr_filtered_t = gwr_results.filter_tvals(alpha = 0.05)
pd.DataFrame(gwr_filtered_t)
# Filter t-values: corrected alpha due to multiple testing
gwr_filtered_tc = gwr_results.filter_tvals()
pd.DataFrame(gwr_filtered_tc)
Map coefficients
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(18,6))
gdf.plot(column='gwr_fb', cmap = 'coolwarm', linewidth=0.01, scheme = 'FisherJenks', k=5, legend=True, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[0])
gdf.plot(column='gwr_fb', cmap = 'coolwarm', linewidth=0.05, scheme = 'FisherJenks', k=5, legend=False, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[1])
gdf[gwr_filtered_t[:,1] == 0].plot(color='white', linewidth=0.05, edgecolor='black', ax=axes[1])
gdf.plot(column='gwr_fb', cmap = 'coolwarm', linewidth=0.05, scheme = 'FisherJenks', k=5, legend=False, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[2])
gdf[gwr_filtered_tc[:,1] == 0].plot(color='white', linewidth=0.05, edgecolor='black', ax=axes[2])
plt.tight_layout()
axes[0].axis("off")
axes[1].axis("off")
axes[2].axis("off")
axes[0].set_title('(a) GWR: Foreign born (BW: ' + str(gwr_bw) +'), all coeffs', fontsize=12)
axes[1].set_title('(b) GWR: Foreign born (BW: ' + str(gwr_bw) +'), significant coeffs', fontsize=12)
axes[2].set_title('(c) GWR: Foreign born (BW: ' + str(gwr_bw) +'), significant coeffs and corr. p-values', fontsize=12)
plt.show()
Test spatial stationarity
%%time
# Monte Carlo test of spatial variability: 500 iterations
gwr_p_values_stationarity = gwr_results.spatial_variability(gwr_selector, 500)
gwr_p_values_stationarity
# Note: The first p-value is for the intercept
Test local multicollinearity
LCC, VIF, CN, VDP = gwr_results.local_collinearity()
pd.DataFrame(VIF)
pd.DataFrame(VIF).describe().round(2)
As the max VIF for each variable is less than 10, there are no signs of local multicollinearity
pd.DataFrame(CN)
gdf['gwr_CN'] = CN
fig, ax = plt.subplots(figsize=(6, 6))
gdf.plot(column='gwr_CN', cmap = 'coolwarm', linewidth=0.01, scheme = 'FisherJenks', k=5, legend=True, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=ax)
ax.set_title('Local multicollinearity (CN > 30)?', fontsize=12)
ax.axis("off")
#plt.savefig('myMap.png',dpi=150, bbox_inches='tight')
plt.show()
Estimate MGWR model
Standardize variables
Zy = (y - y.mean(axis=0)) / y.std(axis=0)
ZX = (X - X.mean(axis=0)) / X.std(axis=0)
Select MGWR bandwidths
mgwr_selector = Sel_BW(coords, Zy, ZX, multi=True)
mgwr_bw = mgwr_selector.search()
mgwr_bw
Fit MGWR model
mgwr_results = MGWR(coords, Zy, ZX, mgwr_selector).fit()
mgwr_results.summary()
Show bandwidth intervals
mgwr_bw_ci = mgwr_results.get_bws_intervals(mgwr_selector)
print(mgwr_bw_ci)
# Local R2 are NOT yet implemented in MGWR
#mgwr_results.localR2[0:5]
Add coefficients to data frame
#Add MGWR parameters to GeoDataframe
gdf['mgwr_intercept'] = mgwr_results.params[:,0]
gdf['mgwr_fb'] = mgwr_results.params[:,1]
gdf['mgwr_aa'] = mgwr_results.params[:,2]
gdf['mgwr_rural'] = mgwr_results.params[:,3]
Filter/correct t-stats
# Filter t-values: standard alpha = 0.05
mgwr_filtered_t = mgwr_results.filter_tvals(alpha = 0.05)
# Filter t-values: corrected alpha due to multiple testing
mgwr_filtered_tc = mgwr_results.filter_tvals()
Map coefficients
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(18,6))
gdf.plot(column='mgwr_fb', cmap = 'coolwarm', linewidth=0.01, scheme = 'FisherJenks', k=5, legend=True, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[0])
gdf.plot(column='mgwr_fb', cmap = 'coolwarm', linewidth=0.05, scheme = 'FisherJenks', k=5, legend=False, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[1])
gdf[mgwr_filtered_t[:,1] == 0].plot(color='white', linewidth=0.05, edgecolor='black', ax=axes[1])
gdf.plot(column='mgwr_fb', cmap = 'coolwarm', linewidth=0.05, scheme = 'FisherJenks', k=5, legend=False, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[2])
gdf[mgwr_filtered_tc[:,1] == 0].plot(color='white', linewidth=0.05, edgecolor='black', ax=axes[2])
plt.tight_layout()
axes[0].axis("off")
axes[1].axis("off")
axes[2].axis("off")
axes[0].set_title('(a) MGWR: Foreign born (BW: ' + str(mgwr_bw[1]) +'), all coeffs', fontsize=12)
axes[1].set_title('(b) MGWR: Foreign born (BW: ' + str(mgwr_bw[1]) +'), significant coeffs', fontsize=12)
axes[2].set_title('(c) MGWR: Foreign born (BW: ' + str(mgwr_bw[1]) +'), significant coeffs and corr. p-values', fontsize=12)
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(18,6))
gdf.plot(column='mgwr_intercept', cmap = 'coolwarm', linewidth=0.01, scheme = 'FisherJenks', k=5, legend=True, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[0])
gdf.plot(column='mgwr_intercept', cmap = 'coolwarm', linewidth=0.05, scheme = 'FisherJenks', k=5, legend=False, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[1])
gdf[mgwr_filtered_t[:,0] == 0].plot(color='white', linewidth=0.05, edgecolor='black', ax=axes[1])
gdf.plot(column='mgwr_intercept', cmap = 'coolwarm', linewidth=0.05, scheme = 'FisherJenks', k=5, legend=False, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[2])
gdf[mgwr_filtered_tc[:,0] == 0].plot(color='white', linewidth=0.05, edgecolor='black', ax=axes[2])
plt.tight_layout()
axes[0].axis("off")
axes[1].axis("off")
axes[2].axis("off")
axes[0].set_title('(a) MGWR: Intercept (BW: ' + str(mgwr_bw[0]) +'), all coeffs', fontsize=12)
axes[1].set_title('(b) MGWR: Intercept (BW: ' + str(mgwr_bw[0]) +'), significant coeffs', fontsize=12)
axes[2].set_title('(c) MGWR: Intercept (BW: ' + str(mgwr_bw[0]) +'), significant coeffs and corr. p-values', fontsize=12)
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(18,6))
gdf.plot(column='mgwr_aa', cmap = 'coolwarm', linewidth=0.01, scheme = 'FisherJenks', k=5, legend=True, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[0])
gdf.plot(column='mgwr_aa', cmap = 'coolwarm', linewidth=0.05, scheme = 'FisherJenks', k=5, legend=False, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[1])
gdf[mgwr_filtered_t[:,2] == 0].plot(color='white', linewidth=0.05, edgecolor='black', ax=axes[1])
gdf.plot(column='mgwr_aa', cmap = 'coolwarm', linewidth=0.05, scheme = 'FisherJenks', k=5, legend=False, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[2])
gdf[mgwr_filtered_tc[:,2] == 0].plot(color='white', linewidth=0.05, edgecolor='black', ax=axes[2])
plt.tight_layout()
axes[0].axis("off")
axes[1].axis("off")
axes[2].axis("off")
axes[0].set_title('(a) MGWR: African Ame. (BW: ' + str(mgwr_bw[2]) +'), all coeffs', fontsize=12)
axes[1].set_title('(b) MGWR: African Ame. (BW: ' + str(mgwr_bw[2]) +'), significant coeffs', fontsize=12)
axes[2].set_title('(c) MGWR: African Ame. (BW: ' + str(mgwr_bw[2]) +'), significant coeffs and corr. p-values', fontsize=12)
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(18,6))
gdf.plot(column='mgwr_rural', cmap = 'coolwarm', linewidth=0.01, scheme = 'FisherJenks', k=5, legend=True, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[0])
gdf.plot(column='mgwr_rural', cmap = 'coolwarm', linewidth=0.05, scheme = 'FisherJenks', k=5, legend=False, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[1])
gdf[mgwr_filtered_t[:,3] == 0].plot(color='white', linewidth=0.05, edgecolor='black', ax=axes[1])
gdf.plot(column='mgwr_rural', cmap = 'coolwarm', linewidth=0.05, scheme = 'FisherJenks', k=5, legend=False, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=axes[2])
gdf[mgwr_filtered_tc[:,3] == 0].plot(color='white', linewidth=0.05, edgecolor='black', ax=axes[2])
plt.tight_layout()
axes[0].axis("off")
axes[1].axis("off")
axes[2].axis("off")
axes[0].set_title('(a) MGWR: Rural (BW: ' + str(mgwr_bw[3]) +'), all coeffs', fontsize=12)
axes[1].set_title('(b) MGWR: Rural (BW: ' + str(mgwr_bw[3]) +'), significant coeffs', fontsize=12)
axes[2].set_title('(c) MGWR: Rural (BW: ' + str(mgwr_bw[3]) +'), significant coeffs and corr. p-values', fontsize=12)
plt.show()
Test spatial stationarity
%%time
# Monte Carlo test of spatial variability: 10 iterations
mgwr_p_values_stationarity = mgwr_results.spatial_variability(mgwr_selector, 10)
mgwr_p_values_stationarity
# Note: The first p-value is for the intercept
Test local multicollinearity
mgwrCN, mgwrVDP = mgwr_results.local_collinearity()
gdf['mgwr_CN'] = mgwrCN
fig, ax = plt.subplots(figsize=(6, 6))
gdf.plot(column='mgwr_CN', cmap = 'coolwarm', linewidth=0.01, scheme = 'FisherJenks', k=5, legend=True, legend_kwds={'bbox_to_anchor':(1.10, 0.96)}, ax=ax)
ax.set_title('Local multicollinearity (CN > 30)?', fontsize=12)
ax.axis("off")
#plt.savefig('myMap.png',dpi=150, bbox_inches='tight')
plt.show()