Spatial convergence across subnational regions of South America
Slides
from IPython.display import IFrame
IFrame(src='https://project2020e-slides.netlify.app/#1', width=600, height=400)
Setup
# Load libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from matplotlib.ticker import FormatStrFormatter
from pylab import rcParams
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 72
import seaborn as sns
#sns.set_style("darkgrid")
#sns.set_context(context="paper", font_scale=1.5, rc=None)
#sns.set(font="serif")
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd
import libpysal
from libpysal import weights
from libpysal.weights import Queen
import esda
from esda.moran import Moran, Moran_Local
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation
from splot.libpysal import plot_spatial_weights
from giddy.directional import Rose
import statsmodels.api as sm
import statsmodels.formula.api as smf
from spreg import OLS
from spreg import MoranRes
from spreg import ML_Lag
from spreg import ML_Error
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW
from mgwr.utils import shift_colormap, truncate_colormap
import warnings
warnings.filterwarnings('ignore')
Import data
gdf = gpd.read_file("https://github.com/quarcs-lab/data-quarcs/raw/master/southAmerica151/balanced_hdi_beta_south_merica_withCoors.geojson")
gdf
Explore data
gdf[['ln_gnic90', 'ln_gnic00', 'g_gnic9000', 'ln_hdi90', 'ln_hdi00', 'gr_hdi9000']].describe().round(2)
Beta convergence
px.scatter(
gdf,
x="ln_gnic90",
y="g_gnic9000",
hover_name="region",
#color="country",
trendline="ols",
marginal_x="rug",
marginal_y="rug",
labels={"ln_gnic90": "Log GNIpc in 1990",
"g_gnic9000": "Growth GNIpc 1990-2000"}
)
px.strip(gdf, x = 'ln_gnic90', hover_name= 'region', color= 'country')
Heterogenous beta convergence
px.scatter(gdf,
x="ln_gnic90",
y="g_gnic9000",
hover_name="region",
color="country",
trendline="ols",
marginal_x="rug",
marginal_y="rug",
labels={"ln_gnic90": "Log GNIpc in 1990",
"g_gnic9000": "Growth GNIpc 1990-2000"})
Basic maps
Static
gdf.plot('ln_gnic90', cmap = 'coolwarm', legend = True, scheme = 'BoxPlot', legend_kwds={'bbox_to_anchor':(0.42, 0.50)})
gdf.plot('g_gnic9000', cmap = 'coolwarm', legend = True, scheme = 'BoxPlot', legend_kwds={'bbox_to_anchor':(0.42, 0.50)});
Interactive
#help(gdf.explore)
gdf.explore(
column='ln_gnic90',
tooltip=['country', 'region', 'ln_gnic90', 'g_gnic9000'],
scheme='BoxPlot',
#k=5,
cmap='coolwarm',
legend=True,
tiles='CartoDB positron', # OpenStreetMap, Stamen Terrain, Stamen Toner, Stamen Watercolor, CartoDB positron, CartoDB dark_matter
style_kwds =dict(color="gray", weight=0.5),
legend_kwds=dict(colorbar=False)
)
Spatial weights
W = weights.Queen.from_dataframe(gdf)
#W = weights.Rook.from_dataframe(gdf)
#W = weights.KNN.from_dataframe(gdf, k=4)
#W = weights.KNN.from_dataframe(gdf, k=6)
#W = weights.KNN.from_dataframe(gdf, k=8)
#>>>> Compute inverse distance squared based on maximum nearest neighbor distance between the n observations
#maxMin = weights.min_threshold_dist_from_shapefile("map.shp")
#W = weights.DistanceBand.from_dataframe(gdf, threshold = maxMin, binary=False, alpha=-2)
# >>> Row-standardize W
W.transform = 'r'
# Ref: https://splot.readthedocs.io/en/stable/users/tutorials/weights.html#a-closer-look-at-w
plot_spatial_weights(W, gdf);
W.mean_neighbors
W.min_neighbors
W.max_neighbors
Spatial dependence
gdf["Wln_gnic90"] = weights.lag_spatial(W, gdf['ln_gnic90'])
px.scatter(gdf,
x="ln_gnic90",
y="Wln_gnic90",
hover_name="region",
labels = dict(ln_gnic90 = "Ln GNPpc in focal region",
Wln_gnic90 = "Neighbors' average Ln GNPpc"
),
trendline="ols",
marginal_x="box",
marginal_y="box")
Global dependence
globalMoran = Moran(gdf['ln_gnic90'], W)
print(globalMoran.I)
print(globalMoran.p_sim)
moran_scatterplot(globalMoran, aspect_equal=False, zstandard=False);
Local dependence
localMoran = Moran_Local(gdf['ln_gnic90'], W, permutations = 999, seed=12345)
moran_scatterplot(localMoran, p=0.10, zstandard =False);
lisa_cluster(localMoran, gdf, p=0.10);
Spatial convergence
y = gdf['g_gnic9000'].values
y_name = 'Growth GNIpc'
x = np.array([gdf.ln_gnic90]).T
x_name = 'Log GNIpc in 1990'
OLS
ols = OLS(y = y, x = x, w = W,
name_y=y_name, name_x = [x_name], name_w="Wqueen", name_ds='gdf',
white_test=True, spat_diag=True, moran=True)
print(ols.summary)
SAR
sar = ML_Lag(y=y, x=x, w=W,
name_y=y_name, name_x = [x_name], name_w="Wqueen", name_ds='gdf')
print(sar.summary)
SEM
sem = ML_Error(y=y, x=x, w=W,
name_y=y_name, name_x = [x_name], name_w="Wqueen", name_ds='gdf')
print(sem.summary)
Heterogeneous convergence
GWR data preparation
y = gdf['g_gnic9000'].values.reshape((-1,1))
X = gdf['ln_gnic90'].values.reshape((-1,1))
print(y.shape, X.shape)
u = gdf['COORD_X']
v = gdf['COORD_Y']
coords = list(zip(u,v))
GWR results
gwr_selector = Sel_BW(coords, y, X)
gwr_bw = gwr_selector.search(bw_min=2)
gwr_bw
gwr_results = GWR(coords, y, X, gwr_bw).fit()
gwr_results.summary()
GWR maps
# Add GWR parameters to GeoDataframe
gdf['GWRintercept'] = gwr_results.params[:,0]
gdf['GWRln_gnic90'] = gwr_results.params[:,1]
# Obtain t-values filtered based on multiple testing correction
gwr_filtered_t = gwr_results.filter_tvals()