Measuring the evolution of spatial dependence and spatial inequality: A tutorial using Python
Motivation
Set up your deepnote
Load libraries
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import libpysal
from libpysal import weights
from libpysal.weights.util import min_threshold_distance
import esda
from esda.moran import Moran, Moran_Local
import splot
from splot.esda import moran_scatterplot, plot_moran
from splot.libpysal import plot_spatial_weights
sns.set_style('darkgrid')
#sns.set_style('whitegrid')
Load and merge data
df = pd.read_csv("/work/tutorial2021-evolution-of-spatial-dependence-and-inequality/data.csv")
df
# Load non-spatial data from uploaded file
#data = pd.read_csv("data.csv")
# OR Load non-spatial data from online repository
data = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/data-open/master/us_income/data.csv")
data
gpd.read_file("/work/tutorial2021-evolution-of-spatial-dependence-and-inequality/map.shp")
# Load spatial data from uploaded files
#map = gpd.read_file("map.shp")
# OR Load spatial data from online repository
map = gpd.read_file("https://github.com/quarcs-lab/data-open/raw/master/us_income/map.zip")
map
map.crs
help(pd.merge)
# Merge data and map
gdf = map.merge(data, on='id', how='left')
gdf
Basic maps (y)
gdf.plot();
gdf.explore()
gdf.plot('ln_y_1994');
gdf.explore('ln_y_1994')
gdf.plot(column='ln_y_1994', cmap='coolwarm', figsize=(15, 10), legend=True);
gdf.explore(
column='ln_y_1994',
tooltip=['state', 'region', 'ln_y_1994'],
scheme='naturalbreaks',
k=5,
cmap='coolwarm',
legend=True,
tiles='Stamen Toner'
)
gdf.explore(
column='ln_y_1994',
tooltip=['state', 'region', 'ln_y_1994'],
scheme='naturalbreaks',
k=5,
cmap='coolwarm',
legend=True,
tiles='CartoDB dark_matter'
)
gdf.explore(
column='ln_y_1994',
tooltip=['state', 'region', 'ln_y_1994'],
scheme='naturalbreaks',
k=5,
cmap='coolwarm',
legend=True,
tiles='Stamen Terrain',
style_kwds =dict(color="gray", weight=0.5),
legend_kwds=dict(colorbar=False)
)
fig, ax = plt.subplots(figsize=(15,10))
gdf.plot(column="ln_y_1994", scheme='NaturalBreaks', k=5, cmap='coolwarm', legend=True, ax=ax, legend_kwds={'bbox_to_anchor':(0.82, 0.92)})
plt.title('Spatial distribution of log GDP per capita in 1994: Five natural breaks')
plt.tight_layout()
ax.axis("off")
#plt.savefig('myMap.png',dpi=150, bbox_inches='tight')
plt.show()
gdf.ln_y_1994.describe()
sns.boxplot(data=gdf, x="ln_y_1994");
fig, ax = plt.subplots(figsize=(15,10))
gdf.plot(column="ln_y_1994", scheme='BoxPlot', cmap='coolwarm', legend=True, ax=ax, legend_kwds={'bbox_to_anchor':(0.82, 0.92)})
plt.title('Spatial distribution of log GDP per capita in 1994: Which regions are below/above the median?')
plt.tight_layout()
ax.axis("off")
#plt.savefig('myMap.png',dpi=150, bbox_inches='tight')
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15,10))
gdf.plot(column="ln_y_1929", scheme='BoxPlot', cmap='coolwarm', legend=True, ax=axes[0], legend_kwds={'bbox_to_anchor':(0.99, 0.40)})
gdf.plot(column="ln_y_1994", scheme='BoxPlot', cmap='coolwarm', legend=True, ax=axes[1], legend_kwds={'bbox_to_anchor':(0.99, 0.40)})
plt.tight_layout()
axes[0].axis("off")
axes[1].axis("off")
axes[0].set_title('Log GDP per capita in 1929')
axes[1].set_title('Log GDP per capita in 1994')
plt.show()
Spatial weights (W)
W = weights.Queen.from_dataframe(gdf)
#W = weights.Rook.from_dataframe(gdf)
#W = weights.KNN.from_dataframe(gdf, k=4)
#W = weights.KNN.from_dataframe(gdf, k=6)
#W = weights.KNN.from_dataframe(gdf, k=8)
# Compute inverse distance squared based on maximum nearest neighbor distance between the n observations
#maxMin = weights.min_threshold_dist_from_shapefile("map.shp")
#W = weights.DistanceBand.from_dataframe(gdf, threshold = maxMin, binary=False, alpha=-2)
# Row-standardize W
W.transform = 'r'
plot_spatial_weights(W, gdf);
W.neighbors
W.min_neighbors
W.mean_neighbors
W.max_neighbors
W.histogram
#Note: (numbers of neighbors, frequency)
Spatial lag (Wy)
# Create spatial lag variables
gdf["Wln_y_1929"] = weights.lag_spatial(W, gdf["ln_y_1929"])
gdf[["ln_y_1929", "Wln_y_1929"]].round(2)
f,ax = plt.subplots(1,2, figsize=(16, 8))
gdf.plot(column='ln_y_1929', ax=ax[0], edgecolor='k',
scheme='NaturalBreaks', k=5, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.05, 0.40)})
ax[0].set_title("Log GDP pc in 1929")
ax[0].axis("off")
gdf.plot(column='Wln_y_1929', ax=ax[1], edgecolor='k',
scheme='NaturalBreaks', k=5, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.05, 0.40)})
ax[1].set_title("Spatial lag of log GDP pc in 1929")
ax[1].axis("off");
Spatial dependence (Wy, y)
g = sns.jointplot(data=gdf, x="ln_y_1929", y="Wln_y_1929", kind="reg")
g.set_axis_labels('Log GDPpc in 1929',
'Log GDPpc in neighbouring states in 1929',);
moran1929 = Moran(gdf["ln_y_1929"], W)
moran1994 = Moran(gdf["ln_y_1994"], W)
print(moran1929.I, moran1994.I)
print(moran1929.p_sim, moran1994.p_sim)
moran_scatterplot(moran1929);
moran_scatterplot(moran1994);
Evolution of spatial dependence
# Create multidimentional array
dfa = gdf.loc[:,'ln_y_1929':'ln_y_1994'].values
dfa
dfa.shape
# Create array of years (Be careful with the index. Add +1)
years = np.arange(1929,1995)
years
mits = [Moran(cs, W) for cs in dfa.T]
res = np.array([(m.I, m.EI, m.p_sim, m.z_sim) for m in mits])
plt.plot(years, res[:,0])
plt.title("Evolution of spatial dependence")
plt.ylabel("Moran's I of log GDP pc");
Evolution of spatial inequality
for row in dfa:
plt.plot(years, row)
plt.ylabel('Log GDP per capita');
rel_dfa = dfa / dfa.mean(axis=0)
for row in rel_dfa:
plt.plot(years, row)
plt.ylabel('Relative log GDP per capita');
sigma = dfa.std(axis=0)
plt.plot(years, sigma)
plt.title("Evolution of spatial inequality")
plt.ylabel('Stand. Dev.of log GDP pc');
Summary graphs
plt.plot(years, res[:,0], label="Sp. Dependence")
plt.plot(years, sigma, label="Sp. Inequality")
plt.legend(loc="upper right")
plt.savefig('evolution.png',dpi=150, bbox_inches='tight');
# create figure and axis objects with subplots()
fig,ax = plt.subplots()
# make a plot
ax.plot(years, res[:,0], color="royalblue")
# set y-axis label
ax.set_ylabel("Spatial Dependence", color="royalblue")
# twin object for two different y-axis on the sample plot
ax2=ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(years, sigma, color="orange")
ax2.set_ylabel("Spatial Inequality", color="darkorange")
sns.set_style("dark")
plt.show()