Measuring the evolution of spatial dependence and spatial inequality: A tutorial using Python

Motivation

Set up your deepnote

Load libraries

# Load libraries import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import geopandas as gpd import libpysal from libpysal import weights from libpysal.weights.util import min_threshold_distance import esda from esda.moran import Moran, Moran_Local import splot from splot.esda import moran_scatterplot, plot_moran from splot.libpysal import plot_spatial_weights sns.set_style('darkgrid') #sns.set_style('whitegrid')

Load and merge data

df = pd.read_csv("/work/tutorial2021-evolution-of-spatial-dependence-and-inequality/data.csv")

# Load non-spatial data from uploaded file #data = pd.read_csv("data.csv") # OR Load non-spatial data from online repository data = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/data-open/master/us_income/data.csv") data

gpd.read_file("/work/tutorial2021-evolution-of-spatial-dependence-and-inequality/map.shp")

# Load spatial data from uploaded files #map = gpd.read_file("map.shp") # OR Load spatial data from online repository map = gpd.read_file("https://github.com/quarcs-lab/data-open/raw/master/us_income/map.zip") map

map.crs

help(pd.merge)

# Merge data and map gdf = map.merge(data, on='id', how='left') gdf

Basic maps (y)

gdf.plot();

gdf.explore()

gdf.plot('ln_y_1994');

gdf.explore('ln_y_1994')

gdf.plot(column='ln_y_1994', cmap='coolwarm', figsize=(15, 10), legend=True);

gdf.explore( column='ln_y_1994', tooltip=['state', 'region', 'ln_y_1994'], scheme='naturalbreaks', k=5, cmap='coolwarm', legend=True, tiles='Stamen Toner' )

gdf.explore( column='ln_y_1994', tooltip=['state', 'region', 'ln_y_1994'], scheme='naturalbreaks', k=5, cmap='coolwarm', legend=True, tiles='CartoDB dark_matter' )

gdf.explore( column='ln_y_1994', tooltip=['state', 'region', 'ln_y_1994'], scheme='naturalbreaks', k=5, cmap='coolwarm', legend=True, tiles='Stamen Terrain', style_kwds =dict(color="gray", weight=0.5), legend_kwds=dict(colorbar=False) )

fig, ax = plt.subplots(figsize=(15,10)) gdf.plot(column="ln_y_1994", scheme='NaturalBreaks', k=5, cmap='coolwarm', legend=True, ax=ax, legend_kwds={'bbox_to_anchor':(0.82, 0.92)}) plt.title('Spatial distribution of log GDP per capita in 1994: Five natural breaks') plt.tight_layout() ax.axis("off") #plt.savefig('myMap.png',dpi=150, bbox_inches='tight') plt.show()

gdf.ln_y_1994.describe()

sns.boxplot(data=gdf, x="ln_y_1994");

fig, ax = plt.subplots(figsize=(15,10)) gdf.plot(column="ln_y_1994", scheme='BoxPlot', cmap='coolwarm', legend=True, ax=ax, legend_kwds={'bbox_to_anchor':(0.82, 0.92)}) plt.title('Spatial distribution of log GDP per capita in 1994: Which regions are below/above the median?') plt.tight_layout() ax.axis("off") #plt.savefig('myMap.png',dpi=150, bbox_inches='tight') plt.show()

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15,10)) gdf.plot(column="ln_y_1929", scheme='BoxPlot', cmap='coolwarm', legend=True, ax=axes[0], legend_kwds={'bbox_to_anchor':(0.99, 0.40)}) gdf.plot(column="ln_y_1994", scheme='BoxPlot', cmap='coolwarm', legend=True, ax=axes[1], legend_kwds={'bbox_to_anchor':(0.99, 0.40)}) plt.tight_layout() axes[0].axis("off") axes[1].axis("off") axes[0].set_title('Log GDP per capita in 1929') axes[1].set_title('Log GDP per capita in 1994') plt.show()

Spatial weights (W)

W = weights.Queen.from_dataframe(gdf) #W = weights.Rook.from_dataframe(gdf) #W = weights.KNN.from_dataframe(gdf, k=4) #W = weights.KNN.from_dataframe(gdf, k=6) #W = weights.KNN.from_dataframe(gdf, k=8) # Compute inverse distance squared based on maximum nearest neighbor distance between the n observations #maxMin = weights.min_threshold_dist_from_shapefile("map.shp") #W = weights.DistanceBand.from_dataframe(gdf, threshold = maxMin, binary=False, alpha=-2) # Row-standardize W W.transform = 'r'

plot_spatial_weights(W, gdf);

W.neighbors

W.min_neighbors

W.mean_neighbors

W.max_neighbors

W.histogram #Note: (numbers of neighbors, frequency)

Spatial lag (Wy)

# Create spatial lag variables gdf["Wln_y_1929"] = weights.lag_spatial(W, gdf["ln_y_1929"])

gdf[["ln_y_1929", "Wln_y_1929"]].round(2)

f,ax = plt.subplots(1,2, figsize=(16, 8)) gdf.plot(column='ln_y_1929', ax=ax[0], edgecolor='k', scheme='NaturalBreaks', k=5, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.05, 0.40)}) ax[0].set_title("Log GDP pc in 1929") ax[0].axis("off") gdf.plot(column='Wln_y_1929', ax=ax[1], edgecolor='k', scheme='NaturalBreaks', k=5, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.05, 0.40)}) ax[1].set_title("Spatial lag of log GDP pc in 1929") ax[1].axis("off");

Spatial dependence (Wy, y)

g = sns.jointplot(data=gdf, x="ln_y_1929", y="Wln_y_1929", kind="reg") g.set_axis_labels('Log GDPpc in 1929', 'Log GDPpc in neighbouring states in 1929',);

moran1929 = Moran(gdf["ln_y_1929"], W) moran1994 = Moran(gdf["ln_y_1994"], W)

print(moran1929.I, moran1994.I)

print(moran1929.p_sim, moran1994.p_sim)

moran_scatterplot(moran1929);

moran_scatterplot(moran1994);

Evolution of spatial dependence

# Create multidimentional array dfa = gdf.loc[:,'ln_y_1929':'ln_y_1994'].values dfa

dfa.shape

# Create array of years (Be careful with the index. Add +1) years = np.arange(1929,1995) years

mits = [Moran(cs, W) for cs in dfa.T] res = np.array([(m.I, m.EI, m.p_sim, m.z_sim) for m in mits])

plt.plot(years, res[:,0]) plt.title("Evolution of spatial dependence") plt.ylabel("Moran's I of log GDP pc");

Evolution of spatial inequality

for row in dfa: plt.plot(years, row) plt.ylabel('Log GDP per capita');

rel_dfa = dfa / dfa.mean(axis=0) for row in rel_dfa: plt.plot(years, row) plt.ylabel('Relative log GDP per capita');

sigma = dfa.std(axis=0) plt.plot(years, sigma) plt.title("Evolution of spatial inequality") plt.ylabel('Stand. Dev.of log GDP pc');

Summary graphs

plt.plot(years, res[:,0], label="Sp. Dependence") plt.plot(years, sigma, label="Sp. Inequality") plt.legend(loc="upper right") plt.savefig('evolution.png',dpi=150, bbox_inches='tight');

# create figure and axis objects with subplots() fig,ax = plt.subplots() # make a plot ax.plot(years, res[:,0], color="royalblue") # set y-axis label ax.set_ylabel("Spatial Dependence", color="royalblue") # twin object for two different y-axis on the sample plot ax2=ax.twinx() # make a plot with different y-axis using second axis object ax2.plot(years, sigma, color="orange") ax2.set_ylabel("Spatial Inequality", color="darkorange") sns.set_style("dark") plt.show()

Measuring the evolution of spatial dependence and spatial inequality: A tutorial using Python

Motivation

Set up your deepnote

Load libraries

Load and merge data

Basic maps (y)

Spatial weights (W)

Spatial lag (Wy)

Spatial dependence (Wy, y)

Evolution of spatial dependence

Evolution of spatial inequality

Summary graphs

Recommended resources

Questions and Answers

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Measuring the evolution of spatial dependence and spatial inequality: A tutorial using Python

Motivation

Set up your deepnote

Load libraries

Load and merge data

Basic maps (y)

Spatial weights (W)

Spatial lag (Wy)

Spatial dependence (Wy, y)

Evolution of spatial dependence

Evolution of spatial inequality

Summary graphs

Recommended resources

Questions and Answers

Measuring the evolution of spatial dependence and spatial inequality: A tutorial using Python