Spatial inequality dynamics across US counties
Setup
# Load libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from matplotlib.ticker import FormatStrFormatter
from matplotlib_scalebar.scalebar import ScaleBar
from pylab import rcParams
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 72
import seaborn as sns
#sns.set_style("darkgrid")
#sns.set_context(context="paper", font_scale=1.5, rc=None)
#sns.set(font="serif")
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd
import libpysal
from libpysal import weights
from libpysal.weights import Queen
import inequality
from inequality.gini import Gini_Spatial
import esda
from esda.moran import Moran, Moran_Local
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation
from splot.libpysal import plot_spatial_weights
from giddy.directional import Rose
import statsmodels.api as sm
import statsmodels.formula.api as smf
from spreg import OLS
from spreg import MoranRes
from spreg import ML_Lag
from spreg import ML_Error
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW
from mgwr.utils import shift_colormap, truncate_colormap
import warnings
warnings.filterwarnings('ignore')
Import data
gdf = gpd.read_file("https://github.com/gdsbook/book/raw/master/data/us_county_income/uscountypcincome.gpkg")
#gdf = gpd.read_file("/work/uscountypcincome.gpkg")
gdf.head()
gdf.columns
gdf.shape
gdf.query('NAME == "Jackson" & STATEFP == "28"').loc[:, '1969':'1979']
years = np.arange(1969, 2018).astype(str)
Regional inequality
sns.histplot(x=gdf['1969'], kde=True);
px.histogram(gdf, x = '1969', hover_name= 'NAME', marginal='rug')
Gini index
g69 = inequality.gini.Gini(gdf['1969'].values)
g69.g
def gini_by_col(column):
return inequality.gini.Gini(column.values).g
inequalities = gdf[years].apply(gini_by_col, axis=0).to_frame('gini')
inequalities.plot();
px.line(inequalities, x=inequalities.index, y='gini')
Theil index
def theil(column):
return inequality.theil.Theil(column.values).T
inequalities['theil'] = gdf[years].apply(theil, axis=0)
inequalities['theil'].plot();
px.line(inequalities, x=inequalities.index, y='theil')
Spatial dependence
ax = gdf.plot(
column='1969',
scheme='NaturalBreaks',
legend=True,
edgecolor='none',
legend_kwds={'loc': 'lower left'},
figsize=(12, 12)
)
ax.set_axis_off()
plt.show()
W = weights.Queen.from_dataframe(gdf)
def moran_by_col(y, w=W):
mo = esda.Moran(y, w=w)
mo_s = pd.Series(
{"I": mo.I, "I-P value": mo.p_sim},
)
return mo_s
moran_stats = gdf[years].apply(moran_by_col, axis=0).T
moran_stats.head()
moran_stats['I'].plot();
px.line(moran_stats, x=moran_stats.index, y="I")
moran_stats['I-P value'].plot();
inequalities = inequalities.join(moran_stats)
Spatial inequality across regions
Theil decomposition
region_names = {
1: "New England",
2: "Mideast",
3: "Great Lakes",
4: "Plains",
5: "Southeast",
6: "Southwest",
7: "Rocky Mountain",
8: "Far West"
}
ax = gdf.assign(Region_Name = gdf.Region.map(region_names))\
.plot(
"Region_Name",
linewidth=0,
legend=True,
categorical=True,
legend_kwds=dict(bbox_to_anchor=(1.2,.5))
)
ax.set_axis_off();
rmeans = gdf.assign(
# Create column with region name for each county
Region_Name = gdf.Region.map(region_names)
).groupby(
# Group counties by region name
by='Region_Name'
# Calculate mean by region and save only year columns
).mean()[years]
rmeans.T.plot.line(figsize=(10, 5));
theil_dr = inequality.theil.TheilD(gdf[years].values, gdf.Region)
inequalities['theil_between'] = theil_dr.bg
inequalities['theil_within'] = theil_dr.wg
inequalities['theil_between_share'] = (
inequalities['theil_between'] / inequalities['theil']
)
inequalities[
['theil_between', 'theil_within', 'theil_between_share']
].plot(subplots=True, figsize=(10,8));
Gini decomposition
W.transform = 'B'
gs69 = Gini_Spatial(gdf['1969'], W)
gs69.g
gs69.wcg_share
gs69.p_sim
def gini_spatial_by_col(incomes, weights):
gs = Gini_Spatial(incomes, weights)
denom = 2 * incomes.mean() * weights.n**2
near_diffs = gs.wg / denom
far_diffs = gs.wcg / denom
out = pd.Series(
{
"gini": gs.g,
"near_diffs": near_diffs,
"far_diffs": far_diffs,
"p_sim": gs.p_sim
}
)
return out
%%time
spatial_gini_results = gdf[years].apply(gini_spatial_by_col, weights=W).T
spatial_gini_results.head()
W.pct_nonzero
inequalities['near_diffs'] = spatial_gini_results.near_diffs
inequalities['near_diffs'].plot.line();
px.line(inequalities, x=inequalities.index, y="near_diffs")
inequalities[['near_diffs', 'I']].plot.line(
subplots=True, figsize=(15,6)
);