Spatial inequality dynamics across US counties
Setup
# Load libraries
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from matplotlib.ticker import FormatStrFormatter
from matplotlib_scalebar.scalebar import ScaleBar
from pylab import rcParams
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 72
import seaborn as sns
#sns.set_style("darkgrid")
#sns.set_context(context="paper", font_scale=1.5, rc=None)
#sns.set(font="serif")
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd
import libpysal
from libpysal  import weights
from libpysal.weights import Queen
import inequality
from inequality.gini import Gini_Spatial
import esda
from esda.moran import Moran, Moran_Local
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation
from splot.libpysal import plot_spatial_weights
from giddy.directional import Rose
import statsmodels.api as sm
import statsmodels.formula.api as smf
from spreg import OLS
from spreg import MoranRes
from spreg import ML_Lag
from spreg import ML_Error 
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW
from mgwr.utils import shift_colormap, truncate_colormap
import warnings
warnings.filterwarnings('ignore') 
Import data
gdf = gpd.read_file("https://github.com/gdsbook/book/raw/master/data/us_county_income/uscountypcincome.gpkg")
#gdf = gpd.read_file("/work/uscountypcincome.gpkg")
gdf.head()
gdf.columns
gdf.shape
gdf.query('NAME == "Jackson" & STATEFP == "28"').loc[:, '1969':'1979']
years = np.arange(1969, 2018).astype(str)
Regional inequality
sns.histplot(x=gdf['1969'], kde=True);
px.histogram(gdf, x = '1969', hover_name= 'NAME',  marginal='rug')
Gini index
g69 = inequality.gini.Gini(gdf['1969'].values)
g69.g
def gini_by_col(column):
    return inequality.gini.Gini(column.values).g
inequalities = gdf[years].apply(gini_by_col, axis=0).to_frame('gini')
inequalities.plot();
px.line(inequalities, x=inequalities.index, y='gini')
Theil index
def theil(column):
    return inequality.theil.Theil(column.values).T
inequalities['theil'] = gdf[years].apply(theil, axis=0)
inequalities['theil'].plot();
px.line(inequalities, x=inequalities.index, y='theil')
Spatial dependence
ax = gdf.plot(
    column='1969', 
    scheme='NaturalBreaks', 
    legend=True, 
    edgecolor='none',
    legend_kwds={'loc': 'lower left'}, 
    figsize=(12, 12)
)
ax.set_axis_off()
plt.show()
W = weights.Queen.from_dataframe(gdf)
def moran_by_col(y, w=W):
    mo = esda.Moran(y, w=w)
    mo_s = pd.Series(
        {"I": mo.I, "I-P value": mo.p_sim},
    )
    return mo_s
moran_stats = gdf[years].apply(moran_by_col, axis=0).T
moran_stats.head()
moran_stats['I'].plot();
px.line(moran_stats, x=moran_stats.index, y="I")
moran_stats['I-P value'].plot();
inequalities = inequalities.join(moran_stats)
Spatial inequality across regions
Theil decomposition
region_names = {
    1: "New England",
    2: "Mideast",
    3: "Great Lakes",
    4: "Plains",
    5: "Southeast",
    6: "Southwest",
    7: "Rocky Mountain",
    8: "Far West"
}
ax = gdf.assign(Region_Name = gdf.Region.map(region_names))\
      .plot(
    "Region_Name",
    linewidth=0,
    legend=True,
    categorical=True,
    legend_kwds=dict(bbox_to_anchor=(1.2,.5))
)
ax.set_axis_off();
rmeans = gdf.assign(
    # Create column with region name for each county
    Region_Name = gdf.Region.map(region_names)
).groupby(
    # Group counties by region name
    by='Region_Name'
# Calculate mean by region and save only year columns
).mean()[years] 
rmeans.T.plot.line(figsize=(10, 5));
theil_dr = inequality.theil.TheilD(gdf[years].values, gdf.Region)
inequalities['theil_between'] = theil_dr.bg
inequalities['theil_within']  = theil_dr.wg
inequalities['theil_between_share'] = (
    inequalities['theil_between'] / inequalities['theil']
)
inequalities[
    ['theil_between', 'theil_within', 'theil_between_share']
].plot(subplots=True, figsize=(10,8));
Gini decomposition
W.transform = 'B'
gs69 = Gini_Spatial(gdf['1969'], W)
gs69.g
gs69.wcg_share
gs69.p_sim
def gini_spatial_by_col(incomes, weights):
    gs = Gini_Spatial(incomes, weights)
    denom = 2 * incomes.mean() * weights.n**2
    near_diffs = gs.wg / denom
    far_diffs = gs.wcg / denom
    out = pd.Series(
        {
            "gini": gs.g,
            "near_diffs": near_diffs,
            "far_diffs": far_diffs,
            "p_sim": gs.p_sim
        }
    )
    return out
%%time
spatial_gini_results = gdf[years].apply(gini_spatial_by_col, weights=W).T
spatial_gini_results.head()
W.pct_nonzero
inequalities['near_diffs'] = spatial_gini_results.near_diffs
inequalities['near_diffs'].plot.line();
px.line(inequalities, x=inequalities.index, y="near_diffs")
inequalities[['near_diffs', 'I']].plot.line(
    subplots=True, figsize=(15,6)
);