Introduction to exploratory space-time data analysis (ESTDA)

Setup

# Load libraries import numpy as np import pandas as pd import matplotlib.pyplot as plt from matplotlib.ticker import FormatStrFormatter from pylab import rcParams import matplotlib as mpl mpl.rcParams['figure.dpi'] = 72 import seaborn as sns sns.set_style("darkgrid") sns.set_context(context="paper", font_scale=1.5, rc=None) sns.set(font="serif") import plotly.express as px import plotly.graph_objects as go import geopandas as gpd import libpysal from libpysal import weights from libpysal.weights import Queen import esda from esda.moran import Moran, Moran_Local import splot from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation from splot.libpysal import plot_spatial_weights from giddy.directional import Rose import statsmodels.api as sm import statsmodels.formula.api as smf from spreg import OLS from spreg import MoranRes from spreg import ML_Lag from spreg import ML_Error import warnings warnings.filterwarnings('ignore')

Import data

# Import a .zip file containing spatial files and data #! wget https://github.com/quarcs-lab/data-open/raw/master/us_income/widePanel.zip #! unzip widePanel.zip

gdf = gpd.read_file("https://raw.githubusercontent.com/quarcs-lab/data-open/master/us_income/widePanel.geojson") gdf

gdf['gdp1969'] = np.exp(gdf.ln_gp_1969) gdf['gdp2008'] = np.exp(gdf.ln_gp_2008)

gdf.columns

gdfSimple = gdf.loc[:, ['id', 'state', 'gdp1969', 'gdp2008', 'geometry']]

type(gdfSimple)

gdfSimple['rel1969'] = gdfSimple['gdp1969'] / gdfSimple['gdp1969'].mean() gdfSimple['rel2008'] = gdfSimple['gdp2008'] / gdfSimple['gdp2008'].mean()

gdfSimple

gdfSimple.columns

Describe data

gdfSimple[['gdp1969', 'gdp2008', 'rel1969', 'rel2008']].describe().round(2)

sns.histplot(x=gdfSimple['rel1969'], kde=True);

sns.histplot(x=gdfSimple['rel2008'], kde=True);

px.histogram(gdfSimple, x = 'rel1969', hover_name= 'state', marginal='rug')

px.histogram(gdfSimple, x = 'rel2008', hover_name= 'state', marginal='rug')

px.scatter( gdfSimple, x="rel1969", y="rel2008", hover_name="state", trendline="ols", marginal_x="box", marginal_y="box")

gdfSimple.crs

Basic maps

fig, ax = plt.subplots(figsize=(6,4)) gdfSimple.plot(column="rel1969", scheme='NaturalBreaks', k=5, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.12, 0.55)}, ax=ax) plt.title('Relative GDP per capita in 1969') plt.tight_layout() ax.axis("off") plt.savefig('map1969',dpi=300, bbox_inches='tight') plt.show()

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,4)) gdfSimple.plot(column="rel1969", scheme='Quantiles', k=5, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.12, 0.55)}, ax=axes[0]) gdfSimple.plot(column="rel2008", scheme='user_defined', classification_kwds={'bins':[0.85, 0.95, 1.03, 1.16]}, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.12, 0.55)},ax=axes[1]) plt.tight_layout() axes[0].axis("off") axes[1].axis("off") axes[0].set_title('(a) Relative GDP per capita in 1969') axes[1].set_title('(b) Relative GDP per capita in 2008') plt.savefig('map1969map2008',dpi=300, bbox_inches='tight') plt.show()

gdfSimple.explore( column='rel1969', tooltip=['state', 'rel1969', 'rel2008'], scheme='naturalbreaks', k=5, cmap='coolwarm', legend=True, tiles='Stamen Terrain', style_kwds =dict(color="gray", weight=0.5), legend_kwds=dict(colorbar=False) )

Spatial weights matrix

Import W

# Load weights matrix from Geoda .gal file W = libpysal.io.open("Wqueen_fromGEODA.gal").read()

W.n

W.neighbors

W.min_neighbors

W.mean_neighbors

W.max_neighbors

# Row-standardize weights matrix W.transform = "R"

Create W

W = weights.Queen.from_dataframe(gdf) #W = weights.Rook.from_dataframe(gdf) #W = weights.KNN.from_dataframe(gdf, k=4) #W = weights.KNN.from_dataframe(gdf, k=6) #W = weights.KNN.from_dataframe(gdf, k=8) #>>>> Compute inverse distance squared based on maximum nearest neighbor distance between the n observations #maxMin = weights.min_threshold_dist_from_shapefile("map.shp") #W = weights.DistanceBand.from_dataframe(gdf, threshold = maxMin, binary=False, alpha=-2) # >>> Row-standardize W W.transform = 'r' # Ref: https://splot.readthedocs.io/en/stable/users/tutorials/weights.html#a-closer-look-at-w

W.neighbors

W.min_neighbors

W.mean_neighbors

W.max_neighbors

Plot W

plot_spatial_weights(W, gdf);

Spatial lag

# Create spatial lag variables gdfSimple["Wrel1969"] = weights.lag_spatial(W, gdfSimple["rel1969"]) gdfSimple["Wrel2008"] = weights.lag_spatial(W, gdfSimple["rel2008"])

gdfSimple[["rel1969", "Wrel1969", "rel2008", "Wrel2008"]].round(2)

gdfSimple[["rel1969", "Wrel1969", "rel2008", "Wrel2008"]].describe().round(2)

px.scatter( gdfSimple, x="rel1969", y="Wrel1969", hover_name="state", labels = dict(rel1969 = "Relative GDPpc in 1969", Wrel1969 = "Neighbors' relative GDPpc in 1969", rel2008 = "Relative GDPpc in 2008", Wrel2008 = "Neighbors' relative GDPpc in 2008"), trendline="ols", marginal_x="box", marginal_y="box")

f,ax = plt.subplots(1,2, figsize=(12, 4)) gdfSimple.plot(column='rel1969', ax=ax[0], edgecolor='k', scheme='NaturalBreaks', k=5, cmap='coolwarm', legend=False) ax[0].set_title("Relative GDP pc in 1969") gdfSimple.plot(column='Wrel1969', ax=ax[1], edgecolor='k', scheme='NaturalBreaks', k=5, cmap='coolwarm', legend=False) ax[1].set_title("Spatial lag of relative GDP pc in 1969");

Spatial autocorrelation

x1969 = gdfSimple["rel1969"] x2008 = gdfSimple["rel2008"]

moran1969 = Moran(x1969, W) moran2008 = Moran(x2008, W)

print(moran1969.I, moran2008.I)

print(moran1969.p_sim, moran2008.p_sim)

print(moran1969.p_norm, moran2008.p_norm)

sns.kdeplot(moran1969.sim, shade=True) plt.vlines(moran1969.I, 0, 1, color='r') plt.vlines(moran1969.EI, 0, 1) plt.xlabel("Moran's I in 1969");

fig, axes = plt.subplots(nrows=1, ncols=2,figsize=(12,4)) moran_scatterplot(moran1969, aspect_equal=False, zstandard=False, ax=axes[0]) moran_scatterplot(moran2008, aspect_equal=False, zstandard=False, ax=axes[1]) axes[0].set_xlim([0.7, 1.4]) axes[0].set_xlabel("Relative GDPpc in 1969") axes[0].set_ylim([0.7, 1.4]) axes[0].set_ylabel("Neighbors' relative GDPpc in 1969") axes[1].set_xlim([0.7, 1.4]) axes[1].set_xlabel("Relative GDPpc in 2008") axes[1].set_ylim([0.7, 1.4]) axes[1].set_ylabel("Neighbors' relative GDPpc in 2008") plt.savefig('moran1969moran2008',dpi=300, bbox_inches='tight') plt.show()

px.scatter( gdfSimple, x="rel2008", y="Wrel2008", hover_name="state", labels = dict(rel1969 = "Relative GDPpc in 1969", Wrel1969 = "Neighbors' relative GDPpc in 1969", rel2008 = "Relative GDPpc in 2008", Wrel2008 = "Neighbors' relative GDPpc in 2008"), trendline="ols", marginal_x="box", marginal_y="box")

Evolution of spatial dependence

# Create multidimentional array dfa = gdf.loc[:,'ln_gp_1929':'ln_gp_1994'].values dfa

dfa.shape

# Create array of years years = np.arange(1929,1995) years

years.shape

mits = [Moran(cs, W) for cs in dfa.T] res = np.array([(m.I, m.EI, m.p_sim, m.z_sim) for m in mits])

plt.plot(years, res[:,0]) plt.title("Evolution of spatial dependence") plt.ylabel("Moran's I of log GDP pc");

Evolution of regional disparities

sigma = dfa.std(axis=0) plt.plot(years, sigma) plt.title("Sigma Convergence") plt.ylabel('Stand. Dev.of log GDP pc');

plt.plot(years, res[:,0], label="Spatial Dep.") plt.plot(years, sigma, label="Disparities") plt.legend(bbox_to_anchor=(1.31,1), loc="upper right"); plt.style.use('seaborn-white')

# create figure and axis objects with subplots() fig,ax = plt.subplots() # make a plot ax.plot(years, res[:,0], color="royalblue") # set y-axis label ax.set_ylabel("Spatial Dependence", color="royalblue") # twin object for two different y-axis on the sample plot ax2=ax.twinx() # make a plot with different y-axis using second axis object ax2.plot(years, sigma, color="orange") ax2.set_ylabel("Regional Disparities", color="darkorange") plt.show()

sns.set_style("darkgrid") for row in dfa: plt.plot(years, row)

rel_dfa = dfa / dfa.mean(axis=0)

rel_dfa

rel_dfa.shape

for row in rel_dfa: plt.plot(years, row)

sns.set_palette(sns.color_palette("coolwarm", 2010-1929)) for row in range(1995-1929): sns.kdeplot(rel_dfa[:,row])

LISA scatterplot and map

Lmoran1969 = Moran_Local(x1969, W, permutations = 999, seed=12345) Lmoran2008 = Moran_Local(x2008, W, permutations = 999, seed=12345)

Lmoran1969.p_sim

# Plot local Moran fig, ax = moran_scatterplot(Lmoran1969, p=0.01, zstandard =False) ax.set_xlabel('Relative GDPpc in 1969') ax.set_ylabel('Spatial Lag of relative GDPpc in 1969') plt.show()

# Trying to fix the colors # It seems that at least one region each quadrant needs to be significant for the proper coloring of points moran_scatterplot(Lmoran1969, p=0.15, zstandard = False);

# Plot LISA map fig, ax = plt.subplots(figsize=(14,12)) lisa_cluster(Lmoran1969, gdf, p=0.15, figsize = (16,12), ax=ax) plt.show()

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,4)) lisa_cluster(Lmoran1969, gdf, p=0.01, ax=axes[0]) lisa_cluster(Lmoran2008, gdf, p=0.01, ax=axes[1]) plt.tight_layout() axes[0].axis("off") axes[1].axis("off") axes[0].set_title('Year 1969') axes[1].set_title('Year 2008') plt.show()

Directional LISA

Y = gdfSimple[['rel1969', 'rel2008']].values Y.shape

np.random.seed(100) r4 = Rose(Y, W, k=4) r8 = Rose(Y, W, k=8)

Four segments

r4.plot_vectors();

r4.plot_origin();

r4.plot(Y[:,0]); # condition on starting relative income

#Number of vectors contained in each sector. r4.counts

#Vector lengths r4.r

# Psuedo p-values for the observed sector counts under the two-sided alternative r4.permute() r4.p

# Hypotheisis: The focal unit and its lag move in the same directions. r4.permute(alternative='positive') r4.p

# Hypotheisis: The focal unit and its lag move in oposite directions. r4.permute(alternative='negative') r4.p

Eight segments

# condition on starting relative income r8.plot(Y[:,0]) ;

# Number of vectors contained in each sector. r8.counts

# Vector lengths r8.r

r8.permute() r8.p

# origin standardized r8.plot_origin();

Preparation for regression models

y = gdf['g1929_1994'].values y_name = 'Growth 1929-1994'

x = np.array([gdf.ln_gp_1929]).T x_name = 'Log GPDpc in 1929'

OLS

ols = OLS(y = y, x = x, w = W, name_y=y_name, name_x = [x_name], name_w="Wqueen", name_ds='gdf', white_test=True, spat_diag=True, moran=True) print(ols.summary)

SAR

sar = ML_Lag(y=y, x=x, w=W, name_y=y_name, name_x = [x_name], name_w="Wqueen", name_ds='gdf') print(sar.summary)

SEM

sem = ML_Error(y=y, x=x, w=W, name_y=y_name, name_x = [x_name], name_w="Wqueen", name_ds='gdf') print(sem.summary)

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Introduction to exploratory space-time data analysis (ESTDA)