Space-time data analysis: The Case of Industrial Agglomeration across Provinces in Indonesia

Setup

# Load libraries import numpy as np import pandas as pd import matplotlib.pyplot as plt from matplotlib.ticker import FormatStrFormatter import matplotlib as mpl mpl.rcParams['figure.dpi'] = 150 import seaborn as sns sns.set_style("darkgrid") sns.set_context(context="paper", font_scale=1.5, rc=None) sns.set(font="serif") import plotly.express as px import plotly.graph_objects as go import geopandas as gpd import libpysal from libpysal import weights from libpysal.weights import Queen import esda from esda.moran import Moran, Moran_Local import splot from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation from splot.libpysal import plot_spatial_weights from giddy.directional import Rose import statsmodels.api as sm import statsmodels.formula.api as smf from spreg import OLS from spreg import MoranRes from spreg import ML_Lag from spreg import ML_Error import warnings warnings.filterwarnings('ignore')

Import data

# Load Sumatra dataset and map file df = pd.read_csv("eksplor.csv") gdf = gpd.read_file("datafinal.gpkg")

gdf.columns

gdfSimple = gdf.loc[:, ['ProvinceID', 'Provinsi', 'growth2010', 'growth2021', 'LQ2010', 'LQ2021', 'geometry']]

type(gdfSimple)

gdfSimple.columns

Describe data

gdfSimple[['LQ2010', 'LQ2021', 'growth2010', 'growth2021']].describe().round(2)

sns.histplot(x=gdfSimple['LQ2010'], kde=True);

sns.histplot(x=gdfSimple['LQ2021'], kde=True);

px.histogram(gdfSimple, x = 'LQ2010', hover_name= 'Provinsi', marginal='rug')

px.histogram(gdfSimple, x = 'LQ2021', hover_name= 'Provinsi', marginal='rug')

gdfSimple.crs

Basic maps

fig, ax = plt.subplots(figsize=(6,4)) gdfSimple.plot(column="LQ2010", scheme='NaturalBreaks', k=5, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.12, 0.55)}, ax=ax) plt.title('Industrial Agglomeration in 2010') plt.tight_layout() ax.axis("off") plt.savefig('LQ2010',dpi=300, bbox_inches='tight') plt.show()

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,4)) gdfSimple.plot(column="LQ2010", scheme='Quantiles', k=5, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.12, 0.55)}, ax=axes[0]) gdfSimple.plot(column="LQ2021", scheme='user_defined', classification_kwds={'bins':[0.85, 0.95, 1.03, 1.16]}, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.12, 0.55)},ax=axes[1]) plt.tight_layout() axes[0].axis("off") axes[1].axis("off") axes[0].set_title('(a) Industrial Agglomeration in 2010') axes[1].set_title('(b) Industrial Agglomeration in 2021') plt.savefig('lq2010-2021',dpi=300, bbox_inches='tight') plt.show()

Spatial weights matrix

Create W

#W = weights.Queen.from_dataframe(gdf) #W = weights.Rook.from_dataframe(gdf) #W = weights.KNN.from_dataframe(gdf, k=4) #W = weights.KNN.from_dataframe(gdf, k=6) #W = weights.KNN.from_dataframe(gdf, k=8) #>>>> Compute inverse distance squared based on maximum nearest neighbor distance between the n observations maxMin = weights.min_threshold_dist_from_shapefile("dataset.shp") W = weights.DistanceBand.from_dataframe(gdf, threshold = maxMin, binary=False, alpha=-2) # >>> Row-standardize W W.transform = 'r' # Ref: https://splot.readthedocs.io/en/stable/users/tutorials/weights.html#a-closer-look-at-w

W.neighbors

W.min_neighbors

W.mean_neighbors

W.max_neighbors

Plot W

plot_spatial_weights(W, gdf);

Spatial lag

# Create spatial lag variables gdfSimple["WLQ2010"] = weights.lag_spatial(W, gdfSimple["LQ2010"]) gdfSimple["WLQ2021"] = weights.lag_spatial(W, gdfSimple["LQ2021"]) gdfSimple["Wgrowth2010"] = weights.lag_spatial(W, gdfSimple["growth2010"]) gdfSimple["Wgrowth2021"] = weights.lag_spatial(W, gdfSimple["growth2021"])

gdfSimple[["LQ2010", "WLQ2010", "LQ2021", "WLQ2021"]].round(2)

gdfSimple[["LQ2010", "WLQ2010", "LQ2021", "WLQ2021","growth2010", "Wgrowth2010", "growth2021", "Wgrowth2021" ]].describe().round(2)

Spatial autocorrelation

LQ2010= gdfSimple["LQ2010"] LQ2021 = gdfSimple["LQ2021"] growth2010= gdfSimple["growth2010"] growth2021 = gdfSimple["growth2021"]

moranLQ2010 = Moran(LQ2010, W) moranLQ2021 = Moran(LQ2021, W) morangrowth2010 = Moran(growth2010, W) morangrowth2021 = Moran(growth2021, W)

print(moranLQ2010.I, moranLQ2021.I,morangrowth2010.I, morangrowth2021.I )

print(moranLQ2010.p_sim, moranLQ2021.p_sim, morangrowth2021.p_sim, morangrowth2021.p_sim)

print(moranLQ2010.p_norm, moranLQ2021.p_norm, morangrowth2021.p_sim, morangrowth2021.p_sim)

sns.kdeplot(moranLQ2010.sim, shade=True) plt.vlines(moranLQ2010.I, 0, 1, color='r') plt.vlines(moranLQ2010.EI, 0, 1) plt.xlabel("Moran's I Industrial Agglomeration in 2010");

fig, axes = plt.subplots(nrows=1, ncols=2,figsize=(12,4)) moran_scatterplot(moranLQ2010, aspect_equal=False, zstandard=False, ax=axes[0]) moran_scatterplot(moranLQ2021, aspect_equal=False, zstandard=False, ax=axes[1]) axes[0].set_xlim([0.2, 1.2]) axes[0].set_xlabel("Industrial Agglomeration in 2010") axes[0].set_ylim([0.2, 1.2]) axes[0].set_ylabel("Neighbors' Industrial Agglomeration in 2010") axes[1].set_xlim([0.2, 1.2]) axes[1].set_xlabel("Industrial Agglomeration in 2021") axes[1].set_ylim([0.2, 1.2]) axes[1].set_ylabel("Neighbors' Industrial Agglomeration in 2021") plt.savefig('moran2010moran2021',dpi=300, bbox_inches='tight') plt.show()

Evolution of spatial dependence

# Create multidimentional array dfa = gdf.loc[:,'LQ2010':'LQ2021'].values dfa

dfa.shape

# Create array of years years = np.arange(2010,2022) years

years.shape

mits = [Moran(cs, W) for cs in dfa.T] res = np.array([(m.I, m.EI, m.p_sim, m.z_sim) for m in mits])

Evolution of regional disparities

sigma = dfa.std(axis=0) plt.plot(years, sigma) plt.title("Sigma Convergence") plt.ylabel('Stand. Dev.of Industrial Agglomeration');

Industrial Agglomeration : Spatial Dependences vs Disparities

plt.plot(years, res[:,0], label="Spatial Dependences.") plt.plot(years, sigma, label="Disparities") plt.legend(bbox_to_anchor=(1.31,1), loc="upper right"); plt.style.use('seaborn-white')

# create figure and axis objects with subplots() fig,ax = plt.subplots() # make a plot ax.plot(years, res[:,0], color="royalblue") # set y-axis label ax.set_ylabel("Spatial Dependence", color="royalblue") # twin object for two different y-axis on the sample plot ax2=ax.twinx() # make a plot with different y-axis using second axis object ax2.plot(years, sigma, color="orange") ax2.set_ylabel("Regional Disparities", color="darkorange") plt.show()

sns.set_style("darkgrid") for row in dfa: plt.plot(years, row)

# Create multidimentional array dfb = gdf.loc[:,'growth2010':'growth2021'].values dfb

dfb.shape

# Create array of years years = np.arange(2010,2022) years

years.shape

mits = [Moran(cs, W) for cs in dfb.T] res = np.array([(m.I, m.EI, m.p_sim, m.z_sim) for m in mits])

plt.plot(years, res[:,0], label="Spatial Dependences.") plt.plot(years, sigma, label="Disparities") plt.legend(bbox_to_anchor=(1.31,1), loc="upper right"); plt.style.use('seaborn-white')

sigma = dfb.std(axis=0) plt.plot(years, sigma) plt.title("Sigma Convergence") plt.ylabel('Stand. Dev.of Growth');

Regional Growth: Spatial Dependence vs Disparities

plt.plot(years, res[:,0], label="Spatial Dependences.") plt.plot(years, sigma, label="Disparities") plt.legend(bbox_to_anchor=(1.31,1), loc="upper right"); plt.style.use('seaborn-white')

sns.set_style("darkgrid") for row in dfb: plt.plot(years, row)

rel_dfa = dfa / dfa.mean(axis=0)

rel_dfa

rel_dfa.shape

for row in rel_dfa: plt.plot(years, row)

for row in range(2010-2021): sns.kdeplot(rel_dfa[:,row])

LISA scatterplot and map

moranLQ2010 = Moran_Local(LQ2010, W, permutations = 999, seed=12345) moranLQ2021 = Moran_Local(LQ2021, W, permutations = 999, seed=12345)

moranLQ2010.p_sim

# Plot local Moran fig, ax = moran_scatterplot(moranLQ2010, p=0.01) ax.set_xlabel('Industrial Agglomeration in 2010') ax.set_ylabel('Spatial Lag of Industrial Agglomeration in 2010') plt.show()

# Trying to fix the colors # It seems that at least one region each quadrant needs to be significant #sns.set_style("darkgrid") sns.set_style("whitegrid") moran_scatterplot(moranLQ2010, p=0.15);

# Plot LISA map fig, ax = plt.subplots(figsize=(14,12)) lisa_cluster(moranLQ2010, gdf, p=0.15, figsize = (16,12), ax=ax) plt.show()

# Plot local Moran fig, ax = moran_scatterplot(moranLQ2021, p=0.15) ax.set_xlabel('Industrial Agglomeration in 2021') ax.set_ylabel('Spatial Lag of Industrial Agglomeration in 2021') plt.show()

# Plot LISA map fig, ax = plt.subplots(figsize=(6,4)) lisa_cluster(moranLQ2021, gdf, p=0.15, ax=ax) plt.show()

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,4)) lisa_cluster(moranLQ2010, gdf, p=0.01, ax=axes[0]) lisa_cluster(moranLQ2021, gdf, p=0.01, ax=axes[1]) plt.tight_layout() axes[0].axis("off") axes[1].axis("off") axes[0].set_title('Tahun 2010') axes[1].set_title('Tahun 2021') plt.show()

plot_local_autocorrelation(moranLQ2010, gdfSimple, 'LQ2010');

plot_local_autocorrelation(moranLQ2021, gdfSimple, 'LQ2021');

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Space-time data analysis: The Case of Industrial Agglomeration across Provinces in Indonesia

Setup

Import data

Describe data

Basic maps

Spatial weights matrix

Create W

Plot W

Spatial lag

Spatial autocorrelation

Evolution of spatial dependence

Evolution of regional disparities

Industrial Agglomeration : Spatial Dependences vs Disparities

Regional Growth: Spatial Dependence vs Disparities

LISA scatterplot and map

References

Space-time data analysis: The Case of Industrial Agglomeration across Provinces in Indonesia