Space-time data analysis: The Case of Industrial Agglomeration across Provinces in Indonesia
Setup
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 150
import seaborn as sns
sns.set_style("darkgrid")
sns.set_context(context="paper", font_scale=1.5, rc=None)
sns.set(font="serif")
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd
import libpysal
from libpysal import weights
from libpysal.weights import Queen
import esda
from esda.moran import Moran, Moran_Local
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation
from splot.libpysal import plot_spatial_weights
from giddy.directional import Rose
import statsmodels.api as sm
import statsmodels.formula.api as smf
from spreg import OLS
from spreg import MoranRes
from spreg import ML_Lag
from spreg import ML_Error
import warnings
warnings.filterwarnings('ignore')
Import data
# Load Sumatra dataset and map file
df = pd.read_csv("eksplor.csv")
gdf = gpd.read_file("datafinal.gpkg")
gdf.columns
gdfSimple = gdf.loc[:, ['ProvinceID', 'Provinsi', 'growth2010', 'growth2021', 'LQ2010', 'LQ2021', 'geometry']]
type(gdfSimple)
gdfSimple.columns
Describe data
gdfSimple[['LQ2010', 'LQ2021', 'growth2010', 'growth2021']].describe().round(2)
sns.histplot(x=gdfSimple['LQ2010'], kde=True);
sns.histplot(x=gdfSimple['LQ2021'], kde=True);
px.histogram(gdfSimple, x = 'LQ2010', hover_name= 'Provinsi', marginal='rug')
px.histogram(gdfSimple, x = 'LQ2021', hover_name= 'Provinsi', marginal='rug')
gdfSimple.crs
Basic maps
fig, ax = plt.subplots(figsize=(6,4))
gdfSimple.plot(column="LQ2010", scheme='NaturalBreaks', k=5, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.12, 0.55)}, ax=ax)
plt.title('Industrial Agglomeration in 2010')
plt.tight_layout()
ax.axis("off")
plt.savefig('LQ2010',dpi=300, bbox_inches='tight')
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,4))
gdfSimple.plot(column="LQ2010", scheme='Quantiles', k=5, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.12, 0.55)}, ax=axes[0])
gdfSimple.plot(column="LQ2021", scheme='user_defined', classification_kwds={'bins':[0.85, 0.95, 1.03, 1.16]}, cmap='coolwarm', legend=True, legend_kwds={'bbox_to_anchor':(1.12, 0.55)},ax=axes[1])
plt.tight_layout()
axes[0].axis("off")
axes[1].axis("off")
axes[0].set_title('(a) Industrial Agglomeration in 2010')
axes[1].set_title('(b) Industrial Agglomeration in 2021')
plt.savefig('lq2010-2021',dpi=300, bbox_inches='tight')
plt.show()
Spatial weights matrix
Create W
#W = weights.Queen.from_dataframe(gdf)
#W = weights.Rook.from_dataframe(gdf)
#W = weights.KNN.from_dataframe(gdf, k=4)
#W = weights.KNN.from_dataframe(gdf, k=6)
#W = weights.KNN.from_dataframe(gdf, k=8)
#>>>> Compute inverse distance squared based on maximum nearest neighbor distance between the n observations
maxMin = weights.min_threshold_dist_from_shapefile("dataset.shp")
W = weights.DistanceBand.from_dataframe(gdf, threshold = maxMin, binary=False, alpha=-2)
# >>> Row-standardize W
W.transform = 'r'
# Ref: https://splot.readthedocs.io/en/stable/users/tutorials/weights.html#a-closer-look-at-w
W.neighbors
W.min_neighbors
W.mean_neighbors
W.max_neighbors
Plot W
plot_spatial_weights(W, gdf);
Spatial lag
# Create spatial lag variables
gdfSimple["WLQ2010"] = weights.lag_spatial(W, gdfSimple["LQ2010"])
gdfSimple["WLQ2021"] = weights.lag_spatial(W, gdfSimple["LQ2021"])
gdfSimple["Wgrowth2010"] = weights.lag_spatial(W, gdfSimple["growth2010"])
gdfSimple["Wgrowth2021"] = weights.lag_spatial(W, gdfSimple["growth2021"])
gdfSimple[["LQ2010", "WLQ2010", "LQ2021", "WLQ2021"]].round(2)
gdfSimple[["LQ2010", "WLQ2010", "LQ2021", "WLQ2021","growth2010", "Wgrowth2010", "growth2021", "Wgrowth2021" ]].describe().round(2)
Spatial autocorrelation
LQ2010= gdfSimple["LQ2010"]
LQ2021 = gdfSimple["LQ2021"]
growth2010= gdfSimple["growth2010"]
growth2021 = gdfSimple["growth2021"]
moranLQ2010 = Moran(LQ2010, W)
moranLQ2021 = Moran(LQ2021, W)
morangrowth2010 = Moran(growth2010, W)
morangrowth2021 = Moran(growth2021, W)
print(moranLQ2010.I, moranLQ2021.I,morangrowth2010.I, morangrowth2021.I )
print(moranLQ2010.p_sim, moranLQ2021.p_sim, morangrowth2021.p_sim, morangrowth2021.p_sim)
print(moranLQ2010.p_norm, moranLQ2021.p_norm, morangrowth2021.p_sim, morangrowth2021.p_sim)
sns.kdeplot(moranLQ2010.sim, shade=True)
plt.vlines(moranLQ2010.I, 0, 1, color='r')
plt.vlines(moranLQ2010.EI, 0, 1)
plt.xlabel("Moran's I Industrial Agglomeration in 2010");
fig, axes = plt.subplots(nrows=1, ncols=2,figsize=(12,4))
moran_scatterplot(moranLQ2010, aspect_equal=False, zstandard=False, ax=axes[0])
moran_scatterplot(moranLQ2021, aspect_equal=False, zstandard=False, ax=axes[1])
axes[0].set_xlim([0.2, 1.2])
axes[0].set_xlabel("Industrial Agglomeration in 2010")
axes[0].set_ylim([0.2, 1.2])
axes[0].set_ylabel("Neighbors' Industrial Agglomeration in 2010")
axes[1].set_xlim([0.2, 1.2])
axes[1].set_xlabel("Industrial Agglomeration in 2021")
axes[1].set_ylim([0.2, 1.2])
axes[1].set_ylabel("Neighbors' Industrial Agglomeration in 2021")
plt.savefig('moran2010moran2021',dpi=300, bbox_inches='tight')
plt.show()
Evolution of spatial dependence
# Create multidimentional array
dfa = gdf.loc[:,'LQ2010':'LQ2021'].values
dfa
dfa.shape
# Create array of years
years = np.arange(2010,2022)
years
years.shape
mits = [Moran(cs, W) for cs in dfa.T]
res = np.array([(m.I, m.EI, m.p_sim, m.z_sim) for m in mits])
Evolution of regional disparities
sigma = dfa.std(axis=0)
plt.plot(years, sigma)
plt.title("Sigma Convergence")
plt.ylabel('Stand. Dev.of Industrial Agglomeration');
Industrial Agglomeration : Spatial Dependences vs Disparities
plt.plot(years, res[:,0], label="Spatial Dependences.")
plt.plot(years, sigma, label="Disparities")
plt.legend(bbox_to_anchor=(1.31,1), loc="upper right");
plt.style.use('seaborn-white')
# create figure and axis objects with subplots()
fig,ax = plt.subplots()
# make a plot
ax.plot(years, res[:,0], color="royalblue")
# set y-axis label
ax.set_ylabel("Spatial Dependence", color="royalblue")
# twin object for two different y-axis on the sample plot
ax2=ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(years, sigma, color="orange")
ax2.set_ylabel("Regional Disparities", color="darkorange")
plt.show()
sns.set_style("darkgrid")
for row in dfa:
plt.plot(years, row)
# Create multidimentional array
dfb = gdf.loc[:,'growth2010':'growth2021'].values
dfb
dfb.shape
# Create array of years
years = np.arange(2010,2022)
years
years.shape
mits = [Moran(cs, W) for cs in dfb.T]
res = np.array([(m.I, m.EI, m.p_sim, m.z_sim) for m in mits])
plt.plot(years, res[:,0], label="Spatial Dependences.")
plt.plot(years, sigma, label="Disparities")
plt.legend(bbox_to_anchor=(1.31,1), loc="upper right");
plt.style.use('seaborn-white')
sigma = dfb.std(axis=0)
plt.plot(years, sigma)
plt.title("Sigma Convergence")
plt.ylabel('Stand. Dev.of Growth');
Regional Growth: Spatial Dependence vs Disparities
plt.plot(years, res[:,0], label="Spatial Dependences.")
plt.plot(years, sigma, label="Disparities")
plt.legend(bbox_to_anchor=(1.31,1), loc="upper right");
plt.style.use('seaborn-white')
sns.set_style("darkgrid")
for row in dfb:
plt.plot(years, row)
rel_dfa = dfa / dfa.mean(axis=0)
rel_dfa
rel_dfa.shape
for row in rel_dfa:
plt.plot(years, row)
for row in range(2010-2021):
sns.kdeplot(rel_dfa[:,row])
LISA scatterplot and map
moranLQ2010 = Moran_Local(LQ2010, W, permutations = 999, seed=12345)
moranLQ2021 = Moran_Local(LQ2021, W, permutations = 999, seed=12345)
moranLQ2010.p_sim
# Plot local Moran
fig, ax = moran_scatterplot(moranLQ2010, p=0.01)
ax.set_xlabel('Industrial Agglomeration in 2010')
ax.set_ylabel('Spatial Lag of Industrial Agglomeration in 2010')
plt.show()
# Trying to fix the colors
# It seems that at least one region each quadrant needs to be significant
#sns.set_style("darkgrid")
sns.set_style("whitegrid")
moran_scatterplot(moranLQ2010, p=0.15);
# Plot LISA map
fig, ax = plt.subplots(figsize=(14,12))
lisa_cluster(moranLQ2010, gdf, p=0.15, figsize = (16,12), ax=ax)
plt.show()
# Plot local Moran
fig, ax = moran_scatterplot(moranLQ2021, p=0.15)
ax.set_xlabel('Industrial Agglomeration in 2021')
ax.set_ylabel('Spatial Lag of Industrial Agglomeration in 2021')
plt.show()
# Plot LISA map
fig, ax = plt.subplots(figsize=(6,4))
lisa_cluster(moranLQ2021, gdf, p=0.15, ax=ax)
plt.show()
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,4))
lisa_cluster(moranLQ2010, gdf, p=0.01, ax=axes[0])
lisa_cluster(moranLQ2021, gdf, p=0.01, ax=axes[1])
plt.tight_layout()
axes[0].axis("off")
axes[1].axis("off")
axes[0].set_title('Tahun 2010')
axes[1].set_title('Tahun 2021')
plt.show()
plot_local_autocorrelation(moranLQ2010, gdfSimple, 'LQ2010');
plot_local_autocorrelation(moranLQ2021, gdfSimple, 'LQ2021');