W and econometrics
How can we estimate the six parameters of A if we only have three observations (n=3)????
How do we "construct" W?
QUIZ: What are spatial weights? How are they related to spatial interactions?
Setup
# Load libraries
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as cx
import plotly.express as px
import libpysal
from libpysal import weights
from libpysal.weights import Queen
from splot.libpysal import plot_spatial_weights
import warnings
warnings.filterwarnings('ignore')
Import a digital map
gdf = gpd.read_file('/work/geoBoundaries-BOL-ADM1_simplified.geojson')
gdf
gdf.explore()
Construct the W matrix
Contiguity weights
W = weights.Queen.from_dataframe(gdf, idVariable = 'shapeID')
#W = weights.Rook.from_dataframe(gdf, idVariable = 'shapeID')
# Row-standardize W
# W.transform = 'r'
Distance weights
# WORK IN PROGRESS
#W = weights.KNN.from_dataframe(gdf, k=4, idVariable = 'shapeID')
#W = weights.KNN.from_dataframe(gdf, k=6, idVariable = 'shapeID')
#W = weights.KNN.from_dataframe(gdf, k=8, idVariable = 'shapeID')
# Row-standardize W
#W.transform = 'r'
# WORK IN PROGRESS
# Compute inverse distance squared based on maximum nearest neighbor distance between the n observations
# Reference: https://pysal.org/libpysal/generated/libpysal.weights.min_threshold_dist_from_shapefile.html#libpysal.weights.min_threshold_dist_from_shapefile
#maxMin = weights.min_threshold_dist_from_shapefile('/work/geoBoundaries-BOL-ADM1_simplified.shp')
#W = weights.DistanceBand.from_dataframe(gdf, threshold = maxMin, binary=False, alpha=-2)
# Row-standardize W
#W.transform = 'r'
Show full-matrix form
W_matrix, ids = W.full()
W_matrix
px.imshow(W_matrix, origin='lower')
ids
df_W_matrix = pd.DataFrame(W_matrix)
df_W_matrix
df_ids = pd.DataFrame({'polyID': ids})
df_ids
W statistics
# How many neighbors does each region have?
W_matrix.sum(axis=1)
count_W_matrix = pd.DataFrame(W_matrix.sum(axis=1))
count_W_matrix.describe().round(2)
# How many regions are included in the connectivity structure?
W.n
# How dense is the connectivity structure? (in percentage)
W.pct_nonzero
# What is the distribution of the number of neighbors?
W.histogram # (number of neighbors, count)
# What is the mean number of neighbors?
W.mean_neighbors
# What is the minimum number of neighbors?
W.min_neighbors
# What is the maximum number of neighbors?
W.max_neighbors
# What is the list of neighbors?
print(W.neighbors)
# What are the neighbors of the region with the id = 48357)?
W['BOL-ADM1-26024454B34957622']
Standardize W
W_stand = W
# >>> Row-standardize W
W_stand.transform = 'r'
W_stand_matrix, ids = W_stand.full()
df_W_stand_matrix = pd.DataFrame(W_stand_matrix)
df_W_stand_matrix
Plot W
px.imshow(W_stand_matrix, origin='lower')
W_plot = weights.Queen.from_dataframe(gdf)
#Simple plot of W
plot_spatial_weights(W_plot, gdf);
gdf['coords'] = gdf['geometry'].apply(lambda x: x.representative_point().coords[:])
gdf['coords'] = [coords[0] for coords in gdf['coords']]
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(7,7))
plot_spatial_weights(W_plot, gdf, ax=ax)
texts =[ax.text(row.coords[0], row.coords[1], s=row['shapeName'], horizontalalignment='center', bbox={'facecolor': 'white', 'alpha':0.8, 'pad': 2, 'edgecolor':'none'}) for idx, row in gdf.iterrows()]
cx.add_basemap(ax, crs=gdf.crs.to_string(), source = cx.providers.CartoDB.Voyager, attribution=False)
plt.show()
Identify specific neighbors
# What are the neighbors of Beni (shapeID = BOL-ADM1-26024454B34957622)?
W['BOL-ADM1-26024454B34957622']
self_and_neighbors = ['BOL-ADM1-26024454B34957622']
self_and_neighbors.extend(W.neighbors['BOL-ADM1-26024454B34957622'])
neighborsOnly = W.neighbors['BOL-ADM1-26024454B34957622']
gdf.query('shapeID in @neighborsOnly')
gdf.query('shapeID in @self_and_neighbors')
Export W
to Stata
# to be read by the spmatrix (from Stata 15)
ids_AND_W_matrix = pd.merge(df_ids, df_W_matrix, left_index=True, right_index=True)
ids_AND_W_matrix.to_csv('ids_AND_W_matrix.csv')
ids_AND_W_matrix.to_stata('ids_AND_W_matrix.dta')
# Display exported stata file
pd.read_stata('/work/ids_AND_W_matrix.dta')
df_W_matrix.to_csv('W_matrix.csv', index = False)
df_W_matrix.to_stata('W_matrix.dta', write_index = False)
# Display exported stata file
pd.read_stata('/work/W_matrix.dta')