Exploring datasets from Geoda
Setup
# Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import geopandas as gpd
import contextily as cx
import libpysal
from libpysal import weights
from libpysal.weights import Queen
import esda
from esda.moran import Moran, Moran_Local
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation
from splot.libpysal import plot_spatial_weights
import warnings
warnings.filterwarnings('ignore')
Datasets
from IPython.display import IFrame
IFrame(src='https://geodacenter.github.io/data-and-lab/', width=800, height=1200)
Import data
IFrame(src='https://geodacenter.github.io/data-and-lab/nepal/', width=800, height=1200)
# Info website: https://geodacenter.github.io/data-and-lab/nepal/
gdf = gpd.read_file("https://geodacenter.github.io/data-and-lab/data/nepal.zip")
gdf.head(5)
gdf.describe().round(2)
Basic maps
gdf.plot('povindex', legend = True);
fig, ax = plt.subplots(figsize=(6,4))
gdf.plot(column="povindex",
scheme='FisherJenks',
k = 3,
cmap='coolwarm',
edgecolor='k',
linewidth=0.5,
alpha=0.9,
legend=True,
ax=ax,
legend_kwds={'bbox_to_anchor':(1.00, 0.92)})
plt.title('Spatial distribution of the poverty index: Three natural breaks')
plt.tight_layout()
#ax.axis("off")
#plt.savefig('myMap.png',dpi=150, bbox_inches='tight')
plt.show()
fig, ax = plt.subplots(figsize=(6,4))
gdf.plot(column="povindex",
scheme='BoxPlot',
cmap='coolwarm',
edgecolor='k',
linewidth=0.5,
alpha=0.9,
legend=True,
ax=ax,
legend_kwds={'bbox_to_anchor':(1.00, 0.92)})
plt.title('Spatial distribution of the poverty index: \nWhich regions are below/above the median?')
plt.tight_layout()
ax.axis("off")
#plt.savefig('myMap.png',dpi=150, bbox_inches='tight')
plt.show()
Contextual maps
fig, ax = plt.subplots(figsize=(6,4))
gdf.plot(facecolor='none', edgecolor='grey', ax=ax)
cx.add_basemap(ax, crs=gdf.crs.to_string(), source = cx.providers.NASAGIBS.ViirsEarthAtNight2012, attribution=False)
ax.axis("off")
plt.show()
fig, ax = plt.subplots(figsize=(6,4))
gdf.plot(column="povindex",
scheme='FisherJenks',
k = 3,
cmap='coolwarm',
edgecolor='k',
linewidth=0.5,
alpha=0.7,
legend=True,
ax=ax,
legend_kwds={'bbox_to_anchor':(1.00, 0.92)})
cx.add_basemap(ax, crs=gdf.crs.to_string(), source = cx.providers.CartoDB.Positron, attribution=False)
plt.title('Spatial distribution of the poverty index: Three natural breaks')
plt.tight_layout()
ax.axis("off")
#plt.savefig('myMap.png',dpi=150, bbox_inches='tight')
plt.show()
fig, ax = plt.subplots(figsize=(6,4))
gdf.plot(column="povindex",
scheme='FisherJenks',
k = 3,
cmap='coolwarm',
edgecolor='k',
linewidth=0.5,
alpha=0.7,
legend=True,
ax=ax,
legend_kwds={'bbox_to_anchor':(1.00, 0.92)})
cx.add_basemap(ax, crs=gdf.crs.to_string(), source = cx.providers.Stamen.TonerHybrid, attribution=False)
plt.title('Spatial distribution of the poverty index: Three natural breaks')
plt.tight_layout()
ax.axis("off")
#plt.savefig('myMap.png',dpi=150, bbox_inches='tight')
plt.show()
Interactive maps
#gdf.explore()
gdf.explore(
column='povindex',
tooltip=['district', 'povindex', 'pcinc', 'schoolcnt'],
scheme='fisherjenks',
k = 3,
cmap='coolwarm',
legend=True,
tiles='Stamen Terrain',
style_kwds =dict(color="gray", weight=0.5),
legend_kwds=dict(colorbar=False)
)
gdf.explore(
column='povindex',
tooltip=['district', 'povindex', 'pcinc', 'schoolcnt'],
scheme='fisherjenks',
k = 3,
cmap='coolwarm',
legend=True,
tiles= cx.providers.NASAGIBS.ViirsEarthAtNight2012,
style_kwds =dict(color="gray", weight=0.5),
legend_kwds=dict(colorbar=False)
)
Interactive data exploration
px.strip(gdf, x = 'povindex', hover_name= 'district', color='name_2')
px.histogram(gdf, x = 'povindex', hover_name= 'district', marginal='rug')
px.histogram(gdf, x = 'povindex', hover_name= 'district', marginal='box')
px.scatter(gdf,
x="schoolcnt",
y="povindex",
#color="name_2",
hover_name="district",
trendline="ols",
marginal_x="box",
marginal_y="box",
)
Spatial neighbors and weights
# Spatial weights based on contiguity
#W = weights.Queen.from_dataframe(gdf)
#W = weights.Rook.from_dataframe(gdf)
# Spatial weights based on distance
#W = weights.KNN.from_dataframe(gdf, k=4)
W = weights.KNN.from_dataframe(gdf, k=5)
#W = weights.KNN.from_dataframe(gdf, k=6)
#W = weights.KNN.from_dataframe(gdf, k=8)
#>>>> Compute inverse distance squared based on maximum nearest neighbor distance between the n observations
#maxMin = weights.min_threshold_dist_from_shapefile("/work/data/nepal.shp")
#W = weights.DistanceBand.from_dataframe(gdf, threshold = maxMin, binary=False, alpha=-2)
# >>> Row-standardize W
W.transform = 'r'
# Ref: https://splot.readthedocs.io/en/stable/users/tutorials/weights.html#a-closer-look-at-w
plot_spatial_weights(W, gdf);
W.neighbors
W.min_neighbors
W.mean_neighbors
W.max_neighbors
Spatial lag
gdf["Wpovindex"] = weights.lag_spatial(W, gdf["povindex"])
gdf[["povindex", "Wpovindex"]].round(2)
Global spatial dependence
px.scatter(
gdf,
x="povindex",
y="Wpovindex",
hover_name="district",
labels = dict(povindex = "Poverty index in focal region",
Wpovindex = "Neighbors' average poverty index"),
trendline="ols",
marginal_x="box",
marginal_y="box")
globalMoran = Moran(gdf['povindex'], W)
print(globalMoran.I)
print(globalMoran.p_sim)
moran_scatterplot(globalMoran, aspect_equal=False, zstandard=True);
Local spatial dependence
localMoran = Moran_Local(gdf['povindex'], W, permutations = 999, seed=123456789)
localMoran.p_sim
moran_scatterplot(localMoran, p=0.10, zstandard =False);
#Note: Just for color-illustration purposes, we use a p-value of 0.20.
lisa_cluster(localMoran, gdf, p=0.10);