Exploring datasets from Geoda

Setup

# Load libraries import numpy as np import pandas as pd import matplotlib.pyplot as plt plt.style.use('ggplot') import seaborn as sns import plotly.express as px import plotly.graph_objects as go import geopandas as gpd import contextily as cx import libpysal from libpysal import weights from libpysal.weights import Queen import esda from esda.moran import Moran, Moran_Local import splot from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation from splot.libpysal import plot_spatial_weights import warnings warnings.filterwarnings('ignore')

Datasets

from IPython.display import IFrame IFrame(src='https://geodacenter.github.io/data-and-lab/', width=800, height=1200)

Import data

IFrame(src='https://geodacenter.github.io/data-and-lab/nepal/', width=800, height=1200)

# Info website: https://geodacenter.github.io/data-and-lab/nepal/ gdf = gpd.read_file("https://geodacenter.github.io/data-and-lab/data/nepal.zip")

gdf.head(5)

gdf.describe().round(2)

Basic maps

gdf.plot('povindex', legend = True);

fig, ax = plt.subplots(figsize=(6,4)) gdf.plot(column="povindex", scheme='FisherJenks', k = 3, cmap='coolwarm', edgecolor='k', linewidth=0.5, alpha=0.9, legend=True, ax=ax, legend_kwds={'bbox_to_anchor':(1.00, 0.92)}) plt.title('Spatial distribution of the poverty index: Three natural breaks') plt.tight_layout() #ax.axis("off") #plt.savefig('myMap.png',dpi=150, bbox_inches='tight') plt.show()

fig, ax = plt.subplots(figsize=(6,4)) gdf.plot(column="povindex", scheme='BoxPlot', cmap='coolwarm', edgecolor='k', linewidth=0.5, alpha=0.9, legend=True, ax=ax, legend_kwds={'bbox_to_anchor':(1.00, 0.92)}) plt.title('Spatial distribution of the poverty index: \nWhich regions are below/above the median?') plt.tight_layout() ax.axis("off") #plt.savefig('myMap.png',dpi=150, bbox_inches='tight') plt.show()

Contextual maps

fig, ax = plt.subplots(figsize=(6,4)) gdf.plot(facecolor='none', edgecolor='grey', ax=ax) cx.add_basemap(ax, crs=gdf.crs.to_string(), source = cx.providers.NASAGIBS.ViirsEarthAtNight2012, attribution=False) ax.axis("off") plt.show()

fig, ax = plt.subplots(figsize=(6,4)) gdf.plot(column="povindex", scheme='FisherJenks', k = 3, cmap='coolwarm', edgecolor='k', linewidth=0.5, alpha=0.7, legend=True, ax=ax, legend_kwds={'bbox_to_anchor':(1.00, 0.92)}) cx.add_basemap(ax, crs=gdf.crs.to_string(), source = cx.providers.CartoDB.Positron, attribution=False) plt.title('Spatial distribution of the poverty index: Three natural breaks') plt.tight_layout() ax.axis("off") #plt.savefig('myMap.png',dpi=150, bbox_inches='tight') plt.show()

fig, ax = plt.subplots(figsize=(6,4)) gdf.plot(column="povindex", scheme='FisherJenks', k = 3, cmap='coolwarm', edgecolor='k', linewidth=0.5, alpha=0.7, legend=True, ax=ax, legend_kwds={'bbox_to_anchor':(1.00, 0.92)}) cx.add_basemap(ax, crs=gdf.crs.to_string(), source = cx.providers.Stamen.TonerHybrid, attribution=False) plt.title('Spatial distribution of the poverty index: Three natural breaks') plt.tight_layout() ax.axis("off") #plt.savefig('myMap.png',dpi=150, bbox_inches='tight') plt.show()

Interactive maps

#gdf.explore()

gdf.explore( column='povindex', tooltip=['district', 'povindex', 'pcinc', 'schoolcnt'], scheme='fisherjenks', k = 3, cmap='coolwarm', legend=True, tiles='Stamen Terrain', style_kwds =dict(color="gray", weight=0.5), legend_kwds=dict(colorbar=False) )

gdf.explore( column='povindex', tooltip=['district', 'povindex', 'pcinc', 'schoolcnt'], scheme='fisherjenks', k = 3, cmap='coolwarm', legend=True, tiles= cx.providers.NASAGIBS.ViirsEarthAtNight2012, style_kwds =dict(color="gray", weight=0.5), legend_kwds=dict(colorbar=False) )

Interactive data exploration

px.strip(gdf, x = 'povindex', hover_name= 'district', color='name_2')

px.histogram(gdf, x = 'povindex', hover_name= 'district', marginal='rug')

px.histogram(gdf, x = 'povindex', hover_name= 'district', marginal='box')

px.scatter(gdf, x="schoolcnt", y="povindex", #color="name_2", hover_name="district", trendline="ols", marginal_x="box", marginal_y="box", )

Spatial neighbors and weights

# Spatial weights based on contiguity #W = weights.Queen.from_dataframe(gdf) #W = weights.Rook.from_dataframe(gdf) # Spatial weights based on distance #W = weights.KNN.from_dataframe(gdf, k=4) W = weights.KNN.from_dataframe(gdf, k=5) #W = weights.KNN.from_dataframe(gdf, k=6) #W = weights.KNN.from_dataframe(gdf, k=8) #>>>> Compute inverse distance squared based on maximum nearest neighbor distance between the n observations #maxMin = weights.min_threshold_dist_from_shapefile("/work/data/nepal.shp") #W = weights.DistanceBand.from_dataframe(gdf, threshold = maxMin, binary=False, alpha=-2) # >>> Row-standardize W W.transform = 'r' # Ref: https://splot.readthedocs.io/en/stable/users/tutorials/weights.html#a-closer-look-at-w

plot_spatial_weights(W, gdf);

W.neighbors

W.min_neighbors

W.mean_neighbors

W.max_neighbors

Spatial lag

gdf["Wpovindex"] = weights.lag_spatial(W, gdf["povindex"])

gdf[["povindex", "Wpovindex"]].round(2)

Global spatial dependence

px.scatter( gdf, x="povindex", y="Wpovindex", hover_name="district", labels = dict(povindex = "Poverty index in focal region", Wpovindex = "Neighbors' average poverty index"), trendline="ols", marginal_x="box", marginal_y="box")

globalMoran = Moran(gdf['povindex'], W) print(globalMoran.I)

print(globalMoran.p_sim)

moran_scatterplot(globalMoran, aspect_equal=False, zstandard=True);

Local spatial dependence

localMoran = Moran_Local(gdf['povindex'], W, permutations = 999, seed=123456789) localMoran.p_sim

moran_scatterplot(localMoran, p=0.10, zstandard =False); #Note: Just for color-illustration purposes, we use a p-value of 0.20.

lisa_cluster(localMoran, gdf, p=0.10);

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Exploring datasets from Geoda