Exploring subnational human development in India

Setup

# Load libraries import numpy as np import pandas as pd pd.set_option('display.max_columns', None) import matplotlib.pyplot as plt plt.style.use('ggplot') from matplotlib.ticker import FormatStrFormatter from matplotlib_scalebar.scalebar import ScaleBar from pylab import rcParams import matplotlib as mpl mpl.rcParams['figure.dpi'] = 72 import seaborn as sns #sns.set_style("darkgrid") #sns.set_context(context="paper", font_scale=1.5, rc=None) #sns.set(font="serif") import plotly.express as px import plotly.graph_objects as go import geopandas as gpd import libpysal from libpysal import weights from libpysal.weights import Queen import esda from esda.moran import Moran, Moran_Local import splot from splot.esda import moran_scatterplot, plot_moran, lisa_cluster, plot_local_autocorrelation from splot.libpysal import plot_spatial_weights from giddy.directional import Rose import statsmodels.api as sm import statsmodels.formula.api as smf from spreg import OLS from spreg import MoranRes from spreg import ML_Lag from spreg import ML_Error from mgwr.gwr import GWR, MGWR from mgwr.sel_bw import Sel_BW from mgwr.utils import shift_colormap, truncate_colormap import warnings warnings.filterwarnings('ignore')

Import data

df = pd.read_csv("https://github.com/quarcs-lab/data-open/raw/master/shdi/SHDIold.csv").sort_values(by=['year', 'SubContinent'])

df.info()

df

map = gpd.read_file("https://gist.github.com/cmg777/81b6e2b1ea5865c09daf3b2bbf35bbf8/raw/5e0a0370f453421bc503594cfae1e466c89e449f/GDL%2520Shapefiles%2520V4.geojson")

map.plot('shdi');

map.info()

map.head(9)

fig, ax = plt.subplots(figsize=(12,8)) map.plot(column="shdi", scheme='BoxPlot', cmap='coolwarm', legend=True, ax=ax, legend_kwds={'bbox_to_anchor':(1.00, 0.70)}) plt.title('Spatial distribution of the subnational HDI: \nWhich regions are below/above the median?') plt.tight_layout() ax.axis("off") #plt.savefig('myMap.png',dpi=150, bbox_inches='tight') plt.show()

map['shdi'].describe().round(2)

Prepare data

# Remove national observations and focus on India df1 = df[(df.level != 'National') & (df.country == 'India')] df1

df1.columns

df1["year"]= df["year"].astype(str)

df1990 = df1[df1['year'] == '1990']

df2019 = df1[df1['year'] == '2019']

df2019['Continent'].unique()

df2019['SubContinent'].unique()

df2019['country'].unique()

Descriptive statistics

df1.loc[:, df1.columns != 'year'].describe().round(2)

df1990.describe().round(2)

df2019.describe().round(2)

Regional differences

Strip plot

px.strip(df1, x = ['shdi'], y = 'country', range_x= [0.35, 0.80], color = 'continent2', hover_name= 'region', hover_data = ['country'], animation_frame= 'year', labels=dict(continent2 = "Continent", shdi ="Subnational human development index") )

Box plot

px.box(df1, x = 'shdi', y = 'country', range_x= [0.35, 0.80], color = 'continent2', hover_name= 'region', hover_data = ['country'], animation_frame= 'year', labels=dict(continent2 = "Continent", shdi ="Subnational human development index") )

Histogram

px.histogram(df1, x = 'shdi', range_x= [0.35, 0.80], nbins= 15, color = 'country', hover_name= 'region', hover_data= ['country'], marginal='rug', # box is also a good option animation_frame = 'year', labels=dict(continent2 = "Continent", shdi ="Subnational human development index") )

ECDF

px.ecdf(df1, x="shdi", range_x= [0.30, 0.80], color="country", hover_name= 'region', hover_data= ['country'], #markers=True, #lines=False, marginal="rug", # histogram animation_frame = 'year', labels=dict(continent2 = "Continent", shdi ="Subnational human development index") )

Time series

Continents

fig = px.line( df1, x="year", y="shdi", log_y= True, color="region", hover_name="country", hover_data= ['country'], labels=dict(shdi="HDI", continent2 ="Continent", year = ""), facet_col="country", facet_col_wrap = 2, facet_row_spacing = 0.01, height= 500 ) fig.update_layout(showlegend=False)

Correlations

px.scatter(df1, y = "shdi", x = "incindex", range_y= [0.2, 0.9], range_x= [0.2, 0.9], hover_name = "region", hover_data= ['country'], color = "country", #size = "pop", size_max = 60, trendline= 'ols', #trendline_scope= "overall", #marginal_x= 'box', #marginal_y= 'box', animation_frame= 'year', labels=dict(country = "Country", continent2 = "Continent", lgnic ="Log of gross national income per capita", lifexp="Life Expectancy (years)") )

Maps

gdf1990 = map.merge(df1990, on = 'GDLcode', how = 'left') gdf2019 = map.merge(df2019, on = 'GDLcode', how = 'left')

gdf2019.columns

fig, ax = plt.subplots(figsize=(12,8)) gdf1990.plot(column="shdi_y", scheme='BoxPlot', cmap='coolwarm', legend=True, ax=ax, edgecolor='black', legend_kwds={'bbox_to_anchor':(0.80, 0.95)}) plt.title('Spatial distribution of human development in 1990: \nWhich regions are below/above the median?') plt.tight_layout() ax.axis("off") #plt.savefig('myMap.png',dpi=300, bbox_inches='tight') plt.show()

fig, ax = plt.subplots(figsize=(12,8)) gdf2019.plot(column="shdi_y", scheme='BoxPlot', cmap='coolwarm', legend=True, ax=ax, edgecolor='black', legend_kwds={'bbox_to_anchor':(0.80, 0.95)}) plt.title('Spatial distribution of human development in 1990: \nWhich regions are below/above the median?') plt.tight_layout() ax.axis("off") #plt.savefig('myMap.png',dpi=300, bbox_inches='tight') plt.show()

Spatial dependence

gdf2019India = gdf2019.query("country_x == 'India'").reset_index(drop=True)

#W = weights.KNN.from_dataframe(gdf2019India, k=4) W = weights.KNN.from_dataframe(gdf2019India, k=6) #W = weights.KNN.from_dataframe(gdf2019India, k=8) #W = weights.Queen.from_dataframe(gdf2019India) #W = weights.Rook.from_dataframe(gdf2019India) #>>>> Compute inverse distance squared based on maximum nearest neighbor distance between the n observations #maxMin = weights.min_threshold_dist_from_shapefile("nepal.shp") #W = weights.DistanceBand.from_dataframe(nepal, threshold = maxMin, binary=False, alpha=-2) # >>> Row-standardize W W.transform = 'r'

plot_spatial_weights(W, gdf2019India);

gdf2019India['Wshdi_y'] = weights.lag_spatial(W, gdf2019India['shdi_y'])

globalMoran = Moran(gdf2019India['shdi_y'], W) MoranI = globalMoran.I MoranI = "{:.2f}".format(MoranI) localMoran = Moran_Local(gdf2019India['shdi_y'], W, permutations = 999, seed=12345)

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12,5)) moran_scatterplot(localMoran, p=0.10, aspect_equal=False, zstandard = False, ax=axes[0]) lisa_cluster(localMoran, gdf2019India, p=0.10, legend_kwds={'bbox_to_anchor':(0.02, 0.90)}, ax=axes[1]) #cx.add_basemap(axes[1], crs=gdf.crs.to_string(), source=cx.providers.Stamen.TonerLite, attribution=False) #cx.add_basemap(axes[1], crs=gdf.crs.to_string(), source=cx.providers.Stamen.TonerLabels, attribution=False) axes[0].set_xlabel('Indicator') axes[0].set_ylabel('Spatial lag of indicator') axes[0].set_title(f"(a) Moran scatterplot (Moran's I = {MoranI})") axes[1].set_title("(b) Spatial clusters and outliers (p<0.10)") plt.show()

gdf2019India.explore( column= 'shdi_y', tooltip=['shdi_y','country_y', 'region_y'], k = 3, scheme='FisherJenks', cmap='coolwarm', legend=True, tiles='CartoDB dark_matter', style_kwds =dict(color="gray", weight=0.4), legend_kwds=dict(colorbar=False) )

px.scatter( gdf2019India, x='shdi_y', y='Wshdi_y', hover_name="region_y", hover_data = ['shdi_y', 'Wshdi_y', 'country_y'], trendline="ols", marginal_x="box", marginal_y="box")

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Exploring subnational human development in India