# Start writing code here..import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import libpysal as ps
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW
import scipy as sp
from matplotlib_scalebar.scalebar import ScaleBar
from mgwr.utils import shift_colormap, truncate_colormap
import numpy as np
import itertools
import time
import statsmodels.api as sm
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
import statsmodels.api as statm
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
/opt/conda/lib/python3.9/site-packages/spglm/utils.py:367: SyntaxWarning: "is not" with a literal. Did you mean "!="?
if resetlist is not ():
df = pd.read_csv("poverty_2010.csv")
gdf = gpd.read_file("/vsicurl/https://raw.githubusercontent.com/mehak-sachdeva/Nagoya_University_workshop_2022/main/data/indonesia_shapes/indonesia_shapes.shp")
df['ln_pov'] = np.log(df['pov'])
gdf['pov'] = df['pov']
gdf['ln_pov'] = df['ln_pov']
X = df[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr' ]].copy()
X_std = (X-X.mean(axis=0))/X.std(axis=0)
X_std=statm.add_constant(X_std)
y = df['pov']
y_std = (y-y.mean(axis=0))/y.std(axis=0)
model = statm.OLS(y_std,X_std).fit()
predictions=model.predict(X_std)
model.summary()
#Data preparation for GWR and MGWR model run
pov_y = df['pov'].values.reshape((-1,1))
X = df[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']].values
u = df['coord_x']
v = df['coord_y']
coords = list(zip(u,v))
pov_y = (pov_y - pov_y.mean(axis=0)) / pov_y.std(axis=0)
X = (X - X.mean(axis=0)) / X.std(axis=0)
#Calibrating the GWR model
gwr_selector = Sel_BW(coords, pov_y, X)
gwr_bw = gwr_selector.search()
gwr_bw
gwr_results = GWR(coords, pov_y, X, gwr_bw).fit()
gwr_results.summary()
===========================================================================
Model type Gaussian
Number of observations: 514
Number of covariates: 7
Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares: 336.400
Log-likelihood: -620.387
AIC: 1254.773
AICc: 1257.059
BIC: -2828.407
R2: 0.346
Adj. R2: 0.338
Variable Est. SE t(Est/SE) p-value
------------------------------- ---------- ---------- ---------- ----------
X0 0.000 0.036 0.000 1.000
X1 -0.511 0.046 -11.126 0.000
X2 0.154 0.037 4.116 0.000
X3 -0.024 0.044 -0.537 0.592
X4 -0.148 0.039 -3.787 0.000
X5 0.022 0.041 0.527 0.598
X6 -0.069 0.036 -1.907 0.057
Geographically Weighted Regression (GWR) Results
---------------------------------------------------------------------------
Spatial kernel: Adaptive bisquare
Bandwidth used: 70.000
Diagnostic information
---------------------------------------------------------------------------
Residual sum of squares: 100.020
Effective number of parameters (trace(S)): 103.559
Degree of freedom (n - trace(S)): 410.441
Sigma estimate: 0.494
Log-likelihood: -308.664
AIC: 826.446
AICc: 880.491
BIC: 1270.009
R2: 0.805
Adjusted R2: 0.756
Adj. alpha (95%): 0.003
Adj. critical t value (95%): 2.945
Summary Statistics For GWR Parameter Estimates
---------------------------------------------------------------------------
Variable Mean STD Min Median Max
-------------------- ---------- ---------- ---------- ---------- ----------
X0 -0.038 0.554 -0.785 -0.152 1.670
X1 -0.336 0.263 -0.999 -0.349 0.304
X2 -0.058 0.180 -0.516 -0.036 0.478
X3 -0.040 0.213 -0.429 -0.066 0.771
X4 -0.118 0.186 -0.589 -0.056 0.188
X5 -0.010 0.088 -0.345 -0.020 0.206
X6 -0.016 0.128 -0.481 -0.020 0.646
===========================================================================
# Calibrating the MGWR model
#Column order: 'constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
mgwr_selector = Sel_BW(coords, pov_y, X, multi=True)
mgwr_bw = mgwr_selector.search(verbose=True)
IPyWidgets are not supported
Current iteration: 1 ,SOC: 0.009342
Bandwidths: 44.0, 60.0, 81.0, 78.0, 79.0, 196.0, 325.0
Current iteration: 2 ,SOC: 0.0048063
Bandwidths: 44.0, 70.0, 105.0, 117.0, 79.0, 360.0, 348.0
Current iteration: 3 ,SOC: 0.0033994
Bandwidths: 44.0, 70.0, 253.0, 127.0, 79.0, 498.0, 439.0
Current iteration: 4 ,SOC: 0.0023173
Bandwidths: 44.0, 70.0, 437.0, 131.0, 79.0, 498.0, 512.0
Current iteration: 5 ,SOC: 0.0011798
Bandwidths: 44.0, 70.0, 512.0, 131.0, 79.0, 498.0, 512.0
Current iteration: 6 ,SOC: 0.000514
Bandwidths: 44.0, 70.0, 512.0, 131.0, 79.0, 498.0, 512.0
Current iteration: 7 ,SOC: 0.0002491
Bandwidths: 44.0, 70.0, 512.0, 131.0, 79.0, 498.0, 512.0
Current iteration: 8 ,SOC: 0.0001322
Bandwidths: 44.0, 70.0, 512.0, 131.0, 79.0, 498.0, 512.0
Current iteration: 9 ,SOC: 7.05e-05
Bandwidths: 44.0, 70.0, 512.0, 131.0, 79.0, 498.0, 512.0
Current iteration: 10 ,SOC: 3.76e-05
Bandwidths: 44.0, 70.0, 512.0, 131.0, 79.0, 498.0, 512.0
Current iteration: 11 ,SOC: 2.04e-05
Bandwidths: 44.0, 70.0, 512.0, 131.0, 79.0, 498.0, 512.0
Current iteration: 12 ,SOC: 1.13e-05
Bandwidths: 44.0, 70.0, 512.0, 131.0, 79.0, 498.0, 512.0
Current iteration: 13 ,SOC: 6.5e-06
Bandwidths: 44.0, 70.0, 512.0, 131.0, 79.0, 498.0, 512.0
mgwr_results = MGWR(coords, pov_y, X, mgwr_selector).fit()
mgwr_results.summary()
IPyWidgets are not supported
===========================================================================
Model type Gaussian
Number of observations: 514
Number of covariates: 7
Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares: 336.400
Log-likelihood: -620.387
AIC: 1254.773
AICc: 1257.059
BIC: -2828.407
R2: 0.346
Adj. R2: 0.338
Variable Est. SE t(Est/SE) p-value
------------------------------- ---------- ---------- ---------- ----------
X0 0.000 0.036 0.000 1.000
X1 -0.511 0.046 -11.126 0.000
X2 0.154 0.037 4.116 0.000
X3 -0.024 0.044 -0.537 0.592
X4 -0.148 0.039 -3.787 0.000
X5 0.022 0.041 0.527 0.598
X6 -0.069 0.036 -1.907 0.057
Multi-Scale Geographically Weighted Regression (MGWR) Results
---------------------------------------------------------------------------
Spatial kernel: Adaptive bisquare
Criterion for optimal bandwidth: AICc
Score of Change (SOC) type: Smoothing f
Termination criterion for MGWR: 1e-05
MGWR bandwidths
---------------------------------------------------------------------------
Variable Bandwidth ENP_j Adj t-val(95%) Adj alpha(95%)
X0 44.000 27.078 3.130 0.002
X1 70.000 16.240 2.974 0.003
X2 512.000 1.634 2.168 0.031
X3 131.000 7.239 2.712 0.007
X4 79.000 13.612 2.918 0.004
X5 498.000 1.525 2.141 0.033
X6 512.000 1.278 2.068 0.039
Diagnostic information
---------------------------------------------------------------------------
Residual sum of squares: 97.986
Effective number of parameters (trace(S)): 68.606
Degree of freedom (n - trace(S)): 445.394
Sigma estimate: 0.469
Log-likelihood: -303.383
AIC: 745.977
AICc: 768.145
BIC: 1041.261
R2 0.809
Adjusted R2 0.780
Summary Statistics For MGWR Parameter Estimates
---------------------------------------------------------------------------
Variable Mean STD Min Median Max
-------------------- ---------- ---------- ---------- ---------- ----------
X0 -0.021 0.626 -0.869 -0.130 1.941
X1 -0.319 0.189 -0.839 -0.364 0.107
X2 0.005 0.007 -0.009 0.006 0.019
X3 -0.049 0.100 -0.231 -0.055 0.315
X4 -0.078 0.140 -0.466 -0.038 0.125
X5 -0.005 0.013 -0.015 -0.013 0.025
X6 -0.028 0.003 -0.033 -0.028 -0.023
===========================================================================
bw_ci=mgwr_results.get_bws_intervals(mgwr_selector)
print(bw_ci)
[(44.0, 44.0), (68.0, 85.0), (402.0, 512.0), (127.0, 154.0), (68.0, 154.0), (402.0, 498.0), (334.0, 512.0)]
#Critical t values for paramter estimates
t_crit=mgwr_results.critical_tval()
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf['b_constant'] = mgwr_results.params[:,0]
gdf['b_mys'] = mgwr_results.params[:,1]
gdf['b_gdpgr'] = mgwr_results.params[:,2]
gdf['b_unemp'] = mgwr_results.params[:,3]
gdf['b_shr_agr'] = mgwr_results.params[:,4]
gdf['b_shr_ind'] = mgwr_results.params[:,5]
gdf['b_inv_shr'] = mgwr_results.params[:,6]
gdf['t_constant'] = mgwr_results.tvalues[:,0]
gdf['t_mys'] = mgwr_results.tvalues[:,1]
gdf['t_gdpgr'] = mgwr_results.tvalues[:,2]
gdf['t_unemp'] = mgwr_results.tvalues[:,3]
gdf['t_shr_agr'] = mgwr_results.tvalues[:,4]
gdf['t_shr_ind'] = mgwr_results.tvalues[:,5]
gdf['t_inv_shr'] = mgwr_results.tvalues[:,6]
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf['constant'] = np.ones_like(df['pov'])
gdf['ln_pov'] = df['ln_pov']
gdf['pov'] = df['pov']
org_cols = ['constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']
b_cols = ['b_constant','b_mys' , 'b_gdpgr' , 'b_unemp', 'b_shr_agr', 'b_shr_ind' , 'b_inv_shr']
bu_cols = ['bu_constant','bu_mys' , 'bu_gdpgr' , 'bu_unemp', 'bu_shr_agr', 'bu_shr_ind' , 'bu_inv_shr']
bt_cols = ['bt_constant','bt_mys' , 'bt_gdpgr' , 'bt_unemp', 'bt_shr_agr', 'bt_shr_ind' , 'bt_inv_shr']
t_cols = ['t_constant','t_mys' , 't_gdpgr' , 't_unemp', 't_shr_agr', 't_shr_ind' , 't_inv_shr']
for i in range(1,7):
gdf[bu_cols[i]] = (gdf[b_cols[i]]*np.std(gdf['pov']))/np.std(df[org_cols[i]])
for i in range(7):
gdf.loc[gdf[t_cols[i]] >=t_crit[i], bt_cols[i]] = gdf[b_cols[i]]
gdf.loc[gdf[t_cols[i]] <=-t_crit[i], bt_cols[i]] = gdf[b_cols[i]]
gdf['bt_inv_shr']
gdf=gdf.fillna(0.0)
#Define a mapping function for consistent maps
def mapp(col,name,title,bw_col=None,vmin=None,vmax=None):
if (vmin==None and vmax==None):
vmin=min(col[name])
vmax=max(col[name])
if ((col[name]==0).all()):
print("All parameters are insignificant")
return None
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,7),facecolor='white')
if bw_col:
ax.set_title(title+' (BW: ' + str(bw_col) +')', fontsize=12)
else:
ax.set_title(title, fontsize=12)
cmap = plt.cm.seismic
if (vmin < 0) & (vmax < 0):
cmap = truncate_colormap(cmap, 0.0, 0.5)
elif (vmin > 0) & (vmax > 0):
cmap = truncate_colormap(cmap, 0.5, 1.0)
else:
cmap = shift_colormap(cmap, start=0.0, midpoint=1 - vmax/(vmax + abs(vmin)), stop=1.)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))
col.plot(name, cmap=sm.cmap, ax=ax, vmin=vmin, vmax=vmax, **{'edgecolor':None, 'alpha':0.65})
if (col[name] == 0).any():
col[col[name]==0].plot(color='white',ax=ax,**{'edgecolor':'grey', 'alpha':1})
texts = []
fig.tight_layout()
sm._A = []
cbar = fig.colorbar(sm,fraction=0.0175, pad=0.04)
cbar.ax.tick_params(labelsize=12)
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
#'constant,'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
mgwr_bw
mapp(gdf,'bt_inv_shr',"Public investment share",bw_col=512)
All parameters are insignificant
mapp(gdf,'bt_mys',"Mean years of schooling coefficient",bw_col=70)
mapp(gdf,'bu_mys',"Mean years of schooling coefficient",bw_col=70)
mapp(gdf,'bt_gdpgr',"GDP growth rate",bw_col=512)
All parameters are insignificant
mapp(gdf,'bu_shr_agr',"Share in agriculture",bw_col=79)
mapp(gdf,'bt_shr_agr',"Share in agriculture - only significant parameter estimates",bw_col=79)
CN_gwr=gwr_results.local_collinearity()
gdf['gwr_CN'] = CN_gwr[2]
CN,VDP=mgwr_results.local_collinearity()
gdf['vdp_0']=VDP[:,0]
gdf['vdp_1']=VDP[:,1]
gdf['vdp_2']=VDP[:,2]
gdf['vdp_3']=VDP[:,3]
gdf['vdp_4']=VDP[:,4]
gdf['vdp_5']=VDP[:,5]
gdf['vdp_6']=VDP[:,6]
gdf['CN'] = CN
mapp(gdf,'CN',"Local condition number")
mapp(gdf,'gwr_CN',"GWR-Local condition number")
#spatial_var=mgwr_results.spatial_variability(mgwr_selector)
#0.001, 0.01, 0.065, 0.01, 0.02, 0.54, 0.73
#print(spatial_var)
# Out-of-sample prediction using~GWR
#Split data into calibration and prediction sets
np.random.seed(908)
sample = np.random.choice(range(512), 20)
mask = np.ones_like(pov_y, dtype = bool).flatten()
mask[sample] = False
cal_coords = np.array(coords)[mask]
cal_y = pov_y[mask]
cal_X = X[mask]
pred_coords = np.array(coords)[~mask]
pred_y = pov_y[~mask]
pred_X = X[~mask]
#Calibrate GWR model
gwr_selector = Sel_BW(cal_coords, cal_y, cal_X)
gwr_bw = gwr_selector.search()
print(gwr_bw)
model = GWR(cal_coords, cal_y, cal_X, gwr_bw)
gwr_results = model.fit()
pred_results = model.predict(pred_coords, pred_X)
#Check correlation between known and predicted values
corr = np.corrcoef(pred_results.predictions.flatten(),pred_y.flatten())[0][1]
print(corr)
70.0
0.8214342934333235
df_2018 = pd.read_csv("poverty_2018.csv")
df_2018.columns
pov_y = df['pov'].values.reshape((-1,1))
X = df[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']].values
u = df['coord_x']
v = df['coord_y']
coords = np.array(list(zip(u,v)))
pov_y = (pov_y - pov_y.mean(axis=0)) / pov_y.std(axis=0)
X = (X - X.mean(axis=0)) / X.std(axis=0)
#Fitting GWR model for 2010 data
gwr_selector = Sel_BW(coords, pov_y, X)
gwr_bw = gwr_selector.search()
print(gwr_bw)
model = GWR(coords, pov_y, X, gwr_bw)
gwr_results = model.fit()
70.0
#Predicting poverty in 2018
y_2018 = df_2018['pov'].values.reshape((-1,1))
X_2018 = df_2018[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']].values
u = df_2018['COORD_X']
v = df_2018['COORD_Y']
coords_2018 = np.array(list(zip(u,v)))
y_2018 = (y_2018 - y_2018.mean(axis=0)) / y_2018.std(axis=0)
X_2018 = (X_2018 - X_2018.mean(axis=0)) / X_2018.std(axis=0)
pred_results = model.predict(coords_2018, X_2018)
#Check correlation between known and predicted values
corr = np.corrcoef(pred_results.predictions.flatten(),y_2018.flatten())[0][1]
print(corr)
0.8360220182069347
gdf['y_2018_pred'] = pred_results.predictions.flatten()
gdf['y_2018_real'] = y_2018
mapp(gdf,'y_2018_pred',"Predicted y - 2018")
mapp(gdf,'y_2018_real',"Real y - 2018",bw_col=None,vmin=-4.5,vmax=3.5)
df_west = pd.read_csv("poverty_west_2010.csv")
gdf_west = gpd.read_file("Indonesia_west/Indonesia_west.shp")
pov_y = df_west['pov'].values.reshape((-1,1))
X = df_west[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']].values
u = df_west['coord_x']
v = df_west['coord_y']
coords = list(zip(u,v))
pov_y = (pov_y - pov_y.mean(axis=0)) / pov_y.std(axis=0)
X = (X - X.mean(axis=0)) / X.std(axis=0)
#Calibrating the GWR model
gwr_selector = Sel_BW(coords, pov_y, X)
gwr_bw = gwr_selector.search()
gwr_bw
gwr_results = GWR(coords, pov_y, X, gwr_bw).fit()
gwr_results.summary()
# Calibrating the MGWR model
#Column order: 'constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
mgwr_selector = Sel_BW(coords, pov_y, X, multi=True)
mgwr_bw = mgwr_selector.search(verbose=True)
mgwr_results = MGWR(coords, pov_y, X, mgwr_selector).fit()
mgwr_results.summary()
#Critical t values for paramter estimates
t_crit=mgwr_results.critical_tval()
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf_west['b_constant'] = mgwr_results.params[:,0]
gdf_west['b_mys'] = mgwr_results.params[:,1]
gdf_west['b_gdpgr'] = mgwr_results.params[:,2]
gdf_west['b_unemp'] = mgwr_results.params[:,3]
gdf_west['b_shr_agr'] = mgwr_results.params[:,4]
gdf_west['b_shr_ind'] = mgwr_results.params[:,5]
gdf_west['b_inv_shr'] = mgwr_results.params[:,6]
gdf_west['t_constant'] = mgwr_results.tvalues[:,0]
gdf_west['t_mys'] = mgwr_results.tvalues[:,1]
gdf_west['t_gdpgr'] = mgwr_results.tvalues[:,2]
gdf_west['t_unemp'] = mgwr_results.tvalues[:,3]
gdf_west['t_shr_agr'] = mgwr_results.tvalues[:,4]
gdf_west['t_shr_ind'] = mgwr_results.tvalues[:,5]
gdf_west['t_inv_shr'] = mgwr_results.tvalues[:,6]
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf_west['constant'] = np.ones_like(df_west['pov'])
gdf_west['pov'] = df_west['pov']
org_cols = ['constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']
b_cols = ['b_constant','b_mys' , 'b_gdpgr' , 'b_unemp', 'b_shr_agr', 'b_shr_ind' , 'b_inv_shr']
bu_cols = ['bu_constant','bu_mys' , 'bu_gdpgr' , 'bu_unemp', 'bu_shr_agr', 'bu_shr_ind' , 'bu_inv_shr']
bt_cols = ['bt_constant','bt_mys' , 'bt_gdpgr' , 'bt_unemp', 'bt_shr_agr', 'bt_shr_ind' , 'bt_inv_shr']
t_cols = ['t_constant','t_mys' , 't_gdpgr' , 't_unemp', 't_shr_agr', 't_shr_ind' , 't_inv_shr']
for i in range(1,7):
gdf_west[bu_cols[i]] = (gdf_west[b_cols[i]]*np.std(gdf_west['pov']))/np.std(df_west[org_cols[i]])
for i in range(7):
gdf_west.loc[gdf_west[t_cols[i]] >=t_crit[i], bt_cols[i]] = gdf_west[b_cols[i]]
gdf_west.loc[gdf_west[t_cols[i]] <=-t_crit[i], bt_cols[i]] = gdf_west[b_cols[i]]
gdf_west=gdf_west.fillna(0.0)
===========================================================================
Model type Gaussian
Number of observations: 338
Number of covariates: 7
Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares: 267.204
Log-likelihood: -439.880
AIC: 893.761
AICc: 896.198
BIC: -1660.225
R2: 0.209
Adj. R2: 0.195
Variable Est. SE t(Est/SE) p-value
------------------------------- ---------- ---------- ---------- ----------
X0 -0.000 0.049 -0.000 1.000
X1 -0.386 0.060 -6.403 0.000
X2 -0.062 0.049 -1.259 0.208
X3 -0.035 0.057 -0.618 0.536
X4 -0.167 0.055 -3.067 0.002
X5 -0.077 0.057 -1.362 0.173
X6 -0.021 0.049 -0.429 0.668
Geographically Weighted Regression (GWR) Results
---------------------------------------------------------------------------
Spatial kernel: Adaptive bisquare
Bandwidth used: 70.000
Diagnostic information
---------------------------------------------------------------------------
Residual sum of squares: 110.023
Effective number of parameters (trace(S)): 68.374
Degree of freedom (n - trace(S)): 269.626
Sigma estimate: 0.639
Log-likelihood: -289.923
AIC: 718.595
AICc: 755.079
BIC: 983.814
R2: 0.674
Adjusted R2: 0.592
Adj. alpha (95%): 0.005
Adj. critical t value (95%): 2.818
Summary Statistics For GWR Parameter Estimates
---------------------------------------------------------------------------
Variable Mean STD Min Median Max
-------------------- ---------- ---------- ---------- ---------- ----------
X0 0.000 0.376 -0.803 0.080 0.662
X1 -0.453 0.315 -1.222 -0.490 0.418
X2 -0.050 0.151 -0.404 -0.080 0.374
X3 -0.038 0.178 -0.602 -0.016 0.408
X4 -0.178 0.305 -0.874 -0.086 0.279
X5 -0.030 0.105 -0.260 -0.027 0.312
X6 -0.024 0.159 -0.299 -0.068 0.401
===========================================================================
IPyWidgets are not supported
Current iteration: 1 ,SOC: 0.015898
Bandwidths: 44.0, 44.0, 61.0, 139.0, 71.0, 292.0, 44.0
Current iteration: 2 ,SOC: 0.0090135
Bandwidths: 44.0, 44.0, 184.0, 153.0, 73.0, 336.0, 44.0
Current iteration: 3 ,SOC: 0.0037063
Bandwidths: 44.0, 44.0, 260.0, 155.0, 73.0, 336.0, 44.0
Current iteration: 4 ,SOC: 0.0015815
Bandwidths: 44.0, 44.0, 272.0, 155.0, 73.0, 336.0, 44.0
Current iteration: 5 ,SOC: 0.0008098
Bandwidths: 44.0, 44.0, 272.0, 155.0, 73.0, 336.0, 44.0
Current iteration: 6 ,SOC: 0.0005313
Bandwidths: 44.0, 44.0, 272.0, 162.0, 73.0, 336.0, 44.0
Current iteration: 7 ,SOC: 0.0004264
Bandwidths: 44.0, 44.0, 272.0, 168.0, 73.0, 336.0, 44.0
Current iteration: 8 ,SOC: 0.0002812
Bandwidths: 44.0, 44.0, 272.0, 168.0, 73.0, 336.0, 44.0
Current iteration: 9 ,SOC: 0.0001354
Bandwidths: 44.0, 44.0, 272.0, 168.0, 73.0, 336.0, 44.0
Current iteration: 10 ,SOC: 0.0018976
Bandwidths: 44.0, 44.0, 272.0, 277.0, 73.0, 336.0, 44.0
Current iteration: 11 ,SOC: 0.0015312
Bandwidths: 44.0, 44.0, 272.0, 334.0, 73.0, 336.0, 44.0
Current iteration: 12 ,SOC: 0.0007571
Bandwidths: 44.0, 44.0, 272.0, 336.0, 73.0, 336.0, 44.0
Current iteration: 13 ,SOC: 0.0003061
Bandwidths: 44.0, 44.0, 272.0, 336.0, 73.0, 336.0, 44.0
Current iteration: 14 ,SOC: 0.0001554
Bandwidths: 44.0, 44.0, 272.0, 336.0, 73.0, 336.0, 44.0
Current iteration: 15 ,SOC: 8.88e-05
Bandwidths: 44.0, 44.0, 272.0, 336.0, 73.0, 336.0, 44.0
Current iteration: 16 ,SOC: 5.53e-05
Bandwidths: 44.0, 44.0, 272.0, 336.0, 73.0, 336.0, 44.0
Current iteration: 17 ,SOC: 3.61e-05
Bandwidths: 44.0, 44.0, 272.0, 336.0, 73.0, 336.0, 44.0
Current iteration: 18 ,SOC: 2.4e-05
Bandwidths: 44.0, 44.0, 272.0, 336.0, 73.0, 336.0, 44.0
Current iteration: 19 ,SOC: 1.6e-05
Bandwidths: 44.0, 44.0, 272.0, 336.0, 73.0, 336.0, 44.0
Current iteration: 20 ,SOC: 1.07e-05
Bandwidths: 44.0, 44.0, 272.0, 336.0, 73.0, 336.0, 44.0
Current iteration: 21 ,SOC: 7.2e-06
Bandwidths: 44.0, 44.0, 272.0, 336.0, 73.0, 336.0, 44.0
IPyWidgets are not supported
===========================================================================
Model type Gaussian
Number of observations: 338
Number of covariates: 7
Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares: 267.204
Log-likelihood: -439.880
AIC: 893.761
AICc: 896.198
BIC: -1660.225
R2: 0.209
Adj. R2: 0.195
Variable Est. SE t(Est/SE) p-value
------------------------------- ---------- ---------- ---------- ----------
X0 -0.000 0.049 -0.000 1.000
X1 -0.386 0.060 -6.403 0.000
X2 -0.062 0.049 -1.259 0.208
X3 -0.035 0.057 -0.618 0.536
X4 -0.167 0.055 -3.067 0.002
X5 -0.077 0.057 -1.362 0.173
X6 -0.021 0.049 -0.429 0.668
Multi-Scale Geographically Weighted Regression (MGWR) Results
---------------------------------------------------------------------------
Spatial kernel: Adaptive bisquare
Criterion for optimal bandwidth: AICc
Score of Change (SOC) type: Smoothing f
Termination criterion for MGWR: 1e-05
MGWR bandwidths
---------------------------------------------------------------------------
Variable Bandwidth ENP_j Adj t-val(95%) Adj alpha(95%)
X0 44.000 15.827 2.973 0.003
X1 44.000 16.873 2.993 0.003
X2 272.000 2.646 2.359 0.019
X3 336.000 1.399 2.108 0.036
X4 73.000 9.444 2.807 0.005
X5 336.000 1.504 2.138 0.033
X6 44.000 15.361 2.964 0.003
Diagnostic information
---------------------------------------------------------------------------
Residual sum of squares: 92.632
Effective number of parameters (trace(S)): 63.053
Degree of freedom (n - trace(S)): 274.947
Sigma estimate: 0.580
Log-likelihood: -260.846
AIC: 649.798
AICc: 680.330
BIC: 894.674
R2 0.726
Adjusted R2 0.663
Summary Statistics For MGWR Parameter Estimates
---------------------------------------------------------------------------
Variable Mean STD Min Median Max
-------------------- ---------- ---------- ---------- ---------- ----------
X0 -0.014 0.449 -0.831 0.060 1.015
X1 -0.378 0.260 -1.087 -0.439 0.305
X2 -0.014 0.030 -0.068 -0.017 0.089
X3 -0.059 0.011 -0.078 -0.061 -0.035
X4 -0.125 0.258 -0.749 -0.029 0.182
X5 -0.024 0.006 -0.032 -0.025 -0.005
X6 -0.058 0.191 -0.881 -0.086 0.436
===========================================================================
mapp(gdf_west,'bt_mys',"Mean years of schooling - West")
mapp(gdf,'bt_mys',"Mean years of schooling - Whole")
mapp(gdf_west,'bt_constant',"Local intercept - West")
mapp(gdf,'bt_constant',"Local intercept - Whole")
df_east = pd.read_csv("poverty_east_2010.csv")
gdf_east = gpd.read_file("Indonesia_east/Indonesia_east.shp")
pov_y = df_east['pov'].values.reshape((-1,1))
X = df_east[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']].values
u = df_east['coord_x']
v = df_east['coord_y']
coords = list(zip(u,v))
pov_y = (pov_y - pov_y.mean(axis=0)) / pov_y.std(axis=0)
X = (X - X.mean(axis=0)) / X.std(axis=0)
#Calibrating the GWR model
gwr_selector = Sel_BW(coords, pov_y, X)
gwr_bw = gwr_selector.search()
gwr_bw
gwr_results = GWR(coords, pov_y, X, gwr_bw).fit()
gwr_results.summary()
# Calibrating the MGWR model
#Column order: 'constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
mgwr_selector = Sel_BW(coords, pov_y, X, multi=True)
mgwr_bw = mgwr_selector.search(verbose=True)
mgwr_results = MGWR(coords, pov_y, X, mgwr_selector).fit()
mgwr_results.summary()
#Critical t values for paramter estimates
t_crit=mgwr_results.critical_tval()
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf_east['b_constant'] = mgwr_results.params[:,0]
gdf_east['b_mys'] = mgwr_results.params[:,1]
gdf_east['b_gdpgr'] = mgwr_results.params[:,2]
gdf_east['b_unemp'] = mgwr_results.params[:,3]
gdf_east['b_shr_agr'] = mgwr_results.params[:,4]
gdf_east['b_shr_ind'] = mgwr_results.params[:,5]
gdf_east['b_inv_shr'] = mgwr_results.params[:,6]
gdf_east['t_constant'] = mgwr_results.tvalues[:,0]
gdf_east['t_mys'] = mgwr_results.tvalues[:,1]
gdf_east['t_gdpgr'] = mgwr_results.tvalues[:,2]
gdf_east['t_unemp'] = mgwr_results.tvalues[:,3]
gdf_east['t_shr_agr'] = mgwr_results.tvalues[:,4]
gdf_east['t_shr_ind'] = mgwr_results.tvalues[:,5]
gdf_east['t_inv_shr'] = mgwr_results.tvalues[:,6]
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf_east['constant'] = np.ones_like(df_east['pov'])
gdf_east['pov'] = df_east['pov']
org_cols = ['constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']
b_cols = ['b_constant','b_mys' , 'b_gdpgr' , 'b_unemp', 'b_shr_agr', 'b_shr_ind' , 'b_inv_shr']
bu_cols = ['bu_constant','bu_mys' , 'bu_gdpgr' , 'bu_unemp', 'bu_shr_agr', 'bu_shr_ind' , 'bu_inv_shr']
bt_cols = ['bt_constant','bt_mys' , 'bt_gdpgr' , 'bt_unemp', 'bt_shr_agr', 'bt_shr_ind' , 'bt_inv_shr']
t_cols = ['t_constant','t_mys' , 't_gdpgr' , 't_unemp', 't_shr_agr', 't_shr_ind' , 't_inv_shr']
for i in range(1,7):
gdf_east[bu_cols[i]] = (gdf_east[b_cols[i]]*np.std(gdf_east['pov']))/np.std(gdf_east[org_cols[i]])
for i in range(7):
gdf_east.loc[gdf_east[t_cols[i]] >=t_crit[i], bt_cols[i]] = gdf_east[b_cols[i]]
gdf_east.loc[gdf_east[t_cols[i]] <=-t_crit[i], bt_cols[i]] = gdf_east[b_cols[i]]
gdf_east=gdf_east.fillna(0.0)
===========================================================================
Model type Gaussian
Number of observations: 176
Number of covariates: 7
Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares: 103.421
Log-likelihood: -202.946
AIC: 419.892
AICc: 422.754
BIC: -770.391
R2: 0.412
Adj. R2: 0.392
Variable Est. SE t(Est/SE) p-value
------------------------------- ---------- ---------- ---------- ----------
X0 -0.000 0.059 -0.000 1.000
X1 -0.635 0.086 -7.429 0.000
X2 0.036 0.066 0.548 0.584
X3 0.028 0.082 0.346 0.729
X4 -0.106 0.065 -1.630 0.103
X5 0.026 0.067 0.394 0.693
X6 -0.013 0.064 -0.208 0.835
Geographically Weighted Regression (GWR) Results
---------------------------------------------------------------------------
Spatial kernel: Adaptive bisquare
Bandwidth used: 77.000
Diagnostic information
---------------------------------------------------------------------------
Residual sum of squares: 37.839
Effective number of parameters (trace(S)): 31.311
Degree of freedom (n - trace(S)): 144.689
Sigma estimate: 0.511
Log-likelihood: -114.464
AIC: 293.550
AICc: 308.636
BIC: 395.991
R2: 0.785
Adjusted R2: 0.738
Adj. alpha (95%): 0.011
Adj. critical t value (95%): 2.564
Summary Statistics For GWR Parameter Estimates
---------------------------------------------------------------------------
Variable Mean STD Min Median Max
-------------------- ---------- ---------- ---------- ---------- ----------
X0 -0.095 0.577 -0.630 -0.292 0.933
X1 -0.324 0.271 -0.913 -0.325 0.070
X2 -0.038 0.148 -0.569 0.020 0.184
X3 -0.112 0.144 -0.341 -0.095 0.299
X4 -0.108 0.128 -0.351 -0.048 0.036
X5 0.032 0.075 -0.257 0.034 0.145
X6 -0.016 0.096 -0.371 0.008 0.151
===========================================================================
IPyWidgets are not supported
Current iteration: 1 ,SOC: 0.0150516
Bandwidths: 43.0, 61.0, 76.0, 86.0, 110.0, 145.0, 175.0
Current iteration: 2 ,SOC: 0.00676
Bandwidths: 43.0, 64.0, 78.0, 118.0, 147.0, 149.0, 175.0
Current iteration: 3 ,SOC: 0.0061039
Bandwidths: 43.0, 78.0, 78.0, 169.0, 169.0, 149.0, 175.0
Current iteration: 4 ,SOC: 0.0030468
Bandwidths: 43.0, 78.0, 78.0, 175.0, 171.0, 152.0, 175.0
Current iteration: 5 ,SOC: 0.0016207
Bandwidths: 43.0, 78.0, 78.0, 175.0, 171.0, 152.0, 175.0
Current iteration: 6 ,SOC: 0.0008932
Bandwidths: 43.0, 78.0, 78.0, 175.0, 169.0, 152.0, 175.0
Current iteration: 7 ,SOC: 0.0004837
Bandwidths: 43.0, 78.0, 78.0, 175.0, 169.0, 152.0, 175.0
Current iteration: 8 ,SOC: 0.0002753
Bandwidths: 43.0, 78.0, 78.0, 175.0, 169.0, 152.0, 175.0
Current iteration: 9 ,SOC: 0.0001718
Bandwidths: 43.0, 78.0, 78.0, 175.0, 169.0, 152.0, 175.0
Current iteration: 10 ,SOC: 0.0001082
Bandwidths: 43.0, 78.0, 78.0, 175.0, 169.0, 152.0, 175.0
Current iteration: 11 ,SOC: 6.8e-05
Bandwidths: 43.0, 78.0, 78.0, 175.0, 169.0, 152.0, 175.0
Current iteration: 12 ,SOC: 4.25e-05
Bandwidths: 43.0, 78.0, 78.0, 175.0, 169.0, 152.0, 175.0
Current iteration: 13 ,SOC: 2.6e-05
Bandwidths: 43.0, 78.0, 78.0, 175.0, 169.0, 152.0, 175.0
Current iteration: 14 ,SOC: 1.56e-05
Bandwidths: 43.0, 78.0, 78.0, 175.0, 169.0, 152.0, 175.0
Current iteration: 15 ,SOC: 9.1e-06
Bandwidths: 43.0, 78.0, 78.0, 175.0, 169.0, 152.0, 175.0
IPyWidgets are not supported
===========================================================================
Model type Gaussian
Number of observations: 176
Number of covariates: 7
Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares: 103.421
Log-likelihood: -202.946
AIC: 419.892
AICc: 422.754
BIC: -770.391
R2: 0.412
Adj. R2: 0.392
Variable Est. SE t(Est/SE) p-value
------------------------------- ---------- ---------- ---------- ----------
X0 -0.000 0.059 -0.000 1.000
X1 -0.635 0.086 -7.429 0.000
X2 0.036 0.066 0.548 0.584
X3 0.028 0.082 0.346 0.729
X4 -0.106 0.065 -1.630 0.103
X5 0.026 0.067 0.394 0.693
X6 -0.013 0.064 -0.208 0.835
Multi-Scale Geographically Weighted Regression (MGWR) Results
---------------------------------------------------------------------------
Spatial kernel: Adaptive bisquare
Criterion for optimal bandwidth: AICc
Score of Change (SOC) type: Smoothing f
Termination criterion for MGWR: 1e-05
MGWR bandwidths
---------------------------------------------------------------------------
Variable Bandwidth ENP_j Adj t-val(95%) Adj alpha(95%)
X0 43.000 9.444 2.824 0.005
X1 78.000 5.354 2.629 0.009
X2 78.000 4.292 2.549 0.012
X3 175.000 1.459 2.133 0.034
X4 169.000 1.451 2.131 0.034
X5 152.000 2.025 2.266 0.025
X6 175.000 1.233 2.063 0.041
Diagnostic information
---------------------------------------------------------------------------
Residual sum of squares: 33.754
Effective number of parameters (trace(S)): 25.258
Degree of freedom (n - trace(S)): 150.742
Sigma estimate: 0.473
Log-likelihood: -104.411
AIC: 261.338
AICc: 270.962
BIC: 344.590
R2 0.808
Adjusted R2 0.776
Summary Statistics For MGWR Parameter Estimates
---------------------------------------------------------------------------
Variable Mean STD Min Median Max
-------------------- ---------- ---------- ---------- ---------- ----------
X0 -0.047 0.650 -0.747 -0.389 1.244
X1 -0.337 0.121 -0.601 -0.353 -0.170
X2 -0.013 0.092 -0.282 0.007 0.152
X3 -0.071 0.006 -0.083 -0.070 -0.049
X4 -0.042 0.022 -0.099 -0.030 -0.011
X5 0.022 0.049 -0.034 -0.005 0.108
X6 0.014 0.007 -0.006 0.016 0.028
===========================================================================
mapp(gdf_east,'bt_mys',"Mean years of schooling - East")
mapp(gdf,'bt_mys',"Mean years of schooling - Whole")
mapp(gdf_east,'bt_shr_agr',"Investment share in Agriculture - East")
All parameters are insignificant
mapp(gdf_east,'bt_gdpgr',"GDP growth rate - East")
All parameters are insignificant
mapp(gdf_east,'bt_constant',"Local intercept - East")
mapp(gdf,'bt_constant',"Local intercept - Whole")