# Start writing code here..import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import libpysal as ps
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW
import scipy as sp
from matplotlib_scalebar.scalebar import ScaleBar
from mgwr.utils import shift_colormap, truncate_colormap
import numpy as np
import itertools
import time
import statsmodels.api as sm
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
import statsmodels.api as statm
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
df = pd.read_csv("poverty_2010.csv")
gdf = gpd.read_file("/vsicurl/https://raw.githubusercontent.com/mehak-sachdeva/Nagoya_University_workshop_2022/main/data/indonesia_shapes/indonesia_shapes.shp")
df['ln_pov'] = np.log(df['pov'])
gdf['pov'] = df['pov']
gdf['ln_pov'] = df['ln_pov']
X = df[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr' ]].copy()
X_std = (X-X.mean(axis=0))/X.std(axis=0)
X_std=statm.add_constant(X_std)
y = df['pov']
y_std = (y-y.mean(axis=0))/y.std(axis=0)
model = statm.OLS(y_std,X_std).fit()
predictions=model.predict(X_std)
model.summary()
#Data preparation for GWR and MGWR model run
pov_y = df['pov'].values.reshape((-1,1))
X = df[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']].values
u = df['coord_x']
v = df['coord_y']
coords = list(zip(u,v))
pov_y = (pov_y - pov_y.mean(axis=0)) / pov_y.std(axis=0)
X = (X - X.mean(axis=0)) / X.std(axis=0)
#Calibrating the GWR model
gwr_selector = Sel_BW(coords, pov_y, X)
gwr_bw = gwr_selector.search()
gwr_bw
gwr_results = GWR(coords, pov_y, X, gwr_bw).fit()
gwr_results.summary()
# Calibrating the MGWR model
#Column order: 'constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
mgwr_selector = Sel_BW(coords, pov_y, X, multi=True)
mgwr_bw = mgwr_selector.search(verbose=True)
mgwr_results = MGWR(coords, pov_y, X, mgwr_selector).fit()
mgwr_results.summary()
bw_ci=mgwr_results.get_bws_intervals(mgwr_selector)
print(bw_ci)
#Critical t values for paramter estimates
t_crit=mgwr_results.critical_tval()
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf['b_constant'] = mgwr_results.params[:,0]
gdf['b_mys'] = mgwr_results.params[:,1]
gdf['b_gdpgr'] = mgwr_results.params[:,2]
gdf['b_unemp'] = mgwr_results.params[:,3]
gdf['b_shr_agr'] = mgwr_results.params[:,4]
gdf['b_shr_ind'] = mgwr_results.params[:,5]
gdf['b_inv_shr'] = mgwr_results.params[:,6]
gdf['t_constant'] = mgwr_results.tvalues[:,0]
gdf['t_mys'] = mgwr_results.tvalues[:,1]
gdf['t_gdpgr'] = mgwr_results.tvalues[:,2]
gdf['t_unemp'] = mgwr_results.tvalues[:,3]
gdf['t_shr_agr'] = mgwr_results.tvalues[:,4]
gdf['t_shr_ind'] = mgwr_results.tvalues[:,5]
gdf['t_inv_shr'] = mgwr_results.tvalues[:,6]
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf['constant'] = np.ones_like(df['pov'])
gdf['ln_pov'] = df['ln_pov']
gdf['pov'] = df['pov']
org_cols = ['constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']
b_cols = ['b_constant','b_mys' , 'b_gdpgr' , 'b_unemp', 'b_shr_agr', 'b_shr_ind' , 'b_inv_shr']
bu_cols = ['bu_constant','bu_mys' , 'bu_gdpgr' , 'bu_unemp', 'bu_shr_agr', 'bu_shr_ind' , 'bu_inv_shr']
bt_cols = ['bt_constant','bt_mys' , 'bt_gdpgr' , 'bt_unemp', 'bt_shr_agr', 'bt_shr_ind' , 'bt_inv_shr']
t_cols = ['t_constant','t_mys' , 't_gdpgr' , 't_unemp', 't_shr_agr', 't_shr_ind' , 't_inv_shr']
for i in range(1,7):
gdf[bu_cols[i]] = (gdf[b_cols[i]]*np.std(gdf['pov']))/np.std(df[org_cols[i]])
for i in range(7):
gdf.loc[gdf[t_cols[i]] >=t_crit[i], bt_cols[i]] = gdf[b_cols[i]]
gdf.loc[gdf[t_cols[i]] <=-t_crit[i], bt_cols[i]] = gdf[b_cols[i]]
gdf['bt_inv_shr']
gdf=gdf.fillna(0.0)
#Define a mapping function for consistent maps
def mapp(col,name,title,bw_col=None,vmin=None,vmax=None):
if (vmin==None and vmax==None):
vmin=min(col[name])
vmax=max(col[name])
if ((col[name]==0).all()):
print("All parameters are insignificant")
return None
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,7),facecolor='white')
if bw_col:
ax.set_title(title+' (BW: ' + str(bw_col) +')', fontsize=12)
else:
ax.set_title(title, fontsize=12)
cmap = plt.cm.seismic
if (vmin < 0) & (vmax < 0):
cmap = truncate_colormap(cmap, 0.0, 0.5)
elif (vmin > 0) & (vmax > 0):
cmap = truncate_colormap(cmap, 0.5, 1.0)
else:
cmap = shift_colormap(cmap, start=0.0, midpoint=1 - vmax/(vmax + abs(vmin)), stop=1.)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))
col.plot(name, cmap=sm.cmap, ax=ax, vmin=vmin, vmax=vmax, **{'edgecolor':None, 'alpha':0.65})
if (col[name] == 0).any():
col[col[name]==0].plot(color='white',ax=ax,**{'edgecolor':'grey', 'alpha':1})
texts = []
fig.tight_layout()
sm._A = []
cbar = fig.colorbar(sm,fraction=0.0175, pad=0.04)
cbar.ax.tick_params(labelsize=12)
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
#'constant,'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
mgwr_bw
mapp(gdf,'bt_inv_shr',"Public investment share",bw_col=512)
mapp(gdf,'bt_mys',"Mean years of schooling coefficient",bw_col=70)
mapp(gdf,'bu_mys',"Mean years of schooling coefficient",bw_col=70)
mapp(gdf,'bt_gdpgr',"GDP growth rate",bw_col=512)
mapp(gdf,'bu_shr_agr',"Share in agriculture",bw_col=79)
mapp(gdf,'bt_shr_agr',"Share in agriculture - only significant parameter estimates",bw_col=79)
CN_gwr=gwr_results.local_collinearity()
gdf['gwr_CN'] = CN_gwr[2]
CN,VDP=mgwr_results.local_collinearity()
gdf['vdp_0']=VDP[:,0]
gdf['vdp_1']=VDP[:,1]
gdf['vdp_2']=VDP[:,2]
gdf['vdp_3']=VDP[:,3]
gdf['vdp_4']=VDP[:,4]
gdf['vdp_5']=VDP[:,5]
gdf['vdp_6']=VDP[:,6]
gdf['CN'] = CN
mapp(gdf,'CN',"Local condition number")
mapp(gdf,'gwr_CN',"GWR-Local condition number")
#spatial_var=mgwr_results.spatial_variability(mgwr_selector)
#0.001, 0.01, 0.065, 0.01, 0.02, 0.54, 0.73
#print(spatial_var)
# Out-of-sample prediction using~GWR
#Split data into calibration and prediction sets
np.random.seed(908)
sample = np.random.choice(range(512), 20)
mask = np.ones_like(pov_y, dtype = bool).flatten()
mask[sample] = False
cal_coords = np.array(coords)[mask]
cal_y = pov_y[mask]
cal_X = X[mask]
pred_coords = np.array(coords)[~mask]
pred_y = pov_y[~mask]
pred_X = X[~mask]
#Calibrate GWR model
gwr_selector = Sel_BW(cal_coords, cal_y, cal_X)
gwr_bw = gwr_selector.search()
print(gwr_bw)
model = GWR(cal_coords, cal_y, cal_X, gwr_bw)
gwr_results = model.fit()
pred_results = model.predict(pred_coords, pred_X)
#Check correlation between known and predicted values
corr = np.corrcoef(pred_results.predictions.flatten(),pred_y.flatten())[0][1]
print(corr)
df_2018 = pd.read_csv("poverty_2018.csv")
df_2018.columns
pov_y = df['pov'].values.reshape((-1,1))
X = df[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']].values
u = df['coord_x']
v = df['coord_y']
coords = np.array(list(zip(u,v)))
pov_y = (pov_y - pov_y.mean(axis=0)) / pov_y.std(axis=0)
X = (X - X.mean(axis=0)) / X.std(axis=0)
#Fitting GWR model for 2010 data
gwr_selector = Sel_BW(coords, pov_y, X)
gwr_bw = gwr_selector.search()
print(gwr_bw)
model = GWR(coords, pov_y, X, gwr_bw)
gwr_results = model.fit()
#Predicting poverty in 2018
y_2018 = df_2018['pov'].values.reshape((-1,1))
X_2018 = df_2018[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']].values
u = df_2018['COORD_X']
v = df_2018['COORD_Y']
coords_2018 = np.array(list(zip(u,v)))
y_2018 = (y_2018 - y_2018.mean(axis=0)) / y_2018.std(axis=0)
X_2018 = (X_2018 - X_2018.mean(axis=0)) / X_2018.std(axis=0)
pred_results = model.predict(coords_2018, X_2018)
#Check correlation between known and predicted values
corr = np.corrcoef(pred_results.predictions.flatten(),y_2018.flatten())[0][1]
print(corr)
gdf['y_2018_pred'] = pred_results.predictions.flatten()
gdf['y_2018_real'] = y_2018
mapp(gdf,'y_2018_pred',"Predicted y - 2018")
mapp(gdf,'y_2018_real',"Real y - 2018",bw_col=None,vmin=-4.5,vmax=3.5)
df_west = pd.read_csv("poverty_west_2010.csv")
gdf_west = gpd.read_file("Indonesia_west/Indonesia_west.shp")
pov_y = df_west['pov'].values.reshape((-1,1))
X = df_west[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']].values
u = df_west['coord_x']
v = df_west['coord_y']
coords = list(zip(u,v))
pov_y = (pov_y - pov_y.mean(axis=0)) / pov_y.std(axis=0)
X = (X - X.mean(axis=0)) / X.std(axis=0)
#Calibrating the GWR model
gwr_selector = Sel_BW(coords, pov_y, X)
gwr_bw = gwr_selector.search()
gwr_bw
gwr_results = GWR(coords, pov_y, X, gwr_bw).fit()
gwr_results.summary()
# Calibrating the MGWR model
#Column order: 'constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
mgwr_selector = Sel_BW(coords, pov_y, X, multi=True)
mgwr_bw = mgwr_selector.search(verbose=True)
mgwr_results = MGWR(coords, pov_y, X, mgwr_selector).fit()
mgwr_results.summary()
#Critical t values for paramter estimates
t_crit=mgwr_results.critical_tval()
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf_west['b_constant'] = mgwr_results.params[:,0]
gdf_west['b_mys'] = mgwr_results.params[:,1]
gdf_west['b_gdpgr'] = mgwr_results.params[:,2]
gdf_west['b_unemp'] = mgwr_results.params[:,3]
gdf_west['b_shr_agr'] = mgwr_results.params[:,4]
gdf_west['b_shr_ind'] = mgwr_results.params[:,5]
gdf_west['b_inv_shr'] = mgwr_results.params[:,6]
gdf_west['t_constant'] = mgwr_results.tvalues[:,0]
gdf_west['t_mys'] = mgwr_results.tvalues[:,1]
gdf_west['t_gdpgr'] = mgwr_results.tvalues[:,2]
gdf_west['t_unemp'] = mgwr_results.tvalues[:,3]
gdf_west['t_shr_agr'] = mgwr_results.tvalues[:,4]
gdf_west['t_shr_ind'] = mgwr_results.tvalues[:,5]
gdf_west['t_inv_shr'] = mgwr_results.tvalues[:,6]
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf_west['constant'] = np.ones_like(df_west['pov'])
gdf_west['pov'] = df_west['pov']
org_cols = ['constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']
b_cols = ['b_constant','b_mys' , 'b_gdpgr' , 'b_unemp', 'b_shr_agr', 'b_shr_ind' , 'b_inv_shr']
bu_cols = ['bu_constant','bu_mys' , 'bu_gdpgr' , 'bu_unemp', 'bu_shr_agr', 'bu_shr_ind' , 'bu_inv_shr']
bt_cols = ['bt_constant','bt_mys' , 'bt_gdpgr' , 'bt_unemp', 'bt_shr_agr', 'bt_shr_ind' , 'bt_inv_shr']
t_cols = ['t_constant','t_mys' , 't_gdpgr' , 't_unemp', 't_shr_agr', 't_shr_ind' , 't_inv_shr']
for i in range(1,7):
gdf_west[bu_cols[i]] = (gdf_west[b_cols[i]]*np.std(gdf_west['pov']))/np.std(df_west[org_cols[i]])
for i in range(7):
gdf_west.loc[gdf_west[t_cols[i]] >=t_crit[i], bt_cols[i]] = gdf_west[b_cols[i]]
gdf_west.loc[gdf_west[t_cols[i]] <=-t_crit[i], bt_cols[i]] = gdf_west[b_cols[i]]
gdf_west=gdf_west.fillna(0.0)
mapp(gdf_west,'bt_mys',"Mean years of schooling - West")
mapp(gdf,'bt_mys',"Mean years of schooling - Whole")
mapp(gdf_west,'bt_constant',"Local intercept - West")
mapp(gdf,'bt_constant',"Local intercept - Whole")
df_east = pd.read_csv("poverty_east_2010.csv")
gdf_east = gpd.read_file("Indonesia_east/Indonesia_east.shp")
pov_y = df_east['pov'].values.reshape((-1,1))
X = df_east[['mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']].values
u = df_east['coord_x']
v = df_east['coord_y']
coords = list(zip(u,v))
pov_y = (pov_y - pov_y.mean(axis=0)) / pov_y.std(axis=0)
X = (X - X.mean(axis=0)) / X.std(axis=0)
#Calibrating the GWR model
gwr_selector = Sel_BW(coords, pov_y, X)
gwr_bw = gwr_selector.search()
gwr_bw
gwr_results = GWR(coords, pov_y, X, gwr_bw).fit()
gwr_results.summary()
# Calibrating the MGWR model
#Column order: 'constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
mgwr_selector = Sel_BW(coords, pov_y, X, multi=True)
mgwr_bw = mgwr_selector.search(verbose=True)
mgwr_results = MGWR(coords, pov_y, X, mgwr_selector).fit()
mgwr_results.summary()
#Critical t values for paramter estimates
t_crit=mgwr_results.critical_tval()
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf_east['b_constant'] = mgwr_results.params[:,0]
gdf_east['b_mys'] = mgwr_results.params[:,1]
gdf_east['b_gdpgr'] = mgwr_results.params[:,2]
gdf_east['b_unemp'] = mgwr_results.params[:,3]
gdf_east['b_shr_agr'] = mgwr_results.params[:,4]
gdf_east['b_shr_ind'] = mgwr_results.params[:,5]
gdf_east['b_inv_shr'] = mgwr_results.params[:,6]
gdf_east['t_constant'] = mgwr_results.tvalues[:,0]
gdf_east['t_mys'] = mgwr_results.tvalues[:,1]
gdf_east['t_gdpgr'] = mgwr_results.tvalues[:,2]
gdf_east['t_unemp'] = mgwr_results.tvalues[:,3]
gdf_east['t_shr_agr'] = mgwr_results.tvalues[:,4]
gdf_east['t_shr_ind'] = mgwr_results.tvalues[:,5]
gdf_east['t_inv_shr'] = mgwr_results.tvalues[:,6]
#'mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr'
gdf_east['constant'] = np.ones_like(df_east['pov'])
gdf_east['pov'] = df_east['pov']
org_cols = ['constant','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']
b_cols = ['b_constant','b_mys' , 'b_gdpgr' , 'b_unemp', 'b_shr_agr', 'b_shr_ind' , 'b_inv_shr']
bu_cols = ['bu_constant','bu_mys' , 'bu_gdpgr' , 'bu_unemp', 'bu_shr_agr', 'bu_shr_ind' , 'bu_inv_shr']
bt_cols = ['bt_constant','bt_mys' , 'bt_gdpgr' , 'bt_unemp', 'bt_shr_agr', 'bt_shr_ind' , 'bt_inv_shr']
t_cols = ['t_constant','t_mys' , 't_gdpgr' , 't_unemp', 't_shr_agr', 't_shr_ind' , 't_inv_shr']
for i in range(1,7):
gdf_east[bu_cols[i]] = (gdf_east[b_cols[i]]*np.std(gdf_east['pov']))/np.std(gdf_east[org_cols[i]])
for i in range(7):
gdf_east.loc[gdf_east[t_cols[i]] >=t_crit[i], bt_cols[i]] = gdf_east[b_cols[i]]
gdf_east.loc[gdf_east[t_cols[i]] <=-t_crit[i], bt_cols[i]] = gdf_east[b_cols[i]]
gdf_east=gdf_east.fillna(0.0)
mapp(gdf_east,'bt_mys',"Mean years of schooling - East")
mapp(gdf,'bt_mys',"Mean years of schooling - Whole")
mapp(gdf_east,'bt_shr_agr',"Investment share in Agriculture - East")
mapp(gdf_east,'bt_gdpgr',"GDP growth rate - East")
mapp(gdf_east,'bt_constant',"Local intercept - East")
mapp(gdf,'bt_constant',"Local intercept - Whole")