# Start writing code here..import numpy as np
import pandas as pd
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import libpysal as ps
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW
import scipy as sp
from matplotlib_scalebar.scalebar import ScaleBar
from mgwr.utils import shift_colormap, truncate_colormap
import numpy as np
import itertools
import time
import statsmodels.api as sm
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
import statsmodels.api as statm
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
/opt/conda/lib/python3.9/site-packages/spglm/utils.py:367: SyntaxWarning: "is not" with a literal. Did you mean "!="?
if resetlist is not ():
df = pd.read_csv("https://msachde1.carto.com/api/v2/sql?filename=poverty_rates&format=CSV&q=SELECT+*+FROM+poverty_rates+as+poverty_rates")
gdf = gpd.read_file("https://msachde1.carto.com/api/v2/sql?filename=indonesia_shapefile&format=SHP&q=SELECT+*+FROM+indonesia_shapes+as+indonesia_shapefile")
print(df.columns)
print(gdf.columns)
Index(['the_geom', 'cartodb_id', 'the_geom_webmercator', 'field_1',
'district_x', 'year', 'pov', 'gap', 'sev', 'mys', 'gdpgr', 'unemp',
'shr_agr', 'shr_ind', 'inv_shr', 'subs_rice', 'gdi', 'island', 'code',
'coord_x', 'coord_y', 'districtid', 'district_y', 'provinceid',
'province', 'islandid', 'island2', 'mys2010', 'inv2010', 'ln_gdp2010',
'ln_gdp2018', 'g'],
dtype='object')
Index(['cartodb_id', 'districtid', 'district', 'coord_x', 'coord_y',
'provinceid', 'province', 'islandid', 'island', 'geometry'],
dtype='object')
gdf.plot()
df_corr = df[['pov', 'gap', 'sev', 'mys', 'gdpgr', 'unemp','shr_agr', 'shr_ind', 'inv_shr', 'subs_rice', 'gdi']]
#Checking for any nulls in our attribute columns
df_corr.isnull().values.any()
sns.set(rc = {'figure.figsize':(15,10)})
cmap = sns.diverging_palette(240, 10, n=9, as_cmap=True)
ax=sns.heatmap(df_corr.corr(),cmap=cmap)
sns.set_style("whitegrid")
sns.distplot(df['pov'])
#Log transformation check for poverty rate
sns.set_style("whitegrid")
sns.distplot(np.log(df['pov']),color='red')
sns.distplot(df['pov'])
from scipy.stats import normaltest
# normality test
stat, p = normaltest(df['pov'])
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
print('Sample looks Gaussian (fail to reject H0)')
else:
print('Sample does not look Gaussian (reject H0)')
Statistics=127.263, p=0.000
Sample does not look Gaussian (reject H0)
df['ln_pov'] = np.log(df['pov'])
from scipy.stats import normaltest
# normality test
stat, p = normaltest(df['ln_pov'])
print('Statistics=%.3f, p=%.3f' % (stat, p))
# interpret
alpha = 0.05
if p > alpha:
print('Sample looks Gaussian (fail to reject H0)')
else:
print('Sample does not look Gaussian (reject H0)')
Statistics=0.667, p=0.716
Sample looks Gaussian (fail to reject H0)
pd.set_option('precision',2)
df_corr.describe()
#Log transformation check for poverty gap index
sns.set_style("whitegrid")
sns.distplot(np.log(df['gap']),color='red')
sns.distplot(df['gap'])
np.isnan(df['pov']).any()
gdf['poverty_rate'] = df['pov']
gdf['poverty_gap_index'] = df['gap']
#Define a mapping function for consistent maps
def mapp(col,name,title,bw_col=None):
vmin=min(col[name])
vmax=max(col[name])
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(15,7),facecolor='white')
if bw_col:
ax.set_title(title+' (BW: ' + str(bw_col) +')', fontsize=12)
else:
ax.set_title(title, fontsize=12)
cmap = plt.cm.seismic
if (vmin < 0) & (vmax < 0):
cmap = truncate_colormap(cmap, 0.0, 0.5)
elif (vmin > 0) & (vmax > 0):
cmap = truncate_colormap(cmap, 0.5, 1.0)
else:
cmap = shift_colormap(cmap, start=0.0, midpoint=1 - vmax/(vmax + abs(vmin)), stop=1.)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax))
col.plot(name, cmap=sm.cmap, ax=ax, vmin=vmin, vmax=vmax, **{'edgecolor':None, 'alpha':0.65})
if (col[name] == 0).any():
col[col[name]==0].plot(color='white',ax=ax,**{'edgecolor':'grey', 'alpha':1})
texts = []
fig.tight_layout()
sm._A = []
cbar = fig.colorbar(sm,fraction=0.0175, pad=0.04)
cbar.ax.tick_params(labelsize=12)
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
plt.show()
mapp(gdf,'poverty_rate',"Poverty Rate in Indonesia - 2010")
mapp(gdf,'poverty_gap_index',"Poverty Gap Index in Indonesia - 2010")
df_corr.columns
correlation = df_corr.corr(method='pearson')
columns = correlation.nlargest(10, 'pov').index
columns
correlation = df_corr.corr(method='pearson')
columns = correlation.nlargest(11, 'gap').index
columns
def fit_linear_reg(X,Y):
#Fit linear regression model and return RSS and R squared values
model_k = linear_model.LinearRegression(fit_intercept = True)
model_k.fit(X,Y)
RSS = mean_squared_error(Y,model_k.predict(X)) * len(Y)
R_squared = model_k.score(X,Y)
return RSS, R_squared
# Best subset selection
#Importing tqdm for the progress bar
import tqdm
#Initialization variables
Y = df.ln_pov
X = df_corr.drop(columns = {'pov','gap'}, axis = 1)
k = 10
RSS_list, R_squared_list, feature_list = [],[], []
numb_features = []
#Looping over k = 1 to k = 11 features in X
for k in tqdm.tnrange(1,len(X.columns) + 1, desc = 'Loop...'):
#Looping over all possible combinations: from 11 choose k
for combo in itertools.combinations(X.columns,k):
tmp_result = fit_linear_reg(X[list(combo)],Y) #Store temp result
RSS_list.append(tmp_result[0]) #Append lists
R_squared_list.append(tmp_result[1])
feature_list.append(combo)
numb_features.append(len(combo))
#Store in DataFrame
best_sub = pd.DataFrame({'Feature_count': numb_features,'RSS': RSS_list, 'R_squared':R_squared_list,'Covariates':feature_list})
IPyWidgets are not supported
best_min = best_sub[best_sub.groupby('Feature_count')['RSS'].transform(min) == best_sub['RSS']]
display(best_min.head(3))
best_sub['Min_RSS'] = best_sub.groupby('Feature_count')['RSS'].transform(min)
best_sub['Max_R_squared'] = best_sub.groupby('Feature_count')['R_squared'].transform(max)
best_sub.head()
fig = plt.figure(figsize = (16,6),facecolor='white')
ax = fig.add_subplot(1, 2, 1)
ax.scatter(best_sub['Feature_count'],best_sub.RSS, alpha = .2, color = 'darkblue' )
ax.set_xlabel('# Features')
ax.set_ylabel('RSS')
ax.set_title('RSS - Best subset selection')
ax.plot(best_sub['Feature_count'],best_sub.Min_RSS,color = 'r', label = 'Best subset')
ax.grid(False);ax.set_xticks([]);ax.set_yticks([]);ax.set_facecolor('white')
ax.legend()
ax = fig.add_subplot(1, 2, 2)
ax.scatter(best_sub['Feature_count'],best_sub.R_squared, alpha = .2, color = 'darkblue' )
ax.plot(best_sub['Feature_count'],best_sub.Max_R_squared,color = 'r', label = 'Best subset')
ax.set_xlabel('# Features')
ax.set_ylabel('R squared')
ax.set_title('R_squared - Best subset selection')
ax.legend()
# Hide grid lines
ax.grid(False);ax.set_xticks([]);ax.set_yticks([]);ax.set_facecolor('white')
plt.show()
best_Rsq=best_sub.sort_values(by='R_squared',ascending=False)[:1]
best_Rss=best_sub.sort_values(by='RSS',ascending=True)[:1]
best_Rsq['Covariates'].to_list()
best_Rss['Covariates'].to_list()
#Initialization variables
Y = df.ln_pov
X = df_corr.drop(columns = {'pov','gap'}, axis = 1)
k = 9
remaining_features = list(X.columns.values)
features = []
RSS_list, R_squared_list = [np.inf], [np.inf] #Due to 1 indexing of the loop...
features_list = dict()
for i in range(1,k+1):
best_RSS = np.inf
for combo in itertools.combinations(remaining_features,1):
RSS = fit_linear_reg(X[list(combo) + features],Y) #Store temp result
if RSS[0] < best_RSS:
best_RSS = RSS[0]
best_R_squared = RSS[1]
best_feature = combo[0]
#Updating variables for next loop
features.append(best_feature)
#remaining_features.remove(best_feature)
#Saving values for plotting
RSS_list.append(best_RSS)
R_squared_list.append(best_R_squared)
features_list[i] = features.copy()
print('Forward stepwise subset selection')
print('Number of features |', 'Features |', 'RSS')
display([(i,features_list[i], round(RSS_list[i])) for i in range(1,5)])
Forward stepwise subset selection
Number of features | Features | RSS
df1 = pd.concat([pd.DataFrame({'features':features_list}),pd.DataFrame({'RSS':RSS_list, 'R_squared': R_squared_list})], axis=1, join='inner')
df1['numb_features'] = df1.index
#Initializing useful variables
m = len(Y)
p = 9
hat_sigma_squared = (1/(m - p -1)) * min(df1['RSS'])
#Computing
df1['C_p'] = (1/m) * (df1['RSS'] + 2 * df1['numb_features'] * hat_sigma_squared )
df1['AIC'] = (1/(m*hat_sigma_squared)) * (df1['RSS'] + 2 * df1['numb_features'] * hat_sigma_squared )
df1['BIC'] = (1/(m*hat_sigma_squared)) * (df1['RSS'] + np.log(m) * df1['numb_features'] * hat_sigma_squared )
df1['R_squared_adj'] = 1 - ( (1 - df1['R_squared'])*(m-1)/(m-df1['numb_features'] -1))
df1
variables = ['C_p', 'AIC','BIC','R_squared_adj']
fig = plt.figure(figsize = (18,6))
for i,v in enumerate(variables):
ax = fig.add_subplot(1, 4, i+1)
ax.plot(df1['numb_features'],df1[v], color = 'lightblue')
ax.scatter(df1['numb_features'],df1[v], color = 'darkblue')
if v == 'R_squared_adj':
ax.plot(df1[v].idxmax(),df1[v].max(), marker = 'x', markersize = 20)
else:
ax.plot(df1[v].idxmin(),df1[v].min(), marker = 'x', markersize = 20)
ax.set_xlabel('Number of predictors')
ax.set_ylabel(v)
fig.suptitle('Subset selection using C_p, AIC, BIC, Adjusted R2', fontsize = 16)
plt.show()
best_Rsq['Covariates'].to_list()
from statsmodels.stats.outliers_influence import variance_inflation_factor
X = df_corr[[ 'sev','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr' , 'subs_rice' , 'gdi']]
# VIF dataframe
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns
# calculating VIF for each feature
vif_data["VIF"] = [variance_inflation_factor(X.values, i)
for i in range(len(X.columns))]
print(vif_data)
feature VIF
0 sev 1.81
1 mys 43.67
2 gdpgr 5.02
3 unemp 5.84
4 shr_agr 2.31
5 shr_ind 1.78
6 inv_shr 1.91
7 subs_rice 9.52
8 gdi 62.39
gdf['gdi'] = df['gdi']
gdf['mys'] = df['mys']
mapp(gdf,'gdi',"gender_development_index")
mapp(gdf,'mys',"mean years of schooling")
sns.set_style("whitegrid")
sns.regplot(df['mys'],df['gdi'])
X = df_corr[[ 'sev','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr' ]]
# VIF dataframe
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns
# calculating VIF for each feature
vif_data["VIF"] = [variance_inflation_factor(X.values, i)
for i in range(len(X.columns))]
print(vif_data)
feature VIF
0 sev 1.72
1 mys 9.93
2 gdpgr 4.22
3 unemp 5.61
4 shr_agr 2.15
5 shr_ind 1.68
6 inv_shr 1.84
df['ln_pov'] = np.log(df['pov'])
X = df[['sev','mys' , 'gdpgr' , 'unemp', 'shr_agr', 'shr_ind' , 'inv_shr']].copy()
X_std = (X-X.mean(axis=0))/X.std(axis=0)
X_std=statm.add_constant(X_std)
y = df['ln_pov']
y_std = (y-y.mean(axis=0))/y.std(axis=0)
model = statm.OLS(y_std,X_std).fit()
predictions=model.predict(X_std)
model.summary()
gdf['global_resid'] = model.resid
mapp(gdf,'global_resid',"Global residuals - OLS")
import esda
from esda.moran import Moran, Moran_Local
import libpysal
from libpysal import weights
from libpysal.weights.util import min_threshold_distance
W = weights.Rook.from_dataframe(gdf)
W.transform = 'r'
('WARNING: ', 5, ' is an island (no neighbors)')
('WARNING: ', 19, ' is an island (no neighbors)')
('WARNING: ', 37, ' is an island (no neighbors)')
('WARNING: ', 71, ' is an island (no neighbors)')
('WARNING: ', 75, ' is an island (no neighbors)')
('WARNING: ', 76, ' is an island (no neighbors)')
('WARNING: ', 81, ' is an island (no neighbors)')
('WARNING: ', 99, ' is an island (no neighbors)')
('WARNING: ', 170, ' is an island (no neighbors)')
('WARNING: ', 203, ' is an island (no neighbors)')
('WARNING: ', 218, ' is an island (no neighbors)')
('WARNING: ', 220, ' is an island (no neighbors)')
('WARNING: ', 222, ' is an island (no neighbors)')
('WARNING: ', 223, ' is an island (no neighbors)')
('WARNING: ', 225, ' is an island (no neighbors)')
('WARNING: ', 242, ' is an island (no neighbors)')
('WARNING: ', 367, ' is an island (no neighbors)')
('WARNING: ', 368, ' is an island (no neighbors)')
('WARNING: ', 374, ' is an island (no neighbors)')
('WARNING: ', 379, ' is an island (no neighbors)')
('WARNING: ', 403, ' is an island (no neighbors)')
('WARNING: ', 406, ' is an island (no neighbors)')
('WARNING: ', 407, ' is an island (no neighbors)')
('WARNING: ', 411, ' is an island (no neighbors)')
('WARNING: ', 419, ' is an island (no neighbors)')
('WARNING: ', 422, ' is an island (no neighbors)')
('WARNING: ', 454, ' is an island (no neighbors)')
('WARNING: ', 459, ' is an island (no neighbors)')
('WARNING: ', 477, ' is an island (no neighbors)')
('WARNING: ', 478, ' is an island (no neighbors)')
('WARNING: ', 481, ' is an island (no neighbors)')
('WARNING: ', 484, ' is an island (no neighbors)')
('WARNING: ', 487, ' is an island (no neighbors)')
('WARNING: ', 489, ' is an island (no neighbors)')
import splot
from splot.esda import moran_scatterplot, plot_moran
from splot.libpysal import plot_spatial_weights
plot_spatial_weights(W, gdf);
moran_pov = Moran(gdf["global_resid"], W)
print(moran_pov.I,moran_pov.p_sim)
0.28588071725348124 0.001
df.columns
pov_y = df['ln_pov'].values.reshape((-1,1))
X = df[['sev','mys' , 'gdpgr', 'shr_agr' , 'inv_shr']].values
u = df['coord_x']
v = df['coord_y']
coords = list(zip(u,v))
pov_y = (pov_y - pov_y.mean(axis=0)) / pov_y.std(axis=0)
X = (X - X.mean(axis=0)) / X.std(axis=0)
gwr_selector = Sel_BW(coords, pov_y, X,spherical=True)
gwr_bw = gwr_selector.search()
gwr_bw
gwr_results = GWR(coords, pov_y, X, gwr_bw,spherical=True).fit()
gwr_results.summary()
===========================================================================
Model type Gaussian
Number of observations: 514
Number of covariates: 6
Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares: 253.886
Log-likelihood: -548.062
AIC: 1108.124
AICc: 1110.346
BIC: -2917.164
R2: 0.506
Adj. R2: 0.501
Variable Est. SE t(Est/SE) p-value
------------------------------- ---------- ---------- ---------- ----------
X0 -0.000 0.031 -0.000 1.000
X1 0.488 0.034 14.410 0.000
X2 -0.389 0.034 -11.545 0.000
X3 -0.035 0.033 -1.067 0.286
X4 -0.088 0.032 -2.786 0.005
X5 -0.029 0.032 -0.907 0.365
Geographically Weighted Regression (GWR) Results
---------------------------------------------------------------------------
Spatial kernel: Adaptive bisquare
Bandwidth used: 60.000
Diagnostic information
---------------------------------------------------------------------------
Residual sum of squares: 77.949
Effective number of parameters (trace(S)): 103.138
Degree of freedom (n - trace(S)): 410.862
Sigma estimate: 0.436
Log-likelihood: -244.588
AIC: 697.451
AICc: 751.009
BIC: 1139.228
R2: 0.848
Adjusted R2: 0.810
Adj. alpha (95%): 0.003
Adj. critical t value (95%): 2.992
Summary Statistics For GWR Parameter Estimates
---------------------------------------------------------------------------
Variable Mean STD Min Median Max
-------------------- ---------- ---------- ---------- ---------- ----------
X0 0.216 0.360 -0.425 0.216 1.214
X1 1.004 0.530 0.094 1.025 2.455
X2 -0.366 0.175 -0.801 -0.319 0.015
X3 -0.093 0.218 -0.603 -0.060 0.703
X4 -0.074 0.189 -0.703 -0.061 0.413
X5 -0.001 0.109 -0.441 0.001 0.563
===========================================================================
mgwr_selector = Sel_BW(coords, pov_y, X, multi=True,spherical=True)
mgwr_bw = mgwr_selector.search(verbose=True)
IPyWidgets are not supported
Current iteration: 1 ,SOC: 0.0087296
Bandwidths: 44.0, 44.0, 50.0, 95.0, 58.0, 350.0
Current iteration: 2 ,SOC: 0.0033581
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 3 ,SOC: 0.0011059
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 4 ,SOC: 0.0005871
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 5 ,SOC: 0.0003445
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 6 ,SOC: 0.0002181
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 7 ,SOC: 0.0001526
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 8 ,SOC: 0.0001179
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 9 ,SOC: 9.71e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 10 ,SOC: 8.27e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 11 ,SOC: 7.14e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 12 ,SOC: 6.22e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 13 ,SOC: 5.44e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 14 ,SOC: 4.77e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 15 ,SOC: 4.19e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 16 ,SOC: 3.68e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 17 ,SOC: 3.24e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 18 ,SOC: 2.86e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 19 ,SOC: 2.52e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 20 ,SOC: 2.22e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 21 ,SOC: 1.96e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 22 ,SOC: 1.73e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 23 ,SOC: 1.52e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 24 ,SOC: 1.34e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 25 ,SOC: 1.18e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 26 ,SOC: 1.04e-05
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
Current iteration: 27 ,SOC: 9.2e-06
Bandwidths: 44.0, 44.0, 58.0, 95.0, 58.0, 512.0
mgwr_results = MGWR(coords, pov_y, X, mgwr_selector,spherical=True).fit()
mgwr_results.summary()
IPyWidgets are not supported
===========================================================================
Model type Gaussian
Number of observations: 514
Number of covariates: 6
Global Regression Results
---------------------------------------------------------------------------
Residual sum of squares: 253.886
Log-likelihood: -548.062
AIC: 1108.124
AICc: 1110.346
BIC: -2917.164
R2: 0.506
Adj. R2: 0.501
Variable Est. SE t(Est/SE) p-value
------------------------------- ---------- ---------- ---------- ----------
X0 -0.000 0.031 -0.000 1.000
X1 0.488 0.034 14.410 0.000
X2 -0.389 0.034 -11.545 0.000
X3 -0.035 0.033 -1.067 0.286
X4 -0.088 0.032 -2.786 0.005
X5 -0.029 0.032 -0.907 0.365
Multi-Scale Geographically Weighted Regression (MGWR) Results
---------------------------------------------------------------------------
Spatial kernel: Adaptive bisquare
Criterion for optimal bandwidth: AICc
Score of Change (SOC) type: Smoothing f
Termination criterion for MGWR: 1e-05
MGWR bandwidths
---------------------------------------------------------------------------
Variable Bandwidth ENP_j Adj t-val(95%) Adj alpha(95%)
X0 44.000 23.856 3.092 0.002
X1 44.000 23.728 3.090 0.002
X2 58.000 18.939 3.022 0.003
X3 95.000 9.924 2.817 0.005
X4 58.000 17.926 3.005 0.003
X5 512.000 1.273 2.067 0.039
Diagnostic information
---------------------------------------------------------------------------
Residual sum of squares: 72.302
Effective number of parameters (trace(S)): 95.646
Degree of freedom (n - trace(S)): 418.354
Sigma estimate: 0.416
Log-likelihood: -225.263
AIC: 643.818
AICc: 689.151
BIC: 1053.813
R2 0.859
Adjusted R2 0.827
Summary Statistics For MGWR Parameter Estimates
---------------------------------------------------------------------------
Variable Mean STD Min Median Max
-------------------- ---------- ---------- ---------- ---------- ----------
X0 0.249 0.358 -0.473 0.222 1.263
X1 0.989 0.510 0.061 1.084 1.959
X2 -0.361 0.150 -0.742 -0.362 -0.034
X3 -0.089 0.161 -0.393 -0.058 0.327
X4 -0.065 0.143 -0.466 -0.042 0.201
X5 -0.002 0.003 -0.005 -0.003 0.004
===========================================================================
gdf.columns
t_crit=mgwr_results.critical_tval()
#'sev','mys' , 'gdpgr', 'shr_agr' , 'inv_shr'
gdf['b_constant'] = mgwr_results.params[:,0]
gdf['b_sev'] = mgwr_results.params[:,1]
gdf['b_mys'] = mgwr_results.params[:,2]
gdf['b_gdpgr'] = mgwr_results.params[:,3]
gdf['b_shr_agr'] = mgwr_results.params[:,4]
gdf['b_inv_shr'] = mgwr_results.params[:,5]
gdf['t_constant'] = mgwr_results.tvalues[:,0]
gdf['t_sev'] = mgwr_results.tvalues[:,1]
gdf['t_mys'] = mgwr_results.tvalues[:,2]
gdf['t_gdpgr'] = mgwr_results.tvalues[:,3]
gdf['t_shr_agr'] = mgwr_results.tvalues[:,4]
gdf['t_inv_shr'] = mgwr_results.tvalues[:,5]
#'sev','mys' , 'gdpgr', 'shr_agr' , 'inv_shr'
gdf['constant'] = np.ones_like(df['sev'])
gdf['ln_pov'] = df['ln_pov']
org_cols = ['constant','sev','mys' , 'gdpgr', 'shr_agr' , 'inv_shr']
b_cols = ['b_constant','b_sev','b_mys' , 'b_gdpgr', 'b_shr_agr' , 'b_inv_shr']
bu_cols = ['bu_constant','bu_sev','bu_mys' , 'bu_gdpgr', 'bu_shr_agr' , 'bu_inv_shr']
bt_cols = ['bt_constant','bt_sev','bt_mys' , 'bt_gdpgr', 'bt_shr_agr' , 'bt_inv_shr']
t_cols = ['t_constant','t_sev','t_mys' , 't_gdpgr', 't_shr_agr' , 't_inv_shr']
for i in range(1,6):
gdf[bu_cols[i]] = (gdf[b_cols[i]]*np.std(gdf['ln_pov']))/np.std(df[org_cols[i]])
for i in range(6):
gdf.loc[gdf[t_cols[i]] >=t_crit[i], bt_cols[i]] = gdf[b_cols[i]]
gdf.loc[gdf[t_cols[i]] <=-t_crit[i], bt_cols[i]] = gdf[b_cols[i]]
gdf['bt_inv_shr']
np.isnan(gdf['bt_inv_shr']).all()
gdf=gdf.fillna(0.0)
mapp(gdf,'bt_constant',"Local intercept")
gdf['conv_mys'] = (np.exp(gdf['bt_mys'])-1)*100
mapp(gdf,'conv_mys',"Mean years of schooling - local estimate")
gdf['conv_shr_agr'] = (np.exp(gdf['bt_shr_agr'])-1)*100
mapp(gdf,'conv_shr_agr',"Share of agriculture to GDP - local estimate")
gdf['local_resid'] = mgwr_results.resid_response
mapp(gdf,'local_resid',"Local errors")
moran_pov = Moran(gdf["local_resid"], W)
print(moran_pov.I,moran_pov.p_sim)
0.06531886662354774 0.039