!pip install adjustText
! pip install scikit-gstat
import pandas as pd
import geopandas as gpd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import libpysal as ps
from mgwr.gwr import GWR, MGWR
from mgwr.sel_bw import Sel_BW
import scipy as sp
from matplotlib_scalebar.scalebar import ScaleBar
from mgwr.utils import shift_colormap, truncate_colormap
import numpy as np
import itertools
import time
import statsmodels.api as sm
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
import statsmodels.api as statm
import warnings
warnings.filterwarnings('ignore')
from adjustText import adjust_text
import adjustText as aT
from sklearn import cluster
from skgstat import Variogram
%matplotlib inline
df = pd.read_csv("census_tracts_housing.csv")
df.head()
df.shape
df_points = gpd.GeoDataFrame(
df, geometry=gpd.points_from_xy(df.longitude, df.latitude), crs="EPSG:4326"
)
df_points.plot()
df_points.crs
gdf = gpd.read_file("census_tracts.shp")
gdf.shape
gdf.head()
gdf = gdf.drop(columns={'bu_tech', 'bt_constan', 'bt_age', 'bt_round_b', 'bt_water_d',
'bt_tech', 'bt_index', 'bu_constan', 'bu_sqft', 'bu_age', 'bu_round_b',
'bu_water_d', 'bt_sqft', 'bu_index', 'b_constant', 'b_sqft', 'b_age',
'b_round_ba', 'b_water_di', 'b_tech', 'b_index'})
gdf = gdf.reset_index()
gdf.head()
gdf.crs
gdf.plot()
df_points=df_points.to_crs(gdf.crs)
df_points.plot()
label = pd.read_csv("more_labels_new.csv")
label.head()
label = gpd.GeoDataFrame(
label, geometry=gpd.points_from_xy(label.longitude, label.latitude), crs="EPSG:4326"
)
label=label.to_crs(epsg=3857)
label=label.drop(label.index[3])
label=label.reset_index()
label=label.drop(label.index[5])
label=label.reset_index()
label.head()
label.plot()
plt.hist(df_points.avg_price)
label.head()
label['coords'] = label['geometry'].apply(lambda x: x.representative_point().coords[:])
label['coords'] = [coords[0] for coords in label['coords']]
label.head()
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10,26),facecolor='white')
df_points.plot(ax=ax, column='avg_price', cmap='bwr',legend=True,legend_kwds={'bbox_to_anchor':(0.8, 0.18)},
scheme="user_defined",
classification_kwds={"bins": [250000,350000,500000, 1000000,1500000]})
gdf.plot(ax=ax,facecolor="none",**{'edgecolor':'grey', 'alpha':1,'linewidth':0.2})
label.plot(ax=ax,color='black',markersize=1)
texts =[ax.text(row.coords[0], row.coords[1], s=row["names"],fontname='Helvetica Neue',weight='bold',
horizontalalignment='right',verticalalignment='bottom',fontsize=10, bbox={'facecolor': 'white', 'alpha':0.1, 'pad': 3, 'edgecolor':'none'}) for idx, row in label.iterrows()]
adjust_text(texts,ax=ax,objects=texts,force_explode=10)
fig.tight_layout()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax.axis('off')
#fig.savefig("final_maps/local_intercept_publish_final.png",dpi=300)
census_data=gpd.sjoin(df_points, gdf, how="right")
census_data.columns
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10,26),facecolor='white')
census_data.plot(ax=ax, column='avg_price', cmap='bwr',legend=True,legend_kwds={'bbox_to_anchor':(0.8, 0.18)},
scheme="user_defined",
classification_kwds={"bins": [250000,350000,500000, 1000000,1500000]})
gdf.plot(ax=ax,facecolor="none",**{'edgecolor':'grey', 'alpha':1,'linewidth':0.2})
label.plot(ax=ax,color='black',markersize=1)
texts =[ax.text(row.coords[0], row.coords[1], s=row["names"],fontname='Helvetica Neue',weight='bold',
horizontalalignment='right',verticalalignment='bottom',fontsize=10, bbox={'facecolor': 'white', 'alpha':0.75, 'pad': 3, 'edgecolor':'none'}) for idx, row in label.iterrows()]
adjust_text(texts,ax=ax,objects=texts,force_explode=10)
fig.tight_layout()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax.axis('off')
#fig.savefig("final_maps/local_intercept_publish_final.png",dpi=300)
census_data.columns
census_sns_pairs = census_data[['avg_tech', 'avg_unemp', 'avg_index', 'avg_price',
'avg_basement', 'avg_sqft', 'avg_water_dist', 'avg_age', 'ind',
'ln_avg_price']]
#Aspatial Correlations and Histograms
def corr(x, y, **kwargs):
# Calculate the value
coef = np.corrcoef(x, y)[0][1]
# Make the label
label = r'$\rho$ = ' + str(round(coef, 2))
# Add the label to the plot
ax = plt.gca()
ax.annotate(label, xy = (0.2, 0.95), size = 20, xycoords = ax.transAxes)
# Create a pair grid instance
grid = sns.PairGrid(data= census_data,
vars = ['avg_tech', 'avg_unemp', 'avg_index', 'avg_price',
'avg_basement', 'avg_sqft', 'avg_water_dist', 'avg_age', 'ind',
'ln_avg_price'])
# Map the plots to the locations
grid = grid.map_upper(plt.scatter, color = 'darkred')
#grid = grid.map_upper(corr)
grid = grid.map_lower(sns.kdeplot, cmap = 'Reds')
grid.map_diag(plt.hist, bins = 10, edgecolor = 'k', color = 'darkred');
kmeans5.fit?
kmeans5 = cluster.KMeans(n_clusters=5)
k5cls = kmeans5.fit(census_data[['avg_price']])
k5cls.labels_
census_data['k5cls'] = k5cls.labels_
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10,26),facecolor='white')
census_data.plot(ax=ax, column='k5cls', categorical=True,legend=True,cmap="Set2")
gdf.plot(ax=ax,facecolor="none",**{'edgecolor':'grey', 'alpha':1,'linewidth':0.2})
label.plot(ax=ax,color='black',markersize=1)
texts =[ax.text(row.coords[0], row.coords[1], s=row["names"],fontname='Helvetica Neue',weight='bold',
horizontalalignment='right',verticalalignment='bottom',fontsize=10, bbox={'facecolor': 'white', 'alpha':0.5, 'pad': 3, 'edgecolor':'none'}) for idx, row in label.iterrows()]
adjust_text(texts,ax=ax,objects=texts,force_explode=10)
fig.tight_layout()
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax.axis('off')
V1 = Variogram(census_data[['avg_price']].values,census_data['avg_price'].values, normalize=False)
V1.plot(show=False);
fig, _a = plt.subplots(2,3, figsize=(18, 10), sharex=True, sharey=True)
axes = _a.flatten()
for i, model in enumerate(('spherical', 'exponential', 'gaussian', 'matern', 'stable', 'cubic')):
V1.model = model
V1.plot(axes=axes[i], hist=False, show=False)
axes[i].set_title('Model: %s; RMSE: %.2f' % (model, V1.rmse))
import matplotlib.pyplot as plt
import seaborn
from pysal.viz import splot
from splot.esda import plot_moran
import contextily
# Analysis
import geopandas
import pandas
from pysal.explore import esda
from pysal.lib import weights
from numpy.random import seed
census_data.shape
census_data = census_data.reset_index()
# Generate W from the GeoDataFrame
w = weights.KNN.from_dataframe(census_data,k=8)
# Row-standardization
w.transform = "R"
census_data["Avg_price_lag"] = weights.spatial_lag.lag_spatial(
w, census_data["avg_price"]
)
f, axs = plt.subplots(1, 2, figsize=(12, 6))
ax1, ax2 = axs
census_data.plot(
column="avg_price",
cmap="viridis",
scheme="quantiles",
k=5,
edgecolor="white",
linewidth=0.0,
alpha=0.75,
legend=True,
ax=ax1,legend_kwds={'bbox_to_anchor':(1.3, 0.18)},
)
ax1.set_axis_off()
ax1.set_title("Average house price")
census_data.plot(
column="Avg_price_lag",
cmap="viridis",
scheme="quantiles",
k=5,
edgecolor="white",
linewidth=0.0,
alpha=0.75,
legend=True,
ax=ax2,legend_kwds={'bbox_to_anchor':(1.3, 0.18)},
)
ax2.set_axis_off()
ax2.set_title("Average house price - Spatial Lag")
plt.show()
w.transform = "R"
moran = esda.moran.Moran(census_data["avg_price"], w)
moran.I
moran.p_sim
plot_moran(moran);