# Install spatial packages
!apt install gdal-bin python-gdal python3-gdal --quiet
!apt install python3-rtree --quiet
!pip install --upgrade pip --quiet
!pip install git+git://github.com/geopandas/geopandas.git --quiet
!pip install descartes --quiet
Reading package lists...
Building dependency tree...
Reading state information...
E: Unable to locate package gdal-bin
E: Unable to locate package python-gdal
E: Unable to locate package python3-gdal
Reading package lists...
Building dependency tree...
Reading state information...
E: Unable to locate package python3-rtree
# Install Pysal packages
!pip install splot --quiet
!pip install libpysal --quiet
!pip install esda --quiet
# Import libraries
import numpy as np
import pandas as pd
import seaborn as sns
import geopandas as gpd
import matplotlib.pyplot as plt
from libpysal import weights
import mapclassify
import esda
from esda.moran import Moran, Moran_Local
import splot
from splot.esda import moran_scatterplot, plot_moran, lisa_cluster
!wget https://raw.githubusercontent.com/quarcs-lab/book-2020-spatial-analysis-methods-and-practice/master/Data/City.csv
#!wget https://raw.githubusercontent.com/quarcs-lab/book-2020-spatial-analysis-methods-and-practice/master/Data/City.geojson
--2021-02-07 11:45:45-- https://raw.githubusercontent.com/quarcs-lab/book-2020-spatial-analysis-methods-and-practice/master/Data/City.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 199.232.64.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|199.232.64.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 20654 (20K) [text/plain]
Saving to: ‘City.csv.1’
City.csv.1 100%[===================>] 20.17K --.-KB/s in 0s
2021-02-07 11:45:45 (124 MB/s) - ‘City.csv.1’ saved [20654/20654]
gdf = gpd.read_file("City.geojson")
ndf = pd.read_csv("City.csv")
#gdf.head()
ndf.head()
pd.options.display.float_format = "{:,.2f}".format
ndf.describe()
#sns.displot(ndf['Income'])
# sns.displot(ndf.Income)
sns.displot(data=ndf, x='Income');
#sns.displot(data=ndf, x="Income", kde=True)
sns.displot(data=ndf, x="Income", kind="kde");
sns.boxplot(data=ndf, x="Income");
#sns.boxplot(data=ndf, y='Income')
sns.violinplot(data=ndf, x="Income");
sns.stripplot(data=ndf, x='Income');
sns.violinplot(data=ndf, x="Income")
sns.stripplot(data=ndf, x="Income", color='orange')
sns.jointplot(data=ndf, x='University', y='Expenses', kind='reg');
g = sns.jointplot(data=ndf, x='University', y='Expenses', kind='reg')
g.set_axis_labels(
xlabel = "Population percentage with bachelor degree",
ylabel = "Monthly per-capita expenses in groceries"
);
sns.jointplot(data=ndf, x='University', y='Expenses', kind='kde');
#sns.pairplot(ndf)
gdf.plot("Income", scheme='NaturalBreaks', k=3, cmap='coolwarm', legend=True);
mapclassify.NaturalBreaks(gdf.Income, k=3)
fig, ax = plt.subplots(figsize=(9,6))
gdf.plot(column="Income", scheme='NaturalBreaks', k=3, cmap='coolwarm', legend=True, ax=ax)
plt.tight_layout()
ax.axis("off")
plt.show()
gdf['IncZscore'] = (gdf['Income'] - gdf['Income'].mean(axis=0))/gdf['Income'].std(axis=0)
mapclassify.UserDefined(gdf['IncZscore'], bins=[-2.5, -1, 1, 2.5])
fig, ax = plt.subplots(figsize=(9,6))
gdf.plot(column="IncZscore", scheme='user_defined', classification_kwds={'bins':[-2.5, -1, 1, 2.5]}, cmap='coolwarm', legend=True, ax=ax)
plt.tight_layout()
ax.axis("off")
g = sns.jointplot(data=ndf, x='University', y='Expenses', kind='reg')
g.set_axis_labels(
xlabel = "Population percentage with bachelor degree",
ylabel = "Monthly per-capita expenses in groceries"
);
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15,10))
gdf.plot(column="University", scheme='NaturalBreaks', k=3, cmap='coolwarm', legend=True, ax=axes[0])
gdf.plot(column="Expenses", scheme='NaturalBreaks', k=3, cmap='coolwarm', legend=True, ax=axes[1])
plt.tight_layout()
axes[0].axis("off")
axes[0].set_title('Population percentage with bachelor degree')
axes[1].axis("off")
axes[1].set_title('Monthly per-capita spending')
plt.show()
sns.pairplot(ndf[['Expenses', 'University', 'SecondaryE']], kind='reg')
sns.pairplot(ndf[['Expenses', 'University', 'SecondaryE']], kind='kde')