Introduction to Markov methods
from IPython.display import IFrame
IFrame(src='https://setosa.io/ev/markov-chains/', width=1200, height=1000)
Set up environment
Load libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams
import geopandas as gpd
import mapclassify as mc
import libpysal
from esda.moran import Moran
import giddy
The dataset
df = pd.read_csv(libpysal.examples.get_path("usjoin.csv"))
df
df.describe().round(2)
df.shape
pci = df.loc[:,'1929':'2009'].values.T
pci
pci.shape
pci[0, :]
Plot absolute dynamics
years = range(1929,2010)
names = df['Name'].values
order1929 = np.argsort(pci[0,:])
order2009 = np.argsort(pci[-1,:])
names1929 = names[order1929[::-1]]
names2009 = names[order2009[::-1]]
first_last = np.vstack((names1929,names2009))
rcParams['figure.figsize'] = 15,10
plt.plot(years,pci)
for i in range(48):
plt.text(1915,54530-(i*1159), first_last[0][i],fontsize=12)
plt.text(2010.5,54530-(i*1159), first_last[1][i],fontsize=12)
plt.xlim((years[0], years[-1]))
plt.ylim((0, 54530))
plt.ylabel(r"$y_{i,t}$",fontsize=14)
plt.xlabel('Years',fontsize=12)
plt.title('Absolute Dynamics',fontsize=18);
Plot relative dynamics
rpci= (pci.T / pci.mean(axis=1)).T
order1929 = np.argsort(rpci[0,:])
order2009 = np.argsort(rpci[-1,:])
names1929 = names[order1929[::-1]]
names2009 = names[order2009[::-1]]
first_last = np.vstack((names1929,names2009))
rcParams['figure.figsize'] = 15,10
plt.plot(years,rpci)
for i in range(48):
plt.text(1915,1.91-(i*0.041), first_last[0][i],fontsize=12)
plt.text(2010.5,1.91-(i*0.041), first_last[1][i],fontsize=12)
plt.xlim((years[0], years[-1]))
plt.ylim((0, 1.94))
plt.ylabel(r"$y_{i,t}/\bar{y}_t$",fontsize=14)
plt.xlabel('Years',fontsize=12)
plt.title('Relative Dynamics',fontsize=18);
Panel data of quintile membership
# slice first raw and all columns
pci[0, :]
q5 = np.array([mc.Quantiles(y, k=5).yb for y in pci]).transpose() # We need to transpose the results to be used later
# cross-section of quintile membership in the first year (1929)
q5[:, 0]
# time series of quintile membership for Colorado
q5[4, :]
# Rows indicate regional units and columns indicate years
pd.DataFrame(q5)
Classic Markov
m5 = giddy.markov.Markov(q5)
print(m5.transitions)
print(m5.p)
print(m5.steady_state)
print(giddy.ergodic.fmpt(m5.p))
Spatial dependence
geo_table = gpd.read_file(libpysal.examples.get_path('us48.shp'))
income_table = pd.read_csv(libpysal.examples.get_path("usjoin.csv"))
complete_table = geo_table.merge(income_table, left_on='STATE_NAME', right_on='Name')
complete_table.head()
index_year = range(1929,2010,15)
fig, axes = plt.subplots(nrows=2, ncols=3,figsize = (15,7))
for i in range(2):
for j in range(3):
ax = axes[i,j]
complete_table.plot(ax=ax, column=str(index_year[i*3+j]), cmap='OrRd', scheme='quantiles', legend=True)
ax.set_title('Per Capita Income %s Quintiles'%str(index_year[i*3+j]))
ax.axis('off')
leg = ax.get_legend()
leg.set_bbox_to_anchor((0.8, 0.15, 0.16, 0.2))
plt.tight_layout()
w = libpysal.io.open(libpysal.examples.get_path("states48.gal")).read()
w.transform = 'R'
mits = [Moran(cs, w) for cs in pci]
res = np.array([(mi.I, mi.EI, mi.seI_norm, mi.sim[974]) for mi in mits])
years = np.arange(1929,2010)
fig, ax = plt.subplots(nrows=1, ncols=1,figsize = (10,5) )
ax.plot(years, res[:,0], label='Moran\'s I')
#plot(years, res[:,1], label='E[I]')
ax.plot(years, res[:,1]+1.96*res[:,2], label='Upper bound',linestyle='dashed')
ax.plot(years, res[:,1]-1.96*res[:,2], label='Lower bound',linestyle='dashed')
ax.set_title("Global spatial autocorrelation for annual US per capita incomes",fontdict={'fontsize':15})
ax.set_xlim([1929,2009])
ax.legend();
Spatial Markov
help(giddy.markov.Spatial_Markov)
sm = giddy.markov.Spatial_Markov(rpci.T, w, fixed = True, k = 5,m=5) # spatial_markov instance o
print(sm.p)
sm.summary()
#we use seaborn to create a heatmap
sns.set()
fig, ax = plt.subplots(figsize = (5,4))
im = sns.heatmap(sm.p, annot=True, linewidths=.5, ax=ax, cbar=True, vmin=0, vmax=1,
square=True, cmap="YlGn",fmt='.3f')
fig, axes = plt.subplots(2,3,figsize = (15,8))
for i in range(2):
for j in range(3):
ax = axes[i,j]
if i==1 and j==2:
ax.axis('off')
continue
p_temp = sm.P[i*3+j]
im = sns.heatmap(p_temp, annot=True, linewidths=.5, ax=ax, cbar=True, vmin=0, vmax=1,
square=True, cmap="YlGn",fmt='.3f')
ax.set_title("Spatial Lag %d"%(i*3+j),fontsize=13)
fig, axes = plt.subplots(2,3,figsize = (15,8))
for i in range(2):
for j in range(3):
ax = axes[i,j]
if i==0 and j==0:
im = sns.heatmap(sm.p, annot=True, linewidths=.5, ax=ax, cbar=True, vmin=0, vmax=1,
square=True, cmap="YlGn",fmt='.3f')
ax.set_title("Global",fontsize=13)
else:
p_temp = sm.P[i*3+j-1]
im = sns.heatmap(p_temp, annot=True, linewidths=.5, ax=ax, cbar=True, vmin=0, vmax=1,
square=True, cmap="YlGn",fmt='.3f')
ax.set_title("Spatial Lag %d"%(i*3+j),fontsize=13)
print(sm.S)
print(sm.F)
giddy.markov.Homogeneity_Results(sm.T).summary()
print(giddy.markov.kullback(sm.T))
LISA Markov
lm = giddy.markov.LISA_Markov(pci.T, w)
print(lm.classes)
print(lm.transitions)
print(lm.p)
print(lm.steady_state)
print(giddy.ergodic.fmpt(lm.p))
print(lm.chi_2)