# Install libraries
!pip install --upgrade pip --quiet
!pip install statsmodels --quiet
!pip install linearmodels --quiet
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
sns.set_context('paper', font_scale= 1.3)
import statsmodels.formula.api as smf
import statsmodels.api as sm
import statsmodels.base.model as smclass
import linearmodels as plm
df: Polled cross-section
# import data
#df = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")
df = pd.read_csv("https://bit.ly/mendez2020")
df
df1: Panel data ready (muti-index)
# import data with sorted multi-index for country and year
df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv", index_col=["country", "year"]).sort_index()
df1
dataDefinitions
# import data definitions
#dataDefinitions = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat-definitions.csv")
dataDefinitions = pd.read_csv("https://bit.ly/mendez2020d")
dataDefinitions
df2: Determinants of growth
# All countries, all years, and proximate sources of economic growth
df2 = df[['country', 'region', 'year', 'hi1990', 'GDPpc', 'lp', 'h', 'ky', 'TFP']]
df2
df3: Determinants of growth 1990
# All countries, year 1990, and proximate sources of economic growth
df3 = df.query('year == 1990').loc[:, ['country', 'region', 'hi1990', 'GDPpc', 'lp', 'h', 'ky', 'TFP']]
df3
df4: Determinants of growth 2014
# All countries, year 2014, and proximate sources of economic growth
df4 = df.query('year == 2014').loc[:, ['country', 'region', 'hi1990', 'GDPpc', 'lp', 'h', 'ky', 'TFP']]
df4
df5: Determinants of growth 1990 in log form
# All countries, year 1990, and proximate sources of economic growth in log form (trend component)
df5 = df.query('year == 1990').loc[:, ['country', 'region', 'hi1990', 'log_GDPpc', 'log_lp', 'log_h', 'log_ky', 'log_tfp']]
df5
df6: Determinants of growth 2014 in log form
# All countries, year 2014, and proximate sources of economic growth in log form (trend component)
df6 = df.query('year == 2014').loc[:, ['country', 'region', 'hi1990', 'log_GDPpc', 'log_lp', 'log_h', 'log_ky', 'log_tfp']]
df6
df7: Wide panel for log lp
# Pivot panel data from long form to wide form
df7 = df.pivot_table(
index=['country', 'region', 'hi1990'],
columns='year',
values='log_lp').reset_index(drop=False)
# Make sure the column names are strings
df7.columns = df7.columns.astype(str)
df7
df8: Wide panel for log ky
# Pivot panel data from long form to wide form
df8 = df.pivot_table(
index=['country', 'region', 'hi1990'],
columns='year',
values='log_ky').reset_index(drop=False)
# Make sure the column names are strings
df8.columns = df8.columns.astype(str)
df9: Wide panel for log h
# Pivot panel data from long form to wide form
df9 = df.pivot_table(
index=['country', 'region', 'hi1990'],
columns='year',
values='log_h').reset_index(drop=False)
# Make sure the column names are strings
df9.columns = df9.columns.astype(str)
df10: Wide panel for log tfp
# Pivot panel data from long form to wide form
df10 = df.pivot_table(
index=['country', 'region', 'hi1990'],
columns='year',
values='log_tfp').reset_index(drop=False)
# Make sure the column names are strings
df10.columns = df10.columns.astype(str)
# descriptive statistics for labor productivity
df7.describe().round(2)
# descriptive statistics for the ratio physical capital to output
df8.describe().round(2)
Exploratory analysis
Univariate plots
# cross-country distribution of log labor productivity in 1990
sns.displot(df7['1990'], kde = True);
sns.kdeplot(df7['1990']);
sns.kdeplot(df7['1990'])
sns.kdeplot(df7['2014']);
Categorical plots
# count countries by income groups in 1990
sns.countplot(x= 'hi1990', data= df7);
# log labor productivity differences across regions in 1990
sns.barplot(x= 'region', y='1990', data= df7);
# Standard deviation of log labor productivity differences across regions in 1990
sns.barplot(x= 'region', y='1990', data= df7, estimator=np.std);
# Labor productivity differences across and within continents in 1990
sns.boxplot(x= 'region', y='1990', data= df7);
# The relationship between labor productivity in 1990 and 2014
sns.jointplot(data=df7, x="1990", y="2014", kind="reg");
# The relationship between labor productivity in 1990 and 2014 using a bi-variate distribution
sns.jointplot(data=df7, x="1990", y="2014", kind="kde");
# The relationship between labor productivity in 1990 and 2014
sns.jointplot(data=df7, x="1990", y="2014", hue= "region");
# The relationship between labor productivity in 1990 and 2014
sns.jointplot(data=df7, x="1990", y="2014", hue= "region");
sns.lmplot(x="1990", y="2014", hue='region', ci=None, data=df7);
Persistence and mobility
# The relationship between labor productivity in 1990 and 2014
g= sns.jointplot(data=df7, x="1990", y="2014", kind="scatter")
# Draw a line of x=y
x0, x1 = g.ax_joint.get_xlim()
y0, y1 = g.ax_joint.get_ylim()
lims = [max(x0, y0), min(x1, y1)]
g.ax_joint.plot(lims, lims, '-r');
g = sns.lmplot(x="1990", y="2014", ci=None, data=df7)
# Draw a line of x=y
x0, x1 = g.ax.get_xlim()
y0, y1 = g.ax.get_ylim()
lims = [max(x0, y0), min(x1, y1)]
g.ax.plot(lims, lims, '-r');
# The relationship between labor productivity in 1990 and 2014 using a bi-variate distribution
g = sns.jointplot(data=df7, x="1990", y="2014", kind="kde", fill="true", height=6, space=0, levels=5);
# Draw a line of x=y
x0, x1 = g.ax_joint.get_xlim()
y0, y1 = g.ax_joint.get_ylim()
lims = [max(x0, y0), min(x1, y1)]
g.ax_joint.plot(lims, lims, '-r');
Evolution of disparities
Labor productivity (Output per worker)
# Create array object
df7a = df7.loc[:,'1990':'2014'].values
df7a
df7a.shape
# Create array of years. Not the upper limit. The data is only avilable up 2014, but we need to write 2015
years = np.arange(1990,2015)
years
years.shape
# Sigma convergence analysis
sigma_df7a = df7a.std(axis=0)
plt.plot(years, sigma_df7a)
plt.title("Sigma Convergence in (log) labor productivity")
plt.ylabel('Stand. Dev.of log labor productivity');
# Beta convergence analysis
# Compute the growth rate between 1990 and 2014 (log aproximation method)
df7['g1990_2014'] = df7['2014']-df7['1990']
sns.lmplot(x="1990", y="g1990_2014", data=df7);
y = df7['g1990_2014']
X = df7['1990']
X_withconst = sm.add_constant(X)
OLS = sm.OLS(y, X_withconst).fit()
print(OLS.summary())
sns.lmplot(x="1990", y="g1990_2014", hue='region', ci=None, data=df7);
for row in df7a:
plt.plot(years, row)
sns.kdeplot(df7['1990'], label="1990")
sns.kdeplot(df7['2014'], label="2014")
plt.legend()
plt.xlabel('Log labor productivity (output per worker)');
# Standardize each observation by its cross-sectional mean
rel_df7a = df7a / df7a.mean(axis=0)
sns.kdeplot(rel_df7a[:,1], label="1990")
sns.kdeplot(rel_df7a[:,24], label="2014")
plt.legend()
plt.xlabel('Relative (log) labor productivity');
# Distributional mobility and convergence
g = sns.jointplot(data=rel_df7a, x=rel_df7a[:,1], y=rel_df7a[:,24], kind="kde", fill="true", height=6, space=0, levels=5, ratio=3)
g.plot_joint(sns.regplot, scatter=False, truncate=False, color="b", line_kws={"lw":4})
# Draw a line of x=y
x0, x1 = g.ax_joint.get_xlim()
y0, y1 = g.ax_joint.get_ylim()
lims = [max(x0, y0), min(x1, y1)]
g.ax_joint.plot(lims, lims, '-r', linestyle='--')
g.set_axis_labels("Relative (log) labor productivity in 1990", "Relative (log) labor productivity in 2014")
g.ax_marg_x.set_xlim(0.65, 1.3)
g.ax_marg_y.set_ylim(0.65, 1.3);
Physical capital ratio
# Create array object
df8a = df8.loc[:,'1990':'2014'].values
years = np.arange(1990,2015)
sigma_df8a = df8a.std(axis=0)
plt.plot(years, sigma_df8a)
plt.title("Sigma Convergence in (log) capital-output ratio")
plt.ylabel('Stand. Dev');
# Beta convergence analysis
# Compute the growth rate between 1990 and 2014 (log aproximation method)
df8['g1990_2014'] = df8['2014']-df8['1990']
sns.lmplot(x="1990", y="g1990_2014", data=df8);
y = df8['g1990_2014']
X = df8['1990']
X_withconst = sm.add_constant(X)
OLS = sm.OLS(y, X_withconst).fit()
print(OLS.summary())
sns.lmplot(x="1990", y="g1990_2014", hue='region', ci=None, data=df8);
for row in df8a:
plt.plot(years, row)
sns.kdeplot(df8['1990'], label="1990")
sns.kdeplot(df8['2014'], label="2014")
plt.legend()
plt.xlabel('Log physical capital ratio');
# Standardize each observation by its cross-sectional mean
rel_df8a = df8a / df8a.mean(axis=0)
sns.kdeplot(rel_df8a[:,1], label="1990")
sns.kdeplot(rel_df8a[:,24], label="2014")
plt.legend()
plt.xlabel('Relative (log) physical capital ratio');
# Distributional mobility and convergence
g = sns.jointplot(data=rel_df8a, x=rel_df8a[:,1], y=rel_df8a[:,24], kind="kde", fill="true", height=6, space=0, levels=5, ratio=3);
g.plot_joint(sns.regplot, scatter=False, truncate=False, color="b", line_kws={"lw":4})
# Draw a line of x=y
x0, x1 = g.ax_joint.get_xlim()
y0, y1 = g.ax_joint.get_ylim()
lims = [max(x0, y0), min(x1, y1)]
g.ax_joint.plot(lims, lims, '-r', linestyle='--')
g.set_axis_labels("Relative (log) physical capital ratio in 1990", "Relative (log) physical capital ratio in 2014")
g.ax_marg_x.set_xlim(-3, 3)
g.ax_marg_y.set_ylim(-3, 3);
# Distributional mobility and convergence
g = sns.jointplot(data=rel_df8a, x=rel_df8a[:,1], y=rel_df8a[:,24], kind="kde", fill="true", height=6, space=0, levels=5, ratio=3);
g.plot_joint(sns.regplot, scatter=False, truncate=False, color="b", line_kws={"lw":4})
# Draw a line of x=y
x0, x1 = g.ax_joint.get_xlim()
y0, y1 = g.ax_joint.get_ylim()
lims = [max(x0, y0), min(x1, y1)]
g.ax_joint.plot(lims, lims, '-r', linestyle='--')
g.set_axis_labels("Relative (log) physical capital ratio in 1990", "Relative (log) physical capital ratio in 2014")
g.ax_marg_x.set_xlim(-3, 3)
g.ax_marg_y.set_ylim(-0.2, 2.10);
Human capital per worker
# Create array object
df9a = df9.loc[:,'1990':'2014'].values
years = np.arange(1990,2015)
sigma_df9a = df9a.std(axis=0)
plt.plot(years, sigma_df9a)
plt.title("Sigma Convergence in (log) human capital per worker")
plt.ylabel('Stand. Dev');
# Beta convergence analysis
# Compute the growth rate between 1990 and 2014 (log aproximation method)
df9['g1990_2014'] = df9['2014']-df9['1990']
sns.lmplot(x="1990", y="g1990_2014", data=df9);
y = df9['g1990_2014']
X = df9['1990']
X_withconst = sm.add_constant(X)
OLS = sm.OLS(y, X_withconst).fit()
print(OLS.summary())
sns.lmplot(x="1990", y="g1990_2014", hue='region', ci=None, data=df9);
for row in df9a:
plt.plot(years, row)
sns.kdeplot(df9['1990'], label="1990")
sns.kdeplot(df9['2014'], label="2014")
plt.legend()
plt.xlabel('Log human capital per worker');
# Standardize each observation by its cross-sectional mean
rel_df9a = df9a / df9a.mean(axis=0)
sns.kdeplot(rel_df9a[:,1], label="1990")
sns.kdeplot(rel_df9a[:,24], label="2014")
plt.legend()
plt.xlabel('Relative (log) human capital per worker');
# Distributional mobility and convergence
g = sns.jointplot(data=rel_df9a, x=rel_df9a[:,1], y=rel_df9a[:,24], kind="kde", fill="true", height=6, space=0, levels=5, ratio=3);
g.plot_joint(sns.regplot, scatter=False, truncate=False, color="b", line_kws={"lw":4})
# Draw a line of x=y
x0, x1 = g.ax_joint.get_xlim()
y0, y1 = g.ax_joint.get_ylim()
lims = [max(x0, y0), min(x1, y1)]
g.ax_joint.plot(lims, lims, '-r', linestyle='--')
g.set_axis_labels("Relative (log) human capital in 1990", "Relative (log) human capital in 2014")
g.ax_marg_x.set_xlim(0, 1.75)
g.ax_marg_y.set_ylim(0, 1.75);
Total factor productivity
# Create array object
df10a = df10.loc[:,'1990':'2014'].values
years = np.arange(1990,2015)
sigma_df10a = df10.std(axis=0)
plt.plot(years, sigma_df10a)
plt.title("Sigma Convergence in (log) total factor productivity")
plt.ylabel('Stand. Dev');
# Beta convergence analysis
# Compute the growth rate between 1990 and 2014 (log aproximation method)
df10['g1990_2014'] = df10['2014']-df7['1990']
sns.lmplot(x="1990", y="g1990_2014", data=df10);
y = df10['g1990_2014']
X = df10['1990']
X_withconst = sm.add_constant(X)
OLS = sm.OLS(y, X_withconst).fit()
print(OLS.summary())
sns.lmplot(x="1990", y="g1990_2014", hue='region', ci=None, data=df10);
for row in df10a:
plt.plot(years, row)
sns.kdeplot(df10['1990'], label="1990")
sns.kdeplot(df10['2014'], label="2014")
plt.legend()
plt.xlabel('Log total factor productivity');
# Standardize each observation by its cross-sectional mean
rel_df10a = df10a / df10a.mean(axis=0)
sns.kdeplot(rel_df10a[:,1], label="1990")
sns.kdeplot(rel_df10a[:,24], label="2014")
plt.legend()
plt.xlabel('Relative (log) total factor productivity');
# Distributional mobility and convergence
g = sns.jointplot(data=rel_df10a, x=rel_df10a[:,1], y=rel_df10a[:,24], kind="kde", fill="true", height=6, space=0, levels=5, ratio=3);
g.plot_joint(sns.regplot, scatter=False, truncate=False, color="b", line_kws={"lw":4})
# Draw a line of x=y
x0, x1 = g.ax_joint.get_xlim()
y0, y1 = g.ax_joint.get_ylim()
lims = [max(x0, y0), min(x1, y1)]
g.ax_joint.plot(lims, lims, '-r', linestyle='--')
g.set_axis_labels("Relative (log) TFP in 1990", "Relative (log) TFP in 2014")
g.ax_marg_x.set_xlim(0.6, 1.4)
g.ax_marg_y.set_ylim(0.6, 1.4);