[PYTHON] Economic development

# Install libraries !pip install --upgrade pip --quiet !pip install statsmodels --quiet !pip install linearmodels --quiet

import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns sns.set_style('darkgrid') sns.set_context('paper', font_scale= 1.3) import statsmodels.formula.api as smf import statsmodels.api as sm import statsmodels.base.model as smclass import linearmodels as plm

df: Polled cross-section

# import data #df = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv") df = pd.read_csv("https://bit.ly/mendez2020") df

df1: Panel data ready (muti-index)

# import data with sorted multi-index for country and year df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv", index_col=["country", "year"]).sort_index() df1

dataDefinitions

# import data definitions #dataDefinitions = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat-definitions.csv") dataDefinitions = pd.read_csv("https://bit.ly/mendez2020d") dataDefinitions

df2: Determinants of growth

# All countries, all years, and proximate sources of economic growth df2 = df[['country', 'region', 'year', 'hi1990', 'GDPpc', 'lp', 'h', 'ky', 'TFP']] df2

df3: Determinants of growth 1990

# All countries, year 1990, and proximate sources of economic growth df3 = df.query('year == 1990').loc[:, ['country', 'region', 'hi1990', 'GDPpc', 'lp', 'h', 'ky', 'TFP']] df3

df4: Determinants of growth 2014

# All countries, year 2014, and proximate sources of economic growth df4 = df.query('year == 2014').loc[:, ['country', 'region', 'hi1990', 'GDPpc', 'lp', 'h', 'ky', 'TFP']] df4

df5: Determinants of growth 1990 in log form

# All countries, year 1990, and proximate sources of economic growth in log form (trend component) df5 = df.query('year == 1990').loc[:, ['country', 'region', 'hi1990', 'log_GDPpc', 'log_lp', 'log_h', 'log_ky', 'log_tfp']] df5

df6: Determinants of growth 2014 in log form

# All countries, year 2014, and proximate sources of economic growth in log form (trend component) df6 = df.query('year == 2014').loc[:, ['country', 'region', 'hi1990', 'log_GDPpc', 'log_lp', 'log_h', 'log_ky', 'log_tfp']] df6

df7: Wide panel for log lp

# Pivot panel data from long form to wide form df7 = df.pivot_table( index=['country', 'region', 'hi1990'], columns='year', values='log_lp').reset_index(drop=False)

# Make sure the column names are strings df7.columns = df7.columns.astype(str)

df7

df8: Wide panel for log ky

# Pivot panel data from long form to wide form df8 = df.pivot_table( index=['country', 'region', 'hi1990'], columns='year', values='log_ky').reset_index(drop=False) # Make sure the column names are strings df8.columns = df8.columns.astype(str)

df9: Wide panel for log h

# Pivot panel data from long form to wide form df9 = df.pivot_table( index=['country', 'region', 'hi1990'], columns='year', values='log_h').reset_index(drop=False) # Make sure the column names are strings df9.columns = df9.columns.astype(str)

df10: Wide panel for log tfp

# Pivot panel data from long form to wide form df10 = df.pivot_table( index=['country', 'region', 'hi1990'], columns='year', values='log_tfp').reset_index(drop=False) # Make sure the column names are strings df10.columns = df10.columns.astype(str)

# descriptive statistics for labor productivity df7.describe().round(2)

# descriptive statistics for the ratio physical capital to output df8.describe().round(2)

Exploratory analysis

Univariate plots

# cross-country distribution of log labor productivity in 1990 sns.displot(df7['1990'], kde = True);

sns.kdeplot(df7['1990']);

sns.kdeplot(df7['1990']) sns.kdeplot(df7['2014']);

Categorical plots

# count countries by income groups in 1990 sns.countplot(x= 'hi1990', data= df7);

# log labor productivity differences across regions in 1990 sns.barplot(x= 'region', y='1990', data= df7);

# Standard deviation of log labor productivity differences across regions in 1990 sns.barplot(x= 'region', y='1990', data= df7, estimator=np.std);

# Labor productivity differences across and within continents in 1990 sns.boxplot(x= 'region', y='1990', data= df7);

# The relationship between labor productivity in 1990 and 2014 sns.jointplot(data=df7, x="1990", y="2014", kind="reg");

# The relationship between labor productivity in 1990 and 2014 using a bi-variate distribution sns.jointplot(data=df7, x="1990", y="2014", kind="kde");

# The relationship between labor productivity in 1990 and 2014 sns.jointplot(data=df7, x="1990", y="2014", hue= "region");

sns.lmplot(x="1990", y="2014", hue='region', ci=None, data=df7);

Persistence and mobility

# The relationship between labor productivity in 1990 and 2014 g= sns.jointplot(data=df7, x="1990", y="2014", kind="scatter") # Draw a line of x=y x0, x1 = g.ax_joint.get_xlim() y0, y1 = g.ax_joint.get_ylim() lims = [max(x0, y0), min(x1, y1)] g.ax_joint.plot(lims, lims, '-r');

g = sns.lmplot(x="1990", y="2014", ci=None, data=df7) # Draw a line of x=y x0, x1 = g.ax.get_xlim() y0, y1 = g.ax.get_ylim() lims = [max(x0, y0), min(x1, y1)] g.ax.plot(lims, lims, '-r');

# The relationship between labor productivity in 1990 and 2014 using a bi-variate distribution g = sns.jointplot(data=df7, x="1990", y="2014", kind="kde", fill="true", height=6, space=0, levels=5); # Draw a line of x=y x0, x1 = g.ax_joint.get_xlim() y0, y1 = g.ax_joint.get_ylim() lims = [max(x0, y0), min(x1, y1)] g.ax_joint.plot(lims, lims, '-r');

Evolution of disparities

Labor productivity (Output per worker)

# Create array object df7a = df7.loc[:,'1990':'2014'].values df7a

df7a.shape

# Create array of years. Not the upper limit. The data is only avilable up 2014, but we need to write 2015 years = np.arange(1990,2015) years

years.shape

# Sigma convergence analysis sigma_df7a = df7a.std(axis=0) plt.plot(years, sigma_df7a) plt.title("Sigma Convergence in (log) labor productivity") plt.ylabel('Stand. Dev.of log labor productivity');

# Beta convergence analysis # Compute the growth rate between 1990 and 2014 (log aproximation method) df7['g1990_2014'] = df7['2014']-df7['1990'] sns.lmplot(x="1990", y="g1990_2014", data=df7);

y = df7['g1990_2014'] X = df7['1990'] X_withconst = sm.add_constant(X) OLS = sm.OLS(y, X_withconst).fit() print(OLS.summary())

sns.lmplot(x="1990", y="g1990_2014", hue='region', ci=None, data=df7);

for row in df7a: plt.plot(years, row)

sns.kdeplot(df7['1990'], label="1990") sns.kdeplot(df7['2014'], label="2014") plt.legend() plt.xlabel('Log labor productivity (output per worker)');

# Standardize each observation by its cross-sectional mean rel_df7a = df7a / df7a.mean(axis=0)

sns.kdeplot(rel_df7a[:,1], label="1990") sns.kdeplot(rel_df7a[:,24], label="2014") plt.legend() plt.xlabel('Relative (log) labor productivity');

# Distributional mobility and convergence g = sns.jointplot(data=rel_df7a, x=rel_df7a[:,1], y=rel_df7a[:,24], kind="kde", fill="true", height=6, space=0, levels=5, ratio=3) g.plot_joint(sns.regplot, scatter=False, truncate=False, color="b", line_kws={"lw":4}) # Draw a line of x=y x0, x1 = g.ax_joint.get_xlim() y0, y1 = g.ax_joint.get_ylim() lims = [max(x0, y0), min(x1, y1)] g.ax_joint.plot(lims, lims, '-r', linestyle='--') g.set_axis_labels("Relative (log) labor productivity in 1990", "Relative (log) labor productivity in 2014") g.ax_marg_x.set_xlim(0.65, 1.3) g.ax_marg_y.set_ylim(0.65, 1.3);

Physical capital ratio

# Create array object df8a = df8.loc[:,'1990':'2014'].values years = np.arange(1990,2015)

sigma_df8a = df8a.std(axis=0) plt.plot(years, sigma_df8a) plt.title("Sigma Convergence in (log) capital-output ratio") plt.ylabel('Stand. Dev');

# Beta convergence analysis # Compute the growth rate between 1990 and 2014 (log aproximation method) df8['g1990_2014'] = df8['2014']-df8['1990'] sns.lmplot(x="1990", y="g1990_2014", data=df8);

y = df8['g1990_2014'] X = df8['1990'] X_withconst = sm.add_constant(X) OLS = sm.OLS(y, X_withconst).fit() print(OLS.summary())

sns.lmplot(x="1990", y="g1990_2014", hue='region', ci=None, data=df8);

for row in df8a: plt.plot(years, row)

sns.kdeplot(df8['1990'], label="1990") sns.kdeplot(df8['2014'], label="2014") plt.legend() plt.xlabel('Log physical capital ratio');

# Standardize each observation by its cross-sectional mean rel_df8a = df8a / df8a.mean(axis=0)

sns.kdeplot(rel_df8a[:,1], label="1990") sns.kdeplot(rel_df8a[:,24], label="2014") plt.legend() plt.xlabel('Relative (log) physical capital ratio');

# Distributional mobility and convergence g = sns.jointplot(data=rel_df8a, x=rel_df8a[:,1], y=rel_df8a[:,24], kind="kde", fill="true", height=6, space=0, levels=5, ratio=3); g.plot_joint(sns.regplot, scatter=False, truncate=False, color="b", line_kws={"lw":4}) # Draw a line of x=y x0, x1 = g.ax_joint.get_xlim() y0, y1 = g.ax_joint.get_ylim() lims = [max(x0, y0), min(x1, y1)] g.ax_joint.plot(lims, lims, '-r', linestyle='--') g.set_axis_labels("Relative (log) physical capital ratio in 1990", "Relative (log) physical capital ratio in 2014") g.ax_marg_x.set_xlim(-3, 3) g.ax_marg_y.set_ylim(-3, 3);

Human capital per worker

# Create array object df9a = df9.loc[:,'1990':'2014'].values years = np.arange(1990,2015)

sigma_df9a = df9a.std(axis=0) plt.plot(years, sigma_df9a) plt.title("Sigma Convergence in (log) human capital per worker") plt.ylabel('Stand. Dev');

# Beta convergence analysis # Compute the growth rate between 1990 and 2014 (log aproximation method) df9['g1990_2014'] = df9['2014']-df9['1990'] sns.lmplot(x="1990", y="g1990_2014", data=df9);

y = df9['g1990_2014'] X = df9['1990'] X_withconst = sm.add_constant(X) OLS = sm.OLS(y, X_withconst).fit() print(OLS.summary())

sns.lmplot(x="1990", y="g1990_2014", hue='region', ci=None, data=df9);

for row in df9a: plt.plot(years, row)

sns.kdeplot(df9['1990'], label="1990") sns.kdeplot(df9['2014'], label="2014") plt.legend() plt.xlabel('Log human capital per worker');

# Standardize each observation by its cross-sectional mean rel_df9a = df9a / df9a.mean(axis=0)

sns.kdeplot(rel_df9a[:,1], label="1990") sns.kdeplot(rel_df9a[:,24], label="2014") plt.legend() plt.xlabel('Relative (log) human capital per worker');

# Distributional mobility and convergence g = sns.jointplot(data=rel_df9a, x=rel_df9a[:,1], y=rel_df9a[:,24], kind="kde", fill="true", height=6, space=0, levels=5, ratio=3); g.plot_joint(sns.regplot, scatter=False, truncate=False, color="b", line_kws={"lw":4}) # Draw a line of x=y x0, x1 = g.ax_joint.get_xlim() y0, y1 = g.ax_joint.get_ylim() lims = [max(x0, y0), min(x1, y1)] g.ax_joint.plot(lims, lims, '-r', linestyle='--') g.set_axis_labels("Relative (log) human capital in 1990", "Relative (log) human capital in 2014") g.ax_marg_x.set_xlim(0, 1.75) g.ax_marg_y.set_ylim(0, 1.75);

Total factor productivity

# Create array object df10a = df10.loc[:,'1990':'2014'].values years = np.arange(1990,2015)

sigma_df10a = df10.std(axis=0) plt.plot(years, sigma_df10a) plt.title("Sigma Convergence in (log) total factor productivity") plt.ylabel('Stand. Dev');

# Beta convergence analysis # Compute the growth rate between 1990 and 2014 (log aproximation method) df10['g1990_2014'] = df10['2014']-df7['1990'] sns.lmplot(x="1990", y="g1990_2014", data=df10);

y = df10['g1990_2014'] X = df10['1990'] X_withconst = sm.add_constant(X) OLS = sm.OLS(y, X_withconst).fit() print(OLS.summary())

sns.lmplot(x="1990", y="g1990_2014", hue='region', ci=None, data=df10);

for row in df10a: plt.plot(years, row)

sns.kdeplot(df10['1990'], label="1990") sns.kdeplot(df10['2014'], label="2014") plt.legend() plt.xlabel('Log total factor productivity');

# Standardize each observation by its cross-sectional mean rel_df10a = df10a / df10a.mean(axis=0)

sns.kdeplot(rel_df10a[:,1], label="1990") sns.kdeplot(rel_df10a[:,24], label="2014") plt.legend() plt.xlabel('Relative (log) total factor productivity');

# Distributional mobility and convergence g = sns.jointplot(data=rel_df10a, x=rel_df10a[:,1], y=rel_df10a[:,24], kind="kde", fill="true", height=6, space=0, levels=5, ratio=3); g.plot_joint(sns.regplot, scatter=False, truncate=False, color="b", line_kws={"lw":4}) # Draw a line of x=y x0, x1 = g.ax_joint.get_xlim() y0, y1 = g.ax_joint.get_ylim() lims = [max(x0, y0), min(x1, y1)] g.ax_joint.plot(lims, lims, '-r', linestyle='--') g.set_axis_labels("Relative (log) TFP in 1990", "Relative (log) TFP in 2014") g.ax_marg_x.set_xlim(0.6, 1.4) g.ax_marg_y.set_ylim(0.6, 1.4);

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}df: Polled cross-section