Income,Technological Advances and the Convergence Hypothesis in the Asia-Pacific
Introduction
Methodology
Development accounting
(Absolute) Beta convergence
Data
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns; sns.set_theme(color_codes=True)
import plotly.express as px
import plotly.graph_objects as go
#chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere')
import statsmodels.api as sm
import statsmodels.formula.api as smf
!pip install stargazer
from stargazer.stargazer import Stargazer, LineLocation
#from spreg import OLS
#from spreg import MoranRes
#from spreg import ML_Lag
#from spreg import ML_Error
import warnings
#warnings.filterwarnings('ignore')
df00 = pd.read_csv("/work/data/PWT100+countryIDs.csv")
df00
df00.columns
df00.dtypes
# Import dataset descriptions
df00_def = pd.read_csv("/work/data/PWT100+countryIDs-descriptions.csv")
df00_def
# Generate log of real GDP per worker (labor productivity)
df00['ln_lp'] = np.log(df00['rgdpo']/df00['emp'])
# Generate log of the adjusted ratio of physical capital to output
df00['ln_k'] = np.log(df00['cn']/df00['cgdpo'])
# Generate log of human capital per worker = human capital index
df00['ln_h'] = df00['hc']
# Generate log of aggregate efficiency = normalized total factor productivity
df00['ln_A'] = df00['cwtfp']
# Subset the sample
df0 = df00[['CountryCode', 'CountryName', 'year', 'SubRegion_PWT100', 'SubContinent', 'Continent',
'incomegroup', 'nonoil', 'intermediate', 'OECD', 'pop', 'ln_lp', 'ln_k', 'ln_h', 'ln_A']]
# Select time period (1980~2019)
# [The np.range function doesn't include the termination parameter (enter n+1 to include n)]
df1 = df0[df0['year'].isin(np.arange(1980, 2020))]
# Eliminate missing values
df2 = df1.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
# Focus on countries in East Asia & Pacific
df = df2[df2["SubRegion_PWT100"].isin(['East Asia & Pacific'],)]
# Reset data index
df = df.reset_index(drop=True)
df
df.columns
# Save the final-studied data to the local path
df.to_csv("/work/data/AS&P.csv")
Descriptive statistics
df.describe().round(2)
df.query("year == 1980").describe().round(2)
df.query("year == 2019").describe().round(2)
Exploratory data analysis
Labor productivity:
px.line(df,
x='year',
y='ln_lp',
color='CountryName',
hover_name = 'CountryName',
hover_data= [ 'pop', 'ln_lp'],
labels=dict(ln_lp = 'Log of labor productivity',
pop = 'Population',
CountryName = 'Country'),
)
px.line(df,
x='year',
y='ln_lp',
color='CountryName',
hover_name = 'CountryName',
facet_col= 'incomegroup', facet_col_wrap=2, width=900, height=600,
labels=dict(ln_lp = 'Log of labor productivity',
incomegroup = 'Income group',
CountryName = 'Country'),
title='Labor productivity across income groups'
)
TFP
px.line(df,
x='year',
y='ln_A',
color='CountryName',
hover_name = 'CountryName',
hover_data= [ 'pop', 'ln_A'],
labels=dict(ln_A = 'TFP in logs',
pop = 'Population',
CountryName = 'Country'),
)
px.line(df,
x='year',
y='ln_A',
color='CountryName',
hover_name = 'CountryName',
facet_col= 'incomegroup', facet_col_wrap=2, width=900, height=600,
labels=dict(ln_A = 'TFP in logs',
incomegroup = 'Income group',
CountryName = 'Country'),
title='Total factor productivity across income groups'
)
Regression analysis
Global relationship: Labor productivity & TFP
px.scatter(df,
x = 'ln_A',
y = 'ln_lp',
color = 'incomegroup',
hover_name = 'CountryName',
range_x = [0.1,1.4],
range_y = [7,14],
trendline = 'ols',
trendline_scope = 'overall',
labels=dict(ln_A = 'TFP in logs',
ln_lp = 'Log of labor productivity',
incomegroup = 'Income group'),
animation_frame = 'year'
)
x = df['ln_A']
y = df['ln_lp']
# ingredients of the OLS formula:
x_bar = np.mean(x)
y_bar = np.mean(y)
var_x = np.var(x, ddof = 1)
cov_xy = np.cov(x, y)[1,0]
y = df['ln_lp']
x0 = df['ln_A']
x0 = sm.add_constant(x0)
ols = sm.OLS(y, x0).fit()
print(ols.summary())
mod1 = smf.ols(formula='ln_lp ~ ln_A', data=df).fit()
Stargazer([mod1])
Relationship by income groups
px.scatter(df,
x = 'ln_A',
y = 'ln_lp',
color = 'incomegroup',
hover_name = 'CountryName',
range_x = [0.1,1.4],
range_y = [7,14],
trendline = 'ols',
labels=dict(ln_A = 'TFP in logs',
ln_lp = 'Log of labor productivity',
incomegroup = 'Income group'),
animation_frame = 'year'
)
hi = df.query('incomegroup=="High income"')
y = hi['ln_lp']
x1 = hi['ln_A']
x1 = sm.add_constant(x1)
ols_hi = sm.OLS(y, x1).fit()
print(ols_hi.summary())
upmid = df.query('incomegroup=="Upper middle income"')
y = upmid['ln_lp']
x2 = upmid['ln_A']
x2 = sm.add_constant(x2)
ols_upmid = sm.OLS(y, x2).fit()
print(ols_upmid.summary())
lowmid = df.query('incomegroup=="Lower middle income"')
y = lowmid['ln_lp']
x3 = lowmid['ln_A']
x3 = sm.add_constant(x3)
ols_lowmid = sm.OLS(y, x3).fit()
print(ols_lowmid.summary())
mod = smf.ols(formula='ln_lp ~ ln_A', data=df).fit()
mod_hi = smf.ols(formula='ln_lp ~ ln_A', data=hi).fit()
mod_upmid = smf.ols(formula='ln_lp ~ ln_A ', data=upmid).fit()
mod_lowmid = smf.ols(formula='ln_lp ~ ln_A', data=lowmid).fit()
Stargazer([mod, mod_hi, mod_upmid, mod_lowmid])
Relationship with controls
y = df['ln_lp']
x = df[['ln_A', 'ln_k', 'ln_h']]
x = sm.add_constant(x)
ols = sm.OLS(y, x).fit()
print(ols.summary())
mod1 = smf.ols(formula='ln_lp ~ ln_A', data=df).fit()
mod2 = smf.ols(formula='ln_lp ~ ln_A + ln_k ', data=df).fit()
mod3 = smf.ols(formula='ln_lp ~ ln_A + ln_h ', data=df).fit()
mod4 = smf.ols(formula='ln_lp ~ ln_A + ln_k + ln_h ', data=df).fit()
Stargazer([mod1, mod2, mod3, mod4])
# For TFP: Pivot panel data from long form to wide form
df_wide_A = df.pivot_table(index=['CountryName'], columns=['year'], values='ln_A')
# Reset data index
df_wide_A = df_wide_A.reset_index(drop=False)
df_wide_A
# Generate the respective growth rates of labor productivity & TFP (1980~2019)
df_wide_lp['growth_lp'] = (df_wide_lp[2019] - df_wide_lp[1980]).round(4)
df_wide_A['growth_A'] = (df_wide_A[2019] - df_wide_A[1980]).round(4)
df_wide_lp.columns
df_wide_lp.to_csv("/work/data/AS&P-LPConv.csv")
df_wide_A.to_csv("/work/data/AS&P-AConv.csv")
df_wide_lp.describe().round(2)
df_wide_A.describe().round(2)
px.scatter(
df_wide_lp,
x=1980,
y="growth_lp",
hover_name="CountryName",
#color="",
trendline="ols",
trendline_scope="overall",
marginal_x="rug",
marginal_y="rug",
labels={"1980": "Log labour productivity in 1980",
"growth_lp": "Growth rate from 1980 to 2019"}
)
y = df_wide_lp['growth_lp']
x = df_wide_lp[1980]
x_withconst = sm.add_constant(x)
ols = sm.OLS(y, x_withconst).fit()
print(ols.summary())
px.scatter(
df_wide_A,
x=1980,
y="growth_A",
hover_name="CountryName",
#color="",
trendline="ols",
trendline_scope="overall",
marginal_x="rug",
marginal_y="rug",
labels={"1980": "Log TFP in 1980 in 1980",
"growth_A": "Growth rate from 2009 to 2019"}
)
y = df_wide_A['growth_A']
x = df_wide_A[1980]
x_withconst = sm.add_constant(x)
OLS = sm.OLS(y, x_withconst).fit()
print(OLS.summary())