Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
#chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere')
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
#warnings.filterwarnings('ignore')
Import data
df1 = pd.read_csv("/work/pwt_with_country_identifiers/pwt_with_country_identifiers.csv")
df1
countryIDint64
1 - 183
POLY_IDcountryfloat64
2.0 - 256.0
12809
183
179
4759
68
139
1609
23
73
6509
93
13
6089
87
8
4619
66
102
11199
160
148
8049
115
219
1539
22
246
9659
138
178
df1.columns
#df1.dtypes
#df1['CountryName_pwt100'].unique()
Dataset definitions
# Import definitions of dataset
df1_def = pd.read_csv("/work/pwt_with_country_identifiers/data-descriptions.csv")
df1_def
nameobject
countryID1.5%
POLY_IDcountry1.5%
65 others97%
varlabobject
Numeric country ID based on the alphabetical order of PWT1001.5%
61 others91%
Missing7.5%
0
countryID
Numeric country ID based on the alphabetical order of PWT100
1
POLY_IDcountry
nan
2
iso3
3-letter ISO country code
3
CountryName
nan
4
CountryName_pwt
Country name
5
SubContinent
nan
6
SubContinent_pwt100
Region
7
currency_unit
Currency unit
8
year
Year
9
rgdpe
Expenditure-side real GDP at chained PPPs (in mil. 2017US$)
Construct variables
# Log of real GDP per worker
df1['y'] = np.log(df1['rgdpo']/df1['emp'])
# Log of the adjusted physical capital to output ratio
df1['k'] = (0.33/0.67)*(np.log(df1['cn']/df1['cgdpo']))
# Log of human capital per worker
df1['h'] = np.log(df1['hc'])
# Log of aggregate efficiency (Total factor productivity) assuming alpha = 0.33
df1['a'] = df1['y']-df1['k']-df1['h']
Prepare data
select
#df1.y
df2 = df1[['countryID', 'POLY_IDcountry', 'iso3', 'CountryName',
'CountryName_pwt100', 'SubContinent',
'year', 'y', 'k', 'h', 'a',
'pop', 'incomegroup', 'n', 'i', 'o']].round(2)
df2
countryIDint64
1 - 183
POLY_IDcountryfloat64
2.0 - 256.0
0
1
38
12250
176
167
8610
124
15
4130
60
146
8540
123
111
4200
61
127
8470
122
151
4270
62
62
4060
59
149
8400
121
220
Sort
df_2014.query("SubContinent == 'Western Europe'").sort_values(
by=["y", "CountryName_pwt100"], ascending=False
)
countryIDint64
POLY_IDcountryfloat64
11194
160
148
7064
101
145
1184
17
236
8324
119
41
694
10
150
4544
65
156
4824
69
231
df_WestEuro = df2.query("SubContinent == 'Western Europe'")
df_WestEuro
Visualize data
Strip plot
px.strip(df2,
x = 'y',
y = 'SubContinent',
hover_name= 'CountryName_pwt100',
animation_frame= 'year',
hover_data={'y', 'k', 'h', 'a'},
labels=dict(
y = 'Log of labor productivity',
k = "Log of physical capital",
h = "Log of human capital",
a = 'Log of aggregate efficiency',
SubContinent = "Continent",
),
range_x = [6,14],
color= 'SubContinent',
title= 'Evolution of labor productivity (by region)')
Line plots
px.line(
df2,
x="year",
y="y",
color="CountryName_pwt100",
labels=dict(y="Log of labor productivity"),
)
px.line(
df_WestEuro,
x="year",
y="y",
color="CountryName_pwt100",
hover_data={'y', 'k', 'h', 'a'},
labels=dict(
y = 'Log of labor productivity',
k = "Log of physical capital",
h = "Log of human capital",
a = 'Log of aggregate efficiency'),
title = 'Evolution of GDP per worker (countries in Western Europe)'
)
px.line(
df2, x="year", y="y", color="CountryName_pwt100",
facet_col="incomegroup",
facet_col_wrap=2,
height=800,
hover_data={'y', 'k', 'h', 'a'},
labels=dict(
y = 'Log of labor productivity',
k = "Log of physical capital",
h = "Log of human capital",
a = 'Log of aggregate efficiency',
incomegroup = 'Income group'),
title= 'Evolution of GDP per worker (by income group)'
)
Maps
px.choropleth(
df2,
locations="iso3",
color="y",
hover_name="CountryName_pwt100",
animation_frame="year",
color_continuous_scale=px.colors.sequential.Plasma,
projection="natural earth",
title = 'Evolution and spatial distribution of labor productivity')