Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
#chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere')
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
#warnings.filterwarnings('ignore')
Import data
df1 = pd.read_csv("/work/pwt_with_country_identifiers/pwt_with_country_identifiers.csv")
df1
df1.columns
#df1.dtypes
#df1['CountryName_pwt100'].unique()
Dataset definitions
# Import definitions of dataset
df1_def = pd.read_csv("/work/pwt_with_country_identifiers/data-descriptions.csv")
df1_def
Construct variables
# Log of real GDP per worker
df1['y'] = np.log(df1['rgdpo']/df1['emp'])
# Log of the adjusted physical capital to output ratio
df1['k'] = (0.33/0.67)*(np.log(df1['cn']/df1['cgdpo']))
# Log of human capital per worker
df1['h'] = np.log(df1['hc'])
# Log of aggregate efficiency (Total factor productivity) assuming alpha = 0.33
df1['a'] = df1['y']-df1['k']-df1['h']
Prepare data
select
#df1.y
df2 = df1[['countryID', 'POLY_IDcountry', 'iso3', 'CountryName',
'CountryName_pwt100', 'SubContinent',
'year', 'y', 'k', 'h', 'a',
'pop', 'incomegroup', 'n', 'i', 'o']].round(2)
df2
Sort
df_2014.query("SubContinent == 'Western Europe'").sort_values(
by=["y", "CountryName_pwt100"], ascending=False
)
df_WestEuro = df2.query("SubContinent == 'Western Europe'")
df_WestEuro
Visualize data
Strip plot
px.strip(df2,
x = 'y',
y = 'SubContinent',
hover_name= 'CountryName_pwt100',
animation_frame= 'year',
hover_data={'y', 'k', 'h', 'a'},
labels=dict(
y = 'Log of labor productivity',
k = "Log of physical capital",
h = "Log of human capital",
a = 'Log of aggregate efficiency',
SubContinent = "Continent",
),
range_x = [6,14],
color= 'SubContinent',
title= 'Evolution of labor productivity (by region)')
Line plots
px.line(
df2,
x="year",
y="y",
color="CountryName_pwt100",
labels=dict(y="Log of labor productivity"),
)
px.line(
df_WestEuro,
x="year",
y="y",
color="CountryName_pwt100",
hover_data={'y', 'k', 'h', 'a'},
labels=dict(
y = 'Log of labor productivity',
k = "Log of physical capital",
h = "Log of human capital",
a = 'Log of aggregate efficiency'),
title = 'Evolution of GDP per worker (countries in Western Europe)'
)
px.line(
df2, x="year", y="y", color="CountryName_pwt100",
facet_col="incomegroup",
facet_col_wrap=2,
height=800,
hover_data={'y', 'k', 'h', 'a'},
labels=dict(
y = 'Log of labor productivity',
k = "Log of physical capital",
h = "Log of human capital",
a = 'Log of aggregate efficiency',
incomegroup = 'Income group'),
title= 'Evolution of GDP per worker (by income group)'
)
Maps
px.choropleth(
df2,
locations="iso3",
color="y",
hover_name="CountryName_pwt100",
animation_frame="year",
color_continuous_scale=px.colors.sequential.Plasma,
projection="natural earth",
title = 'Evolution and spatial distribution of labor productivity')