New Expansion of Plotly Express--based on Penn World Table Version. 10.0

Setup

import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px import plotly.graph_objects as go #chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere') import statsmodels.api as sm import statsmodels.formula.api as smf import warnings #warnings.filterwarnings('ignore')

Import data

df = pd.read_csv("/work/data/PWT100+countryIDs.csv")

df

df.columns

df.dtypes

df['CountryName'].unique()

Describe variables

# Import dataset descriptions df_def = pd.read_csv("/work/data/PWT100+countryIDs-descriptions.csv") df_def

Transform data & Summarize data

Create key variables & Subset a sample

# Generate log of real GDP per worker (labor productivity) df['lp'] = np.log(df['rgdpo']/df['emp']) # Generate log of the adjusted ratio of physical capital to output # Assuming alpha = 1/3 df['k'] = ((1/3)/(2/3))*(np.log(df['cn']/df['cgdpo'])) # Generate log of human capital per worker df['h'] = np.log(df['hc']) # Generate log of aggregate efficiency (total factor productivity) df['a'] = df['lp']-df['k']-df['h']

# Select time period (1980~2019) # The np.range function doesn't include the termination parameter (enter n+1 to include n) df0 = df[df['year'].isin(np.arange(1980, 2020))] # Subset the sample df1 = df0[['CountryCode', 'CountryName', 'year', 'SubRegion_PWT100', 'SubContinent', 'Continent', 'incomegroup', 'nonoil', 'intermediate', 'OECD', 'pop', 'lp', 'k', 'h', 'a']] # Eliminate missing values df2 = df1.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False) df2

Descriptive statistics

df2[['year', 'nonoil', 'intermediate', 'OECD', 'pop', 'lp', 'k', 'h', 'a']].query("year == 2014").describe().round(2)

Group data

df2.groupby('SubRegion_PWT100').lp.agg(['mean', 'std', 'min', 'max']).round(2)

df2.groupby('SubContinent').lp.agg(['mean', 'std', 'min', 'max']).round(2)

Visualize data

Strip plot

px.strip(df2, x = 'lp', y = 'SubRegion_PWT100', hover_name= 'CountryName', hover_data= [ 'pop', 'lp', 'k', 'h', 'a'], color= 'SubRegion_PWT100', labels=dict(lp = 'Labor productivity (in logs)', SubRegion_PWT100 = 'Subregion'), animation_frame= 'year')

px.strip(df2, x = 'lp', y = 'incomegroup', hover_name= 'CountryName', hover_data= [ 'pop', 'lp', 'k', 'h', 'a'], color= 'incomegroup', labels=dict(lp = 'Labor productivity (in logs)', incomegroup = 'Income groups'), animation_frame= 'year')

Line plots

px.line(df2, x='year', y='lp', color='CountryName', hover_name = 'CountryName', hover_data= ['Continent', 'pop', 'lp'], labels=dict(lp = 'Labor productivity (in logs)', CountryName = 'Country'), )

px.line(df2, x='year', y='lp', color='CountryName', hover_name = 'CountryName', facet_col= 'SubRegion_PWT100', facet_col_wrap=2, width=900, height=1200, labels=dict(lp = 'Log LP', SubRegion_PWT100 = 'SR', CountryName = 'Country'), title='Labor productivity across sub-regions (defined by PWT 10.0)' )

px.line(df2, x='year', y='lp', color='CountryName', hover_name = 'CountryName', facet_col= 'incomegroup', facet_col_wrap=2, width=1000, height=1000, labels=dict(lp = 'Log LP', incomegroup = 'IC group', CountryName = 'Country'), title='Labor productivity across income groups (defined by JP (2020))' )

px.line(df2.query("SubRegion_PWT100 == 'Latin America & Caribbean'"), x='year', y='lp', color='CountryName', hover_name = 'CountryName', labels=dict(lp = 'Labor productivity (in logs)', CountryName = 'Country'), title='Weird performance (?): labor productivity in Latin America & Caribbean' )

Scatter plots

Regression

px.scatter(df2, x = 'a', y = 'lp', color = 'SubRegion_PWT100', hover_name = 'CountryName', range_x = [3.5,11.5], range_y = [6,13], trendline = 'ols', trendline_scope = 'overall', labels=dict(a = 'Aggregate efficiency (in logs)', lp = 'Labor productivity (in logs)', SubRegion_PWT100 = 'Sub-region'), animation_frame = 'year' )

px.scatter(df2, x = 'a', y = 'lp', color = 'SubRegion_PWT100', animation_frame = 'year', hover_name = 'CountryName', trendline = 'ols', )

Margins

px.scatter(df2.query("year == 1980"), x = 'a', y = 'lp', color = 'SubRegion_PWT100', hover_name = 'CountryName', trendline = 'ols', marginal_x = 'box', marginal_y = 'box')

px.scatter(df2.query("year == 2019"), x = 'a', y = 'lp', color = 'SubRegion_PWT100', hover_name = 'CountryName', trendline = 'ols', marginal_x = 'box', marginal_y = 'box')

Animated

px.scatter(df2, x = 'a', y = 'lp', animation_frame = 'year', animation_group = 'CountryName', size = 'pop', size_max = 60, color = 'SubRegion_PWT100', hover_name = 'CountryName', range_x = [3.5,11.5], range_y = [6,13], labels=dict(a = 'Aggregate efficiency (in logs)', lp = 'Labor productivity (in logs)', SubRegion_PWT100 = 'Sub-region') )

Facets

px.scatter(df2, x = 'a', y = 'lp', animation_frame = 'year', animation_group = 'CountryName', facet_col = 'nonoil', size = 'pop', size_max = 60, color = 'SubRegion_PWT100', hover_name = 'CountryName', trendline = 'ols', range_x = [3.5,11.5], range_y = [6,13], labels=dict(a = 'Aggregate efficiency (in logs)', lp = 'Labor productivity (in logs)', SubRegion_PWT100 = 'Sub-region') )

px.scatter(df2, x = 'a', y = 'lp', animation_frame = 'year', animation_group = 'CountryName', facet_col = 'intermediate', size = 'pop', size_max = 60, color = 'SubRegion_PWT100', hover_name = 'CountryName', trendline = 'ols', range_x = [3.5,11.5], range_y = [6,13], labels=dict(a = 'Aggregate efficiency (in logs)', lp = 'Labor productivity (in logs)', SubRegion_PWT100 = 'Sub-region') )

px.scatter(df2, x = 'a', y = 'lp', animation_frame = 'year', animation_group = 'CountryName', facet_col = 'OECD', size = 'pop', size_max = 60, color = 'SubRegion_PWT100', hover_name = 'CountryName', trendline = 'ols', range_x = [3.5,11.5], range_y = [6,13], labels=dict(a = 'Aggregate efficiency (in logs)', lp = 'Labor productivity (in logs)', SubRegion_PWT100 = 'Sub-region') )

df1 = df0[['CountryCode', 'CountryName', 'year', 'SubRegion_PWT100', 'SubContinent', 'Continent', 'incomegroup', 'nonoil', 'intermediate', 'OECD', 'pop', 'lp', 'k', 'h', 'a']]

fig = px.choropleth(df2.query("year == 2019"), locations = 'CountryCode', color = 'lp', hover_name = 'CountryName', color_continuous_scale=px.colors.sequential.Plasma, projection='natural earth', labels=dict(CountryCode = 'ISO country code', lp = 'Labor productivity') ) fig.show()

fig = px.choropleth(df2, locations = 'CountryCode', color = 'lp', animation_frame="year", hover_name = 'CountryName', color_continuous_scale=px.colors.sequential.Plasma, projection='natural earth', labels=dict(CountryCode = 'ISO country code', lp = 'Labor productivity') ) fig.show()

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}New Expansion of Plotly Express--based on Penn World Table Version. 10.0