Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
#chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere')
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
#warnings.filterwarnings('ignore')
Import data
df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")
#df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv", parse_dates =['year'])
df1
#df1.head()
df1.columns
Dataset definitions
# Import definitions of dataset
df1_def = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master//assets/dat-definitions.csv")
df1_def
Descriptive statistics
df1.describe().round(2)
Prepare data
df2 = df1[['country', 'year', 'GDPpc', 'h', 'ky', 'TFP']]
df2
df2 = df1[['country', 'year', 'GDPpc', 'h', 'ky', 'TFP']]
df2
Data Visualization
Strip plot
px.strip(df1,
x = 's',
y = 'region',
hover_name= 'country',
hover_data= ['h', 'ky'],
color= 'region',
animation_frame= 'year')
px.strip(df1,
x = 'GDPpc',
y = 'region',
hover_name= 'country',
hover_data= ['h', 'ky'],
color= 'region',
animation_frame= 'year')
Line plots
px.line(df1, x='year', y='log_lp', color='country', facet_col= 'region', facet_col_wrap= 2, height= 800)
Histogram: Cross country labor Productivity by Income
px.histogram(
df1,
x="log_lp",
color="hi1990",
hover_name= 'country',
marginal='box',
animation_frame = 'year'
)
Box plot of mean years of schooling of high and low income countries
px.box(
df1,
x="s",
color="hi1990",
hover_name= 'country',
animation_frame = 'year'
)
Violin plot of Labor productivity of high and low income countries
px.violin(
df1,
y="hi1990",
x="log_lp",
range_x= [6, 12.5],
color="hi1990",
box=True,
hover_name= 'country',
animation_frame = 'year',
points="all",title= ("Evolution of Labor Productivity of High Income Versus Low Income Countries"))
Treemap plot of GDP per capita and Total Factor Productivity by Region
px.treemap(df1.query("year == 2014"), color = "TFP", values = "log_GDPpc", path = ["region", "country"], hover_name = "country")
Scatter plots
Simple
px.scatter(
df1,
x="log_h",
y="log_lp",
color="region",
hover_name="country",
animation_frame = 'year'
)
Regression
px.scatter(
df1.query("year == 2014"),
x="log_h",
y="log_lp",
color="hi1990",
hover_name="country",
hover_data= ['region'],
trendline="ols",
trendline_scope="overall"
)
Regression by groups
Margins
px.scatter(
df1.query("year == 1990"),
x="log_h",
y="log_lp",
hover_name="country",
color="hi1990",
trendline="ols",
marginal_x="box",
marginal_y="box")
px.scatter(
df1.query("year == 1990"),
x="log_h",
y="log_lp",
color="region",
size="pop", size_max=60,
hover_name="country",
labels=dict(log_h="Human capital index in 1990 (in logs)",
log_lp="Labor productivity in 1990 (in logs)",
region="Continent",
pop= "Population")
)
Customized: Labor productivity versus human capital in the 1990s
fig = px.scatter(df1.query("year == 1990"),
y="log_lp",
x="log_h",
log_x = False,
color = "region",
size ="pop", size_max=60,
hover_name = "country",
height =500, width=800,
template = "simple_white",
color_discrete_sequence=px.colors.qualitative.G10,
title = "Year 1990",
labels=dict(region = "Continent",
pop = "Population",
log_lp = "Labor productivity (in logs)",
log_h = "Human capital index (in logs)")
)
fig.update_layout(font_family = "Rockwell",
legend=dict(orientation = "h", title="", y=1.1, x=1, xanchor="right", yanchor="bottom")),
fig.add_hline(df1.query("year == 1990")['log_lp'].mean(), line_width=1, line_dash="dot")
fig.add_vline(df1.query("year == 1990")['log_h'].mean(), line_width=1, line_dash="dot")
fig.show()
px.choropleth(
df1,
locations="isocode",
color="GDPpc",
hover_name="country",
animation_frame="year",
color_continuous_scale=px.colors.sequential.Plasma,
projection="natural earth")