Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
#chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere')
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
#warnings.filterwarnings('ignore')
Import data
df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")
#df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv", parse_dates =['year'])
df1
df1.columns
df1.dtypes
df1['country'].unique()
Dataset definitions
# Import definitions of dataset
df1_def = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master//assets/dat-definitions.csv")
df1_def
Descriptive statistics
df1.describe().round(2)
Select
df1.log_lp
df2 = df1[['country', 'year', 'log_lp', 's']]
df2
Query
df_2014 = df1[['country', 'region', 'hi1990', 'year', 'log_lp', 's']].query("year == 2014")
df_2014
df_vietnam = df1[['country', 'year', 'log_lp', 's']].query("country == 'Vietnam'")
df_vietnam
Pivot_table
# Pivot panel data from long form to wide form
df1_s_wide = df1.pivot_table(
             index = ['country', 'region', 'hi1990'],
             columns = 'year',
             values = 's').reset_index(drop=False)
# Make sure the column names are strings
df1_s_wide.columns = df1_s_wide.columns.astype(str)
df1_s_wide
df1_s_wide.describe().round(2)
df1_s_wide.std().round(2).plot();
Visualize data
Strip plot
px.strip(df_2014, x = 'log_lp', hover_name= 'country', color= 'region')
px.strip(df_2014, x = 's', hover_name= 'country', color= 'region')
Line plots
px.line(df1.query("country == 'Vietnam'"), x='year', y='log_lp', color='country')
px.line(df1.query("country == 'Vietnam'"), x='year', y='s', color='country')
px.line(df1.query("country == 'New Zealand'"), x='year', y='s', color='country')
px.line(df1.query("country == 'New Zealand'"), x='year', y='log_lp', color='country')
px.line(df1, x='year', y='s', color='country', facet_col= 'region', facet_col_wrap= 2, height= 800)
px.line(
    df1.query("country==['Vietnam','Thailand','Japan']"),
    x="year",
    y="s",
    color="country"
    )
px.line(
    df1.query("country==['Vietnam','Thailand','Japan']"),
    x="year",
    y="log_lp",
    color="country"
    )
px.line(
    df1.query("region=='Asia'"),
    x="year",
    y="log_lp",
    color="country"
    )
px.line(
    df1.query("region=='Asia'"),
    x="year",
    y="s",
    color="country"
    )
Treemap plot
px.treemap(df1.query("year == 2014"),
            color = "log_lp",
            values = "pop",
            path = ["region", "country"],
            hover_name = "country")
px.treemap(df1.query("year == 2014"),
            color = "s",
            values = "pop",
            path = ["region", "country"],
            hover_name = "country")
Sunburst plot
px.sunburst(df1.query("year == 2014"),
            color = "log_lp",
            values = "pop",
            path = ["region", "country"],
            hover_name = "country")
px.sunburst(df1.query("year == 2014"),
            color = "s",
            values = "pop",
            path = ["region", "country"],
            hover_name = "country")
Scatter plots
Simple
px.scatter(
           df1.query("year == 2014"),
           x="log_lp",
           y="s",
           color="region",
           hover_name="country",
           animation_frame = 'year'
           )
Regression by groups
px.scatter(
    df1.query("year == 1990"),
    x="log_lp",
    y="s",
    color="hi1990",
    hover_name="country",
    animation_frame = 'year',
    trendline="ols")
Multivariate
px.scatter(
           df1.query("year == 1990"),
           x="log_lp",
           y="s",
           color="region",
           size="pop", size_max=60,
           hover_name="country",
           labels=dict(log_lp="Labor productivity in 1990 (in logs)",
                       s="Years of Schooling",
                       region="Continent",
                       pop= "Population")
           )
px.scatter(
    df1.query("year == 1990"),
    x="log_lp",
    y="s",
    color="region",
    size="pop", size_max=60,
    trendline="ols",
    hover_name="country",
    facet_col="hi1990",
    animation_frame="year"
    )
3D
px.scatter_3d(
           df1.query("year == 1990"),
           x="log_lp",
           y="s",
           z= "pop",
           color="region",
           hover_name="country"
           )
px.choropleth(
    df1,
    locations="isocode",
    color="log_lp",
    hover_name="country",
    animation_frame="year",
    color_continuous_scale=px.colors.sequential.Plasma,
    projection="natural earth")
px.choropleth(
    df1,
    locations="isocode",
    color="s",
    hover_name="country",
    animation_frame="year",
    color_continuous_scale=px.colors.sequential.Plasma,
    projection="natural earth")