Setup

import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px import plotly.graph_objects as go #chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere') import statsmodels.api as sm import statsmodels.formula.api as smf import warnings #warnings.filterwarnings('ignore')

Import data

df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv") #df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv", parse_dates =['year'])

df1

#df1.head()

df1.columns

df1.dtypes

df1['country'].unique()

Dataset definitions

# Import definitions of dataset df1_def = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master//assets/dat-definitions.csv") df1_def

Descriptive statistics

df1.describe().round(2)

Prepare data

select

df1.GDPpc

df2 = df1[['country', 'year', 'region', 'hi1990','log_GDPpc','GDPpc','pop', 'log_lp', 'log_ky','h', 'log_h', 's', 'log_tfp','isocode']] #hi1990:as of 1999 high income country #log_lp:Labor Productivity #log_ky:Capital-Output Ratio #log_h:Human Capital Index #s:Years of Schooling #log_tfp:Aggregate Efficiency df2

query

df_2014 =df2.query("year == 2014") df_1990=df2.query("year==1990")

df_asia_1990 =df_1990.query("region == 'Asia'") df_asia_2014=df_2014.query("region=='Asia'")

df_asia_2014_h=df_asia_2014[['country','year','h']] #获取亚洲国家2014年的收入 #获取2014年的高收入的亚洲国家 df_asia_HighIncome_1990=df_asia_1990.query("hi1990=='yes'") df_asia_HighIncome_1990

# Define the logical condictions is_Asia = df1['region'] == 'Asia' is_HighIncome = df1['hi1990'] == 'yes' is_1990 = df1['year'] == 1990 # Apply the logical conditions df1[is_Asia & is_HighIncome & is_1990]

Visualize data

px.histogram( df1, x="log_lp", color="hi1990", hover_name= 'country', marginal='box', animation_frame = 'year' )

8、the relationship between GDPpc and labor productivity

px.scatter( df1, y="log_GDPpc", x="log_lp", color="region", hover_name="country", animation_frame = 'year' )

px.scatter( df1.query("year == 1990"), x="log_lp", y="log_GDPpc", color="hi1990", hover_name="country", hover_data= ['region'], trendline="ols", trendline_scope="overall" )

px.scatter( df1, x="log_lp", y="log_GDPpc", color="hi1990", hover_name="country", animation_frame = 'year', trendline="ols")

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}