Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
#chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere')
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
#warnings.filterwarnings('ignore')
Import data
df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")
#df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv", parse_dates =['year'])
df1
#df1.head()
df1.columns
df1.dtypes
df1['country'].unique()
Dataset definitions
# Import definitions of dataset
df1_def = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master//assets/dat-definitions.csv")
df1_def
Descriptive statistics
df1.describe().round(2)
Prepare data
select
df1.GDPpc
df2 = df1[['country', 'year', 'region', 'hi1990','log_GDPpc','GDPpc','pop', 'log_lp', 'log_ky','h', 'log_h', 's', 'log_tfp','isocode']]
#hi1990:as of 1999 high income country
#log_lp:Labor Productivity
#log_ky:Capital-Output Ratio
#log_h:Human Capital Index
#s:Years of Schooling
#log_tfp:Aggregate Efficiency
df2
query
df_2014 =df2.query("year == 2014")
df_1990=df2.query("year==1990")
df_asia_1990 =df_1990.query("region == 'Asia'")
df_asia_2014=df_2014.query("region=='Asia'")
df_asia_2014_h=df_asia_2014[['country','year','h']]
#获取亚洲国家2014年的收入
#获取2014年的高收入的亚洲国家
df_asia_HighIncome_1990=df_asia_1990.query("hi1990=='yes'")
df_asia_HighIncome_1990
# Define the logical condictions
is_Asia = df1['region'] == 'Asia'
is_HighIncome = df1['hi1990'] == 'yes'
is_1990 = df1['year'] == 1990
# Apply the logical conditions
df1[is_Asia & is_HighIncome & is_1990]
Visualize data
px.histogram(
df1,
x="log_lp",
color="hi1990",
hover_name= 'country',
marginal='box',
animation_frame = 'year'
)
8、the relationship between GDPpc and labor productivity
px.scatter(
df1,
y="log_GDPpc",
x="log_lp",
color="region",
hover_name="country",
animation_frame = 'year'
)
px.scatter(
df1.query("year == 1990"),
x="log_lp",
y="log_GDPpc",
color="hi1990",
hover_name="country",
hover_data= ['region'],
trendline="ols",
trendline_scope="overall"
)
px.scatter(
df1,
x="log_lp",
y="log_GDPpc",
color="hi1990",
hover_name="country",
animation_frame = 'year',
trendline="ols")