Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
#chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere')
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
#warnings.filterwarnings('ignore')
Import data
df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")
#df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv", parse_dates =['year'])
df1
#df1.head()
df1.columns
df1.dtypes
df1['country'].unique()
Dataset definitions
# Import definitions of dataset
df1_def = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master//assets/dat-definitions.csv")
df1_def
Descriptive statistics
df1.describe().round(2)
Prepare data
select
df1.country
df2 = df1[['country', 'year', 'GDPpc', 'h', 'lp', 's', 'log_GDPpc']]
df2
query
dfcj = df1[['country', 'region', 'hi1990', 'year', 'GDPpc', 'h', 's', 'lp', 'log_GDPpc']].query("country == ['China','Japan']")
dfcj
dfcj.groupby('country').s.agg(['mean', 'std', 'min', 'max']).round(2)
Visualize data
Strip plot
px.strip(dfcj, x = 's', hover_name= 'country', color = 'country')
#fig1 = px.strip(df_2014, x = 'GDPpc', hover_name= 'country')
#save2cs.plot(fig1, filename = 'figureName', auto_open=True)
px.strip(dfcj,
x = 'GDPpc',
y = 'country',
color= 'country')
Line plots
px.line(dfcj, x='year', y='GDPpc', color='country')
px.line(dfcj, x='year', y='s', color='country')
fig1 = px.line(df1.query("country=='Japan'"), x="year", y="log_lp")
fig1.write_html("fig1.html")
Histogram
px.histogram(dfcj, x= 's', hover_name= 'country', color = 'country')
px.histogram(
dfcj,
x="GDPpc",
color="country",
hover_name= 'country'
)
Box plot
px.box(
dfcj,
x="GDPpc",
color="country",
hover_name= 'country'
)
Violin plot
px.violin(
dfcj,
y="country",
x="s",
range_x= [6, 12],
color="country",
box=True,
hover_name= 'country',
points="all")
ECDF
px.ecdf(
dfcj,
x="s",
range_x= [6, 12],
color="country",
hover_name= 'country'
)
Scatter plots
Simple
px.scatter(
dfcj,
x="s",
y="GDPpc",
color="country",
hover_name="country"
)
Regression
px.scatter(
dfcj,
x="s",
y="GDPpc",
color="country",
hover_name="country",
hover_data= ['country'],
trendline="ols"
)
px.scatter(
dfcj,
x="s",
y="GDPpc",
hover_name="country",
color="country",
trendline="ols",
marginal_x="box",
marginal_y="box")
Customized
fig = px.scatter(df1.query("country == ['China','Japan']"),
y="GDPpc",
x="s",
log_x = False,
color = "country",
size ="GDPpc", size_max=60,
hover_name = "country",
height =500, width=800,
template = "simple_white",
color_discrete_sequence=px.colors.qualitative.G10,
#title = "Year 1990",
labels=dict(
s = "Years of Schooling",
GDPpc = "GDP Per Capita")
)
fig.update_layout(font_family = "Rockwell",
legend=dict(orientation = "h", title="", y=1.1, x=1, xanchor="right", yanchor="bottom"))
fig.add_hline(df1.query("year == 1990")['log_lp'].mean(), line_width=1, line_dash="dot")
fig.add_vline(df1.query("year == 1990")['log_h'].mean(), line_width=1, line_dash="dot")
fig.show()
px.scatter(dfcj,
animation_frame="year",
x="s",
y="log_GDPpc",
range_x= [2, 18],
range_y= [7, 14],
color="country",
size="GDPpc", size_max=30,
hover_name="country",
hover_data = ['s', 'GDPpc'],
labels=dict(s = "Years of Schooling",
GDPpc = "GDP Per Capita",
)
)
px.density_contour(
dfcj,
x="s",
y="log_GDPpc",
marginal_x="rug",
marginal_y="rug",
animation_frame="year",
color= "country"
)
3D
px.scatter_3d(
dfcj,
x="s",
y="GDPpc",
z= "log_GDPpc",
color="country",
hover_name="country"
)