The relationship between Years of Schooling and GDP
Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
#chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere')
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
#warnings.filterwarnings('ignore')
Import data
df = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")
df
Dataset definitions
# Import definitions of dataset
data_definitions = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master//assets/dat-definitions.csv")
data_definitions
Descriptive statistics
df.describe().round(2)
Prepare data
select
df1 = df[['country', 'year', 'log_GDPpc', 'region', 'hi1990','s', 'log_lp','pop','isocode']]
df1
Data Visualization
px.treemap(df1.query("year==1990"),color="log_GDPpc",
values ="pop",
path=("region","country"),
hover_name="country",
height=500)
px.choropleth(
df1.query("year==1990"),
locations="isocode",
animation_frame="year",
color="s",
hover_name="country",
projection="natural earth")
px.line(
df1,
x="year",
y="log_GDPpc",
color="country",
facet_col= "region")
px.scatter(df1,
animation_frame="year",
x="s",
y="log_lp",
range_x= [0, 16],
range_y= [7, 12.2],
color="region",
size="pop", size_max=60,
hover_name="country",
labels=dict(s="Years of Schooling",
log_lp="Labor productivity(in Logs)",
region="Continent",
pop= "Population")
)
px.scatter(
df1,
animation_frame="year",
x="s",
y="log_GDPpc",
hover_name="country",
color="hi1990",
trendline="ols",)
References
Plotly Express:https://deepnote.com/@carlos-mendez/PYTHON-Plotly-Express-3e900d54-8143-405b-8be2-266157d4ec75