Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import inequality
from inequality.gini import Gini_Spatial
import plotly.express as px
import plotly.graph_objects as go
#chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere')
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
#warnings.filterwarnings('ignore')
Import data
df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")
#df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv", parse_dates =['year'])
df1
#df1.head()
df1.columns
df1.dtypes
df1['country'].unique()
Dataset definitions
# Import definitions of dataset
df1_def = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master//assets/dat-definitions.csv")
df1_def
Descriptive statistics
df1.describe().round(2)
select
df1.GDPpc
pivot_table
# Pivot panel data from long form to wide form
df1_gdp_wide = df1.pivot_table(
index = ['country', 'region', 'hi1990'],
columns = 'year',
values = 'GDPpc').reset_index(drop=False)
# Make sure the column names are strings
df1_gdp_wide.columns = df1_gdp_wide.columns.astype(str)
df1_gdp_wide
# Pivot panel data from long form to wide form
df1_lngdp_wide = df1.pivot_table(
index = ['country', 'region', 'hi1990'],
columns = 'year',
values = 'log_GDPpc').reset_index(drop=False)
# Make sure the column names are strings
df1_lngdp_wide.columns = df1_lngdp_wide.columns.astype(str)
df1_lngdp_wide['g9014']=df1_lngdp_wide['2014']-df1_lngdp_wide['1990']
df_high = df1_gdp_wide.query("hi1990 == 'yes'")
df_low = df1_gdp_wide.query("hi1990 == 'no'")
df_highl = df1_lngdp_wide.query("hi1990 == 'yes'")
df_lowl = df1_lngdp_wide.query("hi1990 == 'no'")
df1_lngdp_wide
df1_lngdp_wide.describe().round(2)
df1_gdp_wide.std().round(2).plot();
df_high.std().round(2).plot();
df_low.std().round(2).plot();
df1_lngdp_wide.loc[:,'1990':'2014'].std().round(2).plot();
Ineuqality
years = np.arange(1990, 2015).astype(str)
Gini
def gini_by_col(column):
return inequality.gini.Gini(column.values).g
inequalities = df1_gdp_wide[years].apply(gini_by_col, axis=0).to_frame('gini')
inequalities.plot();
high income
ine = df_high[years].apply(gini_by_col, axis=0).to_frame('gini')
ine.plot();
low income
inel = df_low[years].apply(gini_by_col, axis=0).to_frame('gini')
Theil index
def theil(column):
return inequality.theil.Theil(column.values).T
inequalities['theil'] = df1_gdp_wide[years].apply(theil, axis=0).to_frame('theil')
ine['theil'] = df_high[years].apply(theil, axis=0).to_frame('theil')
inel['theil'] = df_low[years].apply(theil, axis=0).to_frame('theil')
inequalities['theil'].plot();
CV
inequalities['CV'] = df1_gdp_wide[years].apply(stats.variation, axis=0).to_frame('CV')
ine['CV'] = df_high[years].apply(stats.variation, axis=0).to_frame('CV')
inel['CV'] = df_low[years].apply(stats.variation, axis=0).to_frame('CV')
inequalities['CV'].plot();
df = pd.DataFrame({'year':range(1990, 2015,1),
'gini': inequalities['gini'],
'theil': inequalities['theil'],
'CV': inequalities['CV']})
dfh = pd.DataFrame({'year':range(1990, 2015,1),
'gini': ine['gini'],
'theil': ine['theil'],
'CV': ine['CV']})
dfl = pd.DataFrame({'year':range(1990, 2015,1),
'gini': inel['gini'],
'theil': inel['theil'],
'CV': inel['CV']})
df.set_index('year').plot(figsize=(10,10));
plt.xticks(range(1990, 2015,2))
dfh.set_index('year').plot(figsize=(10,10));
plt.xticks(range(1990, 2015,2))
dfl.set_index('year').plot(figsize=(10,10));
plt.xticks(range(1990, 2015,2))
Visualize data
px.strip(df1,
x = 'GDPpc',
y = 'region',
hover_name= 'country',
hover_data= ['h', 'ky'],
color= 'region',
animation_frame= 'year')
Line plots
px.line(df1, x='year', y='log_GDPpc', color='country', facet_col= 'hi1990', facet_col_wrap= 2, height= 800)
Regression
Overall beta convergence
px.scatter(
df1_lngdp_wide,
x="1990",
y="g9014",
color="hi1990",
hover_name="country",
hover_data= ['region'],
trendline="ols",
trendline_scope="overall",
labels={"1990": "Log GDPpc in 1990",
"g9014": "Growth GNIpc 1990-2014"}
)
Beta convergence in high income country
px.scatter(
df1_lngdp_wide,
x="1990",
y="g9014",
color="hi1990",
hover_name="country",
trendline="ols",
labels={"1990": "Log GDPpc in 1990",
"g9014": "Growth GDPpc 1990-2014"})
Beta convergence in low income country
px.scatter(
df_highl,
x="1990",
y="g9014",
color="region",
hover_name="country",
trendline="ols",
labels={"1990": "Log GDPpc in 1990",
"g9014": "Growth GDPpc 1990-2014"})
px.scatter(
df_lowl,
x="1990",
y="g9014",
color="region",
hover_name="country",
trendline="ols",
labels={"1990": "Log GDPpc in 1990",
"g9014": "Growth GDPpc 1990-2014"})