Setup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import inequality
from inequality.gini import Gini_Spatial
import plotly.express as px
import plotly.graph_objects as go
#chart_studio.tools.set_credentials_file(username='econdata777', api_key='ADDhere')
import statsmodels.api as sm
import statsmodels.formula.api as smf
import warnings
#warnings.filterwarnings('ignore')
Import data
df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")
#df1 = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv", parse_dates =['year'])
df1
idint64
1 - 108
countryobject
Zambia0.9%
Italy0.9%
106 others98.1%
2699
108
Zambia
1199
48
Italy
1324
53
Kyrgyz Republic
1299
52
Kenya
1274
51
Kazakhstan
2649
106
Vietnam
1249
50
Japan
1224
49
Jamaica
1174
47
Israel
1374
55
Lithuania
#df1.head()
df1.columns
df1.dtypes
df1['country'].unique()
Dataset definitions
# Import definitions of dataset
df1_def = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master//assets/dat-definitions.csv")
df1_def
var_nameobject
country3.6%
year3.6%
26 others92.9%
var_defobject
Standardized country name (from PWT)3.6%
Year3.6%
26 others92.9%
0
country
Standardized country name (from PWT)
1
year
Year
2
Y
GDP
3
K
Physical Capital
4
pop
Population
5
L
Labor Force
6
s
Years of Schooling
7
alpha_it
Variable Capital Share
8
GDPpc
GDP per capita
9
lp
Labor Productivity
Descriptive statistics
df1.describe().round(2)
idfloat64
yearfloat64
count
2700
2700
mean
54.5
2002
std
31.18
7.21
min
1
1990
25%
27.75
1996
50%
54.5
2002
75%
81.25
2008
max
108
2014
select
df1.GDPpc
pivot_table
# Pivot panel data from long form to wide form
df1_gdp_wide = df1.pivot_table(
index = ['country', 'region', 'hi1990'],
columns = 'year',
values = 'GDPpc').reset_index(drop=False)
# Make sure the column names are strings
df1_gdp_wide.columns = df1_gdp_wide.columns.astype(str)
df1_gdp_wide
countryobject
Albania0.9%
Algeria0.9%
106 others98.1%
regionobject
Europe27.8%
Asia25.9%
3 others46.3%
0
Albania
Europe
1
Algeria
Africa
2
Argentina
Americas
3
Armenia