# Basic Packages
from __future__ import division
import os
from datetime import datetime
# Web & file access
import requests
import io
# Import display options for showing websites
from IPython.display import IFrame, HTML
# Plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
%pylab --no-import-all
%matplotlib inline
import seaborn as sns
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_context("talk")
import plotly.express as px
import plotly.graph_objects as go
from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap
# Next line can import all of plotnine, but may overwrite things? Better import each function/object you need
#from plotnine import *
# Data
import pandas as pd
import numpy as np
from pandas_datareader import data, wb
# GIS & maps
import geopandas as gpd
gp = gpd
import georasters as gr
import geoplot as gplt
import geoplot.crs as gcrs
import mapclassify as mc
import textwrap
# Data Munging
from itertools import product, combinations
import difflib
import pycountry
import geocoder
from geonamescache.mappers import country
mapper = country(from_key='name', to_key='iso3')
mapper2 = country(from_key='iso3', to_key='iso')
mapper3 = country(from_key='iso3', to_key='name')
# Regressions & Stats
from scipy.stats import norm
import statsmodels.formula.api as smf
from stargazer.stargazer import Stargazer, LineLocation
# Paths
pathout = './data/'
if not os.path.exists(pathout):
os.mkdir(pathout)
pathgraphs = './graphs/'
if not os.path.exists(pathgraphs):
os.mkdir(pathgraphs)
currentYear = datetime.now().year
year = min(2019, currentYear-3)
year
url = 'https://febpwt.webhosting.rug.nl/Dmn/AggregateXs/SelectInit'
IFrame(url, width=800, height=400)
pwt_version = '100'
pwt = pd.read_stata('https://www.rug.nl/ggdc/docs/pwt' + pwt_version + '.dta')
pwt_labels = pd.io.stata.StataReader('https://www.rug.nl/ggdc/docs/pwt' + pwt_version + '.dta').variable_labels()
pwt.head()
pwt_labels
pwt['gdp_pc_e'] = pwt['rgdpe'] / pwt['pop']
pwt['gdp_pc_o'] = pwt['rgdpo'] / pwt['pop']
pwt['ln_gdp_pc_e'] = pwt['gdp_pc_e'].apply(np.log)
pwt['ln_gdp_pc_o'] = pwt['gdp_pc_o'].apply(np.log)
pwt['ln_pop'] = pwt['pop'].apply(np.log)
wbcountries = wb.get_countries()
wbcountries['name'] = wbcountries.name.str.strip()
wbcountries = wbcountries.loc[wbcountries.region.isin(['Aggregates'])==False].reset_index(drop=True)
wbcountries['incomeLevel'] = wbcountries['incomeLevel'].str.title()
wbcountries.loc[wbcountries.iso3c=='VEN', 'incomeLevel'] = 'Upper Middle Income'
df = pwt.merge(wbcountries, left_on='countrycode', right_on='iso3c')
df.head()
url = 'https://www.statsmodels.org/stable/index.html'
IFrame(url, width=800, height=400)
dffig = df.loc[df.year==year]\
.dropna(subset=['ln_gdp_pc_o', 'ln_gdp_pc_e', 'ln_pop'])\
.sort_values(by=['region', 'iso3c']).reset_index(drop=True)
dffig.head()
mod = smf.ols(formula='ln_gdp_pc_o ~ ln_pop', data=dffig, missing='drop').fit()
mod.summary2()
pred_ols = mod.get_prediction()
iv_l = pred_ols.summary_frame()["mean_ci_lower"]
iv_u = pred_ols.summary_frame()["mean_ci_upper"]
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(dffig.ln_pop, dffig.ln_gdp_pc_o, "o", label="data")
ax.plot(dffig.ln_pop, mod.fittedvalues, "r--.", label="OLS")
ax.plot(dffig.ln_pop, iv_u, "r--")
ax.plot(dffig.ln_pop, iv_l, "r--")
ax.legend(loc="best")
fig
mod2 = smf.ols(formula='ln_gdp_pc_o ~ ln_pop + C(region)', data=dffig, missing='drop').fit()
mod2.summary2()
mod3 = smf.ols(formula='ln_gdp_pc_e ~ ln_pop', data=dffig, missing='drop').fit()
mod3.summary2()
mod4 = smf.ols(formula='ln_gdp_pc_e ~ ln_pop + C(region)', data=dffig, missing='drop').fit()
mod4.summary2()
url = 'https://nbviewer.org/github/mwburke/stargazer/blob/master/examples.ipynb'
IFrame(url, width=800, height=400)
stargazer = Stargazer([mod, mod2, mod3, mod4])
stargazer.significant_digits(2)
stargazer.show_degrees_of_freedom(False)
stargazer.dep_var_name = 'Dependent Variable:'
stargazer.dependent_variable = ' Log[GDP per capita (' + str(year) + ')]'
stargazer.custom_columns(['Output Based GDP', 'Expenditure Based GDP'], [2, 2])
#stargazer.show_model_numbers(False)
stargazer.rename_covariates({'ln_pop':'Log[Population]'}),
stargazer.add_line('WB Region FE', ['No', 'Yes', 'No', 'Yes'], LineLocation.FOOTER_TOP)
stargazer.covariate_order(['ln_pop'])
stargazer.cov_spacing = 2
stargazer
HTML(stargazer.render_html())
file_name = "table-pwt.html" #Include directory path if needed
html_file = open(pathgraphs + file_name, "w" ) #This will overwrite an existing file
html_file.write( stargazer.render_html() )
html_file.close()
url = pathgraphs + 'table-pwt.html'
url = 'https://smu-econ-growth.github.io/EconGrowthUG-Slides-Working-with-PWT/table-pwt.html'
IFrame(url, width=500, height=300)
url = 'https://seaborn.pydata.org/examples/index.html'
IFrame(url, width=800, height=400)
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_context("talk")
g = sns.relplot(x="ln_pop",
y="ln_gdp_pc_e",
data=dffig,
hue="region",
hue_order = dffig.region.drop_duplicates().sort_values(),
style="region",
style_order = dffig.region.drop_duplicates().sort_values(),
size="ln_gdp_pc_o",
sizes=(10, 400),
alpha=.5,
height=6,
aspect=2,
palette="muted",
)
g.set_axis_labels('Log[Population (' + str(year) + ')]', 'Log[GDP per capita (' + str(year) + ')]')
g.fig
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_context("talk")
fig, ax = plt.subplots()
sns.scatterplot(x="ln_pop",
y="ln_gdp_pc_e",
data=dffig,
hue="region",
hue_order = dffig.region.drop_duplicates().sort_values(),
style="region",
style_order = dffig.region.drop_duplicates().sort_values(),
size="ln_gdp_pc_o",
sizes=(10, 400),
alpha=.5,
palette="muted",
ax=ax
)
ax.set_xlabel('Log[Population (' + str(year) + ')]')
ax.set_ylabel('Log[GDP per capita (' + str(year) + ')]')
ax.legend(fontsize=10)
fig
def my_xy_plot(dfin,
x='ln_pop',
y='ln_gdp_pc_o',
labelvar='iso3c',
dx=0.006125,
dy=0.006125,
xlogscale=False,
ylogscale=False,
xlabel='Log[Population ' + str(year) + ']',
ylabel='Log[Income per capita in ' + str(year) + ']',
labels=False,
xpct = False,
ypct = False,
OLS=False,
OLSlinelabel='OLS',
ssline=False,
sslinelabel='45 Degree Line',
filename='income-pop-growth.pdf',
hue='region',
hue_order=['East Asia & Pacific', 'Europe & Central Asia',
'Latin America & Caribbean ', 'Middle East & North Africa',
'North America', 'South Asia', 'Sub-Saharan Africa '],
style='incomeLevel',
style_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
palette=None,
size=None,
sizes=None,
fontsize=10,
label_font_size=12,
save=True):
'''
Plot the association between x and var in dataframe using labelvar for labels.
'''
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_context("talk")
df = dfin.copy()
df = df.dropna(subset=[x, y]).reset_index(drop=True)
# Plot
k = 0
fig, ax = plt.subplots()
sns.scatterplot(x=x, y=y, data=df, ax=ax,
hue=hue,
hue_order=hue_order,
#hue='incomeLevel',
#hue_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
#hue_order=['East Asia & Pacific', 'Europe & Central Asia',
# 'Latin America & Caribbean ', 'Middle East & North Africa',
# 'North America', 'South Asia', 'Sub-Saharan Africa '],
alpha=1,
style=style,
style_order=style_order,
palette=palette,
size=size,
sizes=sizes,
#palette=sns.color_palette("Blues_r", df[hue].unique().shape[0]+6)[:df[hue].unique().shape[0]*2:2],
)
if OLS:
sns.regplot(x=x, y=y, data=df, ax=ax, label=OLSlinelabel, scatter=False)
if ssline:
ax.plot([df[x].min()*.99, df[x].max()*1.01], [df[x].min()*.99, df[x].max()*1.01], c='r', label=sslinelabel)
if labels:
movex = df[x].mean() * dx
movey = df[y].mean() * dy
for line in range(0,df.shape[0]):
ax.text(df[x][line]+movex, df[y][line]+movey, df[labelvar][line], horizontalalignment='left', fontsize=label_font_size, color='black')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
if xpct:
fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
xticks = mtick.FormatStrFormatter(fmt)
ax.xaxis.set_major_formatter(xticks)
if ypct:
fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
yticks = mtick.FormatStrFormatter(fmt)
ax.yaxis.set_major_formatter(yticks)
if ylogscale:
ax.set(yscale="log")
if xlogscale:
ax.set(xscale="log")
handles, labels = ax.get_legend_handles_labels()
handles = np.array(handles)
labels = np.array(labels)
handles = list(handles[(labels!=hue) & (labels!=style) & (labels!=size)])
labels = list(labels[(labels!=hue) & (labels!=style) & (labels!=size)])
ax.legend(handles=handles, labels=labels, fontsize=fontsize)
if save:
plt.savefig(pathgraphs + filename, dpi=300, bbox_inches='tight')
return fig
g = my_xy_plot(dffig,
x='ln_pop',
y='ln_gdp_pc_e',
xlabel='Log[Population in ' + str(year) + ']',
ylabel='Log[GDP per capita (' + str(year) +')]',
OLS=True,
labels=True,
filename='ln-gdp-pc-pop-pwt.pdf')
g
g = my_xy_plot(dffig.loc[dffig.iso3c!='VEN'],
x='ln_gdp_pc_o',
y='ln_gdp_pc_e',
xlabel='Log[GDP per capita (' + str(year) +') Output Based]',
ylabel='Log[GDP per capita (' + str(year) +') Expenditure Based]',
OLS=True,
ssline=True,
labels=True,
label_font_size=12,
filename='ln-gdp-pc-out-exp.pdf')
g
def my_xy_line_plot(dfin,
x='year',
y='ln_gdp_pc_e',
labelvar='iso3c',
dx=0.006125,
dy=0.006125,
xlogscale=False,
ylogscale=False,
xlabel='Growth Rate of Population',
ylabel='Log[Income per capita in ' + str(year) + ']',
labels=False,
xpct = False,
ypct = False,
OLS=False,
OLSlinelabel='OLS',
ssline=False,
sslinelabel='45 Degree Line',
filename='income-pop-growth.pdf',
hue='region',
hue_order=['East Asia & Pacific', 'Europe & Central Asia',
'Latin America & Caribbean ', 'Middle East & North Africa',
'North America', 'South Asia', 'Sub-Saharan Africa '],
style='incomeLevel',
style_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
palette=None,
legend_fontsize=10,
label_font_size=12,
loc=None,
save=True):
'''
Plot the association between x and var in dataframe using labelvar for labels.
'''
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_context("talk")
df = dfin.copy()
df = df.dropna(subset=[x, y]).reset_index(drop=True)
# Plot
k = 0
fig, ax = plt.subplots()
sns.lineplot(x=x, y=y, data=df, ax=ax,
hue=hue,
hue_order=hue_order,
alpha=1,
style=style,
style_order=style_order,
palette=palette,
)
if OLS:
sns.regplot(x=x, y=y, data=df, ax=ax, label=OLSlinelabel, scatter=False)
if ssline:
ax.plot([df[x].min()*.99, df[x].max()*1.01], [df[x].min()*.99, df[x].max()*1.01], c='r', label=sslinelabel)
if labels:
movex = df[x].mean() * dx
movey = df[y].mean() * dy
for line in range(0,df.shape[0]):
ax.text(df[x][line]+movex, df[y][line]+movey, df[labelvar][line], horizontalalignment='left', fontsize=label_fontsize, color='black')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
if xpct:
fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
xticks = mtick.FormatStrFormatter(fmt)
ax.xaxis.set_major_formatter(xticks)
if ypct:
fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
yticks = mtick.FormatStrFormatter(fmt)
ax.yaxis.set_major_formatter(yticks)
if ylogscale:
ax.set(yscale="log")
if xlogscale:
ax.set(xscale="log")
handles, labels = ax.get_legend_handles_labels()
handles = np.array(handles)
labels = np.array(labels)
handles = list(handles[(labels!='region') & (labels!='incomeLevel')])
labels = list(labels[(labels!='region') & (labels!='incomeLevel')])
ax.legend(handles=handles, labels=labels, fontsize=legend_fontsize, loc=loc)
if save:
plt.savefig(pathgraphs + filename, dpi=300, bbox_inches='tight')
return fig
palette=sns.color_palette("Blues_r", df['incomeLevel'].unique().shape[0]+6)[:df['incomeLevel'].unique().shape[0]*2:2]
fig = my_xy_line_plot(df,
x='year',
y='ln_gdp_pc_e',
xlabel='Year',
ylabel='Log[GDP per capita]',
filename='ln-gdp-pc-income-groups-TS-pwt.pdf',
hue='incomeLevel',
hue_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
palette=palette,
OLS=False,
labels=False,
legend_fontsize=16,
loc='lower right',
save=True)
fig
#palette=sns.color_palette("Blues_r", wdi['region'].unique().shape[0]+6)[:wdi['region'].unique().shape[0]*2:2]
fig = my_xy_line_plot(df,
x='year',
y='gdp_pc_e',
xlabel='Year',
ylabel='GDP per capita',
ylogscale=True,
filename='ln-gdp-pc-regions-TS-pwt.pdf',
style='region',
style_order=['East Asia & Pacific', 'Europe & Central Asia',
'Latin America & Caribbean ', 'Middle East & North Africa',
'North America', 'South Asia', 'Sub-Saharan Africa '],
#palette=palette,
OLS=False,
labels=False,
legend_fontsize=12,
loc='lower right',
save=True)
fig
url = 'https://plotly.com/python/'
IFrame(url, width=800, height=400)
symbols = ['circle', 'x', 'square', 'cross', 'diamond', 'star-diamond', 'triangle-up']
fig = px.scatter(dffig,
x="ln_pop",
y="ln_gdp_pc_e",
color='region',
symbol='region',
symbol_sequence=symbols,
hover_name='name',
hover_data=['iso3c', 'ln_pop', 'gdp_pc_o', 'gdp_pc_e'],
size='ln_gdp_pc_o',
size_max=15,
trendline="ols",
trendline_scope="overall",
trendline_color_override="black",
labels={
"ln_pop": "Log[Population (" + str(year) + ")]",
"ln_gdp_pc_o": "Log[GDP per capita (" + str(year) + ")] (Output Based)",
"ln_gdp_pc_e": "Log[GDP per capita (" + str(year) + ")] (Expenditure Based)",
"gdp_pc_o": "GDP per capita (" + str(year) + ") (Output Based)",
"gdp_pc_e": "GDP per capita (" + str(year) + ") (Expenditure Based)",
"region": "WB Region"
},
opacity=0.75,
height=800,
)
fig.show()
fig.update_traces(marker=dict(#size=12,
line=dict(width=2,
color='DarkSlateGrey')),
selector=dict(mode='markers'))
fig.show()
tr_line=[]
for k, trace in enumerate(fig.data):
if trace.mode is not None and trace.mode == 'lines':
tr_line.append(k)
print(tr_line)
for id in tr_line:
fig.data[id].update(line_width=3)
fig.show()
fig.update_layout(legend=dict(
yanchor="bottom",
y=0.01,
xanchor="left",
x=0.01
))
fig.show()
fig.write_image(pathgraphs + "ln-gdp-pc-pop-plotly-pwt.pdf", height=1000, width=1500, scale=4)
results = px.get_trendline_results(fig)
results.px_fit_results.iloc[0].summary()
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}
url = 'https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip'
r = requests.get(url, headers=headers)
countries = gp.read_file(io.BytesIO(r.content))
fig, ax = plt.subplots(figsize=(15,10))
countries.plot(ax=ax)
ax.set_title("WGS84 (lat/lon)", fontdict={'fontsize':34})
dffig2 = countries.merge(dffig, left_on='ADM0_A3', right_on='iso3c')
fig, ax = plt.subplots(figsize=(15,10))
dffig2.plot(column='gdp_pc_e', ax=ax, cmap='Reds')
ax.set_title("WGS84 (lat/lon)", fontdict={'fontsize':34})
url = 'https://residentmario.github.io/geoplot/'
IFrame(url, width=800, height=400)
gplt.polyplot(
countries, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor='white', facecolor='lightgray',
rasterized=True,
extent=[-180, -90, 180, 90],
)
gplt.choropleth(dffig2, hue='gdp_pc_e',
projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor='white',
linewidth=1,
cmap='Reds',
legend=True,
scheme='FisherJenks',
legend_kwargs={'bbox_to_anchor':(0.3, 0.5),
'frameon': True,
'title':'GDP per capita',
},
figsize=(12,8),
rasterized=True,
)
ax = gplt.choropleth(dffig2, hue='gdp_pc_e', projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor='white', linewidth=1,
cmap='Reds', legend=True,
scheme='FisherJenks',
legend_kwargs={'bbox_to_anchor':(0.3, 0.5),
'frameon': True,
'title':'GDP per capita',
},
figsize=(15,10),
rasterized=True,
)
gplt.polyplot(countries, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor='white', facecolor='lightgray',
ax=ax,
rasterized=True,
extent=[-180, -90, 180, 90],
)
# Functions for plotting
def center_wrap(text, cwidth=32, **kw):
'''Center Text (to be used in legend)'''
lines = text
#lines = textwrap.wrap(text, **kw)
return "\n".join(line.center(cwidth) for line in lines)
def MyChloropleth(mydf, myfile='fig', myvar='gdp_pc_e',
mylegend='GDP per capita',
k=5,
extent=[-180, -90, 180, 90],
bbox_to_anchor=(0.25, 0.5),
edgecolor='white', facecolor='lightgray',
scheme='FisherJenks',
save=True,
percent=False,
rn=0,
**kwargs):
# Chloropleth
# Color scheme
if scheme=='EqualInterval':
scheme = mc.EqualInterval(mydf[myvar], k=k)
elif scheme=='Quantiles':
scheme = mc.Quantiles(mydf[myvar], k=k)
elif scheme=='BoxPlot':
scheme = mc.BoxPlot(mydf[myvar], k=k)
elif scheme=='FisherJenks':
scheme = mc.FisherJenks(mydf[myvar], k=k)
elif scheme=='FisherJenksSampled':
scheme = mc.FisherJenksSampled(mydf[myvar], k=k)
elif scheme=='HeadTailBreaks':
scheme = mc.HeadTailBreaks(mydf[myvar], k=k)
elif scheme=='JenksCaspall':
scheme = mc.JenksCaspall(mydf[myvar], k=k)
elif scheme=='JenksCaspallForced':
scheme = mc.JenksCaspallForced(mydf[myvar], k=k)
elif scheme=='JenksCaspallSampled':
scheme = mc.JenksCaspallSampled(mydf[myvar], k=k)
elif scheme=='KClassifiers':
scheme = mc.KClassifiers(mydf[myvar], k=k)
# Format legend
upper_bounds = scheme.bins
# get and format all bounds
bounds = []
for index, upper_bound in enumerate(upper_bounds):
if index == 0:
lower_bound = mydf[myvar].min()
else:
lower_bound = upper_bounds[index-1]
# format the numerical legend here
if percent:
bound = f'{lower_bound:.{rn}%} - {upper_bound:.{rn}%}'.format(width=rn)
else:
bound = f'{float(lower_bound):,.{rn}f} - {float(upper_bound):,.{rn}f}'.format(width=rn)
bounds.append(bound)
legend_labels = bounds
#Plot
ax = gplt.choropleth(
mydf, hue=myvar, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor='white', linewidth=1,
cmap='Reds', legend=True,
scheme=scheme,
legend_kwargs={'bbox_to_anchor': bbox_to_anchor,
'frameon': True,
'title':mylegend,
},
legend_labels = legend_labels,
figsize=(24, 16),
rasterized=True,
)
gplt.polyplot(
countries, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor=edgecolor, facecolor=facecolor,
ax=ax,
rasterized=True,
extent=extent,
)
if save:
plt.savefig(pathgraphs + myfile + '.pdf', dpi=300, bbox_inches='tight')
plt.savefig(pathgraphs + myfile + '.png', dpi=300, bbox_inches='tight')
pass
mylegend = center_wrap(["GDP per capita in " + str(year), "(Expenditure Based)"], cwidth=32, width=32)
MyChloropleth(dffig2, myfile='fig-gdp-pc-' + str(year) + '-pwt', myvar='gdp_pc_e', mylegend=mylegend, k=10, scheme='Quantiles', save=False)
url = 'https://plotly.com/python/maps/'
IFrame(url, width=800, height=400)
scheme = mc.Quantiles(dffig2['gdp_pc_e'], k=5)
classifier = mc.Quantiles.make(k=5, rolling=True)
dffig2['gdp_pc_q'] = classifier(dffig2['gdp_pc_e'])
dffig2['gdp_pc_qc'] = dffig2['gdp_pc_q'].apply(lambda x: scheme.get_legend_classes()[x].replace('[ ', '[').replace('( ', '('))
fig = px.choropleth(dffig2.sort_values('gdp_pc_q', ascending=True),
locations="iso3c",
color="gdp_pc_qc",
hover_name='name',
hover_data=['iso3c', 'ln_pop', 'gdp_pc_o', 'gdp_pc_e'],
labels={
"gdp_pc_qc": "GDP per capita (" + str(year) + ") Range",
'iso3c':'ISO code',
"ln_pop": "Log[Population (" + str(year) + ")]",
"ln_gdp_pc_o": "Log[GDP per capita (" + str(year) + ")] (Output Based)",
"ln_gdp_pc_e": "Log[GDP per capita (" + str(year) + ")] (Expenditure Based)",
"gdp_pc_o": "GDP per capita (" + str(year) + ") (Output Based)",
"gdp_pc_e": "GDP per capita (" + str(year) + ") (Expenditure Based)",
},
color_discrete_sequence=px.colors.sequential.Reds,
height=600,
width=1100,
)
# Change legend position
fig.update_layout(legend=dict(
yanchor="bottom",
y=0.15,
xanchor="left",
x=0.05
))
fig.show()
fig = px.choropleth(dffig2.sort_values('gdp_pc_q', ascending=True),
locations="iso3c",
color="gdp_pc_qc",
hover_name='name',
hover_data=['iso3c', 'ln_pop', 'gdp_pc_o', 'gdp_pc_e'],
labels={
"gdp_pc_qc": "GDP per capita (" + str(year) + ") Range",
'iso3c':'ISO code',
"ln_pop": "Log[Population (" + str(year) + ")]",
"ln_gdp_pc_o": "Log[GDP per capita (" + str(year) + ")] (Output Based)",
"ln_gdp_pc_e": "Log[GDP per capita (" + str(year) + ")] (Expenditure Based)",
"gdp_pc_o": "GDP per capita (" + str(year) + ") (Output Based)",
"gdp_pc_e": "GDP per capita (" + str(year) + ") (Expenditure Based)",
},
color_discrete_sequence=px.colors.sequential.Blues,
height=600,
width=1100,
)
# Change legend position
fig.update_layout(legend=dict(
yanchor="bottom",
y=0.15,
xanchor="left",
x=0.05
))
fig.show()
fig = px.choropleth(dffig,
locations="iso3c",
color="ln_gdp_pc_e",
hover_name='name',
hover_data=['iso3c', 'ln_pop', 'gdp_pc_o', 'gdp_pc_e'],
labels={
"gdp_pc_qc": "GDP per capita (" + str(year) + ") Range",
'iso3c':'ISO code',
"ln_pop": "Log[Population (" + str(year) + ")]",
"ln_gdp_pc_o": "Log[GDP per capita (" + str(year) + ")] (Output Based)",
"ln_gdp_pc_e": "Log[GDP per capita (" + str(year) + ")] (Expenditure Based)",
"gdp_pc_o": "GDP per capita (" + str(year) + ") (Output Based)",
"gdp_pc_e": "GDP per capita (" + str(year) + ") (Expenditure Based)",
},
#color_continuous_scale=px.colors.sequential.Plasma,
color_continuous_scale="Reds",
height=800,
width=1100,
)
fig.show()
fig.update_layout(coloraxis_colorbar=dict(
orientation = 'h',
yanchor="bottom",
xanchor="left",
y=-.01,
x=0,
))
fig.update_coloraxes(colorbar_title_side='top')
fig.show()
# Change legend position
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="center",
x=0.01,
orientation='h',
))
fig.show()
fig = go.Figure(data=go.Choropleth(
locations = dffig['iso3c'],
z = dffig['gdp_pc_e'],
text = dffig['name'],
colorscale = 'Blues',
autocolorscale=False,
reversescale=True,
marker_line_color='darkgray',
marker_line_width=0.5,
colorbar_tickprefix = '$',
colorbar_title = 'GDP pc',
)
)
fig.update_layout(
autosize=False,
width=800,
height=400,
margin=dict(
l=5,
r=5,
b=10,
t=10,
pad=1
),
paper_bgcolor="LightSteelBlue",
)
fig.show()
fig = go.Figure(data=go.Choropleth(
locations = dffig['iso3c'],
z = dffig['gdp_pc_e'],
text = dffig['name'],
colorscale = 'Blues',
autocolorscale=False,
reversescale=True,
marker_line_color='darkgray',
marker_line_width=0.5,
colorbar_tickprefix = '$',
colorbar_title = 'GDP per capita',
)
)
fig.update_layout(
autosize=False,
width=1000,
height=600,
margin=dict(
l=1,
r=1,
b=1,
t=1,
pad=.1
),
paper_bgcolor="LightSteelBlue",
)
# Change legend position
cb = fig.data[0].colorbar
cb.orientation = 'h'
cb.yanchor = 'bottom'
cb.xanchor = 'center'
cb.y = .1
cb.title.side = 'top'
fig.show()
import pandas as pd
pwt_data['Capital per Capita'] = pwt_data['Capital stock at constant 2017 national prices (in mil. 2017US$)'] / pwt_data['Population']
pwt_data['Capital per Worker'] = pwt_data['Capital stock at constant 2017 national prices (in mil. 2017US$)'] / pwt_data['Number of persons engaged (in millions)']
pwt_data['Capital per Effective Worker'] = pwt_data['Capital stock at constant 2017 national prices (in mil. 2017US$)'] / (pwt_data['Number of persons engaged (in millions)'] * pwt_data['Average annual hours worked by persons engaged'])
pwt_data['GDP per Capita'] = pwt_data['GDP at constant 2017 national prices (in mil. 2017US$)'] / pwt_data['Population']
pwt_data['GDP per Worker'] = pwt_data['GDP at constant 2017 national prices (in mil. 2017US$)'] / pwt_data['Number of persons engaged (in millions)']
pwt_data['GDP per Human Capital'] = pwt_data['GDP at constant 2017 national prices (in mil. 2017US$)'] / pwt_data['Human capital index']
pwt_data['GDP per Unit of Effective Labor'] = pwt_data['GDP at constant 2017 national prices (in mil. 2017US$)'] / (pwt_data['Number of persons engaged (in millions)'] * pwt_data['Average annual hours worked by persons engaged'])
print(pwt_data[['Country', 'Capital per Capita', 'Capital per Worker', 'Capital per Effective Worker', 'GDP per Capita', 'GDP per Worker', 'GDP per Human Capital', 'GDP per Unit of Effective Labor']])
import pandas as pd
import matplotlib.pyplot as plt
years_of_interest = [1950, 1970, 1990, 2010, df.index.max()]
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15, 10))
for i, year in enumerate(years_of_interest):
subset = df[df.index.year == year]
axes[0, 0].scatter(subset['GDP'], subset['Capital'], label=year)
axes[0, 0].set_title('GDP vs Capital (Aggregate)')
axes[0, 1].scatter(subset['GDP'] / subset['Population'], subset['Capital'] / subset['Population'], label=year)
axes[0, 1].set_title('GDP per Capita vs Capital per Capita')
axes[0, 2].scatter(subset['GDP'] / subset['Number of persons engaged (in millions)'],
subset['Capital'] / subset['Number of persons engaged (in millions)'], label=year)
axes[0, 2].set_title('GDP per Worker vs Capital per Worker')
axes[1, 0].scatter(subset['GDP'] / (subset['Number of persons engaged (in millions)'] * subset['Average annual hours worked by persons engaged']),
subset['Capital'] / (subset['Number of persons engaged (in millions)'] * subset['Average annual hours worked by persons engaged']), label=year)
axes[1, 0].set_title('GDP per Effective Worker vs Capital per Effective Worker')
axes[1, 1].scatter(subset['GDP'] / (subset['Number of persons engaged (in millions)'] * subset['Average annual hours worked by persons engaged']),
subset['Capital'] / (subset['Number of persons engaged (in millions)'] * subset['Average annual hours worked by persons engaged']), label=year)
axes[1, 1].set_title('GDP per Unit of Effective Labor vs Capital per Unit of Effective Labor')
for ax in axes.flatten():
ax.legend()
plt.tight_layout()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
df['Year'] = df.index.year
measures = ['Aggregate', 'Per Capita', 'Per Worker', 'Per Effective Worker', 'Per Unit of Effective Labor']
for measure in measures:
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15, 10))
for i, income_group in enumerate(df['Income Group'].unique()):
subset = df[df['Income Group'] == income_group]
axes[0, i].plot(subset['Year'], subset[f'GDP {measure}'], label=f'{income_group} - GDP')
axes[0, i].set_title(f'GDP {measure} - {income_group}')
axes[1, i].plot(subset['Year'], subset[f'Capital {measure}'], label=f'{income_group} - Capital')
axes[1, i].set_title(f'Capital {measure} - {income_group}')
for i, region in enumerate(df['Region'].unique()):
subset = df[df['Region'] == region]
axes[0, i + 3].plot(subset['Year'], subset[f'GDP {measure}'], label=f'{region} - GDP')
axes[0, i + 3].set_title(f'GDP {measure} - {region}')
axes[1, i + 3].plot(subset['Year'], subset[f'Capital {measure}'], label=f'{region} - Capital')
axes[1, i + 3].set_title(f'Capital {measure} - {region}')
for ax in axes.flatten():
ax.legend()
plt.tight_layout()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
df['Year'] = df.index.year
measures = ['Aggregate', 'Per Capita', 'Per Worker', 'Per Effective Worker', 'Per Unit of Effective Labor']
for measure in measures:
fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15, 10))
for i, income_group in enumerate(df['Income Group'].unique()):
subset = df[df['Income Group'] == income_group]
axes[0, i].plot(subset['Year'], subset[f'GDP {measure}'], label=f'{income_group} - GDP')
axes[0, i].set_title(f'GDP {measure} - {income_group}')
axes[1, i].plot(subset['Year'], subset[f'Capital {measure}'], label=f'{income_group} - Capital')
axes[1, i].set_title(f'Capital {measure} - {income_group}')
for i, region in enumerate(df['Region'].unique()):
subset = df[df['Region'] == region]
axes[0, i + 3].plot(subset['Year'], subset[f'GDP {measure}'], label=f'{region} - GDP')
axes[0, i + 3].set_title(f'GDP {measure} - {region}')
axes[1, i + 3].plot(subset['Year'], subset[f'Capital {measure}'], label=f'{region} - Capital')
axes[1, i + 3].set_title(f'Capital {measure} - {region}')
for ax in axes.flatten():
ax.legend()
plt.tight_layout()
plt.show()
def my_kde_plots_joint(dataset='pwt', myyears=[2000, 2020], myvar='gdp_pc_e', myvar_label='Income per capita'):
'''
This function plots the KDE-plot of log income per capita in two years, where
dataset: is either pwt or wdi (you need to ensure the variable 'gdp_pc' exists)
myyears: is a list of years
'''
df = eval(dataset + '.loc[' + dataset +'.year.isin(myyears)].copy().reset_index(drop=True)')
# Plot
mycolors = sns.color_palette("Paired",n_colors=len(myyears))
fig, ax = plt.subplots()
for t in range(len(myyears)):
sns.kdeplot(df.loc[df.year==myyears[t], myvar], ax=ax, fill=True, label=str(myyears[t]), linewidth=2, color=mycolors[t])
ax.set_xlabel(myvar_label)
ax.set_ylabel('Density of Countries')
ax.legend()
plt.savefig(pathgraphs + dataset +'_' + myvar + '_' + str(myyears[0]) + '_' + str(myyears[-1]) + '.pdf', dpi=300, bbox_inches='tight')
plt.savefig(pathgraphs + dataset +'_' + myvar + '_' + str(myyears[0]) + '_' + str(myyears[-1]) + '.png', dpi=300, bbox_inches='tight')
plt.draw()
return "Done"
my_kde_plots_joint(dataset='pwt', myyears=[2000, 2010, 2019], myvar='ln_pop', myvar_label='Log[Population]')
import geopandas as gpd
import matplotlib.pyplot as plt
world_with_data = world.merge(df[df['Year'] == 2015], how='left', left_on='country_code', right_index=True)
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
world_with_data.plot(column='Capital Measure 2015', cmap='Blues', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)
ax.set_title('Capital Measure in 2015 Across the World')
plt.show()
import pandas as pd
import statsmodels.api as sm
last_year_data = df[df['Year'] == df['Year'].max()]
variable_pairs = [('GDP per Capita', 'Capital per Capita'),
('GDP per Worker', 'Capital per Worker'),
('GDP per Effective Worker', 'Capital per Effective Worker'),
('GDP per Unit of Effective Labor', 'Capital per Unit of Effective Labor')]
regression_results = pd.DataFrame(columns=['Variable Pair', 'Coefficient', 'Elasticity'])
for pair in variable_pairs:
x_var, y_var = pair
x_data = last_year_data[x_var].values
y_data = last_year_data[y_var].values
x_data = sm.add_constant(x_data)
model = sm.OLS(y_data, x_data).fit()
coefficient = model.params[1]
elasticity = coefficient * (x_data.mean() / y_data.mean())
regression_results = regression_results.append({'Variable Pair': f'{x_var} vs {y_var}',
'Coefficient': coefficient,
'Elasticity': elasticity}, ignore_index=True)
print(regression_results)
coefs = []
models = [mod, mod2, mod3, mod4]
model_names = ['Model 1', 'Model 2', 'Model 3', 'Model 4']
for m in range(len(models)):
coefs2 = pd.concat([models[m].params, models[m].bse, models[m].conf_int()], axis=1).reset_index()
coefs2.columns = ['variable', 'b', 'se', 'lo', 'hi']
coefs2['model'] = model_names[m]
coefs.append(coefs2)
coefs = pd.concat(coefs).reset_index(drop=True)
alpha = 0.05
variable = 'ln_pop'
variable_label = 'Log[Population]'
fig, ax = plt.subplots()
ax.errorbar(coefs.loc[coefs.variable==variable].model,
coefs.loc[coefs.variable==variable].b,
yerr=coefs.loc[coefs.variable==variable].apply(lambda x: norm.ppf(1-alpha/2)*x.se, axis=1),
c=sns.color_palette("YlGnBu_d",n_colors=1)[0], capsize=50, fmt='none', label='')
sns.pointplot(data=coefs.loc[coefs.variable==variable],
x="model", y="b", hue="variable",
capsize=.5, palette="YlGnBu_d",
join=False,
ax = ax, label=variable_label)
ax.set_xlabel('Model')
ax.set_ylabel('Estimate')
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles, [variable_label], title='Variable', )