Motivation

Setup

#!pip install --quiet wbgapi

import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import plotly.express as px import plotly.graph_objects as go import chart_studio chart_studio.tools.set_credentials_file(username='econdata777', api_key='HERE') import chart_studio.plotly as save2cs import statsmodels.api as sm import statsmodels.formula.api as smf from scipy import stats import inequality import wbgapi as wb

WBGAPI library

Sources

wb.source.info()

wb.series.info(db = 12) # Series in the Education Statistics Database

Economies

wb.economy.info()

wb.economy.DataFrame()

wb.economy.info(wb.income.members('HIC')) # high-income economies

Regions

wb.region.info()

# population for African countries, every other year wb.data.DataFrame('SP.POP.TOTL', wb.region.members('AFR'), range(2010, 2020, 2))

Income groups

wb.income.info()

Topics

wb.topic.info()

Series

wb.series.info() # WDI by default

wb.series.info(db = 6) # Series in the Debt Statistics database

wb.series.info(topic = 3) # Series in the topic Economy & Growth

Search series

#Not working #wb.series.info(q='gdp')

Metadata of series

wb.series.metadata.get('NY.GNS.ICTR.GN.ZS')

wb.series.metadata.get('NY.GDP.PCAP.PP.KD')

Import data

Function DataFrame

help(wb.data.DataFrame)

# 5 years of population data (with economy names) from 2010 to 2014 wb.data.DataFrame('SP.POP.TOTL', time=range(2010,2015), labels=True)

# Most recent poverty and income data for Latin American countries (LAC) wb.data.DataFrame(['SI.POV.NAHC', 'NY.GDP.PCAP.CD'], economy = wb.region.members('LAC'), mrnev = 1, # Most recent non-empty most values (time period varies) timeColumns = True, # Show the time dimension for each series/economy labels = True )

wb.series.metadata.get('SI.POV.NAHC') # Note: National poverty rates lines are not necessarily comparable across contries

wb.series.metadata.get('EN.ATM.CO2E.PC')

# Most recent CO2 emissions per capita for each country and merge its income group wb.data.DataFrame('EN.ATM.CO2E.PC', mrnev=1, labels = True).join(wb.economy.DataFrame()['incomeLevel'])

# Top 10 emitters per capita wb.data.DataFrame('EN.ATM.CO2E.PC',mrnev=1,labels=True).sort_values('EN.ATM.CO2E.PC',ascending=False).head(10)

Long and wide panel data

# Import GDP per capita (PPP Constant international $) GDPpc_long = wb.data.DataFrame(['NY.GDP.PCAP.PP.CD'], time=range(2000, 2021), labels=True, skipAggs=True, skipBlanks=True, columns='series').reset_index() GDPpc_long

# Rearrange data in wide form and drop countries for which data for all years are not available GDPpc_wide = pd.pivot_table(GDPpc_long, values='NY.GDP.PCAP.PP.CD', index = 'economy', columns='time').dropna() GDPpc_wide

wb.data.DataFrame('NY.GDP.PCAP.CD', wb.region.members('EMU'), time=range(1960, 2020))

wb.data.DataFrame('NY.GDP.PCAP.CD', wb.region.members('EMU'), time=range(1960, 2020)).dropna()

ren = wb.data.DataFrame('EG.ELC.RNEW.ZS', ['DEU','FRA','ESP','GBR','USA'], time=range(2000,2016,5)) ren

Countries and regions

regionalIndentifiers = wb.economy.DataFrame(skipAggs=True).reset_index() regionalIndentifiers

Merge datasets

# Merge regional intifiers with long-form panel data (merge one to many) GDPpc_longWITHri = pd.merge(GDPpc_long, regionalIndentifiers, how="left", left_on= "economy", right_on= "index") GDPpc_longWITHri

px.line(GDPpc_longWITHri, x= 'Time', y= 'NY.GDP.PCAP.PP.CD', #log_y= True, color='Country', #facet_col = 'incomeLevel', #facet_col_wrap= 2, labels={'NY.GDP.PCAP.PP.CD': 'GDP per capita'} )

px.line(GDPpc_longWITHri, x= 'Time', y= 'NY.GDP.PCAP.PP.CD', log_y= True, color='Country', facet_col = 'incomeLevel', facet_col_wrap= 2, labels={'NY.GDP.PCAP.PP.CD': 'GDP per capita'} )

# Save to Chart Studio figPlotly20220520 = px.line(GDPpc_longWITHri, x= 'Time', y= 'NY.GDP.PCAP.PP.CD', log_y= True, color='Country', facet_col = 'incomeLevel', facet_col_wrap= 2, labels={'NY.GDP.PCAP.PP.CD': 'GDP per capita'} ) #save2cs.plot(figPlotly20220520, filename = 'figPlotly20220520', auto_open=True)

Visualize data

wb.series.metadata.get('EG.ELC.RNEW.ZS')

Bar plots

wb.data.DataFrame('EG.ELC.RNEW.ZS', ['DEU','FRA','ESP','GBR','USA'], time=range(2000,2016,5)).plot.bar();

Line plots

wb.data.DataFrame('EG.ELC.RNEW.ZS', ['DEU','FRA','ESP','GBR','USA'], time=range(2000,2016,5)).T.plot();

df8 = wb.data.DataFrame('EG.ELC.RNEW.ZS', ['DEU','FRA','ESP','GBR','USA'], time=range(2000, 2016, 5), numericTimeKeys = True).T.reset_index() df8

px.line(df8, x= 'index', y= ['DEU','FRA','ESP','GBR','USA'], labels=dict(index = '', value = 'Renewable electricity share', variable = 'Country') )

wb.series.metadata.get('NY.GDP.PCAP.CD')

# What about this other indicator? wb.series.metadata.get('NY.GDP.PCAP.PP.KD')

#Basic chart of income growth for countries in the South Asia region wb.data.DataFrame('NY.GDP.PCAP.PP.KD', economy = wb.region.members('SAS'), time = range(2000, 2021), numericTimeKeys = True, labels = True).set_index('Country').transpose().plot(title='GDP per capita in South Asia');

wb.data.DataFrame('NY.GDP.PCAP.PP.KD', economy = wb.region.members('SAS'), time = range(2000, 2016), numericTimeKeys = True, labels = True)

# Import GDP per capita (PPP Constant international $) GDPpc_long_SAS = wb.data.DataFrame(['NY.GDP.PCAP.PP.KD'], economy = wb.region.members('SAS'), time=range(2000, 2021), labels=True, skipAggs=True, #skipBlanks=True, columns='series').reset_index().sort_values(['economy', 'Time']) GDPpc_long_SAS

px.line(GDPpc_long_SAS, x= 'Time', y= 'NY.GDP.PCAP.PP.KD', color='Country', labels={"NY.GDP.PCAP.PP.KD": "GDP per capita"} )

Scatter plots

# Import GDP per capita and secondary school enrollment df2017_2019 = wb.data.DataFrame(['NY.GDP.PCAP.PP.KD', 'SE.SEC.NENR'], time=range(2017, 2019), labels=True, skipAggs=True, skipBlanks=True, columns='series').reset_index() df2017_2019

px.scatter(df2017_2019.query("Time == '2017'"), x="SE.SEC.NENR", y="NY.GDP.PCAP.PP.KD", log_y = True, # log scale for the y axis trendline="ols", trendline_options=dict(log_y=True), #color="region", #size="pop", size_max=60, hover_name="Country", labels={"SE.SEC.NENR": "School enrollment, secondary (% net)", "NY.GDP.PCAP.PP.KD": "GDP per capita" } )

Maps

px.choropleth( df2017_2019.query("Time == '2017'"), locations = "economy", color = "NY.GDP.PCAP.PP.KD", hover_name = "Country", color_continuous_scale = px.colors.sequential.Plasma, projection = "natural earth", labels = {"NY.GDP.PCAP.PP.KD": "GDP per capita"} )

Cross-country inequality

GDPpc_wide = wb.data.DataFrame('NY.GDP.PCAP.KD', wb.region.members('WLD'), skipAggs=True, time=range(1970, 2021)).dropna() GDPpc_wide

cv = GDPpc_wide.apply(stats.variation, axis = 0)

def gini_by_col(column): return inequality.gini.Gini(column.values).g

gini = GDPpc_wide.apply(gini_by_col, axis=0)

def theil_by_col(column): return inequality.theil.Theil(column.values).T

theil = GDPpc_wide.apply(theil_by_col, axis=0)

df = pd.DataFrame({'cv': cv, 'gini': gini, 'theil': theil}) df.round(2)

px.line(df, x=df.index, y="gini")

px.line(df, x=df.index, y="cv")

px.line(df, x=df.index, y= 'theil')

px.line(df, x=df.index, y= ['theil','gini'])

World evolution

df = GDPpc_wide.apply(np.mean, axis = 0)

px.line(df, x=df.index, y= df.values, )

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Motivation