3. Working with World Development Indicators

# Basic Packages from __future__ import division import os from datetime import datetime # Web & file access import requests import io # Import display options for showing websites from IPython.display import IFrame, HTML

Run to view results

# Plotting import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.ticker as mtick %pylab --no-import-all %matplotlib inline import seaborn as sns sns.set(rc={'figure.figsize':(11.7,8.27)}) sns.set_context("talk") import plotly.express as px import plotly.graph_objects as go from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap # Next line can import all of plotnine, but may overwrite things? Better import each function/object you need #from plotnine import *

Run to view results

# Data import pandas as pd import numpy as np from pandas_datareader import data, wb # GIS & maps import geopandas as gpd gp = gpd import georasters as gr import geoplot as gplt import geoplot.crs as gcrs import mapclassify as mc import textwrap

Run to view results

# Data Munging from itertools import product, combinations import difflib import pycountry import geocoder from geonamescache.mappers import country mapper = country(from_key='name', to_key='iso3') mapper2 = country(from_key='iso3', to_key='iso') mapper3 = country(from_key='iso3', to_key='name') # Regressions & Stats from scipy.stats import norm import statsmodels.formula.api as smf from stargazer.stargazer import Stargazer, LineLocation

Run to view results

# Paths pathout = './data/' if not os.path.exists(pathout): os.mkdir(pathout) pathgraphs = './graphs/' if not os.path.exists(pathgraphs): os.mkdir(pathgraphs)

Run to view results

currentYear = datetime.now().year year = min(2020, currentYear-2)

Run to view results

url = 'https://data.worldbank.org/share/widget?indicators=NY.GDP.PCAP.PP.KD' IFrame(url, width=500, height=300)

Run to view results

url = 'https://pandas-datareader.readthedocs.io/en/latest/remote_data.html#remote-data-wb' IFrame(url, width=800, height=400)

Run to view results

wbcountries = wb.get_countries() wbcountries = wbcountries.loc[wbcountries.region.isin(['Aggregates'])==False].reset_index(drop=True) wbcountries['name'] = wbcountries.name.str.strip() wbcountries['incomeLevel'] = wbcountries['incomeLevel'].str.title() wbcountries.loc[wbcountries.iso3c=='VEN', 'incomeLevel'] = 'Upper Middle Income'

Run to view results

wdi_indicators = ['NY.GDP.PCAP.PP.KD', 'NY.GDP.PCAP.KD', 'SL.GDP.PCAP.EM.KD', 'SP.POP.GROW', 'SP.POP.TOTL', 'SP.DYN.WFRT', 'SP.DYN.TFRT.IN']

Run to view results

popvars = wb.search(string='population') popvars.head()

Run to view results

wdi = wb.download(indicator=wdi_indicators, country=wbcountries.iso2c.values, start=1950, end=year) wdi = wdi.reset_index() wdi['year'] = wdi.year.astype(int) wdi['gdp_pc'] = wdi['NY.GDP.PCAP.PP.KD'] wdi['ln_gdp_pc'] = wdi['NY.GDP.PCAP.PP.KD'].apply(np.log) wdi['ln_pop'] = wdi['SP.POP.TOTL'].apply(np.log) wdi.head()

Run to view results

wdi = wbcountries.merge(wdi, left_on='name', right_on='country') wdi.head()

Run to view results

url = 'https://www.statsmodels.org/stable/index.html' IFrame(url, width=800, height=400)

Run to view results

dffig = wdi.loc[wdi.year==year]\ .dropna(subset=['ln_gdp_pc', 'latitude', 'ln_pop'])\ .sort_values(by='region').reset_index()

Run to view results

mod = smf.ols(formula='ln_gdp_pc ~ latitude', data=dffig, missing='drop').fit()

Run to view results

mod.summary2()

Run to view results

pred_ols = mod.get_prediction() iv_l = pred_ols.summary_frame()["mean_ci_lower"] iv_u = pred_ols.summary_frame()["mean_ci_upper"] fig, ax = plt.subplots(figsize=(8, 6)) ax.plot(dffig.latitude, dffig.ln_gdp_pc, "o", label="data") ax.plot(dffig.latitude, mod.fittedvalues, "r--.", label="OLS") ax.plot(dffig.latitude, iv_u, "r--") ax.plot(dffig.latitude, iv_l, "r--") ax.legend(loc="best")

Run to view results

fig

Run to view results

mod2 = smf.ols(formula='ln_gdp_pc ~ latitude + C(region)', data=dffig, missing='drop').fit()

Run to view results

mod2.summary2()

Run to view results

mod3 = smf.ols(formula='ln_gdp_pc ~ np.abs(latitude) + C(region)', data=dffig, missing='drop').fit()

Run to view results

mod3.summary2()

Run to view results

mod4 = smf.ols(formula='ln_gdp_pc ~ np.log(np.abs(latitude)) + C(region)', data=dffig, missing='drop').fit()

Run to view results

mod4.summary2()

Run to view results

url = 'https://nbviewer.org/github/mwburke/stargazer/blob/master/examples.ipynb' IFrame(url, width=800, height=400)

Run to view results

stargazer = Stargazer([mod, mod2, mod3, mod4])

Run to view results

stargazer.significant_digits(2) stargazer.show_degrees_of_freedom(False) #stargazer.dep_var_name = '' stargazer.dependent_variable = ' Log[GDP per capita (' + str(year) + ')]' stargazer.custom_columns(['Latitude', 'Abs(Latitude)', 'Log[Abs(Latitude)]'], [2, 1, 1]) #stargazer.show_model_numbers(False) stargazer.rename_covariates({'latitude':'Latitude', 'np.abs(latitude)':'Absolute Latitude', 'np.log(np.abs(latitude))':'Log[Absolute Latitude]',}) stargazer.add_line('WB Region FE', ['No', 'Yes', 'Yes', 'Yes'], LineLocation.FOOTER_TOP) stargazer.covariate_order(['latitude', 'np.abs(latitude)', 'np.log(np.abs(latitude))']) stargazer.cov_spacing = 2

Run to view results

stargazer

Run to view results

HTML(stargazer.render_html())

Run to view results

file_name = "table.html" #Include directory path if needed html_file = open(pathgraphs + file_name, "w" ) #This will overwrite an existing file html_file.write( stargazer.render_html() ) html_file.close()

Run to view results

url = pathgraphs + 'table.html' url = 'https://smu-econ-growth.github.io/EconGrowthUG-Slides-Working-with-WDI/table.html' IFrame(url, width=500, height=300)

Run to view results

url = 'https://seaborn.pydata.org/examples/index.html' IFrame(url, width=800, height=400)

Run to view results

sns.set(rc={'figure.figsize':(11.7,8.27)}) sns.set_context("talk") g = sns.relplot(x="latitude", y="ln_gdp_pc", data=dffig, hue="region", hue_order = dffig.region.drop_duplicates().sort_values(), style="region", style_order = dffig.region.drop_duplicates().sort_values(), size="ln_pop", sizes=(10, 400), alpha=.5, height=6, aspect=2, palette="muted", ) g.set_axis_labels('Latitude', 'Log[GDP per capita (' + str(year) + ')]')

Run to view results

g.fig

Run to view results

sns.set(rc={'figure.figsize':(11.7,8.27)}) sns.set_context("talk") fig, ax = plt.subplots() sns.scatterplot(x="latitude", y="ln_gdp_pc", data=dffig, hue="region", hue_order = dffig.region.drop_duplicates().sort_values(), style="region", style_order = dffig.region.drop_duplicates().sort_values(), size="ln_pop", sizes=(10, 400), alpha=.5, palette="muted", ax=ax ) ax.set_xlabel('Latitude') ax.set_ylabel('Log[GDP per capita (' + str(year) + ')]') ax.legend(fontsize=10)

Run to view results

fig

Run to view results

def my_xy_plot(dfin, x='SP.POP.GROW', y='ln_gdp_pc', labelvar='iso3c', dx=0.006125, dy=0.006125, xlogscale=False, ylogscale=False, xlabel='Growth Rate of Population', ylabel='Log[Income per capita in ' + str(year) + ']', labels=False, xpct = False, ypct = False, OLS=False, OLSlinelabel='OLS', ssline=False, sslinelabel='45 Degree Line', filename='income-pop-growth.pdf', hue='region', hue_order=['East Asia & Pacific', 'Europe & Central Asia', 'Latin America & Caribbean ', 'Middle East & North Africa', 'North America', 'South Asia', 'Sub-Saharan Africa '], style='incomeLevel', style_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'], palette=None, size=None, sizes=None, legend_fontsize=10, label_font_size=12, save=True): ''' Plot the association between x and var in dataframe using labelvar for labels. ''' sns.set(rc={'figure.figsize':(11.7,8.27)}) sns.set_context("talk") df = dfin.copy() df = df.dropna(subset=[x, y]).reset_index(drop=True) # Plot k = 0 fig, ax = plt.subplots() sns.scatterplot(x=x, y=y, data=df, ax=ax, hue=hue, hue_order=hue_order, #hue='incomeLevel', #hue_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'], #hue_order=['East Asia & Pacific', 'Europe & Central Asia', # 'Latin America & Caribbean ', 'Middle East & North Africa', # 'North America', 'South Asia', 'Sub-Saharan Africa '], alpha=1, style=style, style_order=style_order, palette=palette, size=size, sizes=sizes, #palette=sns.color_palette("Blues_r", df[hue].unique().shape[0]+6)[:df[hue].unique().shape[0]*2:2], ) if OLS: sns.regplot(x=x, y=y, data=df, ax=ax, label=OLSlinelabel, scatter=False) if ssline: ax.plot([df[x].min()*.99, df[x].max()*1.01], [df[x].min()*.99, df[x].max()*1.01], c='r', label=sslinelabel) if labels: movex = df[x].mean() * dx movey = df[y].mean() * dy for line in range(0,df.shape[0]): ax.text(df[x][line]+movex, df[y][line]+movey, df[labelvar][line], horizontalalignment='left', fontsize=label_font_size, color='black') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if xpct: fmt = '%.0f%%' # Format you want the ticks, e.g. '40%' xticks = mtick.FormatStrFormatter(fmt) ax.xaxis.set_major_formatter(xticks) if ypct: fmt = '%.0f%%' # Format you want the ticks, e.g. '40%' yticks = mtick.FormatStrFormatter(fmt) ax.yaxis.set_major_formatter(yticks) if ylogscale: ax.set(yscale="log") if xlogscale: ax.set(xscale="log") handles, labels = ax.get_legend_handles_labels() handles = np.array(handles) labels = np.array(labels) handles = list(handles[(labels!=hue) & (labels!=style) & (labels!=size)]) labels = list(labels[(labels!=hue) & (labels!=style) & (labels!=size)]) ax.legend(handles=handles, labels=labels, fontsize=legend_fontsize) if save: plt.savefig(pathgraphs + filename, dpi=300, bbox_inches='tight') return fig

Run to view results

g = my_xy_plot(dffig, x='latitude', y='ln_gdp_pc', xlabel='Latitude', ylabel='Log[GDP per capita (' + str(year) +')]', OLS=True, labels=True, size="ln_pop", sizes=(10, 400), filename='ln-gdp-pc-latitude.pdf')

Run to view results

g

Run to view results

def my_xy_line_plot(dfin, x='year', y='ln_gdp_pc', labelvar='iso3c', dx=0.006125, dy=0.006125, xlogscale=False, ylogscale=False, xlabel='Growth Rate of Population', ylabel='Log[Income per capita in ' + str(year) + ']', labels=False, xpct = False, ypct = False, OLS=False, OLSlinelabel='OLS', ssline=False, sslinelabel='45 Degree Line', filename='income-pop-growth.pdf', hue='region', hue_order=['East Asia & Pacific', 'Europe & Central Asia', 'Latin America & Caribbean ', 'Middle East & North Africa', 'North America', 'South Asia', 'Sub-Saharan Africa '], style='incomeLevel', style_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'], palette=None, legend_fontsize=10, label_fontsize=12, loc=None, save=True): ''' Plot the association between x and var in dataframe using labelvar for labels. ''' sns.set(rc={'figure.figsize':(11.7,8.27)}) sns.set_context("talk") df = dfin.copy() df = df.dropna(subset=[x, y]).reset_index(drop=True) # Plot k = 0 fig, ax = plt.subplots() sns.lineplot(x=x, y=y, data=df, ax=ax, hue=hue, hue_order=hue_order, alpha=1, style=style, style_order=style_order, palette=palette, ) if OLS: sns.regplot(x=x, y=y, data=df, ax=ax, label=OLSlinelabel, scatter=False) if ssline: ax.plot([df[x].min()*.99, df[x].max()*1.01], [df[x].min()*.99, df[x].max()*1.01], c='r', label=sslinelabel) if labels: movex = df[x].mean() * dx movey = df[y].mean() * dy for line in range(0,df.shape[0]): ax.text(df[x][line]+movex, df[y][line]+movey, df[labelvar][line], horizontalalignment='left', fontsize=label_fontsize, color='black') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) if xpct: fmt = '%.0f%%' # Format you want the ticks, e.g. '40%' xticks = mtick.FormatStrFormatter(fmt) ax.xaxis.set_major_formatter(xticks) if ypct: fmt = '%.0f%%' # Format you want the ticks, e.g. '40%' yticks = mtick.FormatStrFormatter(fmt) ax.yaxis.set_major_formatter(yticks) if ylogscale: ax.set(yscale="log") if xlogscale: ax.set(xscale="log") handles, labels = ax.get_legend_handles_labels() handles = np.array(handles) labels = np.array(labels) handles = list(handles[(labels!='region') & (labels!='incomeLevel')]) labels = list(labels[(labels!='region') & (labels!='incomeLevel')]) ax.legend(handles=handles, labels=labels, fontsize=legend_fontsize, loc=loc) if save: plt.savefig(pathgraphs + filename, dpi=300, bbox_inches='tight') return fig

Run to view results

palette=sns.color_palette("Blues_r", wdi['incomeLevel'].unique().shape[0]+6)[:wdi['incomeLevel'].unique().shape[0]*2:2] fig = my_xy_line_plot(wdi, x='year', y='ln_gdp_pc', xlabel='Year', ylabel='Log[GDP per capita]', filename='ln-gdp-pc-income-groups-TS.pdf', hue='incomeLevel', hue_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'], palette=palette, OLS=False, labels=False, legend_fontsize=16, loc='lower right', save=True)

Run to view results

fig

Run to view results

#palette=sns.color_palette("Blues_r", wdi['region'].unique().shape[0]+6)[:wdi['region'].unique().shape[0]*2:2] fig = my_xy_line_plot(wdi, x='year', y='gdp_pc', xlabel='Year', ylabel='GDP per capita', ylogscale=True, filename='ln-gdp-pc-regions-TS.pdf', style='region', style_order=['East Asia & Pacific', 'Europe & Central Asia', 'Latin America & Caribbean ', 'Middle East & North Africa', 'North America', 'South Asia', 'Sub-Saharan Africa '], #palette=palette, OLS=False, labels=False, legend_fontsize=12, loc='lower right', save=True)

Run to view results

fig

Run to view results

url = 'https://plotly.com/python/' IFrame(url, width=800, height=400)

Run to view results

symbols = ['circle', 'x', 'square', 'cross', 'diamond', 'star-diamond', 'triangle-up'] fig = px.scatter(dffig, x="latitude", y="ln_gdp_pc", color='region', symbol='region', symbol_sequence=symbols, hover_name='name', hover_data=['iso3c', 'ln_pop', 'gdp_pc'], size='ln_pop', size_max=15, trendline="ols", trendline_scope="overall", trendline_color_override="black", labels={ "latitude": "Latitude", "ln_gdp_pc": "Log[GDP per capita (" + str(year) + ")]", "gdp_pc": "GDP per capita (" + str(year) + ")", "region": "WB Region" }, opacity=0.75, height=800, )

Run to view results

fig.show()

Run to view results

fig.update_traces(marker=dict(#size=12, line=dict(width=2, color='DarkSlateGrey')), selector=dict(mode='markers'))

Run to view results

fig.show()

Run to view results

tr_line=[] for k, trace in enumerate(fig.data): if trace.mode is not None and trace.mode == 'lines': tr_line.append(k) print(tr_line) for id in tr_line: fig.data[id].update(line_width=3)

Run to view results

fig.show()

Run to view results

fig.update_layout(legend=dict( yanchor="top", y=0.25, xanchor="left", x=0.9 ))

Run to view results

fig.show()

Run to view results

fig.write_image(pathgraphs + "ln-gdp-pc-latitude-plotly.pdf", height=1000, width=1500, scale=4)

Run to view results

results = px.get_trendline_results(fig) results.px_fit_results.iloc[0].summary()

Run to view results

headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'} url = 'https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' r = requests.get(url, headers=headers) countries = gp.read_file(io.BytesIO(r.content))

Run to view results

countries.head()

Run to view results

fig, ax = plt.subplots(figsize=(15,10)) countries.plot(ax=ax) ax.set_title("WGS84 (lat/lon)", fontdict={'fontsize':34})

Run to view results

dffig2 = countries.merge(dffig, left_on='ADM0_A3', right_on='iso3c')

Run to view results

fig, ax = plt.subplots(figsize=(15,10)) dffig2.plot(column='gdp_pc', ax=ax, cmap='Reds') ax.set_title("WGS84 (lat/lon)", fontdict={'fontsize':34})

Run to view results

url = 'https://residentmario.github.io/geoplot/' IFrame(url, width=800, height=400)

Run to view results

gplt.polyplot( countries, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None), edgecolor='white', facecolor='lightgray', rasterized=True, extent=[-180, -90, 180, 90], )

Run to view results

gplt.choropleth(dffig2, hue='gdp_pc', projection=gcrs.PlateCarree(central_longitude=0.0, globe=None), edgecolor='white', linewidth=1, cmap='Reds', legend=True, scheme='FisherJenks', legend_kwargs={'bbox_to_anchor':(0.3, 0.5), 'frameon': True, 'title':'GDP per capita', }, figsize=(12,8), rasterized=True, )

Run to view results

ax = gplt.choropleth(dffig2, hue='gdp_pc', projection=gcrs.PlateCarree(central_longitude=0.0, globe=None), edgecolor='white', linewidth=1, cmap='Reds', legend=True, scheme='FisherJenks', legend_kwargs={'bbox_to_anchor':(0.3, 0.5), 'frameon': True, 'title':'GDP per capita', }, figsize=(15,10), rasterized=True, ) gplt.polyplot(countries, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None), edgecolor='white', facecolor='lightgray', ax=ax, rasterized=True, extent=[-180, -90, 180, 90], )

Run to view results

# Functions for plotting def center_wrap(text, cwidth=32, **kw): '''Center Text (to be used in legend)''' lines = text #lines = textwrap.wrap(text, **kw) return "\n".join(line.center(cwidth) for line in lines) def MyChloropleth(mydf, myfile='fig', myvar='gdp_pc', mylegend='GDP per capita', k=5, extent=[-180, -90, 180, 90], bbox_to_anchor=(0.25, 0.5), edgecolor='white', facecolor='lightgray', scheme='FisherJenks', rasterized=False, save=True, percent=False, rn=0, **kwargs): # Chloropleth # Color scheme if scheme=='EqualInterval': scheme = mc.EqualInterval(mydf[myvar], k=k) elif scheme=='Quantiles': scheme = mc.Quantiles(mydf[myvar], k=k) elif scheme=='BoxPlot': scheme = mc.BoxPlot(mydf[myvar], k=k) elif scheme=='FisherJenks': scheme = mc.FisherJenks(mydf[myvar], k=k) elif scheme=='FisherJenksSampled': scheme = mc.FisherJenksSampled(mydf[myvar], k=k) elif scheme=='HeadTailBreaks': scheme = mc.HeadTailBreaks(mydf[myvar], k=k) elif scheme=='JenksCaspall': scheme = mc.JenksCaspall(mydf[myvar], k=k) elif scheme=='JenksCaspallForced': scheme = mc.JenksCaspallForced(mydf[myvar], k=k) elif scheme=='JenksCaspallSampled': scheme = mc.JenksCaspallSampled(mydf[myvar], k=k) elif scheme=='KClassifiers': scheme = mc.KClassifiers(mydf[myvar], k=k) # Format legend upper_bounds = scheme.bins # get and format all bounds bounds = [] for index, upper_bound in enumerate(upper_bounds): if index == 0: lower_bound = mydf[myvar].min() else: lower_bound = upper_bounds[index-1] # format the numerical legend here if percent: bound = f'{lower_bound:.{rn}%} - {upper_bound:.{rn}%}'.format(width=rn) else: bound = f'{float(lower_bound):,.{rn}f} - {float(upper_bound):,.{rn}f}'.format(width=rn) bounds.append(bound) legend_labels = bounds #Plot ax = gplt.choropleth( mydf, hue=myvar, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None), edgecolor='white', linewidth=1, cmap='Reds', legend=True, scheme=scheme, legend_kwargs={'bbox_to_anchor': bbox_to_anchor, 'frameon': True, 'title':mylegend, }, legend_labels = legend_labels, figsize=(24, 16), rasterized=rasterized, ) gplt.polyplot( countries, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None), edgecolor=edgecolor, facecolor=facecolor, ax=ax, rasterized=rasterized, extent=extent, ) if save: plt.savefig(pathgraphs + myfile + '.jpg', dpi=300, bbox_inches='tight') plt.savefig(pathgraphs + myfile + '.png', dpi=300, bbox_inches='tight') plt.savefig(pathgraphs + myfile + '.pdf', dpi=300, bbox_inches='tight') pass

Run to view results

mylegend = center_wrap(["GDP per capita in " + str(year)], cwidth=32, width=32) MyChloropleth(dffig2, myfile='fig-gdp-pc-' + str(year), myvar='gdp_pc', mylegend=mylegend, k=10, scheme='Quantiles', save=True)

Run to view results

url = 'https://plotly.com/python/maps/' IFrame(url, width=800, height=400)

Run to view results

scheme = mc.Quantiles(dffig2['gdp_pc'], k=5) classifier = mc.Quantiles.make(k=5, rolling=True) dffig2['gdp_pc_q'] = classifier(dffig2['gdp_pc']) dffig2['gdp_pc_qc'] = dffig2['gdp_pc_q'].apply(lambda x: scheme.get_legend_classes()[x].replace('[ ', '[').replace('( ', '('))

Run to view results

fig = px.choropleth(dffig2.sort_values('gdp_pc_q', ascending=True), locations="iso3c", color="gdp_pc_qc", hover_name='name', hover_data=['iso3c', 'ln_pop'], labels={ "gdp_pc_qc": "GDP per capita (" + str(year) + ")", }, color_discrete_sequence=px.colors.sequential.Reds, height=600, width=1000, ) # Change legend position fig.update_layout(legend=dict( yanchor="bottom", y=0.15, xanchor="left", x=0.05 ))

Run to view results

fig.show()

Run to view results

fig = px.choropleth(dffig2.sort_values('gdp_pc_q', ascending=True), locations="iso3c", color="gdp_pc_qc", hover_name='name', hover_data=['iso3c', 'gdp_pc' ,'ln_pop'], labels={ "gdp_pc_qc": "GDP per capita (" + str(year) + ")", "gdp_pc": "GDP per capita (" + str(year) + ")", 'iso3c':'ISO code', "ln_pop": "Log[Population (" + str(year) + ")]", }, color_discrete_sequence=px.colors.sequential.Blues, height=600, width=1000, ) # Change legend position fig.update_layout(legend=dict( yanchor="bottom", y=0.15, xanchor="left", x=0.05 ))

Run to view results

fig.show()

Run to view results

fig = px.choropleth(dffig, locations="iso3c", color="ln_gdp_pc", hover_name='name', hover_data=['iso3c', 'ln_pop'], labels={ "ln_gdp_pc": "Log[GDP per capita (" + str(year) + ")]", }, #color_continuous_scale=px.colors.sequential.Plasma, color_continuous_scale="Reds", height=600, width=1100, )

Run to view results

fig.show()

Run to view results

fig.update_layout(coloraxis_colorbar=dict( orientation = 'h', yanchor="bottom", xanchor="left", y=-.2, x=0, )) fig.update_coloraxes(colorbar_title_side='top')

Run to view results

fig.show()

Run to view results

# Change legend position fig.update_layout(legend=dict( yanchor="top", y=0.99, xanchor="center", x=0.01, orientation='h', ))

Run to view results

fig.show()

Run to view results

fig = go.Figure(data=go.Choropleth( locations = dffig['iso3c'], z = dffig['gdp_pc'], text = dffig['name'], colorscale = 'Blues', autocolorscale=False, reversescale=True, marker_line_color='darkgray', marker_line_width=0.5, colorbar_tickprefix = '$', colorbar_title = 'GDP pc', ) ) fig.update_layout( autosize=False, width=800, height=400, margin=dict( l=5, r=5, b=10, t=10, pad=1 ), paper_bgcolor="LightSteelBlue", )

Run to view results

fig.show()

Run to view results

fig = go.Figure(data=go.Choropleth( locations = dffig['iso3c'], z = dffig['gdp_pc'], text = dffig['name'], colorscale = 'Blues', autocolorscale=False, reversescale=True, marker_line_color='darkgray', marker_line_width=0.5, colorbar_tickprefix = '$', colorbar_title = 'GDP per capita', ) ) fig.update_layout( autosize=False, width=1000, height=600, margin=dict( l=1, r=1, b=1, t=1, pad=.1 ), paper_bgcolor="LightSteelBlue", ) # Change legend position cb = fig.data[0].colorbar cb.orientation = 'h' cb.yanchor = 'bottom' cb.xanchor = 'center' cb.y = .1 cb.title.side = 'top'

Run to view results

fig.show()

Run to view results

import pandas as pd import wbdata indicators = {'IP.PAT.RESD': 'Resident Patent Applications', 'IP.PAT.NRES': 'Non-Resident Patent Applications'} countries = 'all' start_date = '2000' end_date = '2022' data = wbdata.get_dataframe(indicators, country=countries, data_date=(start_date, end_date), convert_date=False) data['Total Patents'] = data['Resident Patent Applications'] + data['Non-Resident Patent Applications'] print(data) data.to_csv('patent_data_all_countries.csv')

Run to view results

import pandas as pd import matplotlib.pyplot as plt years_of_interest = ['1990', '1995', '2000', '2010', '2020'] filtered_data = data[data.index.get_level_values('date').isin(years_of_interest)] plt.figure(figsize=(10, 6)) for year in years_of_interest: subset = filtered_data.loc[year] plt.scatter(subset['GDP per capita'], subset['Total Patents'], label=year) plt.title('Relation between GDP per capita and Total Patents') plt.xlabel('GDP per capita') plt.ylabel('Total Patents') plt.legend() plt.show()

Run to view results

import pandas as pd import matplotlib.pyplot as plt income_groups = data['Income Group'].unique() for income_group in income_groups: subset = data[data['Income Group'] == income_group] plt.figure(figsize=(10, 6)) plt.plot(subset.index, subset['GDP per capita'], label='GDP per capita', marker='o') plt.plot(subset.index, subset['Total Patents'], label='Total Patents', marker='o') plt.title(f'Evolution of GDP per capita and Total Patents - {income_group}') plt.xlabel('Year') plt.ylabel('Values') plt.legend() plt.show() regions = data['Region'].unique() for region in regions: subset = data[data['Region'] == region] plt.figure(figsize=(10, 6)) plt.plot(subset.index, subset['GDP per capita'], label='GDP per capita', marker='o') plt.plot(subset.index, subset['Total Patents'], label='Total Patents', marker='o') plt.title(f'Evolution of GDP per capita and Total Patents - {region}') plt.xlabel('Year') plt.ylabel('Values') plt.legend() plt.show()

Run to view results

import pandas as pd import matplotlib.pyplot as plt data_2015 = data[data.index.get_level_values('date') == '2015'] plt.figure(figsize=(8, 8)) plt.scatter(data_2015['Resident Patent Applications'], data_2015['Non-Resident Patent Applications'], label='Patent Activity (2015)') plt.plot([0, max(data_2015.max())], [0, max(data_2015.max())], linestyle='--', color='red', label='45-Degree Line') plt.title('Relation between Patenting Activity by Residents and Non-Residents (2015)') plt.xlabel('Resident Patent Applications') plt.ylabel('Non-Resident Patent Applications') plt.legend() plt.grid(True) plt.show()

Run to view results

import geopandas as gpd import matplotlib.pyplot as plt # Assuming 'world' is a GeoDataFrame with geometry and a column for patenting activity (e.g., 'Total Patents') # Replace 'Total Patents' with the actual column name in your DataFrame # Filter data for the year 2015 data_2015 = data[data.index.get_level_values('date') == '2015'] # Merge the GeoDataFrame with the patent data world_with_data = world.merge(data_2015, how='left', left_on='country_code', right_index=True) # Plotting the static map fig, ax = plt.subplots(1, 1, figsize=(15, 10)) world_with_data.plot(column='Total Patents', cmap='Blues', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True) ax.set_title('Patenting Activity Across the World (2015)') plt.show()

Run to view results

import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import numpy as np data['Total Patents'] = data['Resident Patent Applications'] + data['Non-Resident Patent Applications'] plt.figure(figsize=(12, 8)) sns.scatterplot(x='Log GDP per capita', y='Resident Patent Applications', data=data, label='Residents', color='blue', marker='o') sns.scatterplot(x='Log GDP per capita', y='Non-Resident Patent Applications', data=data, label='Non-Residents', color='orange', marker='o') sns.scatterplot(x='Log GDP per capita', y='Total Patents', data=data, label='Total', color='green', marker='o') plt.title('Relation between Log[GDP per capita] and Patenting Activity') plt.xlabel('Log[GDP per capita]') plt.ylabel('Patent Applications') plt.legend() plt.show() summary_table = data[['Log GDP per capita', 'Resident Patent Applications', 'Non-Resident Patent Applications', 'Total Patents']] print(summary_table.describe()) sns.pairplot(data[['Log GDP per capita', 'Resident Patent Applications', 'Non-Resident Patent Applications', 'Total Patents']]) plt.show()

Run to view results