# Basic Packages
from __future__ import division
import os
from datetime import datetime
# Web & file access
import requests
import io
# Import display options for showing websites
from IPython.display import IFrame, HTML
# Plotting
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
%pylab --no-import-all
%matplotlib inline
import seaborn as sns
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_context("talk")
import plotly.express as px
import plotly.graph_objects as go
from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap
# Next line can import all of plotnine, but may overwrite things? Better import each function/object you need
#from plotnine import *
# Data
import pandas as pd
import numpy as np
from pandas_datareader import data, wb
# GIS & maps
import geopandas as gpd
gp = gpd
import georasters as gr
import geoplot as gplt
import geoplot.crs as gcrs
import mapclassify as mc
import textwrap
# Data Munging
from itertools import product, combinations
import difflib
import pycountry
import geocoder
from geonamescache.mappers import country
mapper = country(from_key='name', to_key='iso3')
mapper2 = country(from_key='iso3', to_key='iso')
mapper3 = country(from_key='iso3', to_key='name')
# Regressions & Stats
from scipy.stats import norm
import statsmodels.formula.api as smf
from stargazer.stargazer import Stargazer, LineLocation
# Paths
pathout = './data/'
if not os.path.exists(pathout):
os.mkdir(pathout)
pathgraphs = './graphs/'
if not os.path.exists(pathgraphs):
os.mkdir(pathgraphs)
currentYear = datetime.now().year
year = min(2020, currentYear-2)
url = 'https://data.worldbank.org/share/widget?indicators=NY.GDP.PCAP.PP.KD'
IFrame(url, width=500, height=300)
url = 'https://pandas-datareader.readthedocs.io/en/latest/remote_data.html#remote-data-wb'
IFrame(url, width=800, height=400)
wbcountries = wb.get_countries()
wbcountries = wbcountries.loc[wbcountries.region.isin(['Aggregates'])==False].reset_index(drop=True)
wbcountries['name'] = wbcountries.name.str.strip()
wbcountries['incomeLevel'] = wbcountries['incomeLevel'].str.title()
wbcountries.loc[wbcountries.iso3c=='VEN', 'incomeLevel'] = 'Upper Middle Income'
wdi_indicators = ['NY.GDP.PCAP.PP.KD', 'NY.GDP.PCAP.KD', 'SL.GDP.PCAP.EM.KD', 'SP.POP.GROW', 'SP.POP.TOTL', 'SP.DYN.WFRT', 'SP.DYN.TFRT.IN']
popvars = wb.search(string='population')
popvars.head()
wdi = wb.download(indicator=wdi_indicators, country=wbcountries.iso2c.values, start=1950, end=year)
wdi = wdi.reset_index()
wdi['year'] = wdi.year.astype(int)
wdi['gdp_pc'] = wdi['NY.GDP.PCAP.PP.KD']
wdi['ln_gdp_pc'] = wdi['NY.GDP.PCAP.PP.KD'].apply(np.log)
wdi['ln_pop'] = wdi['SP.POP.TOTL'].apply(np.log)
wdi.head()
wdi = wbcountries.merge(wdi, left_on='name', right_on='country')
wdi.head()
url = 'https://www.statsmodels.org/stable/index.html'
IFrame(url, width=800, height=400)
dffig = wdi.loc[wdi.year==year]\
.dropna(subset=['ln_gdp_pc', 'latitude', 'ln_pop'])\
.sort_values(by='region').reset_index()
mod = smf.ols(formula='ln_gdp_pc ~ latitude', data=dffig, missing='drop').fit()
mod.summary2()
pred_ols = mod.get_prediction()
iv_l = pred_ols.summary_frame()["mean_ci_lower"]
iv_u = pred_ols.summary_frame()["mean_ci_upper"]
fig, ax = plt.subplots(figsize=(8, 6))
ax.plot(dffig.latitude, dffig.ln_gdp_pc, "o", label="data")
ax.plot(dffig.latitude, mod.fittedvalues, "r--.", label="OLS")
ax.plot(dffig.latitude, iv_u, "r--")
ax.plot(dffig.latitude, iv_l, "r--")
ax.legend(loc="best")
fig
mod2 = smf.ols(formula='ln_gdp_pc ~ latitude + C(region)', data=dffig, missing='drop').fit()
mod2.summary2()
mod3 = smf.ols(formula='ln_gdp_pc ~ np.abs(latitude) + C(region)', data=dffig, missing='drop').fit()
mod3.summary2()
mod4 = smf.ols(formula='ln_gdp_pc ~ np.log(np.abs(latitude)) + C(region)', data=dffig, missing='drop').fit()
mod4.summary2()
url = 'https://nbviewer.org/github/mwburke/stargazer/blob/master/examples.ipynb'
IFrame(url, width=800, height=400)
stargazer = Stargazer([mod, mod2, mod3, mod4])
stargazer.significant_digits(2)
stargazer.show_degrees_of_freedom(False)
#stargazer.dep_var_name = ''
stargazer.dependent_variable = ' Log[GDP per capita (' + str(year) + ')]'
stargazer.custom_columns(['Latitude', 'Abs(Latitude)', 'Log[Abs(Latitude)]'], [2, 1, 1])
#stargazer.show_model_numbers(False)
stargazer.rename_covariates({'latitude':'Latitude',
'np.abs(latitude)':'Absolute Latitude',
'np.log(np.abs(latitude))':'Log[Absolute Latitude]',})
stargazer.add_line('WB Region FE', ['No', 'Yes', 'Yes', 'Yes'], LineLocation.FOOTER_TOP)
stargazer.covariate_order(['latitude', 'np.abs(latitude)', 'np.log(np.abs(latitude))'])
stargazer.cov_spacing = 2
stargazer
HTML(stargazer.render_html())
file_name = "table.html" #Include directory path if needed
html_file = open(pathgraphs + file_name, "w" ) #This will overwrite an existing file
html_file.write( stargazer.render_html() )
html_file.close()
url = pathgraphs + 'table.html'
url = 'https://smu-econ-growth.github.io/EconGrowthUG-Slides-Working-with-WDI/table.html'
IFrame(url, width=500, height=300)
url = 'https://seaborn.pydata.org/examples/index.html'
IFrame(url, width=800, height=400)
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_context("talk")
g = sns.relplot(x="latitude",
y="ln_gdp_pc",
data=dffig,
hue="region",
hue_order = dffig.region.drop_duplicates().sort_values(),
style="region",
style_order = dffig.region.drop_duplicates().sort_values(),
size="ln_pop",
sizes=(10, 400),
alpha=.5,
height=6,
aspect=2,
palette="muted",
)
g.set_axis_labels('Latitude', 'Log[GDP per capita (' + str(year) + ')]')
g.fig
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_context("talk")
fig, ax = plt.subplots()
sns.scatterplot(x="latitude",
y="ln_gdp_pc",
data=dffig,
hue="region",
hue_order = dffig.region.drop_duplicates().sort_values(),
style="region",
style_order = dffig.region.drop_duplicates().sort_values(),
size="ln_pop",
sizes=(10, 400),
alpha=.5,
palette="muted",
ax=ax
)
ax.set_xlabel('Latitude')
ax.set_ylabel('Log[GDP per capita (' + str(year) + ')]')
ax.legend(fontsize=10)
fig
def my_xy_plot(dfin,
x='SP.POP.GROW',
y='ln_gdp_pc',
labelvar='iso3c',
dx=0.006125,
dy=0.006125,
xlogscale=False,
ylogscale=False,
xlabel='Growth Rate of Population',
ylabel='Log[Income per capita in ' + str(year) + ']',
labels=False,
xpct = False,
ypct = False,
OLS=False,
OLSlinelabel='OLS',
ssline=False,
sslinelabel='45 Degree Line',
filename='income-pop-growth.pdf',
hue='region',
hue_order=['East Asia & Pacific', 'Europe & Central Asia',
'Latin America & Caribbean ', 'Middle East & North Africa',
'North America', 'South Asia', 'Sub-Saharan Africa '],
style='incomeLevel',
style_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
palette=None,
size=None,
sizes=None,
legend_fontsize=10,
label_font_size=12,
save=True):
'''
Plot the association between x and var in dataframe using labelvar for labels.
'''
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_context("talk")
df = dfin.copy()
df = df.dropna(subset=[x, y]).reset_index(drop=True)
# Plot
k = 0
fig, ax = plt.subplots()
sns.scatterplot(x=x, y=y, data=df, ax=ax,
hue=hue,
hue_order=hue_order,
#hue='incomeLevel',
#hue_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
#hue_order=['East Asia & Pacific', 'Europe & Central Asia',
# 'Latin America & Caribbean ', 'Middle East & North Africa',
# 'North America', 'South Asia', 'Sub-Saharan Africa '],
alpha=1,
style=style,
style_order=style_order,
palette=palette,
size=size,
sizes=sizes,
#palette=sns.color_palette("Blues_r", df[hue].unique().shape[0]+6)[:df[hue].unique().shape[0]*2:2],
)
if OLS:
sns.regplot(x=x, y=y, data=df, ax=ax, label=OLSlinelabel, scatter=False)
if ssline:
ax.plot([df[x].min()*.99, df[x].max()*1.01], [df[x].min()*.99, df[x].max()*1.01], c='r', label=sslinelabel)
if labels:
movex = df[x].mean() * dx
movey = df[y].mean() * dy
for line in range(0,df.shape[0]):
ax.text(df[x][line]+movex, df[y][line]+movey, df[labelvar][line], horizontalalignment='left', fontsize=label_font_size, color='black')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
if xpct:
fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
xticks = mtick.FormatStrFormatter(fmt)
ax.xaxis.set_major_formatter(xticks)
if ypct:
fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
yticks = mtick.FormatStrFormatter(fmt)
ax.yaxis.set_major_formatter(yticks)
if ylogscale:
ax.set(yscale="log")
if xlogscale:
ax.set(xscale="log")
handles, labels = ax.get_legend_handles_labels()
handles = np.array(handles)
labels = np.array(labels)
handles = list(handles[(labels!=hue) & (labels!=style) & (labels!=size)])
labels = list(labels[(labels!=hue) & (labels!=style) & (labels!=size)])
ax.legend(handles=handles, labels=labels, fontsize=legend_fontsize)
if save:
plt.savefig(pathgraphs + filename, dpi=300, bbox_inches='tight')
return fig
g = my_xy_plot(dffig,
x='latitude',
y='ln_gdp_pc',
xlabel='Latitude',
ylabel='Log[GDP per capita (' + str(year) +')]',
OLS=True,
labels=True,
size="ln_pop",
sizes=(10, 400),
filename='ln-gdp-pc-latitude.pdf')
g
def my_xy_line_plot(dfin,
x='year',
y='ln_gdp_pc',
labelvar='iso3c',
dx=0.006125,
dy=0.006125,
xlogscale=False,
ylogscale=False,
xlabel='Growth Rate of Population',
ylabel='Log[Income per capita in ' + str(year) + ']',
labels=False,
xpct = False,
ypct = False,
OLS=False,
OLSlinelabel='OLS',
ssline=False,
sslinelabel='45 Degree Line',
filename='income-pop-growth.pdf',
hue='region',
hue_order=['East Asia & Pacific', 'Europe & Central Asia',
'Latin America & Caribbean ', 'Middle East & North Africa',
'North America', 'South Asia', 'Sub-Saharan Africa '],
style='incomeLevel',
style_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
palette=None,
legend_fontsize=10,
label_fontsize=12,
loc=None,
save=True):
'''
Plot the association between x and var in dataframe using labelvar for labels.
'''
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_context("talk")
df = dfin.copy()
df = df.dropna(subset=[x, y]).reset_index(drop=True)
# Plot
k = 0
fig, ax = plt.subplots()
sns.lineplot(x=x, y=y, data=df, ax=ax,
hue=hue,
hue_order=hue_order,
alpha=1,
style=style,
style_order=style_order,
palette=palette,
)
if OLS:
sns.regplot(x=x, y=y, data=df, ax=ax, label=OLSlinelabel, scatter=False)
if ssline:
ax.plot([df[x].min()*.99, df[x].max()*1.01], [df[x].min()*.99, df[x].max()*1.01], c='r', label=sslinelabel)
if labels:
movex = df[x].mean() * dx
movey = df[y].mean() * dy
for line in range(0,df.shape[0]):
ax.text(df[x][line]+movex, df[y][line]+movey, df[labelvar][line], horizontalalignment='left', fontsize=label_fontsize, color='black')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
if xpct:
fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
xticks = mtick.FormatStrFormatter(fmt)
ax.xaxis.set_major_formatter(xticks)
if ypct:
fmt = '%.0f%%' # Format you want the ticks, e.g. '40%'
yticks = mtick.FormatStrFormatter(fmt)
ax.yaxis.set_major_formatter(yticks)
if ylogscale:
ax.set(yscale="log")
if xlogscale:
ax.set(xscale="log")
handles, labels = ax.get_legend_handles_labels()
handles = np.array(handles)
labels = np.array(labels)
handles = list(handles[(labels!='region') & (labels!='incomeLevel')])
labels = list(labels[(labels!='region') & (labels!='incomeLevel')])
ax.legend(handles=handles, labels=labels, fontsize=legend_fontsize, loc=loc)
if save:
plt.savefig(pathgraphs + filename, dpi=300, bbox_inches='tight')
return fig
palette=sns.color_palette("Blues_r", wdi['incomeLevel'].unique().shape[0]+6)[:wdi['incomeLevel'].unique().shape[0]*2:2]
fig = my_xy_line_plot(wdi,
x='year',
y='ln_gdp_pc',
xlabel='Year',
ylabel='Log[GDP per capita]',
filename='ln-gdp-pc-income-groups-TS.pdf',
hue='incomeLevel',
hue_order=['High Income', 'Upper Middle Income', 'Lower Middle Income', 'Low Income'],
palette=palette,
OLS=False,
labels=False,
legend_fontsize=16,
loc='lower right',
save=True)
fig
#palette=sns.color_palette("Blues_r", wdi['region'].unique().shape[0]+6)[:wdi['region'].unique().shape[0]*2:2]
fig = my_xy_line_plot(wdi,
x='year',
y='gdp_pc',
xlabel='Year',
ylabel='GDP per capita',
ylogscale=True,
filename='ln-gdp-pc-regions-TS.pdf',
style='region',
style_order=['East Asia & Pacific', 'Europe & Central Asia',
'Latin America & Caribbean ', 'Middle East & North Africa',
'North America', 'South Asia', 'Sub-Saharan Africa '],
#palette=palette,
OLS=False,
labels=False,
legend_fontsize=12,
loc='lower right',
save=True)
fig
url = 'https://plotly.com/python/'
IFrame(url, width=800, height=400)
symbols = ['circle', 'x', 'square', 'cross', 'diamond', 'star-diamond', 'triangle-up']
fig = px.scatter(dffig,
x="latitude",
y="ln_gdp_pc",
color='region',
symbol='region',
symbol_sequence=symbols,
hover_name='name',
hover_data=['iso3c', 'ln_pop', 'gdp_pc'],
size='ln_pop',
size_max=15,
trendline="ols",
trendline_scope="overall",
trendline_color_override="black",
labels={
"latitude": "Latitude",
"ln_gdp_pc": "Log[GDP per capita (" + str(year) + ")]",
"gdp_pc": "GDP per capita (" + str(year) + ")",
"region": "WB Region"
},
opacity=0.75,
height=800,
)
fig.show()
fig.update_traces(marker=dict(#size=12,
line=dict(width=2,
color='DarkSlateGrey')),
selector=dict(mode='markers'))
fig.show()
tr_line=[]
for k, trace in enumerate(fig.data):
if trace.mode is not None and trace.mode == 'lines':
tr_line.append(k)
print(tr_line)
for id in tr_line:
fig.data[id].update(line_width=3)
fig.show()
fig.update_layout(legend=dict(
yanchor="top",
y=0.25,
xanchor="left",
x=0.9
))
fig.show()
fig.write_image(pathgraphs + "ln-gdp-pc-latitude-plotly.pdf", height=1000, width=1500, scale=4)
results = px.get_trendline_results(fig)
results.px_fit_results.iloc[0].summary()
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'}
url = 'https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip'
r = requests.get(url, headers=headers)
countries = gp.read_file(io.BytesIO(r.content))
countries.head()
fig, ax = plt.subplots(figsize=(15,10))
countries.plot(ax=ax)
ax.set_title("WGS84 (lat/lon)", fontdict={'fontsize':34})
dffig2 = countries.merge(dffig, left_on='ADM0_A3', right_on='iso3c')
fig, ax = plt.subplots(figsize=(15,10))
dffig2.plot(column='gdp_pc', ax=ax, cmap='Reds')
ax.set_title("WGS84 (lat/lon)", fontdict={'fontsize':34})
url = 'https://residentmario.github.io/geoplot/'
IFrame(url, width=800, height=400)
gplt.polyplot(
countries, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor='white', facecolor='lightgray',
rasterized=True,
extent=[-180, -90, 180, 90],
)
gplt.choropleth(dffig2, hue='gdp_pc',
projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor='white',
linewidth=1,
cmap='Reds', legend=True,
scheme='FisherJenks',
legend_kwargs={'bbox_to_anchor':(0.3, 0.5),
'frameon': True,
'title':'GDP per capita',
},
figsize=(12,8),
rasterized=True,
)
ax = gplt.choropleth(dffig2, hue='gdp_pc', projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor='white', linewidth=1,
cmap='Reds', legend=True,
scheme='FisherJenks',
legend_kwargs={'bbox_to_anchor':(0.3, 0.5),
'frameon': True,
'title':'GDP per capita',
},
figsize=(15,10),
rasterized=True,
)
gplt.polyplot(countries, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor='white', facecolor='lightgray',
ax=ax,
rasterized=True,
extent=[-180, -90, 180, 90],
)
# Functions for plotting
def center_wrap(text, cwidth=32, **kw):
'''Center Text (to be used in legend)'''
lines = text
#lines = textwrap.wrap(text, **kw)
return "\n".join(line.center(cwidth) for line in lines)
def MyChloropleth(mydf, myfile='fig', myvar='gdp_pc',
mylegend='GDP per capita',
k=5,
extent=[-180, -90, 180, 90],
bbox_to_anchor=(0.25, 0.5),
edgecolor='white', facecolor='lightgray',
scheme='FisherJenks',
rasterized=False,
save=True,
percent=False,
rn=0,
**kwargs):
# Chloropleth
# Color scheme
if scheme=='EqualInterval':
scheme = mc.EqualInterval(mydf[myvar], k=k)
elif scheme=='Quantiles':
scheme = mc.Quantiles(mydf[myvar], k=k)
elif scheme=='BoxPlot':
scheme = mc.BoxPlot(mydf[myvar], k=k)
elif scheme=='FisherJenks':
scheme = mc.FisherJenks(mydf[myvar], k=k)
elif scheme=='FisherJenksSampled':
scheme = mc.FisherJenksSampled(mydf[myvar], k=k)
elif scheme=='HeadTailBreaks':
scheme = mc.HeadTailBreaks(mydf[myvar], k=k)
elif scheme=='JenksCaspall':
scheme = mc.JenksCaspall(mydf[myvar], k=k)
elif scheme=='JenksCaspallForced':
scheme = mc.JenksCaspallForced(mydf[myvar], k=k)
elif scheme=='JenksCaspallSampled':
scheme = mc.JenksCaspallSampled(mydf[myvar], k=k)
elif scheme=='KClassifiers':
scheme = mc.KClassifiers(mydf[myvar], k=k)
# Format legend
upper_bounds = scheme.bins
# get and format all bounds
bounds = []
for index, upper_bound in enumerate(upper_bounds):
if index == 0:
lower_bound = mydf[myvar].min()
else:
lower_bound = upper_bounds[index-1]
# format the numerical legend here
if percent:
bound = f'{lower_bound:.{rn}%} - {upper_bound:.{rn}%}'.format(width=rn)
else:
bound = f'{float(lower_bound):,.{rn}f} - {float(upper_bound):,.{rn}f}'.format(width=rn)
bounds.append(bound)
legend_labels = bounds
#Plot
ax = gplt.choropleth(
mydf, hue=myvar, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor='white', linewidth=1,
cmap='Reds', legend=True,
scheme=scheme,
legend_kwargs={'bbox_to_anchor': bbox_to_anchor,
'frameon': True,
'title':mylegend,
},
legend_labels = legend_labels,
figsize=(24, 16),
rasterized=rasterized,
)
gplt.polyplot(
countries, projection=gcrs.PlateCarree(central_longitude=0.0, globe=None),
edgecolor=edgecolor, facecolor=facecolor,
ax=ax,
rasterized=rasterized,
extent=extent,
)
if save:
plt.savefig(pathgraphs + myfile + '.jpg', dpi=300, bbox_inches='tight')
plt.savefig(pathgraphs + myfile + '.png', dpi=300, bbox_inches='tight')
plt.savefig(pathgraphs + myfile + '.pdf', dpi=300, bbox_inches='tight')
pass
mylegend = center_wrap(["GDP per capita in " + str(year)], cwidth=32, width=32)
MyChloropleth(dffig2, myfile='fig-gdp-pc-' + str(year), myvar='gdp_pc', mylegend=mylegend, k=10, scheme='Quantiles', save=True)
url = 'https://plotly.com/python/maps/'
IFrame(url, width=800, height=400)
scheme = mc.Quantiles(dffig2['gdp_pc'], k=5)
classifier = mc.Quantiles.make(k=5, rolling=True)
dffig2['gdp_pc_q'] = classifier(dffig2['gdp_pc'])
dffig2['gdp_pc_qc'] = dffig2['gdp_pc_q'].apply(lambda x: scheme.get_legend_classes()[x].replace('[ ', '[').replace('( ', '('))
fig = px.choropleth(dffig2.sort_values('gdp_pc_q', ascending=True),
locations="iso3c",
color="gdp_pc_qc",
hover_name='name',
hover_data=['iso3c', 'ln_pop'],
labels={
"gdp_pc_qc": "GDP per capita (" + str(year) + ")",
},
color_discrete_sequence=px.colors.sequential.Reds,
height=600,
width=1000,
)
# Change legend position
fig.update_layout(legend=dict(
yanchor="bottom",
y=0.15,
xanchor="left",
x=0.05
))
fig.show()
fig = px.choropleth(dffig2.sort_values('gdp_pc_q', ascending=True),
locations="iso3c",
color="gdp_pc_qc",
hover_name='name',
hover_data=['iso3c', 'gdp_pc' ,'ln_pop'],
labels={
"gdp_pc_qc": "GDP per capita (" + str(year) + ")",
"gdp_pc": "GDP per capita (" + str(year) + ")",
'iso3c':'ISO code',
"ln_pop": "Log[Population (" + str(year) + ")]",
},
color_discrete_sequence=px.colors.sequential.Blues,
height=600,
width=1000,
)
# Change legend position
fig.update_layout(legend=dict(
yanchor="bottom",
y=0.15,
xanchor="left",
x=0.05
))
fig.show()
fig = px.choropleth(dffig,
locations="iso3c",
color="ln_gdp_pc",
hover_name='name',
hover_data=['iso3c', 'ln_pop'],
labels={
"ln_gdp_pc": "Log[GDP per capita (" + str(year) + ")]",
},
#color_continuous_scale=px.colors.sequential.Plasma,
color_continuous_scale="Reds",
height=600,
width=1100,
)
fig.show()
fig.update_layout(coloraxis_colorbar=dict(
orientation = 'h',
yanchor="bottom",
xanchor="left",
y=-.2,
x=0,
))
fig.update_coloraxes(colorbar_title_side='top')
fig.show()
# Change legend position
fig.update_layout(legend=dict(
yanchor="top",
y=0.99,
xanchor="center",
x=0.01,
orientation='h',
))
fig.show()
fig = go.Figure(data=go.Choropleth(
locations = dffig['iso3c'],
z = dffig['gdp_pc'],
text = dffig['name'],
colorscale = 'Blues',
autocolorscale=False,
reversescale=True,
marker_line_color='darkgray',
marker_line_width=0.5,
colorbar_tickprefix = '$',
colorbar_title = 'GDP pc',
)
)
fig.update_layout(
autosize=False,
width=800,
height=400,
margin=dict(
l=5,
r=5,
b=10,
t=10,
pad=1
),
paper_bgcolor="LightSteelBlue",
)
fig.show()
fig = go.Figure(data=go.Choropleth(
locations = dffig['iso3c'],
z = dffig['gdp_pc'],
text = dffig['name'],
colorscale = 'Blues',
autocolorscale=False,
reversescale=True,
marker_line_color='darkgray',
marker_line_width=0.5,
colorbar_tickprefix = '$',
colorbar_title = 'GDP per capita',
)
)
fig.update_layout(
autosize=False,
width=1000,
height=600,
margin=dict(
l=1,
r=1,
b=1,
t=1,
pad=.1
),
paper_bgcolor="LightSteelBlue",
)
# Change legend position
cb = fig.data[0].colorbar
cb.orientation = 'h'
cb.yanchor = 'bottom'
cb.xanchor = 'center'
cb.y = .1
cb.title.side = 'top'
fig.show()
import pandas as pd
import wbdata
indicators = {'IP.PAT.RESD': 'Resident Patent Applications',
'IP.PAT.NRES': 'Non-Resident Patent Applications'}
countries = 'all'
start_date = '2000'
end_date = '2022'
data = wbdata.get_dataframe(indicators, country=countries, data_date=(start_date, end_date), convert_date=False)
data['Total Patents'] = data['Resident Patent Applications'] + data['Non-Resident Patent Applications']
print(data)
data.to_csv('patent_data_all_countries.csv')
import pandas as pd
import matplotlib.pyplot as plt
years_of_interest = ['1990', '1995', '2000', '2010', '2020']
filtered_data = data[data.index.get_level_values('date').isin(years_of_interest)]
plt.figure(figsize=(10, 6))
for year in years_of_interest:
subset = filtered_data.loc[year]
plt.scatter(subset['GDP per capita'], subset['Total Patents'], label=year)
plt.title('Relation between GDP per capita and Total Patents')
plt.xlabel('GDP per capita')
plt.ylabel('Total Patents')
plt.legend()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
income_groups = data['Income Group'].unique()
for income_group in income_groups:
subset = data[data['Income Group'] == income_group]
plt.figure(figsize=(10, 6))
plt.plot(subset.index, subset['GDP per capita'], label='GDP per capita', marker='o')
plt.plot(subset.index, subset['Total Patents'], label='Total Patents', marker='o')
plt.title(f'Evolution of GDP per capita and Total Patents - {income_group}')
plt.xlabel('Year')
plt.ylabel('Values')
plt.legend()
plt.show()
regions = data['Region'].unique()
for region in regions:
subset = data[data['Region'] == region]
plt.figure(figsize=(10, 6))
plt.plot(subset.index, subset['GDP per capita'], label='GDP per capita', marker='o')
plt.plot(subset.index, subset['Total Patents'], label='Total Patents', marker='o')
plt.title(f'Evolution of GDP per capita and Total Patents - {region}')
plt.xlabel('Year')
plt.ylabel('Values')
plt.legend()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
data_2015 = data[data.index.get_level_values('date') == '2015']
plt.figure(figsize=(8, 8))
plt.scatter(data_2015['Resident Patent Applications'], data_2015['Non-Resident Patent Applications'], label='Patent Activity (2015)')
plt.plot([0, max(data_2015.max())], [0, max(data_2015.max())], linestyle='--', color='red', label='45-Degree Line')
plt.title('Relation between Patenting Activity by Residents and Non-Residents (2015)')
plt.xlabel('Resident Patent Applications')
plt.ylabel('Non-Resident Patent Applications')
plt.legend()
plt.grid(True)
plt.show()
import geopandas as gpd
import matplotlib.pyplot as plt
# Assuming 'world' is a GeoDataFrame with geometry and a column for patenting activity (e.g., 'Total Patents')
# Replace 'Total Patents' with the actual column name in your DataFrame
# Filter data for the year 2015
data_2015 = data[data.index.get_level_values('date') == '2015']
# Merge the GeoDataFrame with the patent data
world_with_data = world.merge(data_2015, how='left', left_on='country_code', right_index=True)
# Plotting the static map
fig, ax = plt.subplots(1, 1, figsize=(15, 10))
world_with_data.plot(column='Total Patents', cmap='Blues', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)
ax.set_title('Patenting Activity Across the World (2015)')
plt.show()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
data['Total Patents'] = data['Resident Patent Applications'] + data['Non-Resident Patent Applications']
plt.figure(figsize=(12, 8))
sns.scatterplot(x='Log GDP per capita', y='Resident Patent Applications', data=data, label='Residents', color='blue', marker='o')
sns.scatterplot(x='Log GDP per capita', y='Non-Resident Patent Applications', data=data, label='Non-Residents', color='orange', marker='o')
sns.scatterplot(x='Log GDP per capita', y='Total Patents', data=data, label='Total', color='green', marker='o')
plt.title('Relation between Log[GDP per capita] and Patenting Activity')
plt.xlabel('Log[GDP per capita]')
plt.ylabel('Patent Applications')
plt.legend()
plt.show()
summary_table = data[['Log GDP per capita', 'Resident Patent Applications', 'Non-Resident Patent Applications', 'Total Patents']]
print(summary_table.describe())
sns.pairplot(data[['Log GDP per capita', 'Resident Patent Applications', 'Non-Resident Patent Applications', 'Total Patents']])
plt.show()