Social and Economic comparison, Exploring Americas
# Load computational modules
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
!pip install statsmodels
!pip install --upgrade pip
import statsmodels.api as sm
import statsmodels.formula.api as smf
Import Data
# Alternative way to load the data
df = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")
df
Overall view.
px.bar(df, x = 'region', y ='GDPpc', animation_frame = 'year',
color = 'region', hover_name='country', title = 'GDP PerCapita, Regional distribution',
labels=dict(region = "Region"))
px.choropleth(df, color='GDPpc',
locations='isocode', animation_frame = 'year', hover_name='country', projection="natural earth",
labels=dict(log_GDPpc_raw = 'GDP PerCapita'))
px.treemap(df1, color = 'GDPpc', values = 'pop',
path = ["region", "country"], hover_name = 'country', height = 500,
labels=dict(GDPpc = 'GDP Percap'))
Latin America
df1 = df.query("region == 'Americas'")
df2 = df1.drop(labels=range(375, 400), axis=0)
df3 = df2.drop(labels=range(2550, 2575), axis=0)
df3
df3.describe()
px.line(df1.query("country != ['United States', 'Canada']"),
y = "GDPpc", x = "year", color = "country", labels =dict(country = "Country"),
hover_name = "country", title = "GDP Percapita Latin America")
North America
df4.describe()
px.line(df1.query("country == ['United States', 'Canada']"), x = "year", y = "GDPpc",
color = "country", labels = dict(country = "Country"), title = "GDP Percapita North America")
df1.groupby('hi1990').std()
px.line(df1, x = "year", y = "GDPpc", color = "hi1990", hover_name="country",
labels = dict(hi1990 = "Northern countries"), title = "GDP Percapita Latin America vs North America")
px.scatter(df1, x="s",y="GDPpc", color="hi1990",
hover_name="country", trendline="ols",
labels =dict(s = "Years of Schooling",
hi1990 = "Northern countries"),
title = "Regression Education-GDPpc, Latin America vs North America")
px.histogram(df1, x = "s", y = "country", color = "hi1990",
labels = dict(hi1990 = "Northern countries", s = "Years of schooling", country = "Countries"),
title = "Years of Schooling, Latin America vs North America")
px.histogram(df1, x = "h", y = "country", color = "hi1990",
labels = dict(hi1990 = "Northern countries", h = "Human Capital", country = "Countries"),
title = "Human capital, Latin America vs North America")
px.scatter(df1.query("year == 2014"), x="log_GDPpc", color="hi1990",
hover_name="country", trendline="ols",
labels =dict(log_GDPpc = "Trend Log GDPpc",
hi1990 = "Northern countries"),
title = "Trend of Log GDPpc, Latin America vs North America")
px.choropleth(df1, color='lp',
locations='isocode', animation_frame = 'year', hover_name='country', projection="natural earth",
labels=dict(log_GDPpc_raw = 'GDP PerCapita'))