Set up
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm
import statsmodels.formula.api as smf
Import data
df = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")
df
data_definitions = pd.read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat-definitions.csv")
data_definitions
Transform and subset data
df2014 = df.query("year == 2014")
df2014
Describe data
df2014.describe()
Analyze one numerical variable
df2014['log_lp'].describe()
df2014['log_lp'].describe().round(2)
px.strip(df2014, x = 'log_lp', hover_name= 'country')
px.histogram(df2014, x = 'log_lp', hover_name= 'country')
px.box(df2014, x = 'log_lp', hover_name= 'country')
Analyze two numerical variables
px.scatter(df2014, x = 'log_lp', y = 'log_GDPpc', hover_name= 'country')
px.scatter(df2014, x = 'log_lp', y = 'log_GDPpc', hover_name= 'country', trendline= 'ols')
Analyze one categorical variable
px.bar(df2014, x = 'hi1990')
Analyze two categorical variables
px.bar(df2014, x = 'hi1990', color= 'region')
Analyze one categorical variable and one numeric variable
px.box(df2014, x = 'log_lp', y = 'hi1990')
#px.box(df2014, x = 'log_lp', y = 'hi1990', labels=dict(lp = 'Labor Productivity', hi1990 = 'Is it a developed country?'))
fig = px.histogram(df2014, #dataframe
x = 's', #x-values column
color = 'hi1990', #column shown by color
marginal = 'rug', #plot type (eg box,rug)
hover_data = df2014.columns #extra info in hover
)
fig.show()
px.box(df2014, x = 'log_lp', y = 'region')
Analyze two numerical variables and one categorical variable
px.scatter(df2014, x = 'log_lp', y = 'log_GDPpc', hover_name= 'country', trendline= 'ols', color = 'hi1990')
Gapminder dataset
gapminder = px.data.gapminder()
gapminder
gapminder.describe()
gapminder_china = gapminder.query("country=='China'")
gapminder_china
fig = px.line(gapminder_china, #dataframe
x = 'year', #x-values column
y = 'lifeExp', #y-values column
)
fig.show()
fig = px.bar(gapminder_china, #dataframe
x = 'year', #x-values column
y = 'pop', #y-values column
hover_data = ['lifeExp', 'gdpPercap'],#extra hover info
color = 'lifeExp', #column by color
labels = {'pop':'population of China'}#label change
)
fig.show()
fig = px.scatter_geo(gapminder, #dataframe
locations = 'iso_alpha', #location code
color = 'continent', #column shown by color
hover_name = 'country', #hover info title
size = 'gdpPercap', #column shown by size
animation_frame = 'year',#column animated
projection = 'orthographic'#type of map
)
fig.show()
fig = px.choropleth(gapminder, #dataframe
locations = 'iso_alpha', #location code
color = 'pop', #column shown by color
hover_name = 'country', #hover info title
animation_frame = 'year', #column animated
range_color = [60000,1300000000]#color range
)
fig.show()