Load libraries
# Load libraries
suppressWarnings(suppressMessages({
library(knitr)
library(kableExtra)
library(htmltools)
library(tidyverse)
library(scales)
library(ExPanDaR)
library(plotly)
}))
knitr::opts_chunk$set(fig.align = 'center')
options(warn = -1)
options(scipen = 10000)
options(repr.plot.width = 6, repr.plot.height = 4)
Import data
# Import data from the book of Mendez (2020)
dat <- read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")
dat %>%
head()
# Import data definitions
dat_def <- read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat-definitions.csv")
dat_def
Bar chart
df <- dat
df$year <- as.factor(df$year)
df$hi1990 <- as.factor(df$hi1990)
p <- ggplot(df, aes(x = year)) +
geom_bar(aes(fill = hi1990), position = "fill") +
labs(x = "year", fill = "hi1990", y = "Percent") +
scale_y_continuous(labels = percent_format())
p <- p + scale_x_discrete(breaks = pretty(as.numeric(as.character(df$year)), n = 10))
ggplotly(p)
Missing values
df <- dat
p <- prepare_missing_values_graph(df, "year")
p
ggplotly(p)
Descriptive statistics
df <- dat[df$year == "1990", ]
t <- prepare_descriptive_table(df)
# Create a function to round the decimals of a df
round_df <- function(x, digits) {
# round all numeric variables
# x: data frame
# digits: number of digits to round
numeric_columns <- sapply(x, mode) == 'numeric'
x[numeric_columns] <- round(x[numeric_columns], digits)
x
}
round_df(t$df, 2)
Histogram
var <- as.numeric(dat$log_lp[dat$year == "1990"])
hist(var, main="", xlab = "log_lp", col="red", right = FALSE, breaks= 10)
Extreme values table
t <- prepare_ext_obs_table(dat, n = 10,
cs_id = "country",
ts_id = "year",
var = "log_lp")
t$df
By group: Bar graph
df <- dat
df <- df[df$year == "1990", ]
p <- prepare_by_group_bar_graph(df, "hi1990", "lp", mean, TRUE)$plot +
ylab("mean lp")
ggplotly(p)
By group: Violin plot
df <- dat
p <- prepare_by_group_violin_graph(df, "region", "log_lp", TRUE)
ggplotly(p)
Trend graph
df <- dat
p <- prepare_trend_graph(df, "year", c("lp"))$plot
ggplotly(p)
Quantile trend graph
df <- dat
p <- prepare_quantile_trend_graph(df, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "lp", points = FALSE)$plot
ggplotly(p)
Correlation matrix
df <- dat
ret <- prepare_correlation_graph(df)
ret2 <- prepare_correlation_graph(df[, c(10, 11, 12, 13, 14, 15, 16)])
Scatterplot
df <- dat
df <- df[, c("country", "year", "log_lp", "log_GDPpc", "region", "pop")]
df <- df[complete.cases(df), ]
df$region <- as.factor(df$region)
set.seed(42)
df <- sample_n(df, 1000)
p <- prepare_scatter_plot(df, "log_lp", "log_GDPpc", color = "region", size = "pop", loess = 1)
ggplotly(p)
df <- dat
df <- df[, c("country", "year", "log_lp", "log_GDPpc", "region", "pop")]
df <- df[complete.cases(df), ]
df$region <- as.factor(df$region)
ggplotly(
ggplot(df, aes(log_lp, log_GDPpc, color = region)) +
geom_point(aes(size = pop, frame = year, ids = country))
)
ggplotly(
ggplot(df, aes(log_GDPpc, log_GDPpc, color = region)) +
geom_point(aes(size = pop, frame = year, ids = country))
)
Regression table
df <- dat
df <- df[, c("log_lp", "log_ky", "log_h", "log_tfp", "country", "year", "hi1990")]
df <- df[complete.cases(df), ]
df$hi1990 <- as.factor(df$hi1990)
df <- droplevels(df)
t <- prepare_regression_table(df, dvs = "log_lp", idvs = c("log_ky", "log_h", "log_tfp"), feffects = c("country", "year"), clusters = c("country", "year"), byvar = "hi1990", models = "ols", format = "text")
t