[R] Explore panel data

Load libraries

# Load libraries suppressWarnings(suppressMessages({ library(knitr) library(kableExtra) library(htmltools) library(tidyverse) library(scales) library(ExPanDaR) library(plotly) })) knitr::opts_chunk$set(fig.align = 'center') options(warn = -1) options(scipen = 10000) options(repr.plot.width = 6, repr.plot.height = 4)

Import data

# Import data from the book of Mendez (2020) dat <- read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat.csv")

dat %>% head()

# Import data definitions dat_def <- read_csv("https://raw.githubusercontent.com/quarcs-lab/mendez2020-convergence-clubs-code-data/master/assets/dat-definitions.csv") dat_def

Bar chart

df <- dat df$year <- as.factor(df$year) df$hi1990 <- as.factor(df$hi1990) p <- ggplot(df, aes(x = year)) + geom_bar(aes(fill = hi1990), position = "fill") + labs(x = "year", fill = "hi1990", y = "Percent") + scale_y_continuous(labels = percent_format()) p <- p + scale_x_discrete(breaks = pretty(as.numeric(as.character(df$year)), n = 10))

ggplotly(p)

Missing values

df <- dat p <- prepare_missing_values_graph(df, "year") p

ggplotly(p)

Descriptive statistics

df <- dat[df$year == "1990", ] t <- prepare_descriptive_table(df) # Create a function to round the decimals of a df round_df <- function(x, digits) { # round all numeric variables # x: data frame # digits: number of digits to round numeric_columns <- sapply(x, mode) == 'numeric' x[numeric_columns] <- round(x[numeric_columns], digits) x } round_df(t$df, 2)

Histogram

var <- as.numeric(dat$log_lp[dat$year == "1990"]) hist(var, main="", xlab = "log_lp", col="red", right = FALSE, breaks= 10)

Extreme values table

t <- prepare_ext_obs_table(dat, n = 10, cs_id = "country", ts_id = "year", var = "log_lp") t$df

By group: Bar graph

df <- dat df <- df[df$year == "1990", ] p <- prepare_by_group_bar_graph(df, "hi1990", "lp", mean, TRUE)$plot + ylab("mean lp")

ggplotly(p)

By group: Violin plot

df <- dat p <- prepare_by_group_violin_graph(df, "region", "log_lp", TRUE)

ggplotly(p)

Trend graph

df <- dat p <- prepare_trend_graph(df, "year", c("lp"))$plot

ggplotly(p)

Quantile trend graph

df <- dat p <- prepare_quantile_trend_graph(df, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "lp", points = FALSE)$plot

ggplotly(p)

Correlation matrix

df <- dat ret <- prepare_correlation_graph(df)

ret2 <- prepare_correlation_graph(df[, c(10, 11, 12, 13, 14, 15, 16)])

Scatterplot

df <- dat df <- df[, c("country", "year", "log_lp", "log_GDPpc", "region", "pop")] df <- df[complete.cases(df), ] df$region <- as.factor(df$region) set.seed(42) df <- sample_n(df, 1000) p <- prepare_scatter_plot(df, "log_lp", "log_GDPpc", color = "region", size = "pop", loess = 1)

ggplotly(p)

df <- dat df <- df[, c("country", "year", "log_lp", "log_GDPpc", "region", "pop")] df <- df[complete.cases(df), ] df$region <- as.factor(df$region)

ggplotly( ggplot(df, aes(log_lp, log_GDPpc, color = region)) + geom_point(aes(size = pop, frame = year, ids = country)) )

ggplotly( ggplot(df, aes(log_GDPpc, log_GDPpc, color = region)) + geom_point(aes(size = pop, frame = year, ids = country)) )

Regression table

df <- dat df <- df[, c("log_lp", "log_ky", "log_h", "log_tfp", "country", "year", "hi1990")] df <- df[complete.cases(df), ] df$hi1990 <- as.factor(df$hi1990) df <- droplevels(df) t <- prepare_regression_table(df, dvs = "log_lp", idvs = c("log_ky", "log_h", "log_tfp"), feffects = c("country", "year"), clusters = c("country", "year"), byvar = "hi1990", models = "ols", format = "text") t

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Load libraries

Import data

Bar chart

Missing values

Descriptive statistics

Histogram

Extreme values table

By group: Bar graph

By group: Violin plot

Trend graph

Quantile trend graph

Correlation matrix

Scatterplot

Regression table

Load libraries