Load libraries
# Load libraries
suppressWarnings(suppressMessages({
library(knitr)
library(kableExtra)
library(htmltools)
library(tidyverse)
library(scales)
library(ExPanDaR)
library(plotly)
}))
knitr::opts_chunk$set(fig.align = 'center')
options(warn = -1)
options(scipen = 10000)
options(repr.plot.width = 6, repr.plot.height = 4)
Import data
# Import data from tproject
dat <- read_csv("olahpanel1.csv")
dat %>%
head()
# Import data definitions
dat_def <- read_csv("definition.csv")
dat_def
Bar chart
df <- dat
df$year <- as.factor(df$year)
df$Region <- as.factor(df$Region)
p <- ggplot(df, aes(x = year)) +
geom_bar(aes(fill = Region), position = "fill") +
labs(x = "year", fill = "Region", y = "Percent") +
scale_y_continuous(labels = percent_format())
p <- p + scale_x_discrete(breaks = pretty(as.numeric(as.character(df$year)), n = 10))
ggplotly(p)
Missing values
df <- dat
p <- prepare_missing_values_graph(df, "year")
p
ggplotly(p)
Descriptive statistics
df <- dat
t <- prepare_descriptive_table(df)
# Create a function to round the decimals of a df
round_df <- function(x, digits) {
# round all numeric variables
# x: data frame
# digits: number of digits to round
numeric_columns <- sapply(x, mode) == 'numeric'
x[numeric_columns] <- round(x[numeric_columns], digits)
x
}
round_df(t$df, 2)
Histogram
var <- as.numeric(dat$LQ)
hist(var, main="", xlab = "LQ", col="red", right = FALSE, breaks= 10)
Extreme values table
t <- prepare_ext_obs_table(dat, n = 10,
cs_id = "Province",
ts_id = "year",
var = 'gross_cap')
t$df
By group: Bar graph
df <- dat
p <- prepare_by_group_bar_graph(df, "Region", "LQ", mean, TRUE)$plot +
ylab("mean LQ")
ggplotly(p)
By group: Violin plot
df <- dat
p <- prepare_by_group_violin_graph(df, "Region", "LQ", TRUE)
ggplotly(p)
Trend graph
df <- dat
p <- prepare_trend_graph(df, "year", c("LQ"))$plot
ggplotly(p)
Quantile trend graph
df <- dat
p <- prepare_quantile_trend_graph(df, "year", c(0.05, 0.25, 0.5, 0.75, 0.95), "LQ", points = FALSE)$plot
ggplotly(p)
Correlation matrix
df <- dat
ret <- prepare_correlation_graph(df)
Scatterplot
df <- dat
df <- df[, c("Province", "year", "LQ", "growth", "Region", "gross_cap")]
df <- df[complete.cases(df), ]
df$region <- as.factor(df$Region)
set.seed(42)
df <- sample_n(df, 408)
p <- prepare_scatter_plot(df, "LQ", "growth", color = "Region", size = "gross_cap", loess = 1)
ggplotly(p)
ggplotly(p)
df <- dat
df <- df[, c("Province", "year", "LQ", "growth", "Region", "gross_cap")]
df <- df[complete.cases(df), ]
df$Region <- as.factor(df$Region)
ggplotly(
ggplot(df, aes(LQ, growth, color = Region)) +
geom_point(aes(size = gross_cap, frame = year, ids = Region))
)
df <- dat
df <- df[, c("Province", "year", "tourist", "growth", "Region", "gov_exp")]
df <- df[complete.cases(df), ]
df$Region <- as.factor(df$Region)
ggplotly(
ggplot(df, aes(tourist, growth, color = Region)) +
geom_point(aes(size = gov_exp, frame = year, ids = Province))
)