Economic Growth Rates Analysis (Before-After) Covid-19
Project Description & Motivation
Problem Description
Data Sources
Project Value
Preliminary Data Exploration
Data Collection and Wrangling
library(readxl)
library(ggplot2)
library(dplyr)
library(tibble)
library(stringr)
library(tidyr)
#load data from xls file into Data Frame
df_2018 <- read_excel("/work/Economic Growth/[Seri 2010] Laju Pertumbuhan PDB Seri 2018.xlsx", skip = 3)
New names:
• `` -> `...1`
• `Triwulan I` -> `Triwulan I...2`
• `Triwulan II` -> `Triwulan II...3`
• `Triwulan III` -> `Triwulan III...4`
• `Triwulan IV` -> `Triwulan IV...5`
• `Tahunan` -> `Tahunan...6`
• `Triwulan I` -> `Triwulan I...7`
• `Triwulan II` -> `Triwulan II...8`
• `Triwulan III` -> `Triwulan III...9`
• `Triwulan IV` -> `Triwulan IV...10`
• `Tahunan` -> `Tahunan...11`
• `Triwulan I` -> `Triwulan I...12`
• `Triwulan II` -> `Triwulan II...13`
• `Triwulan III` -> `Triwulan III...14`
• `Triwulan IV` -> `Triwulan IV...15`
• `Tahunan` -> `Tahunan...16`
New names:
• `` -> `...1`
• `Triwulan I` -> `Triwulan I...2`
• `Triwulan II` -> `Triwulan II...3`
• `Triwulan III` -> `Triwulan III...4`
• `Triwulan IV` -> `Triwulan IV...5`
• `Tahunan` -> `Tahunan...6`
• `Triwulan I` -> `Triwulan I...7`
• `Triwulan II` -> `Triwulan II...8`
• `Triwulan III` -> `Triwulan III...9`
• `Triwulan IV` -> `Triwulan IV...10`
• `Tahunan` -> `Tahunan...11`
• `Triwulan I` -> `Triwulan I...12`
• `Triwulan II` -> `Triwulan II...13`
• `Triwulan III` -> `Triwulan III...14`
• `Triwulan IV` -> `Triwulan IV...15`
• `Tahunan` -> `Tahunan...16`
New names:
• `` -> `...1`
• `Triwulan I` -> `Triwulan I...2`
• `Triwulan II` -> `Triwulan II...3`
• `Triwulan III` -> `Triwulan III...4`
• `Triwulan IV` -> `Triwulan IV...5`
• `Tahunan` -> `Tahunan...6`
• `Triwulan I` -> `Triwulan I...7`
• `Triwulan II` -> `Triwulan II...8`
• `Triwulan III` -> `Triwulan III...9`
• `Triwulan IV` -> `Triwulan IV...10`
• `Tahunan` -> `Tahunan...11`
• `Triwulan I` -> `Triwulan I...12`
• `Triwulan II` -> `Triwulan II...13`
• `Triwulan III` -> `Triwulan III...14`
• `Triwulan IV` -> `Triwulan IV...15`
• `Tahunan` -> `Tahunan...16`
New names:
• `` -> `...1`
• `Triwulan I` -> `Triwulan I...2`
• `Triwulan II` -> `Triwulan II...3`
• `Triwulan III` -> `Triwulan III...4`
• `Triwulan IV` -> `Triwulan IV...5`
• `Tahunan` -> `Tahunan...6`
• `Triwulan I` -> `Triwulan I...7`
• `Triwulan II` -> `Triwulan II...8`
• `Triwulan III` -> `Triwulan III...9`
• `Triwulan IV` -> `Triwulan IV...10`
• `Tahunan` -> `Tahunan...11`
• `Triwulan I` -> `Triwulan I...12`
• `Triwulan II` -> `Triwulan II...13`
• `Triwulan III` -> `Triwulan III...14`
• `Triwulan IV` -> `Triwulan IV...15`
• `Tahunan` -> `Tahunan...16`
New names:
• `` -> `...1`
• `Triwulan I` -> `Triwulan I...2`
• `Triwulan II` -> `Triwulan II...3`
• `Triwulan III` -> `Triwulan III...4`
• `Triwulan IV` -> `Triwulan IV...5`
• `Tahunan` -> `Tahunan...6`
• `Triwulan I` -> `Triwulan I...7`
• `Triwulan II` -> `Triwulan II...8`
• `Triwulan III` -> `Triwulan III...9`
• `Triwulan IV` -> `Triwulan IV...10`
• `Tahunan` -> `Tahunan...11`
• `Triwulan I` -> `Triwulan I...12`
• `Triwulan II` -> `Triwulan II...13`
• `Triwulan III` -> `Triwulan III...14`
• `Triwulan IV` -> `Triwulan IV...15`
• `Tahunan` -> `Tahunan...16`
View(df_2018)
# Get Column Name
names(df_2018)
colnames(df_2018)[1] <- "LapanganUsaha"
View(df_2018$LapanganUsaha)
#Select only sectoral names by using String Detection Function and Regex
selected_values <- df_2018[str_detect(df_2018$LapanganUsaha, "(?:[A-Z][.,])+"), ]
View(selected_values)
#Rearrangement Categorical Names and Cleansing
pattern_mapping <- c(
"(?:[A-Z][.,])+" = ""
)
selected_values$LapanganUsaha <- str_replace_all(selected_values$LapanganUsaha, pattern_mapping)
selected_values <- na.omit(selected_values)
#View Clean Categorical Names
View(selected_values$LapanganUsaha)
#Selection Column
selected_columns <- c(names(selected_values)[1], tail(names(selected_values), 5))
selected_values <- selected_values[selected_columns]
colnames(selected_values)[2] <- "TriwulanI"
colnames(selected_values)[3] <- "TriwulanII"
colnames(selected_values)[4] <- "TriwulanIII"
colnames(selected_values)[5] <- "TriwulanIV"
colnames(selected_values)[6] <- "Tahunan"
View(selected_values)
#Visualize Data Sectoral for Economic Growth Rate in 2018 (y-on-y)
negative_color <- 'red'
positive_color <- 'blue'
images1 <- ggplot(selected_values, aes(x = LapanganUsaha, y = TriwulanI, fill = TriwulanI>0)) +
geom_bar(stat = 'identity', color = 'black') +
scale_fill_manual(values = c(negative_color, positive_color), guide = "none") +
xlab("Sectoral") +
ylab("Growth Rate") +
ggtitle("Sectoral Growth Rate in 1st Quarter 2018") +
theme_minimal()+theme(axis.text.x = element_text(angle = 45, hjust = 1))
images1
#Save Graph Data to Image
ggsave("growth_rate_q1_2018.png",images1, width = 15, height = 10)
Data Exploration
# Function to load and process dataframe
process_data <- function(data_path, year) {
# Read the Excel file
df <- read_excel(data_path, range = "A4:P69")
# Rename the first column to "LapanganUsaha"
colnames(df)[1] <- "LapanganUsaha"
# Subset the dataframe based on the LapanganUsaha column
df_sector <- df[str_detect(df$LapanganUsaha, "(?:[A-Z][.,])+"), ]
# Define pattern mapping for cleaning strings
pattern_mapping <- c(
"(?:[A-Z][.,])+" = "",
"^\\s+|\\s+$" = ""
)
# Apply pattern mapping to clean the selected dataframe
df_sector$LapanganUsaha <- str_replace_all(df_sector$LapanganUsaha, pattern_mapping)
# Select the desired columns
selected_columns <- c(names(df_sector)[1], tail(names(df_sector), 5))
df_sector <- df_sector[selected_columns]
# Rename the columns based on the specified year
colnames(df_sector)[2] <- sprintf("TriwulanI_%d", year)
colnames(df_sector)[3] <- sprintf("TriwulanII_%d", year)
colnames(df_sector)[4] <- sprintf("TriwulanIII_%d", year)
colnames(df_sector)[5] <- sprintf("TriwulanIV_%d", year)
colnames(df_sector)[6] <- sprintf("Tahunan_%d", year)
# Return the processed dataframe
return(df_sector)
}
year <- 2018
data_path = "/work/Economic Growth/[Seri 2010] Laju Pertumbuhan PDB Seri 2018.xlsx"
df_sector_2018 <- process_data(data_path, year)
New names:
• `` -> `...1`
• `Triwulan I` -> `Triwulan I...2`
• `Triwulan II` -> `Triwulan II...3`
• `Triwulan III` -> `Triwulan III...4`
• `Triwulan IV` -> `Triwulan IV...5`
• `Tahunan` -> `Tahunan...6`
• `Triwulan I` -> `Triwulan I...7`
• `Triwulan II` -> `Triwulan II...8`
• `Triwulan III` -> `Triwulan III...9`
• `Triwulan IV` -> `Triwulan IV...10`
• `Tahunan` -> `Tahunan...11`
• `Triwulan I` -> `Triwulan I...12`
• `Triwulan II` -> `Triwulan II...13`
• `Triwulan III` -> `Triwulan III...14`
• `Triwulan IV` -> `Triwulan IV...15`
• `Tahunan` -> `Tahunan...16`
df_sector_2018
year <- 2019
data_path = "/work/Economic Growth/[Seri 2010] Laju Pertumbuhan PDB Seri 2019.xlsx"
df_sector_2019 <- process_data(data_path, year)
New names:
• `` -> `...1`
• `Triwulan I` -> `Triwulan I...2`
• `Triwulan II` -> `Triwulan II...3`
• `Triwulan III` -> `Triwulan III...4`
• `Triwulan IV` -> `Triwulan IV...5`
• `Tahunan` -> `Tahunan...6`
• `Triwulan I` -> `Triwulan I...7`
• `Triwulan II` -> `Triwulan II...8`
• `Triwulan III` -> `Triwulan III...9`
• `Triwulan IV` -> `Triwulan IV...10`
• `Tahunan` -> `Tahunan...11`
• `Triwulan I` -> `Triwulan I...12`
• `Triwulan II` -> `Triwulan II...13`
• `Triwulan III` -> `Triwulan III...14`
• `Triwulan IV` -> `Triwulan IV...15`
• `Tahunan` -> `Tahunan...16`
df_sector_2019
year <- 2020
data_path = "/work/Economic Growth/[Seri 2010] Laju Pertumbuhan PDB Seri 2020.xlsx"
df_sector_2020 <- process_data(data_path, year)
df_sector_2020
New names:
• `` -> `...1`
• `Triwulan I` -> `Triwulan I...2`
• `Triwulan II` -> `Triwulan II...3`
• `Triwulan III` -> `Triwulan III...4`
• `Triwulan IV` -> `Triwulan IV...5`
• `Tahunan` -> `Tahunan...6`
• `Triwulan I` -> `Triwulan I...7`
• `Triwulan II` -> `Triwulan II...8`
• `Triwulan III` -> `Triwulan III...9`
• `Triwulan IV` -> `Triwulan IV...10`
• `Tahunan` -> `Tahunan...11`
• `Triwulan I` -> `Triwulan I...12`
• `Triwulan II` -> `Triwulan II...13`
• `Triwulan III` -> `Triwulan III...14`
• `Triwulan IV` -> `Triwulan IV...15`
• `Tahunan` -> `Tahunan...16`
year <- 2021
data_path = "/work/Economic Growth/[Seri 2010] Laju Pertumbuhan PDB Seri 2021.xlsx"
df_sector_2021 <- process_data(data_path, year)
df_sector_2021
New names:
• `` -> `...1`
• `Triwulan I` -> `Triwulan I...2`
• `Triwulan II` -> `Triwulan II...3`
• `Triwulan III` -> `Triwulan III...4`
• `Triwulan IV` -> `Triwulan IV...5`
• `Tahunan` -> `Tahunan...6`
• `Triwulan I` -> `Triwulan I...7`
• `Triwulan II` -> `Triwulan II...8`
• `Triwulan III` -> `Triwulan III...9`
• `Triwulan IV` -> `Triwulan IV...10`
• `Tahunan` -> `Tahunan...11`
• `Triwulan I` -> `Triwulan I...12`
• `Triwulan II` -> `Triwulan II...13`
• `Triwulan III` -> `Triwulan III...14`
• `Triwulan IV` -> `Triwulan IV...15`
• `Tahunan` -> `Tahunan...16`
year <- 2022
data_path = "/work/Economic Growth/[Seri 2010] Laju Pertumbuhan PDB Seri 2022.xlsx"
df_sector_2022 <- process_data(data_path, year)
df_sector_2022
New names:
• `` -> `...1`
• `Triwulan I` -> `Triwulan I...2`
• `Triwulan II` -> `Triwulan II...3`
• `Triwulan III` -> `Triwulan III...4`
• `Triwulan IV` -> `Triwulan IV...5`
• `Tahunan` -> `Tahunan...6`
• `Triwulan I` -> `Triwulan I...7`
• `Triwulan II` -> `Triwulan II...8`
• `Triwulan III` -> `Triwulan III...9`
• `Triwulan IV` -> `Triwulan IV...10`
• `Tahunan` -> `Tahunan...11`
• `Triwulan I` -> `Triwulan I...12`
• `Triwulan II` -> `Triwulan II...13`
• `Triwulan III` -> `Triwulan III...14`
• `Triwulan IV` -> `Triwulan IV...15`
• `Tahunan` -> `Tahunan...16`
# Join or merge all dataframe
result_dfs <- list(df_sector_2018, df_sector_2019, df_sector_2020, df_sector_2021, df_sector_2022)
df_merged <- Reduce(function(x, y) merge(x, y, by = 'LapanganUsaha', all=TRUE), result_dfs)
df_merged
# Reshape the dataframe
df_melted <- gather(df_merged, key = 'ReportingPeriods', value = "GrowthRate", -LapanganUsaha)
df_melted
# Create new columns to indicate Triwulan and Year
df_melted <- separate(df_melted,ReportingPeriods, into = c("Triwulan", "Year"), sep = "_", remove = FALSE )
df_melted$Period <- ifelse(df_melted$Year %in% c(2018, 2019), "Before Covid",
ifelse(df_melted$Year %in% c(2020, 2021), "During Covid",
ifelse(df_melted$Year %in% 2022, "After Covid", NA)))
df_melted
df_melted <- df_melted %>%
mutate(
GrowthRate = as.numeric(as.character(GrowthRate)),
GrowthRate = round(GrowthRate, 2)
)
df_tahunan <- df_melted[df_melted$Triwulan == "Tahunan", ]
line_chart <- ggplot(df_tahunan[c('LapanganUsaha', 'Year', 'GrowthRate')], aes(x = Year, y = GrowthRate, color = LapanganUsaha, group = LapanganUsaha)) +
geom_line() +
labs(title = "Sectoral Economic Growth Rate by Year",
x = "Year",
y = "Growth Rate") +
theme_minimal()
line_chart
ggsave("sectoral_economic_growth.png", plot = line_chart, width = 16, height = 6, units = "in", dpi = 100)
df_pdb <- df_melted[df_melted$LapanganUsaha == 'PRODUK DOMESTIK BRUTO' & df_melted$Triwulan != 'Tahunan',]
ggplot(df_pdb, aes(x = Year, y = GrowthRate, fill = Triwulan)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Year", y = "Growth Rate", fill = "Triwulan") +
ggtitle("Growth Rate by Year and Triwulan") +
theme_minimal() +
ylim(min(df_pdb$GrowthRate) - 1, max(df_pdb$GrowthRate) + 1)
bar_chart <- ggplot(df_pdb, aes(x = interaction(Year, Triwulan, lex.order = TRUE), y = GrowthRate, fill = Period)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Year", y = "Growth Rate", fill = "Perioid") +
ggtitle("Growth Rate by Year") +
theme_minimal() +
ylim(min(df_pdb$GrowthRate) - 1, max(df_pdb$GrowthRate) + 1) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
bar_chart
ggsave("economic_growth_by_pandemic_period.png", plot = bar_chart, width = 16, height = 6, units = "in", dpi = 100)
Project Progress
Current:
Next Plan:
Continue exploration till sub-sectoral level
Find out any possibility relationship between the deteriorating rate of economic growth during Pandemic with Covid-19
Find out any possibility relationship between sectors affected during Pandemic with creditworthiness of someone working in it.