Club convergence of regional wage across Indonesian provinces in R and conditioning factors
library(ConvergenceClubs) # For implementing log-t-tes and identify club convergence
library(mFilter) # For filtering series using HP filter
library(tidyverse) # Modern data manipulation
library(tibble) # Modern data manipulation
library(data.table) # Modern data manipulation
library(ggpubr) # Provides some easy-to-use functions for creating and customizing ggplot2
library(plyr) # Modern data manipulation
library(plotly) # For interactive visualization
library(sf) # Encodes spatial vector data
# Adjustments
knitr::opts_chunk$set(fig.align = 'center')
options(warn = -1)
options(scipen = 10000)
options(repr.plot.width = 6, repr.plot.height = 4)
Part 1. Identifying club convergence
Load the data
nominal.wage <- read_delim("https://raw.githubusercontent.com/haginta/Project_Club_convergence_wage_Indonesia/main/nominal_wage_A.csv", ";", escape_double = FALSE, trim_ws = TRUE)
nominal.wage
glimpse(nominal.wage)
head(nominal.wage)
Select complete data only from 2008 onwards and remove "Region" column, and remove the observation of Indonesia
nominal.wage.complete <- nominal.wage %>%
select(-c(1,3:5)) %>%
subset(Province!="National")
nominal.wage.complete
Transform the wage series to log
nominal.wage.log <- log(nominal.wage.complete[,-1])
nominal.wage.log
Remove short-run noise using HP filter
f.nominal.wage.log <- apply(nominal.wage.log, 1,
function(x){hpfilter(x, freq=6.25, type="lambda")$trend} )
f.nominal.wage.log <- data.frame(Province = nominal.wage.complete[,1], t(f.nominal.wage.log), stringsAsFactors=FALSE )
colnames(f.nominal.wage.log) <- colnames(nominal.wage.complete)
Inspect the filtered data
head(f.nominal.wage.log)
Run the log-t-test
H.nominal <- computeH(f.nominal.wage.log[,-1], quantity = "H")
round(estimateMod(H.nominal, time_trim=0.333, HACmethod = "FQSB"), 3)
Identify convergence clubs
clubs.nominal <- findClubs(f.nominal.wage.log, dataCols=2:14, unit_names = 1, refCol=14,
time_trim=0.333, cstar=0, HACmethod = 'FQSB')
summary(clubs.nominal)
print(clubs.nominal)
plot(clubs.nominal)
plot(clubs.nominal, clubs=NULL, avgTP = TRUE, legend=TRUE)
Merge clubs
mclubs.nominal <- mergeClubs(clubs.nominal, mergeMethod='PS')
summary(mclubs.nominal)
print(mclubs.nominal)
plot(mclubs.nominal)
plot(mclubs.nominal, clubs=NULL, avgTP = TRUE, legend=TRUE)
Part 2. Generate relative values for transition path plot
Convert "m.clubs.nom" from list to table with name "df.mclubs.nom"
table.mclubs.nom <- map(mclubs.nominal, as.data.table)
df.mclubs.nom <- rbindlist(table.mclubs.nom, fill = T, idcol = T)
df.mclubs.nom
colnames(df.mclubs.nom)[c(1,5)] <- c("Club","Province")
df.mclubs.nom
Merge the "df.mclubs.nom" with "f.nominal.wage.log", which is the log of wage after HP filtered and save as dataframe with name "nominal.wage.club"
nominal.wage.club <- as.data.frame(
inner_join(f.nominal.wage.log, df.mclubs.nom, by="Province") %>%
select(-contains(c("clubs","id","model")))
)
nominal.wage.club
Generate relative values of wage for each year by dividing the wage of province i at time t to the mean wage of all provinces at each time.
relative <- list()
for(a in 2:14) {
relative[[a]] <- data.frame(nominal.wage.club$Province, nominal.wage.club[,a]/mean(nominal.wage.club[,a]))
colnames(relative[[a]])[1]<- "Province"
colnames(relative[[a]])[2]<-
paste("rel",colnames(nominal.wage.club)[a],sep="_")
}
for (x in 2:13) {
relative[[x+1]]<-left_join(relative[[x]],relative[[x+1]], by="Province")
}
table.relative <- map(relative, as.data.table)
df.relative <- rbindlist(table.relative, fill = T, idcol = T) %>% drop_na()
df.relative <- join(df.relative,nominal.wage.club,by=c('Province')) %>%
select(Province,contains("rel"),Club)
df.relative <- as.data.frame(df.relative)
colnames(df.relative) <- gsub("rel_","",colnames(df.relative))
df.relative
Transform "df.relative" to long format dataframe so the ggplot would be happy to plot.
df.relative.long <- df.relative %>% pivot_longer(-c(Province,Club), names_to = "Time", values_to="Rel_Wage")
df.relative.long
Calculate the mean by clubs
df.relative.path <- aggregate(Rel_Wage ~ Club + Time, df.relative.long, mean) %>%
arrange(Club)
df.relative.path
Plot all clubs
path_all <- df.relative.path %>%
ggplot(aes(x=Time,y=Rel_Wage, group=Club, col=Club)) + geom_line() +
labs(title = "Transition path of all clubs", caption = "Rel_Wage = Relative Wage") +
theme_bw()
path_all
Plot transition path within club
Generate relative values Club 1
club1 <- nominal.wage.club %>%
filter(Club == "club1")
relative.club1 <- list()
for(a in 2:14) {
relative.club1[[a]] <- data.frame(club1$Province, club1[,a]/mean(club1[,a]))
colnames(relative.club1[[a]])[1]<- "Province"
colnames(relative.club1[[a]])[2]<-
paste("rel",colnames(club1)[a],sep="_")
}
for (x in 2:13) {
relative.club1[[x+1]]<-left_join(relative.club1[[x]],relative.club1[[x+1]], by="Province")
}
table.relative.club1 <- map(relative.club1, as.data.table)
df.relative.club1 <- rbindlist(table.relative.club1, fill = T, idcol = T) %>% drop_na()
library(plyr)
df.relative.club1 <- join(df.relative.club1,club1,by=c('Province')) %>%
select(Province,contains("rel"),Club)
df.relative.club1 <- as.data.frame(df.relative.club1)
colnames(df.relative.club1) <- gsub("rel_","",colnames(df.relative.club1))
df.relative.club1
df.relative.club1.long <- df.relative.club1 %>% pivot_longer(-c(Province,Club), names_to = "Time", values_to="Rel_Wage")
df.relative.club1.long
df.relative.club1.path <- aggregate(Rel_Wage ~ Province + Time, df.relative.club1.long, mean) %>%
arrange(Province)
Plot transition path of regions in Club 1
path_club1 <- df.relative.club1.path %>%
ggplot(aes(x=Time,y=Rel_Wage, group=Province, col=Province)) + geom_line() +
labs(title = "Transition path of regions in Club 1", caption = "Rel_Wage = Relative Wage") +
theme_bw() +
theme(legend.title = element_text(size = 8),
legend.text = element_text(size = 8),
axis.text.x = element_text(angle = 90))
path_club1
ggplotly(path_club1)
Generate relative values Club 2
club2 <- nominal.wage.club %>%
filter(Club == "club2")
relative.club2 <- list()
for(a in 2:14) {
relative.club2[[a]] <- data.frame(club2$Province, club2[,a]/mean(club2[,a]))
colnames(relative.club2[[a]])[1]<- "Province"
colnames(relative.club2[[a]])[2]<-
paste("rel",colnames(club2)[a],sep="_")
}
for (x in 2:13) {
relative.club2[[x+1]]<-left_join(relative.club2[[x]],relative.club2[[x+1]], by="Province")
}
table.relative.club2 <- map(relative.club2, as.data.table)
df.relative.club2 <- rbindlist(table.relative.club2, fill = T, idcol = T) %>% drop_na()
library(plyr)
df.relative.club2 <- join(df.relative.club2,club2,by=c('Province')) %>%
select(Province,contains("rel"),Club)
df.relative.club2 <- as.data.frame(df.relative.club2)
colnames(df.relative.club2) <- gsub("rel_","",colnames(df.relative.club2))
df.relative.club2
df.relative.club2.long <- df.relative.club2 %>% pivot_longer(-c(Province,Club), names_to = "Time", values_to="Rel_Wage")
df.relative.club2.long
df.relative.club2.path <- aggregate(Rel_Wage ~ Province + Time, df.relative.club2.long, mean) %>%
arrange(Province)
Plot transition path of regions in Club 2
path_club2 <- df.relative.club2.path %>%
ggplot(aes(x=Time,y=Rel_Wage, group=Province, col=Province)) + geom_line() +
labs(title = "Transition path of regions in Club 2", caption = "Rel_Wage = Relative Wage") +
theme_bw() +
theme(legend.title = element_text(size = 8),
legend.text = element_text(size = 8),
axis.text.x = element_text(angle = 90))
path_club2
ggplotly(path_club2)
Part 3. Analyzing conditioning factors of convergence clubs
ologit <- read_delim("https://raw.githubusercontent.com/haginta/Project_Club_convergence_wage_Indonesia/main/wage_convergence_ologit.csv", ";", escape_double = FALSE, trim_ws = TRUE)
ologit
Interactive data exploration
explore <- ggplot(ologit, aes(Region, grdp_g, label=Province, color=Region)) + geom_point() + xlab("Province") + ylab("GDP growth 2008-2020 (%, yoy)") + theme_bw()
ggplotly(explore)
ggbarplot(ologit, x = "Region", y = "grdp_g",
add = c("mean_se", "point"),
color = "Region", alpha = 0.5) +
xlab("Region") + ylab("GDP growth 2020 (%, yoy)") +
ggrepel::geom_text_repel(aes(label = Province), size = 2) +
theme(axis.text.y = element_text(size = 8), axis.text.x = element_text(size = 8, angle = 90, vjust = 0.5, hjust=1),legend.position = "none")
explore_box <- ggbarplot(ologit, x = "Region", y = "grdp_g", color="Region",
add = c("mean_se","point")) +
geom_point(aes(label=Province, color=Region)) +
xlab("Region") + ylab("GDP growth 2008-2020 (%, yoy)") +
theme(axis.text.y = element_text(size = 8), axis.text.x = element_text(size = 8, angle = 90, vjust = 0.5, hjust=1),legend.position = "none")
ggplotly(explore_box)
scatter <- ggplot(ologit, aes(x=emp_manu, y=grdp_g, size=grdp)) +
geom_point(aes(label=Province)) + geom_smooth(method = "lm", se = F) +
labs(x = "Manufacturing employment share", y="Average GDP growth 2008-2020")
ggplotly(scatter)
Load shape file
IDNmap <- read_sf("https://gist.githubusercontent.com/haginta/22a1d27cf8ae440530758a5715f129c4/raw/1506205198c4337a6dbb6430b070fc24ba80e940/IDN_poly34_simple.JSON")
colnames(IDNmap)
colnames(IDNmap)[4] <- 'Province'
colnames(IDNmap)
ggplot(IDNmap) +
geom_sf() + theme_bw() +
ggtitle("Provinces level boundaries map of Indonesia", subtitle = paste0("(", length(unique(IDNmap$POLY_ID)), " provinces)")) +
labs(x = "Longitude", y="Latitude")
Merge non-spatial and spatial data
df_map <- merge(IDNmap, ologit, by=c("Province"))
df_map <- st_make_valid(df_map)
glimpse(df_map)
Show interactive map
club_map <- ggplot(df_map) + geom_sf(aes(fill=factor(club_real), label=Province), lwd = 0.05) +
scale_fill_manual(values = c("red", "yellow", "blue"), name = "Clubs") + theme_bw() +
labs(x = "Longitude", y="Latitude", title = "Club convergence of regional wage in Indonesia, 2008-2020")
club_inter <- ggplotly(club_map)
club_inter
gdp_map <- ggplot(df_map) + geom_sf(aes(fill=grdp_g, label=Province), lwd = 0.05) +
scale_fill_viridis_c(name = "GDP growth") + theme_bw() +
labs(x = "Longitude", y="Latitude", title = "GDP growth of 34 provinces in Indonesia, average 2008-2020")
gdp_inter <- ggplotly(gdp_map)
gdp_inter
Perform ordered logit model
ologit$clubs <- factor(ologit$clubs)
ologit <- ologit %>%
mutate(lngrdp = log(grdp))
glimpse(ologit)
mylogit <- glm(clubs ~ real_wage_2008 + emp_manu + pmtb_gdrp + + tpak + lngrdp, data = ologit, family = "binomial")
summary(mylogit)