[R] Club convergence of regional wage across Indonesian provinces in R

Club convergence of regional wage across Indonesian provinces in R and conditioning factors

library(ConvergenceClubs) # For implementing log-t-tes and identify club convergence library(mFilter) # For filtering series using HP filter library(tidyverse) # Modern data manipulation library(tibble) # Modern data manipulation library(data.table) # Modern data manipulation library(ggpubr) # Provides some easy-to-use functions for creating and customizing ggplot2 library(plyr) # Modern data manipulation library(plotly) # For interactive visualization library(sf) # Encodes spatial vector data # Adjustments knitr::opts_chunk$set(fig.align = 'center') options(warn = -1) options(scipen = 10000) options(repr.plot.width = 6, repr.plot.height = 4)

Part 1. Identifying club convergence

Load the data

nominal.wage <- read_delim("https://raw.githubusercontent.com/haginta/Project_Club_convergence_wage_Indonesia/main/nominal_wage_A.csv", ";", escape_double = FALSE, trim_ws = TRUE) nominal.wage

glimpse(nominal.wage) head(nominal.wage)

Select complete data only from 2008 onwards and remove "Region" column, and remove the observation of Indonesia

nominal.wage.complete <- nominal.wage %>% select(-c(1,3:5)) %>% subset(Province!="National") nominal.wage.complete

Transform the wage series to log

nominal.wage.log <- log(nominal.wage.complete[,-1]) nominal.wage.log

Remove short-run noise using HP filter

f.nominal.wage.log <- apply(nominal.wage.log, 1, function(x){hpfilter(x, freq=6.25, type="lambda")$trend} ) f.nominal.wage.log <- data.frame(Province = nominal.wage.complete[,1], t(f.nominal.wage.log), stringsAsFactors=FALSE ) colnames(f.nominal.wage.log) <- colnames(nominal.wage.complete)

Inspect the filtered data

head(f.nominal.wage.log)

Run the log-t-test

H.nominal <- computeH(f.nominal.wage.log[,-1], quantity = "H") round(estimateMod(H.nominal, time_trim=0.333, HACmethod = "FQSB"), 3)

Identify convergence clubs

clubs.nominal <- findClubs(f.nominal.wage.log, dataCols=2:14, unit_names = 1, refCol=14, time_trim=0.333, cstar=0, HACmethod = 'FQSB')

summary(clubs.nominal)

print(clubs.nominal)

plot(clubs.nominal)

plot(clubs.nominal, clubs=NULL, avgTP = TRUE, legend=TRUE)

Merge clubs

mclubs.nominal <- mergeClubs(clubs.nominal, mergeMethod='PS') summary(mclubs.nominal)

print(mclubs.nominal)

plot(mclubs.nominal)

plot(mclubs.nominal, clubs=NULL, avgTP = TRUE, legend=TRUE)

Part 2. Generate relative values for transition path plot

Convert "m.clubs.nom" from list to table with name "df.mclubs.nom"

table.mclubs.nom <- map(mclubs.nominal, as.data.table) df.mclubs.nom <- rbindlist(table.mclubs.nom, fill = T, idcol = T) df.mclubs.nom

colnames(df.mclubs.nom)[c(1,5)] <- c("Club","Province") df.mclubs.nom

Merge the "df.mclubs.nom" with "f.nominal.wage.log", which is the log of wage after HP filtered and save as dataframe with name "nominal.wage.club"

nominal.wage.club <- as.data.frame( inner_join(f.nominal.wage.log, df.mclubs.nom, by="Province") %>% select(-contains(c("clubs","id","model"))) ) nominal.wage.club

Generate relative values of wage for each year by dividing the wage of province i at time t to the mean wage of all provinces at each time.

relative <- list() for(a in 2:14) { relative[[a]] <- data.frame(nominal.wage.club$Province, nominal.wage.club[,a]/mean(nominal.wage.club[,a])) colnames(relative[[a]])[1]<- "Province" colnames(relative[[a]])[2]<- paste("rel",colnames(nominal.wage.club)[a],sep="_") } for (x in 2:13) { relative[[x+1]]<-left_join(relative[[x]],relative[[x+1]], by="Province") }

table.relative <- map(relative, as.data.table) df.relative <- rbindlist(table.relative, fill = T, idcol = T) %>% drop_na() df.relative <- join(df.relative,nominal.wage.club,by=c('Province')) %>% select(Province,contains("rel"),Club) df.relative <- as.data.frame(df.relative)

colnames(df.relative) <- gsub("rel_","",colnames(df.relative)) df.relative

Transform "df.relative" to long format dataframe so the ggplot would be happy to plot.

df.relative.long <- df.relative %>% pivot_longer(-c(Province,Club), names_to = "Time", values_to="Rel_Wage") df.relative.long

Calculate the mean by clubs

df.relative.path <- aggregate(Rel_Wage ~ Club + Time, df.relative.long, mean) %>% arrange(Club) df.relative.path

Plot all clubs

path_all <- df.relative.path %>% ggplot(aes(x=Time,y=Rel_Wage, group=Club, col=Club)) + geom_line() + labs(title = "Transition path of all clubs", caption = "Rel_Wage = Relative Wage") + theme_bw() path_all

Plot transition path within club

Generate relative values Club 1

club1 <- nominal.wage.club %>% filter(Club == "club1")

relative.club1 <- list() for(a in 2:14) { relative.club1[[a]] <- data.frame(club1$Province, club1[,a]/mean(club1[,a])) colnames(relative.club1[[a]])[1]<- "Province" colnames(relative.club1[[a]])[2]<- paste("rel",colnames(club1)[a],sep="_") } for (x in 2:13) { relative.club1[[x+1]]<-left_join(relative.club1[[x]],relative.club1[[x+1]], by="Province") }

table.relative.club1 <- map(relative.club1, as.data.table) df.relative.club1 <- rbindlist(table.relative.club1, fill = T, idcol = T) %>% drop_na() library(plyr) df.relative.club1 <- join(df.relative.club1,club1,by=c('Province')) %>% select(Province,contains("rel"),Club) df.relative.club1 <- as.data.frame(df.relative.club1)

colnames(df.relative.club1) <- gsub("rel_","",colnames(df.relative.club1)) df.relative.club1

df.relative.club1.long <- df.relative.club1 %>% pivot_longer(-c(Province,Club), names_to = "Time", values_to="Rel_Wage") df.relative.club1.long

df.relative.club1.path <- aggregate(Rel_Wage ~ Province + Time, df.relative.club1.long, mean) %>% arrange(Province)

Plot transition path of regions in Club 1

path_club1 <- df.relative.club1.path %>% ggplot(aes(x=Time,y=Rel_Wage, group=Province, col=Province)) + geom_line() + labs(title = "Transition path of regions in Club 1", caption = "Rel_Wage = Relative Wage") + theme_bw() + theme(legend.title = element_text(size = 8), legend.text = element_text(size = 8), axis.text.x = element_text(angle = 90)) path_club1

ggplotly(path_club1)

Generate relative values Club 2

club2 <- nominal.wage.club %>% filter(Club == "club2")

relative.club2 <- list() for(a in 2:14) { relative.club2[[a]] <- data.frame(club2$Province, club2[,a]/mean(club2[,a])) colnames(relative.club2[[a]])[1]<- "Province" colnames(relative.club2[[a]])[2]<- paste("rel",colnames(club2)[a],sep="_") } for (x in 2:13) { relative.club2[[x+1]]<-left_join(relative.club2[[x]],relative.club2[[x+1]], by="Province") }

table.relative.club2 <- map(relative.club2, as.data.table) df.relative.club2 <- rbindlist(table.relative.club2, fill = T, idcol = T) %>% drop_na() library(plyr) df.relative.club2 <- join(df.relative.club2,club2,by=c('Province')) %>% select(Province,contains("rel"),Club) df.relative.club2 <- as.data.frame(df.relative.club2)

colnames(df.relative.club2) <- gsub("rel_","",colnames(df.relative.club2)) df.relative.club2

df.relative.club2.long <- df.relative.club2 %>% pivot_longer(-c(Province,Club), names_to = "Time", values_to="Rel_Wage") df.relative.club2.long

df.relative.club2.path <- aggregate(Rel_Wage ~ Province + Time, df.relative.club2.long, mean) %>% arrange(Province)

Plot transition path of regions in Club 2

path_club2 <- df.relative.club2.path %>% ggplot(aes(x=Time,y=Rel_Wage, group=Province, col=Province)) + geom_line() + labs(title = "Transition path of regions in Club 2", caption = "Rel_Wage = Relative Wage") + theme_bw() + theme(legend.title = element_text(size = 8), legend.text = element_text(size = 8), axis.text.x = element_text(angle = 90)) path_club2

ggplotly(path_club2)

Part 3. Analyzing conditioning factors of convergence clubs

ologit <- read_delim("https://raw.githubusercontent.com/haginta/Project_Club_convergence_wage_Indonesia/main/wage_convergence_ologit.csv", ";", escape_double = FALSE, trim_ws = TRUE) ologit

Interactive data exploration

explore <- ggplot(ologit, aes(Region, grdp_g, label=Province, color=Region)) + geom_point() + xlab("Province") + ylab("GDP growth 2008-2020 (%, yoy)") + theme_bw() ggplotly(explore)

ggbarplot(ologit, x = "Region", y = "grdp_g", add = c("mean_se", "point"), color = "Region", alpha = 0.5) + xlab("Region") + ylab("GDP growth 2020 (%, yoy)") + ggrepel::geom_text_repel(aes(label = Province), size = 2) + theme(axis.text.y = element_text(size = 8), axis.text.x = element_text(size = 8, angle = 90, vjust = 0.5, hjust=1),legend.position = "none")

explore_box <- ggbarplot(ologit, x = "Region", y = "grdp_g", color="Region", add = c("mean_se","point")) + geom_point(aes(label=Province, color=Region)) + xlab("Region") + ylab("GDP growth 2008-2020 (%, yoy)") + theme(axis.text.y = element_text(size = 8), axis.text.x = element_text(size = 8, angle = 90, vjust = 0.5, hjust=1),legend.position = "none") ggplotly(explore_box)

scatter <- ggplot(ologit, aes(x=emp_manu, y=grdp_g, size=grdp)) + geom_point(aes(label=Province)) + geom_smooth(method = "lm", se = F) + labs(x = "Manufacturing employment share", y="Average GDP growth 2008-2020") ggplotly(scatter)

Load shape file

IDNmap <- read_sf("https://gist.githubusercontent.com/haginta/22a1d27cf8ae440530758a5715f129c4/raw/1506205198c4337a6dbb6430b070fc24ba80e940/IDN_poly34_simple.JSON") colnames(IDNmap)

colnames(IDNmap)[4] <- 'Province' colnames(IDNmap)

ggplot(IDNmap) + geom_sf() + theme_bw() + ggtitle("Provinces level boundaries map of Indonesia", subtitle = paste0("(", length(unique(IDNmap$POLY_ID)), " provinces)")) + labs(x = "Longitude", y="Latitude")

Merge non-spatial and spatial data

df_map <- merge(IDNmap, ologit, by=c("Province")) df_map <- st_make_valid(df_map) glimpse(df_map)

Show interactive map

club_map <- ggplot(df_map) + geom_sf(aes(fill=factor(club_real), label=Province), lwd = 0.05) + scale_fill_manual(values = c("red", "yellow", "blue"), name = "Clubs") + theme_bw() + labs(x = "Longitude", y="Latitude", title = "Club convergence of regional wage in Indonesia, 2008-2020") club_inter <- ggplotly(club_map) club_inter

gdp_map <- ggplot(df_map) + geom_sf(aes(fill=grdp_g, label=Province), lwd = 0.05) + scale_fill_viridis_c(name = "GDP growth") + theme_bw() + labs(x = "Longitude", y="Latitude", title = "GDP growth of 34 provinces in Indonesia, average 2008-2020") gdp_inter <- ggplotly(gdp_map) gdp_inter

Perform ordered logit model

ologit$clubs <- factor(ologit$clubs)

ologit <- ologit %>% mutate(lngrdp = log(grdp))

glimpse(ologit)

mylogit <- glm(clubs ~ real_wage_2008 + emp_manu + pmtb_gdrp + + tpak + lngrdp, data = ologit, family = "binomial") summary(mylogit)

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Club convergence of regional wage across Indonesian provinces in R and conditioning factors