Denmark's CPI Forecasting (ARIMA)
library(xts);library(utils);library(urca);library(quantmod);library(car);library(TTR);library(fpp2);library(tidyverse);library(AER);library(zoo);library("readxl");library(gridExtra);library(strucchange)
#library(bit64)
# Import CSV
my_data <- read_excel("CPI_DK.xlsx")
cpi <- ts(my_data, start=c(2001,2), end=c(2021,7), frequency = 12)
autoplot(cpi)
Clearly CPI Increases over years
#ACF and PACF charts
acf(cpi)
pacf(cpi)
#Log Transformation
log.cpi <- log(cpi)
autoplot(cpi) + ylab("CPI")
autoplot(log.cpi) + ylab("Transformed CPI")
#ACF and PACF charts
acf(log.cpi)
pacf(log.cpi)
autoplot(decompose(log.cpi, type='additive'))
summary(ur.kpss(log.cpi, type = "tau"))
summary(ur.df(log.cpi, type="trend", selectlags = "AIC"))
summary(ur.kpss(log.cpi, type = "mu"))
summary(ur.df(log.cpi, type="drift", selectlags = "AIC"))
diff.log.cpi <- diff(log.cpi)
autoplot(diff.log.cpi) + ylab("Diff LogCPI")
summary(ur.kpss(diff.log.cpi, type = "mu"))
summary(ur.df(diff.log.cpi, type="drift", selectlags = "AIC"))
summary(ur.df(diff.log.cpi, selectlags = "AIC"))
# Structural breaks
chowtest <- cbind(
Lag0 = diff(diff.log.cpi),
Lag1 = stats::lag(diff.log.cpi,-1))
qlr <- Fstats(Lag0 ~ Lag1, data = chowtest, from = 0.15)
test <- sctest(qlr, type = "supF")
breakpoints(qlr, alpha = 0.05)
plot(qlr, alpha = 0.05, main = "F Statistics")
grid.arrange((ggAcf(diff.log.cpi, lag=36)
+ ylab("")
+ ggtitle("ACF")),
(ggPacf(diff.log.cpi, lag=36)
+ ylab("")
+ ggtitle("PACF")),
nrow = 1)
auto.arima(diff.log.cpi)
auto.arima(log.cpi)
log.cpi %>% diff() %>% diff(lag=12) %>% ggtsdisplay()
# Train/Test Split
log.cpi.train <- window(log.cpi, end=c(2019, 2))
log.cpi.test <- window(log.cpi, start=c(2019, 3))
h <- length(log.cpi.test)
# ARIMA 1
arima_1 <- Arima(log.cpi.train,order=c(0,1,0), seasonal=c(0,1,1))
summary(arima_1)
fc.arima_1 <- forecast(arima_1, h)
autoplot(fc.arima_1)
checkresiduals(arima_1)
mean(residuals(arima_1))
# ARIMA 2
arima_2 <- Arima(log.cpi.train,order=c(0,1,0), seasonal=c(1,1,1))
summary(arima_2)
fc.arima_2 <- forecast(arima_2, h)
autoplot(fc.arima_2)
checkresiduals(arima_2)
mean(residuals(arima_2))
# ETS 1
ets1 <- ets(log.cpi.train, model='AAN', damped=FALSE)
summary(ets1)
fc.ets1 <- forecast(ets1, h)
autoplot(fc.ets1)
checkresiduals(ets1)
mean(residuals(ets1))
# ETS 2
ets2 <- ets(log.cpi.train, model='AAA', damped=FALSE)
summary(ets2)
fc.ets2 <- forecast(ets2, h)
autoplot(fc.ets2)
checkresiduals(ets2)
mean(residuals(ets2))
The ARIMA models residuals show a white noise behavior, this indicates that the models captured enough information. On the other hand ETS model does not seem to be appropriate for the task as the residuals still show autocorrelations and do not behave normal.
Forecast with ARIMA models
autoplot(window(log.cpi, start=c(2015, 1)))+
autolayer(log.cpi.test, series="Test Set")+
autolayer(fc.arima_1, PI = FALSE, series="ARIMA(0,1,0)(0,1,1)")+
autolayer(fc.arima_2, PI = FALSE, series="ARIMA(0,1,0)(1,1,1)") +
autolayer(fc.ets1, PI = FALSE, series="ETS AAN") +
autolayer(fc.ets2, PI = FALSE, series="ETS AAA") +
ggtitle("29 months forecast Arima and ETS") +
ylab("")
autoplot(log.cpi.test - log.cpi.test) +
autolayer(fc.arima_1$mean - log.cpi.test, series='ARIMA(0,1,0)(0,1,1)') +
autolayer(fc.arima_2$mean - log.cpi.test, series='ARIMA(0,1,0)(1,1,1)') +
autolayer(fc.ets1$mean - log.cpi.test, series='ETS AAN') +
autolayer(fc.ets2$mean - log.cpi.test, series='ETS AAA') +
ggtitle("Difference between forecasts and test set") +
ylab("")
accuracy(fc.arima_1, log.cpi.test)
accuracy(fc.arima_2, log.cpi.test)
accuracy(fc.ets1, log.cpi.test)
accuracy(fc.ets2, log.cpi.test)
autoplot(fc.arima_1, range(start=c(2015, 1)))+
autolayer(log.cpi.test, series="Test Set")+
ggtitle("29 months forecast ARIMA(0,1,0)(0,1,1)") +
ylab("")
autoplot(fc.arima_2, range(start=c(2015, 1)))+
autolayer(log.cpi.test, series="Test Set")+
ggtitle("29 months forecast ARIMA(0,1,0)(1,1,1)") +
ylab("")
autoplot(fc.ets1, range(start=c(2015, 1)))+
autolayer(log.cpi.test, series="Test Set")+
ggtitle("29 months forecast ETS(A,A,N)") +
ylab("")
autoplot(fc.ets2,range(start=c(2015, 1)))+
autolayer(log.cpi.test, series="Test Set")+
ggtitle("29 months forecast ETS(A,A,A)") +
ylab("")