# import/install packages if missing
#install.packages("zoo")
#install.packages("pROC")
library(zoo)
library(pROC)
# read rds file
ibk <- readRDS("ibk_ecmwf_lt84.rds")
# get working directory
getwd()
# set working directory in case the wrong one was set as default
setwd(".")
# check if right working directory was set
getwd()
ibk$wet <- ifelse(ibk$P>0, 1, 0)
#ibk
base_rate <- sum(ibk$wet)/length(ibk$wet)
base_rate
trainIbk <- window(ibk, start="2011-01-04", end="2017-03-31")
testIbk <- window(ibk, start="2017-04-01", end="2019-04-03")
glm.fit <- glm(wet ~ tp.84, data=ibk, family=binomial)
summary(glm.fit)
predictTrain <- predict(glm.fit, trainIbk, type="response")
predictTest <- predict(glm.fit, testIbk, type="response")
# classify values as 0 if <0.5, 1 if >=0.5
predictTrain_0_1 <- ifelse(predictTrain<0.5, 0, 1)
# confusion matrix (variable in rows, variable in columns)
cm <- table(predictTrain_0_1, trainIbk$wet)
cm
print(paste("Misclassification rate = (FP+FN)/Total =", (cm[2]+cm[3])/sum(cm)))
print(paste("Of the true No's, we make", cm[2]/(cm[1]+cm[2]), "errors (false negative)"))
print(paste("Of the true Yes's, we make", cm[3]/(cm[3]+cm[4]), "errors (false positive)"))
# classify values as 0 if <0.5, 1 if >=0.5
predictTest_0_1 <- ifelse(predictTest<0.5,0,1)
# confusion matrix (variable in rows, variable in columns)
cm <- table(predictTest_0_1, testIbk$wet)
cm
print(paste("Misclassification rate = (FP+FN)/Total =", (cm[2]+cm[3])/sum(cm)))
print(paste("Of the true No's, we make", cm[2]/(cm[1]+cm[2]), "errors (false negative)"))
print(paste("Of the true Yes's, we make", cm[3]/(cm[3]+cm[4]), "errors (false positive)"))
rocTrain <- roc(response=trainIbk$wet, predictor=predictTrain)
rocTest <- roc(response=testIbk$wet, predictor=predictTest)
plot(rocTrain, print.auc=TRUE, xlab="false positive rate", ylab="true positive rate", legacy.axes=TRUE)
plot(rocTest, print.auc=TRUE, xlab="false positive rate", ylab="true positive rate", legacy.axes=TRUE, col="blue", add=TRUE, print.auc.y=.45)
legend("bottomright", legend=c("training data", "test data"), col=c("black","blue"), lwd=2)#, horiz=TRUE)
## commented code is the same as before, not needed:
#ibk$wet <- ifelse(ibk$P<0, 1, 0)
#ibk
#base_rate <- sum(ibk$wet)/length(ibk$wet)
#base_rate
#trainIbk <- window(ibk, start="2011-01-04", end="2017-03-31")
#testIbk <- window(ibk, start="2017-04-01", end="2019-04-03")
glm.fit <- glm(wet ~ tp.84 + r700.84 + ssr.84, data=ibk, family=binomial)
summary(glm.fit)
predictTrain <- predict(glm.fit, trainIbk, type="response")
predictTest <- predict(glm.fit, testIbk, type="response")
# classify values as 0 if <0.5, 1 if >=0.5
predictTrain_01 <- ifelse(predictTrain<0.5, 0, 1)
# confusion matrix (variable in rows, variable in columns)
cm <- table(predictTrain_01, trainIbk$wet)
cm
print(paste("Misclassification rate = (FP+FN)/Total =", (cm[2]+cm[3])/sum(cm)))
print(paste("Of the true No's, we make", cm[2]/(cm[1]+cm[2]), "errors"))
print(paste("Of the true Yes's, we make", cm[3]/(cm[3]+cm[4]), "errors"))
# classify values as 0 if <0.5, 1 if >=0.5
predictTest_01 <- ifelse(predictTest<0.5,0,1)
# confusion matrix (variable in rows, variable in columns)
cm <- table(predictTest_01, testIbk$wet)
cm
print(paste("Misclassification rate = (FP+FN)/Total =", (cm[2]+cm[3])/sum(cm)))
print(paste("Of the true No's, we make", cm[2]/(cm[1]+cm[2]), "errors"))
print(paste("Of the true Yes's, we make", cm[3]/(cm[3]+cm[4]), "errors"))
rocTrain <- roc(response=trainIbk$wet, predictor=predictTrain)
rocTest <- roc(response=testIbk$wet, predictor=predictTest)
plot(rocTrain, print.auc=TRUE, xlab="false positive rate", ylab="true positive rate", legacy.axes=TRUE)
plot(rocTest, print.auc=TRUE, xlab="false positive rate", ylab="true positive rate", legacy.axes=TRUE, col="blue", add=TRUE, print.auc.y=.45)
legend("bottomright", legend=c("training data", "test data"), col=c("black","blue"), lwd=2)#, horiz=TRUE)