# import/install packages if missing
#install.packages("zoo")
#install.packages("pROC")
library(zoo)
library(pROC)
Attaching package: ‘zoo’
The following objects are masked from ‘package:base’:
as.Date, as.Date.numeric
Type 'citation("pROC")' for a citation.
Attaching package: ‘pROC’
The following objects are masked from ‘package:stats’:
cov, smooth, var
# read rds file
ibk <- readRDS("ibk_ecmwf_lt84.rds")
# get working directory
getwd()
# set working directory in case the wrong one was set as default
setwd(".")
# check if right working directory was set
getwd()
ibk$wet <- ifelse(ibk$P>0, 1, 0)
#ibk
base_rate <- sum(ibk$wet)/length(ibk$wet)
base_rate
trainIbk <- window(ibk, start="2011-01-04", end="2017-03-31")
testIbk <- window(ibk, start="2017-04-01", end="2019-04-03")
glm.fit <- glm(wet ~ tp.84, data=ibk, family=binomial)
summary(glm.fit)
predictTrain <- predict(glm.fit, trainIbk, type="response")
predictTest <- predict(glm.fit, testIbk, type="response")
# classify values as 0 if <0.5, 1 if >=0.5
predictTrain_0_1 <- ifelse(predictTrain<0.5, 0, 1)
# confusion matrix (variable in rows, variable in columns)
cm <- table(predictTrain_0_1, trainIbk$wet)
cm
print(paste("Misclassification rate = (FP+FN)/Total =", (cm[2]+cm[3])/sum(cm)))
print(paste("Of the true No's, we make", cm[2]/(cm[1]+cm[2]), "errors (false negative)"))
print(paste("Of the true Yes's, we make", cm[3]/(cm[3]+cm[4]), "errors (false positive)"))
[1] "Misclassification rate = (FP+FN)/Total = 0.166964285714286"
[1] "Of the true No's, we make 0.0968775020016013 errors (false negative)"
[1] "Of the true Yes's, we make 0.255297679112008 errors (false positive)"
# classify values as 0 if <0.5, 1 if >=0.5
predictTest_0_1 <- ifelse(predictTest<0.5,0,1)
# confusion matrix (variable in rows, variable in columns)
cm <- table(predictTest_0_1, testIbk$wet)
cm
print(paste("Misclassification rate = (FP+FN)/Total =", (cm[2]+cm[3])/sum(cm)))
print(paste("Of the true No's, we make", cm[2]/(cm[1]+cm[2]), "errors (false negative)"))
print(paste("Of the true Yes's, we make", cm[3]/(cm[3]+cm[4]), "errors (false positive)"))
[1] "Misclassification rate = (FP+FN)/Total = 0.167400881057269"
[1] "Of the true No's, we make 0.102981029810298 errors (false negative)"
[1] "Of the true Yes's, we make 0.243589743589744 errors (false positive)"
rocTrain <- roc(response=trainIbk$wet, predictor=predictTrain)
Setting levels: control = 0, case = 1
Setting direction: controls < cases
rocTest <- roc(response=testIbk$wet, predictor=predictTest)
Setting levels: control = 0, case = 1
Setting direction: controls < cases
plot(rocTrain, print.auc=TRUE, xlab="false positive rate", ylab="true positive rate", legacy.axes=TRUE)
plot(rocTest, print.auc=TRUE, xlab="false positive rate", ylab="true positive rate", legacy.axes=TRUE, col="blue", add=TRUE, print.auc.y=.45)
legend("bottomright", legend=c("training data", "test data"), col=c("black","blue"), lwd=2)#, horiz=TRUE)
## commented code is the same as before, not needed:
#ibk$wet <- ifelse(ibk$P<0, 1, 0)
#ibk
#base_rate <- sum(ibk$wet)/length(ibk$wet)
#base_rate
#trainIbk <- window(ibk, start="2011-01-04", end="2017-03-31")
#testIbk <- window(ibk, start="2017-04-01", end="2019-04-03")
glm.fit <- glm(wet ~ tp.84 + r700.84 + ssr.84, data=ibk, family=binomial)
summary(glm.fit)
predictTrain <- predict(glm.fit, trainIbk, type="response")
predictTest <- predict(glm.fit, testIbk, type="response")
# classify values as 0 if <0.5, 1 if >=0.5
predictTrain_01 <- ifelse(predictTrain<0.5, 0, 1)
# confusion matrix (variable in rows, variable in columns)
cm <- table(predictTrain_01, trainIbk$wet)
cm
print(paste("Misclassification rate = (FP+FN)/Total =", (cm[2]+cm[3])/sum(cm)))
print(paste("Of the true No's, we make", cm[2]/(cm[1]+cm[2]), "errors"))
print(paste("Of the true Yes's, we make", cm[3]/(cm[3]+cm[4]), "errors"))
# classify values as 0 if <0.5, 1 if >=0.5
predictTest_01 <- ifelse(predictTest<0.5,0,1)
# confusion matrix (variable in rows, variable in columns)
cm <- table(predictTest_01, testIbk$wet)
cm
[1] "Misclassification rate = (FP+FN)/Total = 0.162053571428571"
[1] "Of the true No's, we make 0.104083266613291 errors"
[1] "Of the true Yes's, we make 0.235116044399596 errors"
print(paste("Misclassification rate = (FP+FN)/Total =", (cm[2]+cm[3])/sum(cm)))
print(paste("Of the true No's, we make", cm[2]/(cm[1]+cm[2]), "errors"))
print(paste("Of the true Yes's, we make", cm[3]/(cm[3]+cm[4]), "errors"))
rocTrain <- roc(response=trainIbk$wet, predictor=predictTrain)
rocTest <- roc(response=testIbk$wet, predictor=predictTest)
plot(rocTrain, print.auc=TRUE, xlab="false positive rate", ylab="true positive rate", legacy.axes=TRUE)
plot(rocTest, print.auc=TRUE, xlab="false positive rate", ylab="true positive rate", legacy.axes=TRUE, col="blue", add=TRUE, print.auc.y=.45)
legend("bottomright", legend=c("training data", "test data"), col=c("black","blue"), lwd=2)#, horiz=TRUE)
[1] "Misclassification rate = (FP+FN)/Total = 0.168869309838473"
[1] "Of the true No's, we make 0.116531165311653 errors"
[1] "Of the true Yes's, we make 0.230769230769231 errors"
Setting levels: control = 0, case = 1
Setting direction: controls < cases
Setting levels: control = 0, case = 1
Setting direction: controls < cases