winequality = read.csv('data/winequality-red.csv')
head(winequality)
winequality.predictors = scale(subset(winequality, select = -quality))
boxplot(winequality.predictors, las=2)
par(mfrow = c(3,4))
cols = colnames(winequality)
for (col in cols) {
hist(winequality[,col], main = col)
}
library(ggplot2)
library(reshape2)
cormat <- round(cor(winequality),2)
head(cormat)
# Get lower triangle of the correlation matrix
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
lower_tri <- get_lower_tri(cormat)
melted_cormat <- melt(lower_tri, na.rm = TRUE)
ggheatmap = ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) +
geom_tile() + theme(axis.text.x = element_text(angle = 20))
ggheatmap +
geom_text(aes(Var1, Var2, label = value), color = "black", size = 4) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.justification = c(1, 0),
legend.position = c(0.6, 0.7),
legend.direction = "horizontal")+
guides(fill = guide_colorbar(barwidth = 7, barheight = 1,
title.position = "top", title.hjust = 0.5))
lm1 = lm(quality ~ ., data = winequality)
summary(lm1)
# linearity assumption
plot(lm1, which = 1)
# normality assumption
plot(lm1, which = 2)
lm2 = lm(quality ~ . + pH * (. - pH), data = winequality)
summary(lm2)
# linearity assumption
plot(lm2, which = 1)
# normality assumption
plot(lm2, which = 2)
lm3 = lm(quality ~ log(fixed.acidity) + log(volatile.acidity) + log(citric.acid + 1) + log(residual.sugar) + log(chlorides) + log(free.sulfur.dioxide) + log(total.sulfur.dioxide) + density + pH + log(sulphates) + log(alcohol), data = winequality)
summary(lm3)
# linearity assumption
plot(lm3, which = 1)
# normality assumption
plot(lm3, which = 2)