library(factoextra)
library(FactoMineR)
library(corrplot)
library(dplyr)
library(ggcorrplot)
library(ggplot2)
library(reshape)
library(stats)
library(ellipsis)
library(gridExtra)
library(pillar)
library(cluster)
#open dataset
data <- read.table("data.csv", header = TRUE, sep = "")
#show first 6 lines
head(data)
dim(data)
str(data)
summary(data)
colSums(is.na(data))
data_2014 = data[which(data$Year == 2014),] #
data_2014 = select(data_2014, -Year, -Country, -Status) # removing categorical variables
par(mfrow=c(4,4), main="Histograms")
for(col in 1:ncol(data_2014)){
hist(data_2014[,col], main = colnames(data_2014)[col], xlab = colnames(data_2014)[col])
}
# if 0
for(i in c(1,2,3,4,6,7,8)){
data_2014[,i] = log10(data_2014[,i]+1)
}
# if no 0
for (i in c(10,12,13,14,15,16)){
data_2014[,i] = log10(data_2014[,i])
}
for (i in c(5,9,11)){
data_2014[,i] = -log10(100-data_2014[,i])
}
par(mfrow=c(4,4))
for(col in 1:ncol(data_2014)){
hist(data_2014[,col], main = colnames(data_2014)[col], xlab = colnames(data_2014)[col])
}
ggcorrplot(cor(data_2014),
hc.order = TRUE,
lab = TRUE
)
data_long = melt(data_2014)
ggplot(data_long, aes(x = variable, y = value)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 60, hjust = 1))
data_trans = scale(data_2014)
data_long2 = melt(data_trans)
ggplot(data_long2, aes(x = X2, y = value)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 60, hjust = 1))
library(factoextra)
res.pca <- prcomp(data_trans, scale = TRUE)
fviz_eig(res.pca)
fviz_pca_ind(res.pca,
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
fviz_pca_var(res.pca,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
fviz_pca_biplot(res.pca, repel = TRUE,
col.var = "#2E9FDF", # Variables color
col.ind = "#696969" # Individuals color
)
# Eigenvalues
eig.val <- get_eigenvalue(res.pca)
eig.val
data_trans = as.data.frame(data_trans)
data_vaccin = select(data_trans, Diphtheria,Polio,Hepatitis.B)
data_thin = select(data_trans, thinness.5.9.years,thinness..1.19.years)
data_death = select(data_trans, under.five.deaths, infant.deaths)
res.pca <- prcomp(data_vaccin, scale = TRUE)
fviz_eig(res.pca)
fviz_pca_var(res.pca
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
princomp(data_vaccin)
res.pca <- prcomp(data_thin, scale = TRUE)
fviz_eig(res.pca)
fviz_pca_var(res.pca,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
res.pca <- prcomp(data_death, scale = TRUE)
fviz_eig(res.pca)
fviz_pca_var(res.pca,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
colnames(data_2014, do.NULL = TRUE, prefix = "col")
reg.simple69 = lm(Life.expectancy ~ Adult.Mortality+infant.deaths +Alcohol+Hepatitis.B+Measles+BMI+under.five.deaths+Polio+Total.expenditure+Diphtheria+GDP+Population+thinness..1.19.years+thinness.5.9.years+Schooling, data = data_2014)
resume.simple69 = summary(reg.simple69)
summary(reg.simple69)
library(leaps)
choixb<-regsubsets(Life.expectancy~.,data=data_2014,nbest=1,nvmax=10,method="backward")
summary(choixb)
plot(choixb,scale="bic")
ggplot(data_2014, aes(x = infant.deaths, y = Life.expectancy)) + geom_point()
reg.simple = lm(Life.expectancy ~ infant.deaths, data = data_2014)
resume.simple = summary(reg.simple)
summary(reg.simple)
ggplot(data_2014, aes(x = Adult.Mortality, y = Life.expectancy)) + geom_point()
reg.simple = lm(Life.expectancy ~ Adult.Mortality, data = data_2014)
resume.simple = summary(reg.simple)
summary(reg.simple)
reg.mul = lm(Life.expectancy ~ infant.deaths + Adult.Mortality, data = data_2014)
resume.mul = summary(reg.mul)
summary(reg.mul)
ggplot(data_2014, aes(x = Status, y = Life.expectancy)) + geom_boxplot()
anova = lm(Life.expectancy ~ Status, data = data_2014_all)
summary(anova)
data_2014 = data[which(data$Year == 2014),]
data_study = c(data_2014$Life.expectancy, data_2014$cov_vaccin)