k-fold cross validation
set.seed(2017)
k = 10 # Number of fold
folds_i <- sample(rep(1:k, length.out = nrow(df))) # Assign instance to fold
accuracy = c()
for (i in 1:k) {
test_i <- which(folds_i == i)
train <- df[-test_i, ]
test <- df[test_i, ]
# Train model on k-1 folds
model <- glm(formula = `LabStat Desktop` ~ Year + College + `Aruba Laptop` , family = binomial(), data = train)
predictions = predict(model, test[,c('Year','College','Aruba Laptop')], type="response")
abs_err = sum(abs(test$`Aruba Laptop` - predictions))
accuracy[i] = 1-sum(abs_err)/length(predictions)
}
# Accuracy of cross-validation
mean(accuracy)