beatrice-m
9/13/2017 - 2:02 AM

k-fold cross validation

k-fold cross validation

set.seed(2017)
k = 10 # Number of fold
folds_i <- sample(rep(1:k, length.out = nrow(df))) # Assign instance to fold

accuracy = c()
for (i in 1:k) {
  test_i <- which(folds_i == i)
  train <- df[-test_i, ]
  test <- df[test_i, ]
  # Train model on k-1 folds
  model <- glm(formula = `LabStat Desktop` ~  Year + College + `Aruba Laptop` , family = binomial(), data = train)
  predictions = predict(model, test[,c('Year','College','Aruba Laptop')], type="response")
  abs_err = sum(abs(test$`Aruba Laptop` - predictions))
  accuracy[i] = 1-sum(abs_err)/length(predictions)
}
# Accuracy of cross-validation
mean(accuracy)