ofrendo
6/6/2017 - 3:01 PM

Adding a custom evaluation function metric (matthew's correlation coefficient) for evaluating caret models

Adding a custom evaluation function metric (matthew's correlation coefficient) for evaluating caret models

myFormula <- as.formula(paste0("is_pnio~", paste0(feat_names, collapse = "+")))

test_rs <- train(myFormula, data = subTrain,
                 method = "rf",
                 metric = "mcc", # "Kappa",
                 tuneGrid = expand.grid(maxdepth = c(2, 5)),
                 #tuneLength = 3,
                 trControl = trainControl( method = "cv", number = 5, 
                                           summaryFunction = caretMCCFunction))

caretMCCFunction <- function(data, lev = NULL, model = NULL) {
  #print(head(data))
  #print(NROW(data))
  setDT(data)
  tp <- data[obs=="yes" & pred=="yes", as.numeric(.N)] # .N by default returns an integer value, with large values this leads to integer overflows and NA values
  tn <- data[obs=="no"  & pred=="no",  as.numeric(.N)]
  fp <- data[obs=="no"  & pred=="yes", as.numeric(.N)]
  fn <- data[obs=="yes" & pred=="no",  as.numeric(.N)]
  
  mcc = (tp * tn - fp * fn) / sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
  
  #print(paste0("tp=", tp, ", tn=", tn, ", fp=", fp, ", fn=", fn, ", mcc=", mcc))
  
  out <- mcc 
  names(out) <- "mcc"
  out
}