jorgehcb
12/19/2017 - 11:38 AM

[Python] Cross Validation Sample

Cross Validation Sample

kf = StratifiedKFold(df['target'], n_folds=10)
mse = []
fold_count = 0
for train, test in kf:
  print("Processing fold %s" % fold_count)
  train_fold = df.ix[train]
  test_fold = df.ix[test]

  # find best features
  corr = train_fold.corr()['target'][train_fold.corr()['target'] < 1].abs()
  corr.sort(ascending=False)
  features = corr.index[[0,1]].values

  # Get training examples
  train_fold_input = train_fold[features].values
  train_fold_output = train_fold['target']

  # Fit logistic regression
  logreg = LogisticRegression()
  logreg.fit(train_fold_input, train_fold_output)

  # Check MSE on test set
  pred = logreg.predict(test_fold[features])
  mse.append(mean_squared_error(test_fold.target, pred))

  # Done with the fold
  fold_count += 1

print(DataFrame(mse).mean())

# Processing fold 0
# Processing fold 1
# Processing fold 2
# Processing fold 3
# Processing fold 4
# Processing fold 5
# Processing fold 6
# Processing fold 7
# Processing fold 8
# Processing fold 9

DataFrame(mse).mean()
# 0 0.441212
# dtype: float64