freephys
10/18/2019 - 3:52 PM

kfold split and xgboost classifier

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
        
from xgboost import XGBClassifier
from sklearn.model_selection import KFold


n_splits = 3
​
kf = KFold(n_splits=n_splits, shuffle=True, random_state=2019)
yoof = np.zeros(len(train))
yhat = np.zeros(len(test))
​
fold = 0
n_est = 2000
lr = 0.2
for in_index, oof_index in kf.split(train, y_train):
    fold += 1
    print(f'fold {fold} of {n_splits}')
    X_in, X_oof = train.values[in_index], train.values[oof_index]
    y_in, y_oof = y_train.values[in_index], y_train.values[oof_index]
​
    model = XGBClassifier(n_estimators=n_est, learning_rate=lr, random_state=2019, tree_method = 'gpu_exact')
​
    model.fit(X_in, y_in, early_stopping_rounds=20, eval_set=[(X_oof, y_oof)], verbose=100,eval_metric=['merror', 'mlogloss'])
​
    print('## lr:',lr,'n_est:',n_est)
    print('Best iteration: '+ str(model.best_iteration), 'Best ntree_limit: '+ str(model.best_ntree_limit), 'Best score:', str(model.best_score))
​
    yoof[oof_index] = model.predict(X_oof)
    yhat += model.predict(test.values)
​
yhat /= n_splits


cm=confusion_matrix(y_train, yoof)
sns.heatmap(cm, annot=True, fmt="d", xticklabels=range(1,8), yticklabels=range(1,8))
print('Accuracy:', accuracy_score(y_train, yoof))
​