import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
from xgboost import XGBClassifier
from sklearn.model_selection import KFold
n_splits = 3
kf = KFold(n_splits=n_splits, shuffle=True, random_state=2019)
yoof = np.zeros(len(train))
yhat = np.zeros(len(test))
fold = 0
n_est = 2000
lr = 0.2
for in_index, oof_index in kf.split(train, y_train):
fold += 1
print(f'fold {fold} of {n_splits}')
X_in, X_oof = train.values[in_index], train.values[oof_index]
y_in, y_oof = y_train.values[in_index], y_train.values[oof_index]
model = XGBClassifier(n_estimators=n_est, learning_rate=lr, random_state=2019, tree_method = 'gpu_exact')
model.fit(X_in, y_in, early_stopping_rounds=20, eval_set=[(X_oof, y_oof)], verbose=100,eval_metric=['merror', 'mlogloss'])
print('## lr:',lr,'n_est:',n_est)
print('Best iteration: '+ str(model.best_iteration), 'Best ntree_limit: '+ str(model.best_ntree_limit), 'Best score:', str(model.best_score))
yoof[oof_index] = model.predict(X_oof)
yhat += model.predict(test.values)
yhat /= n_splits
cm=confusion_matrix(y_train, yoof)
sns.heatmap(cm, annot=True, fmt="d", xticklabels=range(1,8), yticklabels=range(1,8))
print('Accuracy:', accuracy_score(y_train, yoof))