GridSearchCV
# cross-validation
#n_estimators = 500 gives better results
clf = RandomForestClassifier(n_estimators=1000, random_state=100, n_jobs=-1)
param_grid = {"max_depth": [3, 8, None],
"class_weight": [{0:.90, 1:.10}, {0:.93, 1:.07}, {0:.99, 1:.01}],
"min_samples_split": [2, 3, 5,10,15],
"max_features": ["sqrt", "log2", "auto", None],
"criterion": ["gini", "entropy"]}
# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid, scoring=mcc_scorer)
start = time()
grid_search.fit(X_train, y_train)
print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
% (time() - start, len(grid_search.grid_scores_)))