04pallav
9/11/2017 - 10:22 PM

GridSearchCV

GridSearchCV

# cross-validation

#n_estimators = 500 gives better results 
clf = RandomForestClassifier(n_estimators=1000, random_state=100, n_jobs=-1)

param_grid = {"max_depth": [3, 8, None],
              "class_weight": [{0:.90, 1:.10}, {0:.93, 1:.07}, {0:.99, 1:.01}],
              "min_samples_split": [2, 3, 5,10,15],
              "max_features": ["sqrt", "log2", "auto", None],
              "criterion": ["gini", "entropy"]}

# run grid search
grid_search = GridSearchCV(clf, param_grid=param_grid,  scoring=mcc_scorer)
start = time()
grid_search.fit(X_train, y_train)

print("GridSearchCV took %.2f seconds for %d candidate parameter settings."
     % (time() - start, len(grid_search.grid_scores_)))