9/11/2017 - 10:30 PM

GridSearchCV scikit

GridSearchCV scikit

from sklearn import grid_search
from sklearn.grid_search import GridSearchCV
tuned_parameters = [{'n_estimators': [100, 1000, 2000]}] #for randomforest
tuned_parameters = [{'penalty':['l2'],'C': np.logspace(-3,3,10)},{'penalty':['l1'],'C': np.logspace(-3,3,10)}] # for logistic

scores = ['precision', 'recall']
clf = GridSearchCV(RandomForestClassifier(), tuned_parameters, cv=5,scoring='%s_weighted' % score)
clf.fit(X_train, y_train)
print("Best parameters set found on development set:")

print("Grid scores on development set:")
for params, mean_score, scores in clf.grid_scores_:
        print("%0.3f (+/-%0.03f) for %r"
              % (mean_score, scores.std() * 2, params))

#exhaustively considers all parameters

clf2 = GridSearchCV(estimator=model, param_grid=dict(alpha=alphas))

#Grid search means you have a set of models (which differ from each other in their parameter values, which lie on a grid). What you do is you then train each of the models and evaluate it using cross-validation. You then select the one that performed best.