Plot Validation Curve according to a range of values for any estimator hyper-parameter.
## scorer
from sklearn.metrics import make_scorer
def RMSE(y_true,y_pred):
error = y_pred - y_true
mse = np.mean(error**2)
return np.sqrt(mse)
def scorer():
return make_scorer(RMSE, greater_is_better=False)
## PLOT THE VALIDATION CURVE
def plot_validation_curve(X:'array',y:'array',estimator:'scikit clf',param_name:str,param_values:list,scorer:'scikit scorer',
njobs:int=2,ncv:int=10,issemilog:bool=True)->tuple:
"""
Plot the Validation Curve.
X,y -- features / target arrays.
estimator -- Scikit Learn estimator.
param_name -- name of hyperparameters to be evaluated.
param_values -- array of possible values of param to be evaluated.
scorer -- Scikit Learn scorer: it is possible a suitable string or a custom scikit scorer function.
njobs -- number of CPUs to be used (default 2).
ncv -- number of cv folks to be used (default 10).
issemilog -- use or not semilog scale.
return -- tuple: (training scores, test (cv) scores)
"""
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import validation_curve
train_scores, test_scores = validation_curve(
estimator, X, y, param_name=param_name, param_range=param_values,
scoring=scorer(), n_jobs=njobs, cv=ncv)
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
plt.title("Validation Curve")
plt.xlabel("%s"%param_name)
plt.ylabel("Score")
lw = 2
if issemilog:
plt.semilogx(param_values, train_scores_mean, label="Training score",
color="darkorange", lw=lw)
else:
plt.plot(param_values, train_scores_mean, label="Training score",
color="darkorange", lw=lw)
plt.fill_between(param_values, train_scores_mean - train_scores_std,
train_scores_mean + train_scores_std, alpha=0.2,
color="darkorange", lw=lw)
if issemilog:
plt.semilogx(param_values, test_scores_mean, label="Cross-validation score",
color="navy", lw=lw)
else:
plt.plot(param_values, test_scores_mean, label="Cross-validation score",
color="navy", lw=lw)
plt.fill_between(param_values, test_scores_mean - test_scores_std,
test_scores_mean + test_scores_std, alpha=0.2,
color="navy", lw=lw)
plt.legend(loc="best")
plt.show()
# return
return (train_scores,test_scores)