dfply tools (functions) to calculate data scores.
## ussage
df >> group_by(X.col1) >> summarize(newcol=new_functions(X.col2))
## useful functions
@make_symbolic
def fR2(y_true, y_pred):
from sklearn.metrics import r2_score
return r2_score(y_true, y_pred)
@make_symbolic
def fPEARSON(y_true, y_pred):
from scipy.stats import pearsonr
return pearsonr(y_true, y_pred)[0]
@make_symbolic
def fBIAS(obs,fcst):
import numpy as np
error = fcst - obs
return np.mean(error)
@make_symbolic
def fMAE(obs,fcst):
import numpy as np
error = fcst - obs
return np.mean(np.abs(error))
@make_symbolic
def fRMSE(obs,fcst):
import numpy as np
error = fcst - obs
mse = np.mean(error**2)
return np.sqrt(mse)
@make_symbolic
def fcorr(x,y):
from scipy.stats import pearsonr
return pearsonr(x,y)
@make_symbolic
def fvariance(obs):
import numpy as np
return np.var(obs)
# noise: fvariance(y_test)
# variance predicition: fvariance(y_prediction)