jmquintana79
1/12/2018 - 7:05 AM

dfply scores

dfply tools (functions) to calculate data scores.

## ussage
df >> group_by(X.col1) >> summarize(newcol=new_functions(X.col2))

## useful functions
@make_symbolic
def fR2(y_true, y_pred):
    from sklearn.metrics import r2_score
    return r2_score(y_true, y_pred) 
    
@make_symbolic
def fPEARSON(y_true, y_pred):
    from scipy.stats import pearsonr
    return pearsonr(y_true, y_pred)[0] 
    
@make_symbolic
def fBIAS(obs,fcst):
    import numpy as np
    error = fcst - obs 
    return np.mean(error)
    
@make_symbolic
def fMAE(obs,fcst):
    import numpy as np
    error = fcst - obs 
    return np.mean(np.abs(error))
    
@make_symbolic
def fRMSE(obs,fcst):
    import numpy as np
    error = fcst - obs 
    mse = np.mean(error**2)
    return np.sqrt(mse)
    
@make_symbolic
def fcorr(x,y):
    from scipy.stats import pearsonr
    return pearsonr(x,y)
    
@make_symbolic
def fvariance(obs):
    import numpy as np
    return np.var(obs)    
# noise: fvariance(y_test)
# variance predicition: fvariance(y_prediction)