jmquintana79
12/28/2017 - 2:48 AM

Linear Regression (degree 1) + Hist

Linear Regression + hist. It is customizable.

## plot customized linear regression
@valida
def plot_lregression(x:'array',y:'array',ishist:bool=True,**kwargs) -> dict:
    '''
    Plot customized linear regression with main statistics.
    x -- x data.
    y -- y data.
    ishist -- is display or not small histograms of x,y data.
    return -- information of lienar regression. Format: {'polynomial':["coef. A","coef. B],'R2','pearsonr','pearsonp'}
    '''    
    # manage karguments
    xlim = kwargs.get("xlim",(min(x),max(x)))
    ylim = kwargs.get("ylim",(min(y),max(y)))
    snamex = kwargs.get("xlabel",'X')
    snamey = kwargs.get("ylabel",'Y')
    snamex_fontsize = kwargs.get("xlabel_fontsize",12)
    snamey_fontsize = kwargs.get("ylabel_fontsize",12)
    stitle = kwargs.get("stitle",'Linear Regression')
    stitle_fontsize = kwargs.get("stitle_fontsize",10)
    stitle_main_fontsize = kwargs.get("stitle_main_fontsize",20)
    stitle_fontstyle = kwargs.get("stitle_fontstyle",'italic')
    iloc = kwargs.get("loc",4)
    line_color = kwargs.get("line_color",'red')
    scatter_color = kwargs.get("scatter_color",'blue')
    scatter_size = kwargs.get("scatter_size",20)
    scatter_linewidth = kwargs.get("scatter_linewidth",1)
    
    # import libraries
    import pandas as pd
    from sklearn import linear_model
    import scipy    
    import seaborn as sns
    import matplotlib.pyplot as plt
    from mpl_toolkits.axes_grid.anchored_artists import AnchoredText
    sns.set(style="white", color_codes=True)
    
    ## CALCULATE STATISTICS
    # initialize
    results = {}
    # data preparation
    X = np.reshape(x,(len(x),1))
    # create linear regression object
    regr = linear_model.LinearRegression()
    # train the model using the training sets
    regr.fit(X,y)
    # calculate R2
    R2 = regr.score(X,y)
    # calculate Pearson Correlation
    c,p = scipy.stats.pearsonr(x,y)
    # store regression info 
    results['polynomial'] = [regr.coef_[0], regr.intercept_] # Coefficients
    results['R2'] = R2 # R**2
    results['pearsonr'] = c
    results['pearsonp'] = p
    
    ## PLOT
    if ishist:
        # data preparation
        DATA = pd.DataFrame({'x':x,'y':y})
        # plot
        g = sns.jointplot("x", "y", data=DATA, kind="reg",
                          color=scatter_color,size=7,
                          joint_kws={'line_kws':{'color':line_color}},
                          scatter_kws={"s": scatter_size,
                                       'edgecolor':'black',
                                       'linewidth':scatter_linewidth,
                                       'facecolor':scatter_color},
                          stat_func=None)
        # clean
        del(DATA)
    else:
        # plot
        sns.mpl.rc("figure", figsize=(8,8))
        g = sns.regplot(x,y,color=scatter_color, 
                          line_kws={'color':line_color},
                          scatter_kws={"s": scatter_size,
                                       'edgecolor':'black',
                                       'linewidth':scatter_linewidth,
                                       'facecolor':scatter_color})

    # main title
    if stitle != 'Linear Regression' and not ishist: 
        plt.title(stitle,fontsize=stitle_main_fontsize)
        stitle = 'Linear Regression'
    # legend
    textstr = '%s:\nY = %.5f * X + %.5f \nR2 = %.5f \npearsonr = %.5f; p = %.2e \nN = %s'%(stitle,
                                       results['polynomial'][0],
                                       results['polynomial'][1],
                                       results['R2'],
                                       results['pearsonr'],results['pearsonp'],
                                       len(x)
                                      )
    # function legend only text
    def textonly(ax,txt,fontsize=14,fontstyle='normal',loc = 2):
        at = AnchoredText(txt,prop=dict(size=fontsize,style=fontstyle),frameon=False,loc=loc)
        at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
        ax.add_artist(at)
        return at
    if not iloc is None: at = textonly(plt.gca(),textstr,loc=iloc,fontsize=stitle_fontsize,fontstyle=stitle_fontstyle)

    # limits of values
    plt.xlim(xlim[0],xlim[1])
    plt.ylim(ylim[0],ylim[1])
    # axis labels
    plt.xlabel(snamex,fontsize=snamex_fontsize)
    plt.ylabel(snamey,fontsize=snamey_fontsize)
    # plot
    plt.show()
    # clean
    sns.reset_orig()
    plt.close() 
    
    # return
    return results