Linear Regression + hist. It is customizable.
## plot customized linear regression
@valida
def plot_lregression(x:'array',y:'array',ishist:bool=True,**kwargs) -> dict:
'''
Plot customized linear regression with main statistics.
x -- x data.
y -- y data.
ishist -- is display or not small histograms of x,y data.
return -- information of lienar regression. Format: {'polynomial':["coef. A","coef. B],'R2','pearsonr','pearsonp'}
'''
# manage karguments
xlim = kwargs.get("xlim",(min(x),max(x)))
ylim = kwargs.get("ylim",(min(y),max(y)))
snamex = kwargs.get("xlabel",'X')
snamey = kwargs.get("ylabel",'Y')
snamex_fontsize = kwargs.get("xlabel_fontsize",12)
snamey_fontsize = kwargs.get("ylabel_fontsize",12)
stitle = kwargs.get("stitle",'Linear Regression')
stitle_fontsize = kwargs.get("stitle_fontsize",10)
stitle_main_fontsize = kwargs.get("stitle_main_fontsize",20)
stitle_fontstyle = kwargs.get("stitle_fontstyle",'italic')
iloc = kwargs.get("loc",4)
line_color = kwargs.get("line_color",'red')
scatter_color = kwargs.get("scatter_color",'blue')
scatter_size = kwargs.get("scatter_size",20)
scatter_linewidth = kwargs.get("scatter_linewidth",1)
# import libraries
import pandas as pd
from sklearn import linear_model
import scipy
import seaborn as sns
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid.anchored_artists import AnchoredText
sns.set(style="white", color_codes=True)
## CALCULATE STATISTICS
# initialize
results = {}
# data preparation
X = np.reshape(x,(len(x),1))
# create linear regression object
regr = linear_model.LinearRegression()
# train the model using the training sets
regr.fit(X,y)
# calculate R2
R2 = regr.score(X,y)
# calculate Pearson Correlation
c,p = scipy.stats.pearsonr(x,y)
# store regression info
results['polynomial'] = [regr.coef_[0], regr.intercept_] # Coefficients
results['R2'] = R2 # R**2
results['pearsonr'] = c
results['pearsonp'] = p
## PLOT
if ishist:
# data preparation
DATA = pd.DataFrame({'x':x,'y':y})
# plot
g = sns.jointplot("x", "y", data=DATA, kind="reg",
color=scatter_color,size=7,
joint_kws={'line_kws':{'color':line_color}},
scatter_kws={"s": scatter_size,
'edgecolor':'black',
'linewidth':scatter_linewidth,
'facecolor':scatter_color},
stat_func=None)
# clean
del(DATA)
else:
# plot
sns.mpl.rc("figure", figsize=(8,8))
g = sns.regplot(x,y,color=scatter_color,
line_kws={'color':line_color},
scatter_kws={"s": scatter_size,
'edgecolor':'black',
'linewidth':scatter_linewidth,
'facecolor':scatter_color})
# main title
if stitle != 'Linear Regression' and not ishist:
plt.title(stitle,fontsize=stitle_main_fontsize)
stitle = 'Linear Regression'
# legend
textstr = '%s:\nY = %.5f * X + %.5f \nR2 = %.5f \npearsonr = %.5f; p = %.2e \nN = %s'%(stitle,
results['polynomial'][0],
results['polynomial'][1],
results['R2'],
results['pearsonr'],results['pearsonp'],
len(x)
)
# function legend only text
def textonly(ax,txt,fontsize=14,fontstyle='normal',loc = 2):
at = AnchoredText(txt,prop=dict(size=fontsize,style=fontstyle),frameon=False,loc=loc)
at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
ax.add_artist(at)
return at
if not iloc is None: at = textonly(plt.gca(),textstr,loc=iloc,fontsize=stitle_fontsize,fontstyle=stitle_fontstyle)
# limits of values
plt.xlim(xlim[0],xlim[1])
plt.ylim(ylim[0],ylim[1])
# axis labels
plt.xlabel(snamex,fontsize=snamex_fontsize)
plt.ylabel(snamey,fontsize=snamey_fontsize)
# plot
plt.show()
# clean
sns.reset_orig()
plt.close()
# return
return results