jmquintana79
10/21/2016 - 4:26 AM

Matrix correlation between several variables stored into pandas dataframe

Matrix correlation between several variables stored into pandas dataframe

## WAY 1

def correlation_matrix(df,title):
    import numpy as np
    from matplotlib import pyplot as plt
    from matplotlib import cm as cm

    # get name of variables
    lvariables = df.columns.values
    
    # create chart objects
    fig = plt.figure()
    ax1 = fig.add_subplot(111)
    cmap = cm.get_cmap('jet', 30)
    # plot
    cax = ax1.imshow(df.corr(), interpolation="nearest", cmap=cmap)
    # set grid
    ax1.grid(True)
    # set title
    plt.title('MATRIX CORRELATION: %s'%title)
    # set axis ticks
    plt.xticks(range(0,len(lvariables),1),lvariables,fontsize=10, rotation='vertical')
    plt.yticks(range(0,len(lvariables),1),lvariables,fontsize=10)
    # build color bar
    cbar = fig.colorbar(cax)
    # display
    plt.show()
    
    
## WAY 2
import matplotlib.pyplot as plt
from pandas.plotting import scatter_matrix
scatter_matrix(DF, alpha=0.2, figsize=(6, 6), diagonal='kde')
plt.show()