vgrabovets
3/20/2017 - 4:14 PM

get statistics

get statistics

def get_stats(df, col_name, limit=None, print_pic=True):
    stat = df[col_name].fillna('!not specified').value_counts().to_frame(name='count')
    stat['share'] = round(stat['count'] / stat['count'].sum(), 3)
    unique = len(df[col_name].unique())
    print('Unique entries: ', unique)
    if print_pic:
        stat.ix[:limit,['share']].plot(kind='bar')
        plt.title(col_name)
        plt.ylabel('Share')
        plt.show()
    return stat[:limit]