jmquintana79
5/11/2016 - 8:01 AM

Basic statistics

Basic statistics

import scipy

# dataframe column to numpy array
datanp = datadf.as_matrix(columns=[var])

# statistics
mean = datadf[var].mean()
median = scipy.median(datanp)
variance = datadf[var].var()
std = scipy.std(datanp) 
sem = stats.sem(datadf[var])

p90 = np.percentile(datanp, 90)
p50 = np.percentile(datanp, 50) # p50 = median
p10 = np.percentile(datanp, 10)

minv = datanp.min()
maxv = datanp.max()

mode = scipy.stats.mode(datanp,axis=None)[0][0]
def most_common(lst):
    return max(set(lst), key=lst.count)