Basic statistics
import scipy
# dataframe column to numpy array
datanp = datadf.as_matrix(columns=[var])
# statistics
mean = datadf[var].mean()
median = scipy.median(datanp)
variance = datadf[var].var()
std = scipy.std(datanp)
sem = stats.sem(datadf[var])
p90 = np.percentile(datanp, 90)
p50 = np.percentile(datanp, 50) # p50 = median
p10 = np.percentile(datanp, 10)
minv = datanp.min()
maxv = datanp.max()
mode = scipy.stats.mode(datanp,axis=None)[0][0]
def most_common(lst):
return max(set(lst), key=lst.count)