Basic statistics
import scipy
# dataframe column to numpy array
v = datadf.as_matrix(columns=[var])
# mean
mean = datadf[var].mean()
mean = np.nanmean(v)
# median
median = scipy.median(v)
median = np.nanmedian(v)
# mode
mode = scipy.stats.mode(datanp,axis=None)[0][0]
def most_common(lst):
return max(set(lst), key=lst.count)
mode = max(v, key=v.count) # v is a list
# variance
variance = datadf[var].var()
variance = np.nanvar(v)
# standard error
std = scipy.std(v)
std = np.nanstd(v)
# standard error of the mean
sem = scipy.stats.sem(datadf[var])
# percentiles
p90 = np.percentile(v, 90)
p50 = np.percentile(v, 50) # p50 = median
p10 = np.percentile(v, 10)
# min / max
min_v = datanp.min()
min_v = np.nanmin(v)
max_v = datanp.max()
max_v = np.nanmax(v)
# kurtosis
kurtosis = scipy.stats.kurtosis(v)
kurtosis = datadf[var].kurtosis()
# skewness
skewness = scipy.stats.skew(v)
skewness = datadf[var].skew()