source: Applied Data Science with Python - 02 Plotting
from scipy import stats
# Average & Standard Deviation
year_avg = df.mean(axis = 0)
year_std = df.std(axis = 0)
# print('Year Average\n', year_avg,'\n\n', 'Year Standard Deviation\n', year_std)
# CI with "scipy"
df0 = df.iloc[:, 0:1]
stats.t.interval(0.95, len(df0)-1, loc=np.mean(df0), scale=stats.sem(df0)) # output: two arrays
# CI with "statsmodels"
import statsmodels.stats.api as sms
sms.DescrStatsW(df0).tconfint_mean() # output: two arrays
# User defined function
# GOOD
output_list = ['mean', 'h', 'CI_low', 'CI_high'] # for Data Frame
# change if more variables are put in output
def mean_confidence_interval(data, confidence=0.95):
a = 1.0*np.array(data)
n = len(a)
m, se = np.mean(a), stats.sem(a)
h = se * stats.t._ppf((1+confidence)/2., n-1)
h = h[0] # otherwise it's an array
return m, h, m-h, m+h
lookup_vec = []
for col in df:
# print(col, ' -- ', mean_confidence_interval(df[[col]])) # (!!) remember: two "[[]]" are necessary for columns
lookup_vec.append(mean_confidence_interval(df[[col]]))
lookup = pd.DataFrame(lookup_vec, columns = output_list, index = df.columns).transpose()
lookup