moralmar
1/2/2018 - 4:30 PM

Confidence Interval (CI)

  • CI
  • user-defined function for more

source: Applied Data Science with Python - 02 Plotting

from scipy import stats

# Average & Standard Deviation
year_avg = df.mean(axis = 0)
year_std = df.std(axis = 0)
# print('Year Average\n', year_avg,'\n\n', 'Year Standard Deviation\n', year_std)

# CI with "scipy"
df0 = df.iloc[:, 0:1]
stats.t.interval(0.95, len(df0)-1, loc=np.mean(df0), scale=stats.sem(df0)) # output: two arrays


# CI with "statsmodels"
import statsmodels.stats.api as sms
sms.DescrStatsW(df0).tconfint_mean() # output: two arrays

# User defined function 
#     GOOD
output_list = ['mean', 'h', 'CI_low', 'CI_high']    # for Data Frame
                                                    # change if more variables are put in output
def mean_confidence_interval(data, confidence=0.95):
    a = 1.0*np.array(data)
    n = len(a)
    m, se = np.mean(a), stats.sem(a)
    h = se * stats.t._ppf((1+confidence)/2., n-1)
    h = h[0] # otherwise it's an array
    return m, h, m-h, m+h

lookup_vec = []
for col in df:
    # print(col, ' -- ', mean_confidence_interval(df[[col]])) # (!!) remember: two "[[]]" are necessary for columns
    lookup_vec.append(mean_confidence_interval(df[[col]]))

lookup = pd.DataFrame(lookup_vec, columns = output_list, index = df.columns).transpose()
lookup