# a function of comparing nps between 2 timewindows (periods)
import pandas as pd
import datetime as dt
from statsmodels.stats.proportion import proportions_ztest
def analyse_nps(df, a_start_date, a_end_date, b_start_date, b_end_date, question_list, period_a_label='pre', period_b_label='post'):
'''
df: with columns of user_id, completed_at, question_list
a_start_date: start date of timewindow 1, so as b
a_end_date: end date of timewindow 1, so as b
question_list: ratings of questions in questionaire, such as rating_nps, rating_delivery
period_a_label: string, the label of period a. so as b
'''
def determin_period(survey_date): # function to determin periode
if (survey_date < a_end_date) & (survey_date > a_start_date):
return period_a_label
elif (survey_date < b_end_date) & (survey_date > b_start_date):
return period_b_label
else:
return 'others'
df = df.copy()
df['completed_at'] = pd.to_datetime(df['completed_at']).dt.date # extract date
df['completed_at'] = pd.to_datetime(df['completed_at'])
df['period'] = df['completed_at'].apply(lambda x: determin_period(x))
df = df[df['period']!='others']
nps_bins = {range(0,7):'detractor', range(7,9): 'passive', range(9,11):'promoter'} # map nps
for i in question_list:
col_level = i + '_level'
df[col_level] = df[i].apply(lambda x: next((v for k, v in nps_bins.items() if x in k)))
nps_1 = df.groupby(['period'])[question_list].mean()
nps_1 = nps_1.apply(lambda x: round(x,2))
# nps result 2 - level groups
nps_2_dict = {}
for i in question_list:
col_level = i + '_level'
nps_2 = df.groupby(['period',col_level])['user_id'].count().unstack(1)
nps_2['n_rating'] = nps_2[['detractor', 'passive' ,'promoter']].sum(axis=1)
score_col = i + '_score'
nps_2[score_col] = (nps_2['promoter'] - nps_2['detractor'])/nps_2['n_rating']
nps_2[score_col] = nps_2[score_col].apply(lambda x: round(100*x, 1))
p_values = {}
for m in ['detractor', 'passive', 'promoter']:
pct_col = m+'_pct'
nps_2[pct_col] = nps_2[m] / nps_2['n_rating']
nps_2[pct_col] = nps_2[pct_col].apply(lambda x: round(x,2))
# nps t-test
count = nps_2.loc[:,m].tolist()
nobs = nps_2.loc[:,'n_rating'].tolist()
stat, pval = proportions_ztest(count, nobs, alternative='two-sided')
p_values[m] = round(pval,3)
#print('{} p-value: {:0.3f}\n'.format(i, pval))
nps_2_dict[i] = {'overview': nps_2, 'p_values': p_values}
nps_dict = {'nps_1': nps_1, 'nps_2': nps_2_dict}
return nps_dict
# apply function
df = df4.copy()
a_start_date = dt.datetime(2018,1,1)
a_end_date = dt.datetime(2019,11,14)
b_start_date = dt.datetime(2019,11,21)
b_end_date = pd.to_datetime(dt.date.today())
question_list = ['rating_nps', 'rating_delivery', 'rating_expectations']
nps_result = analyse_nps(df, a_start_date, a_end_date, b_start_date, b_end_date, question_list, period_a_label='pre', period_b_label='post')
# show result - nps1
nps_result['nps_1']
# show result - nps2
from IPython.display import display, HTML
nps2_dict = nps_result['nps_2']
for q in nps2_dict.keys():
print(q)
display(nps2_dict[q]['overview'])
print('p-value:', nps2_dict[q]['p_values'], '\n \n \n')