Vivian
3/11/2020 - 10:11 AM

function to compare nps between 2 periods

# a function of comparing nps between 2 timewindows (periods)
import pandas as pd
import datetime as dt
from statsmodels.stats.proportion import proportions_ztest

def analyse_nps(df, a_start_date, a_end_date, b_start_date, b_end_date, question_list, period_a_label='pre', period_b_label='post'):
  '''
  df: with columns of user_id, completed_at, question_list
  a_start_date: start date of timewindow 1, so as b
  a_end_date: end date of timewindow 1, so as b
  question_list: ratings of questions in questionaire, such as rating_nps, rating_delivery
  period_a_label: string, the label of period a. so as b
  '''
  
  def determin_period(survey_date): # function to determin periode
    if (survey_date < a_end_date) & (survey_date > a_start_date):
      return period_a_label
    elif (survey_date < b_end_date) & (survey_date > b_start_date):
      return period_b_label
    else:
      return 'others'
  
  df = df.copy()
  df['completed_at'] = pd.to_datetime(df['completed_at']).dt.date # extract date
  df['completed_at'] = pd.to_datetime(df['completed_at'])

  df['period'] = df['completed_at'].apply(lambda x: determin_period(x))
  df = df[df['period']!='others']

  nps_bins = {range(0,7):'detractor', range(7,9): 'passive', range(9,11):'promoter'} # map nps

  for i in question_list:
    col_level = i + '_level'
    df[col_level] = df[i].apply(lambda x: next((v for k, v in nps_bins.items() if x in k)))

  nps_1 = df.groupby(['period'])[question_list].mean()
  nps_1 = nps_1.apply(lambda x: round(x,2))

  # nps result 2 - level groups
  nps_2_dict = {}
  for i in question_list:
    col_level = i + '_level'
    nps_2 = df.groupby(['period',col_level])['user_id'].count().unstack(1)
    nps_2['n_rating'] = nps_2[['detractor',	'passive'	,'promoter']].sum(axis=1)
    score_col = i + '_score'
    nps_2[score_col] = (nps_2['promoter'] - nps_2['detractor'])/nps_2['n_rating']
    nps_2[score_col] = nps_2[score_col].apply(lambda x: round(100*x, 1))

    p_values = {}
    for m in ['detractor',	'passive',	'promoter']:
      pct_col = m+'_pct'
      nps_2[pct_col] = nps_2[m] / nps_2['n_rating']
      nps_2[pct_col] = nps_2[pct_col].apply(lambda x: round(x,2))
      
      # nps t-test
      count = nps_2.loc[:,m].tolist()
      nobs = nps_2.loc[:,'n_rating'].tolist()
      stat, pval = proportions_ztest(count, nobs, alternative='two-sided')
      p_values[m] = round(pval,3)
      #print('{} p-value: {:0.3f}\n'.format(i, pval))
      
    nps_2_dict[i] = {'overview': nps_2, 'p_values': p_values}
  nps_dict = {'nps_1': nps_1, 'nps_2': nps_2_dict}
  
  return nps_dict

# apply function
df = df4.copy()
a_start_date = dt.datetime(2018,1,1)
a_end_date = dt.datetime(2019,11,14)
b_start_date = dt.datetime(2019,11,21)
b_end_date = pd.to_datetime(dt.date.today())
question_list = ['rating_nps',	'rating_delivery',	'rating_expectations']

nps_result = analyse_nps(df, a_start_date, a_end_date, b_start_date, b_end_date, question_list, period_a_label='pre', period_b_label='post')

# show result - nps1
nps_result['nps_1']

# show result - nps2
from IPython.display import display, HTML
nps2_dict = nps_result['nps_2']
for q in nps2_dict.keys():
  print(q)
  display(nps2_dict[q]['overview'])
  print('p-value:', nps2_dict[q]['p_values'], '\n \n \n')