path = "/Users/itversity/Research/data/retail_db" or path = "/public/retail_db"
orderItems = sc.textFile(path + "/order_items").\
map(lambda orderItem: (int(orderItem.split(",")[1]), float(orderItem.split(",")[4])))
// Compute revenue for each order
for i in orderItems.\
reduceByKey(lambda total, orderItemSubtotal: total + orderItemSubtotal).\
take(100):
print(i)
// Compute revenue and number of items for each order using aggregateByKey
for i in orderItems.\
aggregateByKey((0.0, 0),
lambda iTotal, oisubtotal: (iTotal[0] + oisubtotal, iTotal[1] + 1),
lambda fTotal, iTotal: (fTotal[0] + iTotal[0], fTotal[1] + iTotal[1])
).\
take(100):
print(i)
// Compute revenue and number of items for each order using reduceByKey
for i in sc.textFile(path + "/order_items").\
map(lambda orderItem: (int(orderItem.split(",")[1]), (float(orderItem.split(",")[4]), 1))).\
reduceByKey(lambda total, element: (total[0] + element[0], total[1] + element[1])).\
take(100):
print(i)