dgadiraju
11/4/2017 - 5:57 PM

core-spark-key-value-pairs.py

#Check out our lab for practice: https://labs.itversity.com

#Converting data into key value pairs using map
#ordersFiltered is picked up from the previous topic with accumulators
ordersMap = ordersFiltered.\
map(lambda order: (int(order.split(",")[0]), order.split(",")[1]))

orderItems = sc.textFile("/public/retail_db/order_items")
orderItemsMap = orderItems.\
map(lambda orderItem:
  (int(orderItem.split(",")[1]), (int(orderItem.split(",")[2]), float(orderItem.split(",")[4])))
)

for order in ordersMap.take(10): print(order)
for orderItem in orderItemsMap.take(10): print(orderItem)

#Raise any issues on https://discuss.itversity.com - make sure to categorize properly