path = "/public/retail_db" or path = "/Users/itversity/Research/data/retail_db"
orders201312 = sc.textFile(path + "/orders").\
filter(lambda order: "2013-12" in order.split(",")[1]).\
map(lambda order: (int(order.split(",")[0]), order.split(",")[1]))
orderItems = sc.textFile(path + "/order_items").\
map(lambda rec: (int(rec.split(",")[1]), int(rec.split(",")[2])))
distinctProducts201312 = orders201312.\
join(orderItems).\
map(lambda order: order[1][1]).\
distinct()
orders201401 = sc.textFile(path + "/orders").\
filter(lambda order: "2014-01" in order.split(",")[1]).\
map(lambda order: (int(order.split(",")[0]), order.split(",")[1]))
products201312 = orders201312.\
join(orderItems).\
map(lambda order: order[1][1])
products201401 = orders201401.\
join(orderItems).\
map(lambda order: order[1][1])
products201312.union(products201401).count()
products201312.union(products201401).distinct().count()
products201312.intersection(products201401).count()