path = "/public/retail_db" or path = "/Users/itversity/Research/data/retail_db"
orders = sc.textFile(path + "/orders").\
map(lambda rec: (int(rec.split(",")[0]), rec))
orderItems = sc.textFile(path + "/order_items").\
map(lambda rec: (int(rec.split(",")[1]), rec))
ordersJoin = orders.join(orderItems)
for i in ordersJoin.take(10): print(i)
ordersLeftOuter = orders.leftOuterJoin(orderItems)
for i in ordersLeftOuter.filter(lambda rec: rec[1][1] == None).take(10): print(i)
for i in ordersLeftOuter.\
filter(lambda rec: rec[1][1] == None).\
map(lambda rec: rec[1][0]).\
take(10):
print(i)
ordersCogroup = orders.cogroup(orderItems)
for i in ordersCogroup.take(10): print(i)
a = sc.parallelize(range(1, 10))
b = sc.parallelize(["Hello", "World"])
for i in a.cartesian(b): print(i)