dgadiraju
7/30/2017 - 10:21 AM

pyspark-actions-preview-data.py

path = "/public/retail_db" or path = "/Users/itversity/Research/data/retail_db"

rdd = sc.textFile(path + "/orders")
rdd.first()
rdd.take(10)
rdd.collect()
for i in rdd.take(10): print(i)
for i in rdd.take(10): print(i.split(",")[0] + "\t" + i.split(",")[1])