#Check out our lab for practice: https://labs.itversity.com
#Create RDD from file in HDFS
orders = sc.textFile("/public/retail_db/orders")
#Create RDD from local file (data from file -> collection -> RDD)
productsList = open("/data/retail_db/products/part-00000").read().splitlines()
productsRDD = sc.parallelize(productsList)
#Raise any issues on https://discuss.itversity.com - make sure to categorize properly