// hadoop fs -copyFromLocal /data/nyse /user/dgadiraju/nyse
/*
spark-shell --master yarn \
--conf spark.ui.port=12345 \
--num-executors 4
*/
val nyse = sc.textFile("/user/dgadiraju/nyse").
coalesce(4).
map(stock => {
val s = stock.split(",")
(s(0), s(1), s(2).toFloat, s(3).toFloat, s(4).toFloat, s(5).toFloat, s(6).toInt)
}).
toDF("stockticker", "transactiondate", "openprice", "highprice", "lowprice", "closeprice", "volume")
sqlContext.setConf("spark.sql.shuffle.partitions", "4")
nyse.save("/user/dgadiraju/nyse_parquet", "parquet")
//nyse.write.parquet("spark-scala-")