dgadiraju
11/13/2017 - 12:13 AM

spark-scala-convert-nyse-to-parquet.scala

// hadoop fs -copyFromLocal /data/nyse /user/dgadiraju/nyse

/*
spark-shell --master yarn \
  --conf spark.ui.port=12345 \
  --num-executors 4
*/

val nyse = sc.textFile("/user/dgadiraju/nyse").
  coalesce(4).
  map(stock => {
    val s = stock.split(",")
    (s(0), s(1), s(2).toFloat, s(3).toFloat, s(4).toFloat, s(5).toFloat, s(6).toInt)
  }).
  toDF("stockticker", "transactiondate", "openprice", "highprice", "lowprice", "closeprice", "volume")
  
sqlContext.setConf("spark.sql.shuffle.partitions", "4")
nyse.save("/user/dgadiraju/nyse_parquet", "parquet")
//nyse.write.parquet("spark-scala-")