dgadiraju
6/4/2017 - 1:44 PM

spark-scala-cardcountbysuit-numtasks.scala


// Make sure you do not have directory used for output path
// hadoop fs -rm -R /user/dgadiraju/cardcountbysuit
val inputPath = "/public/cards/largedeck.txt"
val outputPath = "/user/dgadiraju/cardcountbysuit"

sc.textFile(inputPath).
  map(card => (card.split("\\|")(1), 1)).
  reduceByKey((total, card) => total + card, 1). //Only 1 file will be created and 1 task will be used in second stage.
  saveAsTextFile(outputPath)