dgadiraju
6/1/2017 - 8:09 AM

spark-scala-wordcount-mapPartitions.scala

val path = "/Users/itversity/Research/data/wordcount.txt" or val path = "/public/randomtextwriter/part-m-00000"

sc.textFile(path).
  mapPartitions(lines => {
    // Using Scala APIs to process each partition
    lines.flatMap(_.split(" ")).map((_, 1))
  }).
  reduceByKey((total, agg) => total + agg).
  take(100).
  foreach(println)