dgadiraju
11/13/2017 - 1:04 AM

spark-scala-word-count.scala

/*
spark-shell --master yarn \
  --conf spark.ui.port=12456 \
  --num-executors 10 \
  --executor-memory 3G \
  --executor-cores 2 \
  --packages com.databricks:spark-avro_2.10:2.0.1
*/

val lines = sc.textFile("/public/randomtextwriter")
val words = lines.flatMap(line => line.split(" "))
val tuples = words.map(word => (word, 1))
val wordCount = tuples.reduceByKey((total, value) => total + value, 8)
val wordCountDF = wordCount.toDF("word", "count")

import com.databricks.spark.avro._
wordCountDF.write.avro("/user/dgadiraju/solutions/solution05/wordcount")