dgadiraju
3/17/2017 - 6:31 AM

SparkStreamingWordCount.scala

/**
  * Created by itversity on 17/03/17.
  * This is primarily to get the word count on the data received from 
  * nc -lk 19999
  * Make sure build.sbt is updated with the dependency - 
  * libraryDependencies += "org.apache.spark" % "spark-streaming_2.10" % "1.6.2"
  * Create jar, ship the jar, start nc, and then use spark-submit
  * spark-submit --class SparkStreamingWordCount --master yarn --conf spark.ui.port=14562 retail_2.10-1.0.jar
  */
import org.apache.spark.SparkConf
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming._

object SparkStreamingWordCount {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("Testing Streaming").setMaster("yarn-client")
    val ssc = new StreamingContext(conf, Seconds(10))

    val lines = ssc.socketTextStream("gw01.itversity.com", 19999)
    val linesFlatMap = lines.flatMap(rec => rec.split(" "))
    val linesMap =  linesFlatMap.map((_, 1))
    val linesRBK = linesMap.reduceByKey(_ + _)

    linesRBK.print()

    ssc.start()
    ssc.awaitTermination()
  }

}