dgadiraju
5/31/2017 - 5:00 AM

spark-scala-wordcount-ide.scala

package wordcount

import org.apache.spark.{SparkConf,SparkContext}
import org.apache.hadoop.fs._

/**
  * Created by itversity on 31/05/17.
  */
object WordCount {
  def main(args: Array[String]) = {
    val conf = new SparkConf().setAppName("Word Count").setMaster("local")
    val sc = new SparkContext(conf)

    val fs = FileSystem.get(sc.hadoopConfiguration)

    val inputPath = args(0)
    val outputPath = args(1)

    if(!fs.exists(new Path(inputPath))) {
      println("Input path does not exist")
    } else {

      if (fs.exists(new Path(outputPath)))
        fs.delete(new Path(outputPath), true)

      sc.textFile(inputPath).
        flatMap(_.split(" ")).
        map((_, 1)).
        reduceByKey(_ + _).
        map(rec => rec._1 + "\t" + rec._2).
        saveAsTextFile(outputPath)
    }
  }
}