repo2kanagaraj
7/4/2018 - 2:27 AM

Spark

Morgan Standly questions

val docData = sc.textFile("/home/kanagaraj/dev/applications/spark/interview_workspace/morgan_standly/data.txt")

val CountDocData = docData.map(rec => rec.split(",")).map(rec => (rec(2), rec(3), if ( rec(1) == "yes") 1 else 0))

val CountDocData = docval CountDocData = docData.map(rec => rec.split(",")).map(rec => (rec._3, rec._4, if ( rec._1 == "yes") 1 else 0))
Data.map(rec => rec.split(","))

CountDocData.map(rec => println(rec))

val flightData = sc.textFile(
  "/home/kanagaraj/dev/applications/spark/interview_workspace/morgan_standly/data.txt"
)

val SumByDestContury =  flightData.map(
  rec => rec.split(",")
).map(
  rec => ((rec(0), rec(1)) , rec(2).toInt)
).reduceByKey(
  (rec1, rec2) => rec1 + rec2
).map(
  rec => (rec._1._1, rec._2)
).reduceByKey(
  (rec1, rec2) => if(rec1 < rec2) rec1 else rec2)
)

val SumByDestContury =  flightData.map(
  rec => rec.split(",")
).map(
  rec => ((rec(0), rec(1), rec(2).toInt), 1)
).reduceByKey(
  (rec1, rec2) => rec1 + rec2
).map(
  rec => ((rec._1._1, rec._1._2), rec._1._3)
).reduceByKey(
  (rec1, rec2) => rec1 + rec2
).map(
  rec => (rec._1._2, rec._2)
).reduceByKey(
  (rec1, rec2) => if(rec1 < rec2) rec2 else rec1
)


SumByDestContury.sortByKey().take(100).foreach(println)