beatrice-m
11/4/2016 - 6:04 PM

Calculate cumulative sum

Calculate cumulative sum

package main.scala

import java.io.File
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
import org.apache.spark.sql.SQLContext


object Test {

    def main(args: Array[String]) {

        val conf = new SparkConf().setAppName("Test")
        val sc = new SparkContext(conf)
        val sqlContext = SQLContextSingleton.getInstance(sc)
        import sqlContext.implicits._
         
        val data = Seq(("A",1,1,1),("A",3,1,3),("A",0,0,2),("A",4,0,4),("A",0,0,6),("A",2,1,5),
                         ("B",0,1,3),("B",0,0,4),("B",2,0,1),("B",2,1,2),("B",0,0,6),("B",1,1,5))
            .toDF("id","param1","param2","t")
        data.show()

        data.withColumn("cumulativeSum1", sum("param1").over( Window.partitionBy("id").orderBy("t")))
            .withColumn("cumulativeSum2", sum("param2").over( Window.partitionBy("id").orderBy("t")))
            .show()
    }
}