Calculate cumulative sum
package main.scala
import java.io.File
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.expressions.Window
import org.apache.spark.sql.functions._
import org.apache.spark.sql.SQLContext
object Test {
def main(args: Array[String]) {
val conf = new SparkConf().setAppName("Test")
val sc = new SparkContext(conf)
val sqlContext = SQLContextSingleton.getInstance(sc)
import sqlContext.implicits._
val data = Seq(("A",1,1,1),("A",3,1,3),("A",0,0,2),("A",4,0,4),("A",0,0,6),("A",2,1,5),
("B",0,1,3),("B",0,0,4),("B",2,0,1),("B",2,1,2),("B",0,0,6),("B",1,1,5))
.toDF("id","param1","param2","t")
data.show()
data.withColumn("cumulativeSum1", sum("param1").over( Window.partitionBy("id").orderBy("t")))
.withColumn("cumulativeSum2", sum("param2").over( Window.partitionBy("id").orderBy("t")))
.show()
}
}