beatrice-m
2/3/2017 - 6:07 PM

Multiple agg on same column

Apply aggregation function several time on the same column

   import org.apache.spark.sql.Column
   import org.apache.spark.sql.functions._

   
   // Do a agg() for average and std
   val map_avg_std: Map[String, Column => Column] = Map(
   "std" -> stddev_pop, "mean" -> avg)
   val groupBy = Seq("Username")
   val aggregate = Seq("DurationInMn") 
   val op_avg_std = Seq("std", "mean")
   val exprs = aggregate.flatMap(c => op_avg_std .map(f => map_avg_std(f)(col(c)))) 	 

   val df2 = df1.groupBy(groupBy.map(col): _*).agg(exprs.head, exprs.tail: _*)
                .select($"avg(DurationInMn)".cast(IntegerType), $"stddev_pop(DurationInMn)".cast(IntegerType)) 
                .groupBy("avg(DurationInMn)","stddev_pop(DurationInMn)").count()
   df2.show()