alathrop
12/4/2019 - 2:55 PM

spark_best_of


# groupBy then count then orderBy
display(ratings.groupBy("community").count().orderBy(col("count").desc()))

# Divide the numerator by the denominator #formatting
sparsity = (1.0 - (numerator *1.0)/denominator)*100
print("The ratings dataframe is ", "%.2f" % sparsity + "% empty.")