dgadiraju
8/2/2017 - 6:33 AM

pyspark-cardcountbysuit.py

# Make sure you do not have directory used for output path
# hadoop fs -rm -R /user/dgadiraju/cardcountbysuit
inputPath = "/public/cards/largedeck.txt"
outputPath = "/user/dgadiraju/cardcountbysuit"

sc.textFile(inputPath). \
  map(lambda card: (card.split("|")[1], 1)). \
  reduceByKey(lambda total, card: total + card). \
  saveAsTextFile(outputPath)