import sys
import ConfigParser as cp
try:
from pyspark import SparkConf, SparkContext
props = cp.RawConfigParser()
props.read("src/main/resources/application.properties")
conf = SparkConf().setAppName("Word Count").setMaster(props.get(sys.argv[3], "executionMode"))
sc = SparkContext(conf=conf)
inputPath = sys.argv[1]
outputPath = sys.argv[2]
Path = sc._gateway.jvm.org.apache.hadoop.fs.Path
FileSystem = sc._gateway.jvm.org.apache.hadoop.fs.FileSystem
Configuration = sc._gateway.jvm.org.apache.hadoop.conf.Configuration
fs = FileSystem.get(Configuration())
if(fs.exists(Path(inputPath)) == False):
print("Input path does not exists")
else:
if(fs.exists(Path(outputPath))):
fs.delete(Path(outputPath), True)
sc.textFile(inputPath). \
flatMap(lambda l: l.split(" ")). \
map(lambda w: (w, 1)). \
reduceByKey(lambda t, e: t + e). \
saveAsTextFile(outputPath)
print ("Successfully imported Spark Modules")
except ImportError as e:
print ("Can not import Spark Modules", e)
sys.exit(1)