allomov
1/18/2016 - 12:59 PM

coursera.sh

hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \
   -input /user/cloudera/input \
   -output /user/cloudera/output_new \
   -mapper /home/cloudera/coursera/map-reduce-assigment/wordcount_mapper.py \
   -reducer /home/cloudera/coursera/map-reduce-assigment/wordcount_reducer.py


hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar \
   -input /user/cloudera/input \
   -output /user/cloudera/output_new_1 \
   -mapper /home/cloudera/coursera/map-reduce-assigment/wordcount_mapper.py \
   -reducer /home/cloudera/coursera/map-reduce-assigment/wordcount_reducer.py \
   -numReduceTasks 2

hdfs dfs -getmerge /user/cloudera/output_new_0/* wordcount_num0_output.txt

hdfs dfs -getmerge /user/cloudera/output_new_1/* wordcount_num1_output.txt


hadoop jar /usr/lib/hadoop-mapreduce/hadoop-streaming.jar -input /user/cloudera/input -mapper $PWD/join2_mapper.py -reducer $PWD/join2_reducer.py -output /user/cloudera/output_join2_1