6/10/2015 - 3:28 PM


import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class wcMap extends Mapper<LongWritable, Text, Text, IntWritable> {

	public void map(LongWritable ikey, Text ivalue, Context context)
			throws IOException, InterruptedException {
		// ikey is the offset of the line
		// ivalue is the line itself
		Text word = new Text(); // we use the H class
		String line = ivalue.toString(); // we convert the input to string so we can use tokenizer
		StringTokenizer tokenizer = new StringTokenizer(line);
		while ( tokenizer.hasMoreElements())
			context.write(word, new IntWritable(1)); // emit Text, IntWritable, this are the (key,value) so they are serializable and comparable in H