wcMap

6/10/2015 - 3:28 PM

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


public class wcMap extends Mapper<LongWritable, Text, Text, IntWritable> {

	public void map(LongWritable ikey, Text ivalue, Context context)
			throws IOException, InterruptedException {
		// ikey is the offset of the line
		// ivalue is the line itself
		Text word = new Text(); // we use the H class
		String line = ivalue.toString(); // we convert the input to string so we can use tokenizer
		StringTokenizer tokenizer = new StringTokenizer(line);
		
		while ( tokenizer.hasMoreElements())
				{
			word.set(tokenizer.nextToken());
			context.write(word, new IntWritable(1)); // emit Text, IntWritable, this are the (key,value) so they are serializable and comparable in H
				}
	}
}

Cacher is the code snippet organizer for pro developers

We empower you and your team to get more done, faster

wcMap