import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class wcMap extends Mapper<LongWritable, Text, Text, IntWritable> {
public void map(LongWritable ikey, Text ivalue, Context context)
throws IOException, InterruptedException {
// ikey is the offset of the line
// ivalue is the line itself
Text word = new Text(); // we use the H class
String line = ivalue.toString(); // we convert the input to string so we can use tokenizer
StringTokenizer tokenizer = new StringTokenizer(line);
while ( tokenizer.hasMoreElements())
{
word.set(tokenizer.nextToken());
context.write(word, new IntWritable(1)); // emit Text, IntWritable, this are the (key,value) so they are serializable and comparable in H
}
}
}