package org.apache.hadoop.examples; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class WordCount { public static class TokenizerMapper extends Mapper{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context ) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word = new Text(itr.nextToken()); //to unitest,should be new Text word.set(itr.nextToken()) context.write(word, new IntWritable(1)); } } } public static class IntSumReducer extends Reducer { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable values, Context context ) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount "); System.exit(2); } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } } |
package org.apache.hadoop.examples; /* author zhouhh * date:2012.8.7 */ import static org.mockito.Mockito.*; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.io.*; import org.junit.*; public class WordCountTest { @Test public void testWordCountMap() throws IOException, InterruptedException { WordCount w = new WordCount(); WordCount.TokenizerMapper mapper = new WordCount.TokenizerMapper(); Text value = new Text("a b c b a a"); @SuppressWarnings("unchecked") WordCount.TokenizerMapper.Context context = mock(WordCount.TokenizerMapper.Context.class); mapper.map(null, value, context); verify(context,times(3)).write(new Text("a"), new IntWritable(1)); verify(context).write(new Text("c"), new IntWritable(1)); //verify(context).write(new Text("cc"), new IntWritable(1)); } @Test public void testWordCountReduce() throws IOException, InterruptedException { WordCount.IntSumReducer reducer = new WordCount.IntSumReducer(); WordCount.IntSumReducer.Context context = mock(WordCount.IntSumReducer.Context.class); Text key = new Text("a"); List values = new ArrayList(); values.add(new IntWritable(1)); values.add(new IntWritable(1)); reducer.reduce(key, values, context); verify(context).write(new Text("a"), new IntWritable(2)); } public static void main(String[] args) { // try { // WordCountTest t = new WordCountTest(); // // //t.testWordCountMap(); // t.testWordCountReduce(); // } catch (IOException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } catch (InterruptedException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } } } |
原文:http://www.blogjava.net/qileilove/archive/2014/11/26/420618.html