WordCount程序中Map和Reduce过程分析(网易云课堂Hadoop与大数据学习笔记)




import java.io.IOException;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
/**
* @param args
* @author nwpulisz
* @date:2016.3.29
*/
static final String INPUT_PATH="hdfs://192.168.255.132:9000/INPUT";
static final String OUTPUT_PATH="hdfs://192.168.255.132:9000/OUTPUT";
public static void main(String[] args) throws Throwable {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
Path outPut_path= new Path(OUTPUT_PATH);
Job job = new Job(conf, "WordCount");
//如果输出路径是存在的,则提前删除输出路径
FileSystem fileSystem = FileSystem.get(new URI(OUTPUT_PATH), conf);
if(fileSystem.exists(outPut_path))
{
fileSystem.delete(outPut_path,true);
}
FileInputFormat.setInputPaths(job, INPUT_PATH);
FileOutputFormat.setOutputPath(job, outPut_path);
job.setMapperClass(MyMapper.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
job.waitForCompletion(true);
}
static class MyMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
protected void map(LongWritable key, Text value,
Context context) throws IOException, InterruptedException {
String[] splits = value.toString().split("\\W+");
for (String word : splits) {
context.write(new Text(word), new LongWritable(1));
}
}
}
static class MyReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
protected void reduce(Text key, Iterable<LongWritable> values, Context context
) throws IOException, InterruptedException {
long times = 0L;
for(LongWritable value: values) {
times+=value.get();
}
context.write(new Text(key),new LongWritable(times));
}
}
}




原文:http://www.cnblogs.com/nwpulisz/p/5340401.html