hadoop —— MapReduce例子（求平均值）

时间：2014-03-10 06:52:39 阅读：647 评论：0 收藏：0 [点我收藏+]

参考：http://eric-gcm.iteye.com/blog/1807468

math.txt:

张三    88
李四    99
王五    66
赵六    77

china.txt:

张三    78
李四    89
王五    96
赵六    67

english.txt:

张三    80
李四    82
王五    84
赵六    86

JAVA代码:

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Score {

    public static class Map extends
        Mapper<LongWritable, Text, Text, IntWritable> {

        // 实现map函数
        public void map(LongWritable key, Text value, Context context)
            throws IOException, InterruptedException {

            // 将输入的纯文本文件的数据转化成String
            String line = value.toString();

            // 将输入的数据首先按行进行分割
            StringTokenizer tokenizerArticle = new StringTokenizer(line, "\n");

            // 分别对每一行进行处理
            while (tokenizerArticle.hasMoreElements()) {

                // 每行按空格划分
                StringTokenizer tokenizerLine = new StringTokenizer(
                        tokenizerArticle.nextToken());

                String strName = tokenizerLine.nextToken();// 学生姓名部分
                String strScore = tokenizerLine.nextToken();// 成绩部分
                Text name = new Text(strName);
                int scoreInt = Integer.parseInt(strScore);

                // 输出姓名和成绩
                context.write(name, new IntWritable(scoreInt));
            }
        }
    }

    public static class Reduce extends
        Reducer<Text, IntWritable, Text, IntWritable> {

        // 实现reduce函数
        public void reduce(Text key, Iterable<IntWritable> values,

        Context context) throws IOException, InterruptedException {

            int sum = 0;
            int count = 0;
            Iterator<IntWritable> iterator = values.iterator();

            while (iterator.hasNext()) {

                sum += iterator.next().get();// 计算总分
                count++;// 统计总的科目数
            }
            int average = (int) sum / count;// 计算平均成绩
            context.write(key, new IntWritable(average));
        }
    }

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        // 这句话很关键
        conf.set("mapred.job.tracker", "172.16.11.74:9001");

        String[] ioArgs = new String[] { "score_in", "score_out" };
        String[] otherArgs = new GenericOptionsParser(conf, ioArgs)
                .getRemainingArgs();

        if (otherArgs.length != 2) {

            System.err.println("Usage: Score Average <in> <out>");
            System.exit(2);
        }
        Job job = new Job(conf, "Score Average");
        job.setJarByClass(Score.class);

        // 设置Map、Combine和Reduce处理类
        job.setMapperClass(Map.class);
        job.setCombinerClass(Reduce.class);
        job.setReducerClass(Reduce.class);

        // 设置输出类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        // 将输入的数据集分割成小数据块splites，提供一个RecordReder的实现
        job.setInputFormatClass(TextInputFormat.class);

        // 提供一个RecordWriter的实现，负责数据输出
        job.setOutputFormatClass(TextOutputFormat.class);

        // 设置输入和输出目录
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

Score

运行结果:

张三    82
李四    90
王五    82
赵六    76

具体打包运行步骤：

参考博文：http://www.cnblogs.com/-wangjiannan/p/3590324.html

hadoop —— MapReduce例子（求平均值）,布布扣,bubuko.com

hadoop —— MapReduce例子（求平均值）

原文：http://www.cnblogs.com/-wangjiannan/p/3590438.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)

hadoop —— MapReduce例子 （求平均值）

hadoop —— MapReduce例子（求平均值）