首页 > Web开发 > 详细

七、统计网站中不同省份用户的访问数

时间:2016-05-09 23:40:34      阅读:426      评论:0      收藏:0      [点我收藏+]

一、需求

针对log日志中给定的信息,统计网站中不同省份用户的访问数

二、编程代码

 

package org.apache.hadoop.studyhdfs.mapreduce;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.jboss.netty.util.internal.StringUtil;

public class ProvinceCountMapReduce extends Configured implements Tool {
    //1.map
    /*
     * <KEYIN,VALUEIN,KEYOUT,VALUEOUT>
     */
    public static class WordCountMapper extends Mapper<LongWritable,Text,IntWritable,IntWritable>{
        private IntWritable mapOutputKey =new IntWritable();
        private IntWritable mapOutputValue =new IntWritable(1);
        @Override
        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            //get lineValue
            String lineValue =value.toString();
            //split
            String[] strs =lineValue.split("\t");
            //line blank
            String url=strs[1];
            String provinceIdValue =strs[23];
            
            //guolv
            if(strs.length < 30 || StringUtils.isBlank(provinceIdValue) || StringUtils.isBlank(url)){
                return;    
            }
            
            int provinceId =Integer.MAX_VALUE;
            try {
                provinceId=Integer.valueOf(provinceIdValue);
            } catch (Exception e) {
                return;
            }
            if(provinceId == Integer.MAX_VALUE){
                return;
            }
            mapOutputKey.set(provinceId);
            context.write(mapOutputKey, mapOutputValue);
        }
        
    }
    
    //2.reduce
    public static class WordCountReduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{
        private IntWritable outputValue =new IntWritable();
        @Override
        public void reduce(IntWritable key, Iterable<IntWritable> values,Context context)
                throws IOException, InterruptedException {
            //to do
            int sum = 0;
            for(IntWritable value:values){
                sum +=value.get();
            }
            outputValue.set(sum);
            context.write(key, outputValue);
        }
        
        
    }
    
    public int run(String[] args) throws Exception{
        //1.get Configuration
        Configuration conf =super.getConf();
        //2.create job
        Job job =Job.getInstance(conf, this.getClass().getSimpleName());
        job.setJarByClass(ProvinceCountMapReduce.class);
        //3.set job
        //3.1 set input
        Path inputPath =new Path(args[0]);
        FileInputFormat.addInputPath(job, inputPath);
        //3.2 set mapper
        job.setMapperClass(WordCountMapper.class);
        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(IntWritable.class);
        
        
        //3.3 set reduce
        job.setReducerClass(WordCountReduce.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(IntWritable.class);
        //3.4 set input
        Path outputPath =new Path(args[1]);
        FileOutputFormat.setOutputPath(job, outputPath);
        
        //4.submmit
        boolean isSuccess =job.waitForCompletion(true);
        return isSuccess?0:1;
    }
    
    public static void main(String[] args) throws Exception {
        args =new String[]{
                "hdfs://Hadoop-senior02.beifeng.com:8020/input/2015082818",
                "hdfs://Hadoop-senior02.beifeng.com:8020/output15/"
        };
        Configuration conf =new Configuration();
        conf.set("mapreduce.map.output.compress", "true");
        int status=ToolRunner.run(conf, new ProvinceCountMapReduce() , args);
        System.exit(status);
    }
    
}

 

3、运行结果

1)运行代码:bin/hdfs dfs -text /output15/par*

2)运行结果:

1 3527
2 1672
3 511
4 325
5 776
6 661
7 95
8 80
9 183
10 93
11 135
12 289
13 264
14 374
15 163
16 419
17 306
18 272
19 226
20 2861
21 124
22 38
23 96
24 100
25 20
26 157
27 49
28 21
29 85
30 42
32 173

七、统计网站中不同省份用户的访问数

原文:http://www.cnblogs.com/really0612/p/5475966.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!