首页 > 其他 > 详细

spark wordcount

时间:2020-04-06 21:50:16      阅读:78      评论:0      收藏:0      [点我收藏+]

spark wordcount

 maven

<dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <version>2.3.0</version>
        </dependency>

 

package cn.easyinfo.spark;

import java.util.Arrays;
import java.util.Iterator;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;

import scala.Tuple2;

/**
 * @author
 * @date 2020年4月6日 下午6:26:51
 * 
 */
public class MyJavaWordCount {

    public static void main(String[] args) {
        // TODO Auto-generated method stub
        //参数检查
        if(args.length<2){
            System.err.println("Usage:MyJavaWordCount input output");
            System.exit(1);
        }
        
        
        //输入路径
        String inputPath = args[0];
        //输出路径
        String outputPath = args[1];
        
        //创建SparkContext
        SparkConf conf = new SparkConf().setAppName("MyJavaWordCount");
        JavaSparkContext jsc = new JavaSparkContext(conf);
        
        //读取数据
        JavaRDD<String> inputRDD = jsc.textFile(inputPath);
        
        //flatmap扁平化操作
        JavaRDD<String> words = inputRDD.flatMap(new FlatMapFunction<String, String>() {

            /**
             * 
             */
            private static final long serialVersionUID = 1L;

            @Override
            public Iterator<String> call(String line) throws Exception {
                // TODO Auto-generated method stub
                return Arrays.asList(line.split("\\s+")).iterator();
            }
        });
        
        //map 操作
        JavaPairRDD<String, Integer> pairRDD = words.mapToPair(new PairFunction<String, String, Integer>() {

            /**
             * 
             */
            private static final long serialVersionUID = 1L;

            @Override
            public Tuple2<String, Integer> call(String word) throws Exception {
                // TODO Auto-generated method stub
                return new Tuple2<String, Integer>(word,1);
            }
        });
        
        //reduce操作
        JavaPairRDD<String, Integer> result = pairRDD.reduceByKey(new Function2<Integer, Integer, Integer>() {
            
            /**
             * 
             */
            private static final long serialVersionUID = 1L;

            @Override
            public Integer call(Integer x, Integer y) throws Exception {
                // TODO Auto-generated method stub
                return x+y;
            }
        });
        
        //保存结果
        result.saveAsTextFile(outputPath);
        
        //关闭jsc
        jsc.close();
    }

}

打包

技术分享图片

 

 运行结果:技术分享图片

 

spark wordcount

原文:https://www.cnblogs.com/ptbx1t/p/12636545.html

(1)
(1)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!