1 package com.xiangmu_bushu 2 3 import org.apache.spark.rdd.RDD 4 import org.apache.spark.{SparkConf, SparkContext} 5 6 object Stream_WordCount { 7 def main(args: Array[String]): Unit = { 8 var conf = new SparkConf().setAppName("Stream_WordCount").setMaster("local[2]") 9 //获取SparkContext对象 10 var sc = new SparkContext(conf) 11 12 //1、读取文件生成数据RDD 13 var dataRdd = sc.textFile("D:\\IDEA_Maven\\monthlycheck\\src\\main\\resources\\a.txt") 14 //切分 15 var flatRdd = dataRdd.flatMap(_.split(" ")) 16 //标记 17 //var mapRdd = flatRdd.map((_,1)) 18 val mapRdd: RDD[(String, Int)] = flatRdd.map((_, 1)) 19 //聚合 20 var reduceRdd = mapRdd.reduceByKey(_ + _) 21 //打印输出 22 reduceRdd.saveAsTextFile("./result") 23 24 } 25 26 }
原文:https://www.cnblogs.com/xjqi/p/12862402.html