Hive: https://www.cnblogs.com/Coeus-P/p/13356392.html
Spark:
def main(args: Array[String]): Unit = {
//println("Hello, WordCount")
val conf = new SparkConf()
conf.setAppName("wordCount")
conf.setMaster("local")
val sc = new SparkContext(conf)
//A B
val lines: RDD[String] = sc.textFile("./data/words")
//A
//B
val words: RDD[String] = lines.flatMap(lines => {
lines.split(" ")
})
//A 1
//B 1
val pairWords: RDD[(String, Int)] = words.map(words=>{new Tuple2(words, 1)})
//A 1
//B 2
val result:RDD[(String, Int)] = pairWords.reduceByKey((v1:Int, v2:Int)=>{v1+v2})
val result1 = result.sortBy(tp=>{tp._2})
result1.foreach(one=>{
println(one)
})
sc.stop()
}
原文:https://www.cnblogs.com/Coeus-P/p/13769702.html