在pom.xml文件添加以下依赖
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-scala_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>${flink.version}</version>
</dependency>
创建一个scala类
创建一个scala对象
package com.stuscala.batch
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.flink.api.scala._
object WordCount {
def main(args: Array[String]) {
//批处理程序,需要创建ExecutionEnvironment
val env = ExecutionEnvironment.getExecutionEnvironment
//fromElements(elements:_*) --- 从一个给定的对象序列中创建一个数据流,所有的对象必须是相同类型的。
val text = env.fromElements(
"Who‘s there?",
"I think I hear them. Stand, ho! Who‘s there?","hah")
val counts = text.flatMap { _.toLowerCase.split("\\W+") filter { _.nonEmpty } }
.map { (_, 1) }
.groupBy(0)//根据第一个元素分组
.sum(1)
//打印
counts.print()
}
}
原文:https://www.cnblogs.com/braveym/p/13686141.html