首页 > 其他 > 详细

dataframe sparksession

时间:2020-07-19 21:00:36      阅读:82      评论:0      收藏:0      [点我收藏+]

 sparksession读jdbc

import org.apache.spark.SparkConf
import org.apache.spark.sql.{DataFrame, SparkSession}

object DataFrameOperate {
  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf().setMaster("local[*]").setAppName("dataframe")
    val sparkSession = SparkSession.builder().config(sparkConf).getOrCreate()
    val dataFrame: DataFrame = sparkSession.read.format("jdbc")
      .option("url", "jdbc:mysql://hadoop01:3306/test")
      .option("dbtable", "student")
      .option("user", "root")
      .option("password", "root")
      .load()
    dataFrame.show()

    sparkSession.close()
  }

}

  

rdd =>  dataFrame

import org.apache.spark.SparkConf
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, Row, SparkSession}

object DataFrameOperate {
def main(args: Array[String]): Unit = {
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("dataframe")
val sparkSession = SparkSession.builder().config(sparkConf).getOrCreate()

val rdd: RDD[(Int, String, Int)] = sparkSession.sparkContext.parallelize(
List(
(1, "chinese", 86),
(1, "english", 77),
(1, "math", 99),
(2, "chinese", 76),
(2, "english", 86),
(2, "math", 99),
(3, "chinese", 77),
(3, "english", 77),
(3, "math", 99)
))
val rowRDD: RDD[Row] = rdd.map(a => Row.fromTuple(a))

val id: StructField = StructField("id", IntegerType, true)
val subject: StructField = StructField("subject", StringType, true)
val score: StructField = StructField("score", IntegerType, true)
val structType: StructType = StructType(Array(id, subject, score))

val dataFrame: DataFrame = sparkSession.createDataFrame(rowRDD, structType)
dataFrame.show()
dataFrame.groupBy("id").pivot("subject").avg("score").show()

sparkSession.close()
}
}

输出:

| id|subject|score|
+---+-------+-----+
| 1|chinese| 86|
| 1|english| 77|
| 1| math| 99|
| 2|chinese| 76|
| 2|english| 86|
| 2| math| 99|
| 3|chinese| 77|
| 3|english| 77|
| 3| math| 99|
+---+-------+-----+

+---+-------+-------+----+
| id|chinese|english|math|
+---+-------+-------+----+
| 1| 86.0| 77.0|99.0|
| 3| 77.0| 77.0|99.0|
| 2| 76.0| 86.0|99.0|
+---+-------+-------+----+

  

 

dataframe sparksession

原文:https://www.cnblogs.com/ls-oyang/p/13340698.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!