首页 > 其他 > 详细

Spark RDD类源码学习(未完)

时间:2016-05-06 21:56:45      阅读:356      评论:0      收藏:0      [点我收藏+]

每天进步一点点~开搞~

 

abstract class RDD[T: ClassTag](
  //@transient 注解表示将字段标记为瞬态的
    @transient private var _sc: SparkContext,
  // Seq是序列,元素有插入的先后顺序,可以有重复的元素。
    @transient private var deps: Seq[Dependency[_]]
  ) extends Serializable with Logging {

  
  if (classOf[RDD[_]].isAssignableFrom(elementClassTag.runtimeClass)) {
  user programs that
   
  }

//这里应该是声明sparkContext对象后才能使用RDD的调用
  private def sc: SparkContext = {
    if (_sc == null) {
      throw new SparkException(
        "RDD transformations and actions can only be invoked by the driver, not inside of other " +
        "transformations; for example, rdd1.map(x => rdd2.values.count() * x) is invalid because " +
        "the values transformation and count action cannot be performed inside of the rdd1.map " +
        "transformation. For more information, see SPARK-5063.")
    }
    _sc
  }

//构建一个RDD应该是一对一的关系,比如子RDD对应唯一的父RDD
  def this(@transient oneParent: RDD[_]) =
    this(oneParent.context , List(new OneToOneDependency(oneParent)))



  private[spark] def conf: SparkConf = _conf


//sparkconf的设置
def getConf: SparkConf = conf.clone()

//获取相应的配置信息
def jars: Seq[String] = _jars
def files: Seq[String] = _files
def master: String = _conf.get("spark.master")
def appName: String = _conf.get("spark.app.name")


private[spark] def isEventLogEnabled: Boolean = _conf.getBoolean("spark.eventLog.enabled", false)
  private[spark] def eventLogDir: Option[URI] = _eventLogDir
  private[spark] def eventLogCodec: Option[String] = _eventLogCodec

//临时文件夹的名称为spark+随机时间戳
  val externalBlockStoreFolderName = "spark-" + randomUUID.toString()

//判断是否为local模式
def isLocal: Boolean = (master == "local" || master.startsWith("local["))

 

Spark RDD类源码学习(未完)

原文:http://www.cnblogs.com/yangsy0915/p/5467152.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!