每天进步一点点~开搞~
abstract class RDD[T: ClassTag]( //@transient 注解表示将字段标记为瞬态的 @transient private var _sc: SparkContext, // Seq是序列,元素有插入的先后顺序,可以有重复的元素。 @transient private var deps: Seq[Dependency[_]] ) extends Serializable with Logging { if (classOf[RDD[_]].isAssignableFrom(elementClassTag.runtimeClass)) { user programs that } //这里应该是声明sparkContext对象后才能使用RDD的调用 private def sc: SparkContext = { if (_sc == null) { throw new SparkException( "RDD transformations and actions can only be invoked by the driver, not inside of other " + "transformations; for example, rdd1.map(x => rdd2.values.count() * x) is invalid because " + "the values transformation and count action cannot be performed inside of the rdd1.map " + "transformation. For more information, see SPARK-5063.") } _sc } //构建一个RDD应该是一对一的关系,比如子RDD对应唯一的父RDD def this(@transient oneParent: RDD[_]) = this(oneParent.context , List(new OneToOneDependency(oneParent))) private[spark] def conf: SparkConf = _conf //sparkconf的设置 def getConf: SparkConf = conf.clone() //获取相应的配置信息 def jars: Seq[String] = _jars def files: Seq[String] = _files def master: String = _conf.get("spark.master") def appName: String = _conf.get("spark.app.name") private[spark] def isEventLogEnabled: Boolean = _conf.getBoolean("spark.eventLog.enabled", false) private[spark] def eventLogDir: Option[URI] = _eventLogDir private[spark] def eventLogCodec: Option[String] = _eventLogCodec //临时文件夹的名称为spark+随机时间戳 val externalBlockStoreFolderName = "spark-" + randomUUID.toString() //判断是否为local模式 def isLocal: Boolean = (master == "local" || master.startsWith("local["))
原文:http://www.cnblogs.com/yangsy0915/p/5467152.html