import org.apache.spark.mllib.tree.DecisionTree import org.apache.spark.mllib.util.MLUtils val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").cache() val numClasses = 2 val categoricalFeaturesInfo = Map[Int, Int]() val impurity = "gini" val maxDepth = 5 val maxBins = 100 val model = DecisionTree.trainClassifier(data, numClasses, categoricalFeaturesInfo, impurity,maxDepth, maxBins) val labelAndPreds = data.map { point => val prediction = model.predict(point.features) (point.label, prediction)} val trainErr = labelAndPreds.filter(r => r._1 != r._2).count.toDouble / data.count println("Training Error = " + trainErr) println("Learned classification tree model:\n" + model)
原文:http://my.oschina.net/u/1426212/blog/374966