#和随机森林一样,基于决策树,采用连续的方式构建树,深度很小max_depth<5.重要的参数n_estimate和learning_rate,这两个参数的y作用在于对模型过拟合化得调整,从而提高模型得泛化能力。
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
cancer=load_breast_cancer()
x_train,x_test,y_train,y_test=train_test_split(cancer.data,cancer.target,random_state=0)
gbrt=GradientBoostingClassifier()#模型不做参数调整
gbrt.fit(x_train,y_train)
print(gbrt.score(x_train,y_train))
print(gbrt.score(x_test,y_test))
#对模型做预剪枝
gbrt=GradientBoostingClassifier(n_estimate=100,learning_rate=0.01)
#n_estimate主要控制树的数量,learning_rate控制错误的纠正度改参数越小模型越复杂
原文:https://www.cnblogs.com/thechain/p/9310689.html