from sklearn.linear_model import LogisticRegression as LR from sklearn.datasets import load_breast_cancer import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score data = load_breast_cancer() X = data.data y = data.target data.data.shape lrl1 = LR(penalty="l1",solver="liblinear",C=0.5,max_iter=1000) lrl2 = LR(penalty="l2",solver="liblinear",C=0.5,max_iter=1000) #逻辑回归的重要属性coef_,查看每个特征所对应的参数 lrl1 = lrl1.fit(X,y) lrl1.coef_ (lrl1.coef_ != 0).sum(axis=1) lrl2 = lrl2.fit(X,y) lrl2.coef_
l1 = [] l2 = [] l1test = [] l2test = [] Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,y,test_size=0.3,random_state=420) for i in np.linspace(0.05,1,19): lrl1 = LR(penalty="l1",solver="liblinear",C=i,max_iter=1000) lrl2 = LR(penalty="l2",solver="liblinear",C=i,max_iter=1000) lrl1 = lrl1.fit(Xtrain,Ytrain) l1.append(accuracy_score(lrl1.predict(Xtrain),Ytrain)) l1test.append(accuracy_score(lrl1.predict(Xtest),Ytest)) lrl2 = lrl2.fit(Xtrain,Ytrain) l2.append(accuracy_score(lrl2.predict(Xtrain),Ytrain)) l2test.append(accuracy_score(lrl2.predict(Xtest),Ytest)) graph = [l1,l2,l1test,l2test] color = ["green","black","lightgreen","gray"] label = ["L1","L2","L1test","L2test"] plt.figure(figsize=(6,6)) for i in range(len(graph)): plt.plot(np.linspace(0.05,1,19),graph[i],color[i],label=label[i]) plt.legend(loc=4) #图例的位置在哪里?4表示,右下角 plt.show()
from sklearn.linear_model import LogisticRegression as LR from sklearn.datasets import load_breast_cancer import numpy as np import matplotlib.pyplot as plt from sklearn.model_selection import cross_val_score from sklearn.feature_selection import SelectFromModel data = load_breast_cancer() data.data.shape LR_ = LR(solver="liblinear",C=0.9,random_state=420) cross_val_score(LR_,data.data,data.target,cv=10).mean() X_embedded = SelectFromModel(LR_,norm_order=1).fit_transform(data.data,data.target) X_embedded.shape cross_val_score(LR_,X_embedded,data.target,cv=10).mean()
fullx = [] fsx = [] threshold = np.linspace(0,abs((LR_.fit(data.data,data.target).coef_)).max(),20) k=0 for i in threshold: X_embedded = SelectFromModel(LR_,threshold=i).fit_transform(data.data,data.target) fullx.append(cross_val_score(LR_,data.data,data.target,cv=5).mean()) fsx.append(cross_val_score(LR_,X_embedded,data.target,cv=5).mean()) print((threshold[k],X_embedded.shape[1])) k+=1 plt.figure(figsize=(20,5)) plt.plot(threshold,fullx,label="full") plt.plot(threshold,fsx,label="feature selection") plt.xticks(threshold) plt.legend() plt.show()
fullx = [] fsx = [] C=np.arange(0.01,10.01,0.5) for i in C: LR_ = LR(solver="liblinear",C=i,random_state=420) fullx.append(cross_val_score(LR_,data.data,data.target,cv=10).mean()) X_embedded = SelectFromModel(LR_,norm_order=1).fit_transform(data.data,data.target) fsx.append(cross_val_score(LR_,X_embedded,data.target,cv=10).mean()) print(max(fsx),C[fsx.index(max(fsx))]) plt.figure(figsize=(20,5)) plt.plot(C,fullx,label="full") plt.plot(C,fsx,label="feature selection") plt.xticks(C) plt.legend() plt.show()
fullx = [] fsx = [] C=np.arange(6.05,7.05,0.005) for i in C: LR_ = LR(solver="liblinear",C=i,random_state=420) fullx.append(cross_val_score(LR_,data.data,data.target,cv=10).mean()) X_embedded = SelectFromModel(LR_,norm_order=1).fit_transform(data.data,data.target) fsx.append(cross_val_score(LR_,X_embedded,data.target,cv=10).mean()) print(max(fsx),C[fsx.index(max(fsx))]) plt.figure(figsize=(20,5)) plt.plot(C,fullx,label="full") plt.plot(C,fsx,label="feature selection") plt.xticks(C) plt.legend() plt.show() #验证模型效果:降维之前 LR_ = LR(solver="liblinear",C=6.069999999999999,random_state=420) cross_val_score(LR_,data.data,data.target,cv=10).mean() #验证模型效果:降维之后 LR_ = LR(solver="liblinear",C=6.069999999999999,random_state=420) X_embedded = SelectFromModel(LR_,norm_order=1).fit_transform(data.data,data.target) cross_val_score(LR_,X_embedded,data.target,cv=10).mean() X_embedded.shape
机器学习sklearn(57):算法实例(十四)分类(七)逻辑回归(二)linear_model.LogisticRegression(一) 重要参数
原文:https://www.cnblogs.com/qiu-hua/p/14940780.html