1.逻辑回归是怎么防止过拟合的?为什么正则化可以防止过拟合?(大家用自己的话介绍下)
逻辑回归是用正则化来防止过拟合的,
正则化是通过约束参数的范数使其不要太大,所以可以在一定程度上减少过拟合情况。
2.用logiftic回归来进行实践操作,数据不限
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report#分类报告
import pandas as pd
import numpy as np
#加载数据集
df = pd.read_csv(‘./data/178.csv‘,encoding=‘GB2312‘)
labels = list(df.columns.values)
x=df.iloc[:,5]#生成-2*3.14到2*3.14的200个数
y=df.iloc[:,13]#将X正弦化,然后加入噪音
x = np.array(x)
y = np.array(y)
print("Shape of x: {0}; positive example: {1}; negative: {2}".format(x.shape, y[y==1].shape[0], y[y==2].shape[0],y[y==3].shape[0])) # 查看数据的形状和类别分布
print("Cancer df labels name: ", labels) # 查看数据的特征
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
import numpy as np
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(x_train.reshape(-1,1), y_train.astype(str))#训练模型
X = x_test.reshape(-1,1)
train_score = model.score(x_train.reshape(-1,1), y_train.astype(str))
test_score = model.score(X, y_test.astype(str))
print("train score: {train_score:.6f}; test score: {test_score:.6f}".format(train_score=train_score, test_score=test_score))
#预测正确的个数
y_pred = model.predict(x_test.reshape(-1,1))
print("matchs: {0}/{1}".format(np.equal(y_pred, y_test.astype(str)).shape[0], y_test.shape[0]))
原文:https://www.cnblogs.com/xwc520/p/13125047.html