编程实现对率回归,并给出西瓜数据集3.0\(\alpha\)上的结果。
对数几率回归最小化损失函数(西瓜书公式3.27)如下:
证明:
更新方式为
\(\alpha\) 为步长。
Accuracy = 0.71
import numpy as np
import matplotlib.pyplot as plt
def loss(X, y, beta): # 根据损失公式计算当前参数 beta 时打损失
loss1 = np.sum(-y * (X.dot(beta)))
loss2 = np.sum(np.log(1 + np.exp(X.dot(beta))))
return loss1 + loss2
def grad(X, y, beta): # 根据梯度公式计算当前参数 beta 时的一阶梯度
g = np.exp(X.dot(beta))
grad1 = -y.dot(X)
grad2 = (g / (1.0 + g)).dot(X)
return grad1 + grad2
def get_data(data): # 根据原始数据得到增加1截距项X,y,这里就不化分训练和测试集
X = [(item.split(‘,‘)) for item in data.split()]
X = np.array(X, dtype=np.float)
y = X[:, 3]
X = X[:,[1,2]]
X = np.hstack([np.ones((len(X),1)), X])
return X, y
def get_acc(X, y_true, beta): # 计算准确率
y_pred = 1. / (1 + np.exp(-X.dot(beta)))
y_pred = np.array(y_pred >= 0.5, dtype=np.float)
acc = (y_pred == y_true).sum() / len(y_pred)
return acc
def run(X, y):
episilon = 1e-5 # 如果两次更新损失小于次,停止更新
max_epoch = 1000 # 最大迭代次数
alpha = 0.1 # 学习率
beta = np.random.random(X.shape[1]) # 待学习参数
losses = [] # 记录损失
for i in range(max_epoch):
last_beta = beta.copy()
grad_beta = grad(X, y, beta) # 计算梯度
beta -= alpha * grad_beta # 梯度下降法
if (abs(loss(X,y,beta) - loss(X,y,grad_beta))<episilon):
break
losses.append(loss(X, y, beta)) # 记录损失
acc = get_acc(X, y, beta)x-special/nautilus-clipboard
copy

return acc, losses
# 编号,密度,含糖率,好1(坏0)瓜
data = """
1,0.697,0.46,1
2,0.774,0.376,1
3,0.634,0.264,1
4,0.608,0.318,1
5,0.556,0.215,1
6,0.403,0.237,1
7,0.481,0.149,1
8,0.437,0.211,1
9,0.666,0.091,0
10,0.243,0.267,0
11,0.245,0.057,0
12,0.343,0.099,0
13,0.639,0.161,0
14,0.657,0.198,0
15,0.36,0.37,0
16,0.593,0.042,0
17,0.719,0.103,0
"""
X, y = get_data(data)
acc, losses = run(X, y) # 获取准确率和损失list
print(f"Accuracy = {acc:.2}")
# 可视化
# ax1 显示原数据
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(12,4))
ax1.scatter(X[y==0,1], X[y==0,2], c=‘r‘, label="bad")
ax1.scatter(X[y==1,1], X[y==1,2], c=‘g‘, label="good")
ax1.set_xlabel("density", fontsize=15)
ax1.set_ylabel("sugar", fontsize=15)
ax1.legend()
# ax2 显示损失变化
ax2.plot(range(len(losses)), losses, ‘b‘)
ax2.set_xlabel("epoch", fontsize=15)
ax2.set_ylabel("loss", fontsize=15)
plt.show()
原文:https://www.cnblogs.com/forcekeng/p/12830612.html