\[ min_{w,b,\xi}{1\over{2}}{||w||}^2 + C\sum_{i=1}^m\xi^{(i)} \]
\[ s.t. \ \ y^{(i)}(w^{T}x^{(i)} + b), i=1,2,...,m \\xi^{(i)} \ge 0, i=1,2,...m \]
\[ min_{\alpha}{1\over{2}}\sum_{i=1}^m\sum_{j=1}^{m}\alpha^{(i)}\alpha^{(j)}y^{(i)}y^{(j)}K(x^{(i)},x^{(j)})-\sum_{i=1}^m\alpha^{(i)} \]
\[ s.t. \ \ \sum_{i=1}^m\alpha^{(i)}y^{(i)}=0 \0 \le \alpha^{(i)} \le C, i=1,2,...,m \]
\[ w^* = \sum_{i=1}^m\alpha^*_{i}y^{(i)}x^{(i)} \b^* = y^{(j)} - \sum_{i=1}^my^{(i)}\alpha^{(i)}K(x^{(i)}, x^{(j)}) \]
calc_E
函数中iter
函数中\[ \alpha^{new,unc}_2 = \alpha^{old}_2 + {{y_2}(E^{(i)} - E^{(j)})\over{\eta}} \ 其中\eta = K_{11} + K_{22} - 2K_{12} \ 注意: K_{11}指的是在使用核函数映射之后的输入样本中的第i行与第j行样本, \ 同理K_{22}指的是在使用核函数映射之后的输入样本中的第i行与第j行样本... \ 注意: \eta不能为小于等于0, 如果出现这种情况, 则在迭代函数中直接返回0 \]
clip_alpha
函数中
定义函数clip_alpha(alpha, L, H)
```py
计算\(\alpha^{new}_1\)
\[ \alpha^{new}_1=\alpha^{old}_1+y^{(1)}y^{(2)}(\alpha^{old}_2-\alpha^{new}_2) \]
选择第2个\(\alpha_2\), 选择依据就是让\(abs(^{(i)}-E^{(j)})\)最大, 那个\(j\)就是我们期望的值, 从而\(\alpha_2=\alpha_j\), 定义函数select_j(Ei, i, model)
py # model封装了整个SVM公式中的参数(C, xi)与数据(X, y) # 其中model还有一个E属性, E为一个mx2的矩阵, 初始情况下都为0, 如果E对应的alpha被访问处理过了, 就会在赋予model.E[i, :] = [1, Ei] def select_j(Ei, i, model): j = 0 max_delta_E = 0 Ej = 0 # 查找所有已经被访问过了样本 nonzeros_indice = nonzeros(model.E[:, 0])[0] if len(nonzeros_indice) > 1: # 在for循环中查找使得abs(Ei-Ej)最大的Ej和j for index in nonzeros_indice: # 选择的两个alpha不能来自同一个样本 if index == i: continue E_temp = calc_E(i, model) delta_E = abs(E_temp - Ei) if delta_E > max_delta_E: delta_E = max_delta_E j = index Ej = E_temp else: j = i while j = i: Ej = int(random.uniform(0, model.m)) return j, Ej
\(\alpha_1\)是否违反了KKT条件
if (yi * Ei > toler and alphai > 0) or (yi * Ei < -toler and alphaj < 0):
# 违反了
pass
else:
# 没有违反KKT, 直接返回0
pass
#!/usr/bin/env python
from numpy import *
class Model(object):
def __init__(self, X, y, C, toler, kernel_param):
self.X = X
self.y = y
self.C = C
self.toler = toler
self.kernel_param = kernel_param
self.m = shape(X)[0]
self.mapped_data = mat(zeros((self.m, self.m)))
for i in range(self.m):
self.mapped_data[:, i] = gaussian_kernel(self.X, X[i, :], self.kernel_param)
self.E = mat(zeros((self.m, 2)))
self.alphas = mat(zeros((self.m, 1)))
self.b = 0
def load_data(filename):
X = []
y = []
with open(filename, 'r') as fd:
for line in fd.readlines():
nums = line.strip().split(',')
X_temp = []
for i in range(len(nums)):
if i == len(nums) - 1:
y.append(float(nums[i]))
else:
X_temp.append(float(nums[i]))
X.append(X_temp)
return mat(X), mat(y)
def gaussian_kernel(X, l, kernel_param):
sigma = kernel_param
m = shape(X)[0]
mapped_data = mat(zeros((m, 1)))
for i in range(m):
mapped_data[i] = exp(-sum((X[i, :] - l).T * (X[i, :] - l) / (2 * sigma ** 2)))
return mapped_data
def clip_alpha(L, H, alpha):
if alpha > H:
alpha = H
elif alpha < L:
alpha = L
return alpha
def calc_b(b1, b2):
return (b1 + b2) / 2
def calc_E(i, model):
yi = float(model.y[i])
gxi = float(multiply(model.alphas, model.y).T * model.mapped_data[:, i] + model.b)
Ei = gxi - yi
return Ei
def select_j(Ei, i, model):
nonzero_indices = nonzero(model.E[:, 0].A)[0]
Ej = 0
j = 0
max_delta = 0
if len(nonzero_indices) > 1:
for index in nonzero_indices:
if index == i:
continue
E_temp = calc_E(index, model)
delta = abs(E_temp - Ei)
if delta > max_delta:
max_delta = delta
Ej = E_temp
j = index
else:
j = i
while j == i:
j = int(random.uniform(0, model.m))
Ej = calc_E(j, model)
return j, Ej
def iterate(i, model):
yi = model.y[i]
Ei = calc_E(i, model)
model.E[i] = [1, Ei]
# 如果alpahi不满足KKT条件, 则进行之后的操作, 选择alphaj, 更新alphai与alphaj, 还有b
if (yi * Ei > model.toler and model.alphas[i] > 0) or (yi * Ei < -model.toler and model.alphas[i] < model.C):
# alphai不满足KKT条件
# 选择alphaj
j, Ej = select_j(Ei, i, model)
yj = model.y[j]
alpha1old = model.alphas[i].copy()
alpha2old = model.alphas[j].copy()
eta = model.mapped_data[i, i] + model.mapped_data[j, j] - 2 * model.mapped_data[i, j]
if eta <= 0:
return 0
alpha2new_unclip = alpha2old + yj * (Ei - Ej) / eta
if yi == yj:
L = max(0, alpha2old + alpha1old - model.C)
H = min(model.C, alpha1old + alpha2old)
else:
L = max(0, alpha2old - alpha1old)
H = min(model.C, model.C - alpha1old + alpha2old)
if L == H:
return 0
alpha2new = clip_alpha(L, H, alpha2new_unclip)
if abs(alpha2new - alpha2old) < 0.00001:
return 0
alpha1new = alpha1old + yi * yj * (alpha2old - alpha2new)
b1new = -Ei - yi * model.mapped_data[i, i] * (alpha1new - alpha1old) - yj * model.mapped_data[j, i] * (alpha2new - alpha2old) + model.b
b2new = -Ej - yi * model.mapped_data[i, j] * (alpha1new - alpha1old) - yj * model.mapped_data[j, j] * (alpha2new - alpha2old) + model.b
model.b = calc_b(b1new, b2new)
model.alphas[i] = alpha1new
model.alphas[j] = alpha2new
model.E[i] = [1, calc_E(i, model)]
model.E[j] = [1, calc_E(j, model)]
return 1
return 0
def smo(X, y, C, toler, iter_num, kernel_param):
model = Model(X, y.T, C, toler, kernel_param)
changed_alphas = 0
current_iter = 0
for i in range(model.m):
changed_alphas += iterate(i, model)
print("iter:%d i:%d,pairs changed %d"
%(current_iter, i, changed_alphas))
current_iter += 1
print('start...')
while current_iter < iter_num and changed_alphas > 0:
changed_alphas = 0
# 处理支持向量
alphas_indice = nonzero((model.alphas.A > 0) * (model.alphas.A < C))[0]
for i in alphas_indice:
changed_alphas += iterate(i, model)
print("iter:%d i:%d,pairs changed %d"
%(current_iter, i, changed_alphas))
current_iter += 1
return model.alphas, model.b
原文:https://www.cnblogs.com/megachen/p/10491461.html