1 # Kernal Princlple Component Analysis之实现 2 3 import numpy 4 from matplotlib import pyplot as plt 5 6 numpy.random.seed(0) 7 8 9 def getOriData(type1Size=100, type2Size=100): 10 ‘‘‘ 11 生成特征提取数据集 12 ‘‘‘ 13 theta1 = numpy.random.uniform(0, 2 * numpy.pi, type1Size) 14 theta2 = numpy.random.uniform(0, 2 * numpy.pi, type2Size) 15 16 R1 = numpy.random.uniform(0, 0.3, type1Size).reshape((-1, 1)) 17 R2 = numpy.random.uniform(0.7, 1, type2Size).reshape((-1, 1)) 18 X1 = R1 * numpy.hstack((numpy.cos(theta1).reshape((-1, 1)), numpy.sin(theta1).reshape((-1, 1)))) 19 X2 = R2 * numpy.hstack((numpy.cos(theta1).reshape((-1, 1)), numpy.sin(theta1).reshape((-1, 1)))) 20 return X1, X2 21 22 23 class KPCA(object): 24 25 def __init__(self, X, mu=1): 26 ‘‘‘ 27 X: 特征提取数据集, 1行代表1个样本 28 ‘‘‘ 29 self.__X = X # 特征提取数据集 30 self.__mu = mu # gaussian kernel之超参数 31 32 self.__mean = numpy.mean(X, axis=0) 33 self.__A = (X - self.__mean).T 34 35 36 def get_XTra(self, featureCnt=1): 37 ‘‘‘ 38 获取所有样本特征提取结果 39 featureCnt: 待提取特征数 40 ‘‘‘ 41 XTra = list() 42 for xOri in self.__X: 43 xTra = self.get_xTra(xOri, featureCnt) 44 XTra.append(xTra) 45 XTra = numpy.array(XTra) 46 return XTra 47 48 49 def get_xTra(self, xOri, featureCnt=1): 50 ‘‘‘ 51 获取指定样本特征提取结果 52 ‘‘‘ 53 A = self.__A 54 mu = self.__mu 55 mean = self.__mean.reshape((-1, 1)) 56 if not hasattr(self, "eigVals"): 57 KAA = self.__get_KAA(A, mu) 58 self.__calc_eigs(KAA) 59 eigVecs = numpy.real(self.eigVecs) 60 61 x = numpy.array(xOri).reshape((-1, 1)) 62 xTra = list() 63 for i in range(featureCnt): 64 alpha = eigVecs[:, i:i+1] 65 hVal = self.__get_hVal(x, mean, alpha, A, mu) 66 xTra.append(hVal) 67 xTra = numpy.array(xTra) 68 return xTra 69 70 71 def __get_hVal(self, x, mean, alpha, A, mu): 72 KAx = self.__get_KAx(A, x - mean, mu) 73 hVal = numpy.matmul(alpha.T, KAx)[0, 0] 74 return hVal 75 76 77 def __calc_eigs(self, KAA): 78 oriEigVals, oriEigVecs = numpy.linalg.eig(KAA) 79 seqArr = numpy.argsort(-oriEigVals) 80 self.eigVals = oriEigVals[seqArr] 81 self.eigVecs = oriEigVecs[:, seqArr] 82 83 84 def __get_KAx(self, A, x, mu): 85 KAx = numpy.zeros((A.shape[1], 1)) 86 for rowIdx in range(KAx.shape[0]): 87 x1 = A[:, rowIdx:rowIdx+1] 88 val = self.__calc_gaussian(x1, x, mu) 89 KAx[rowIdx, 0] = val 90 return KAx 91 92 93 def __get_KAA(self, A, mu): 94 KAA = numpy.zeros((A.shape[1], A.shape[1])) 95 for rowIdx in range(KAA.shape[0]): 96 for colIdx in range(rowIdx + 1): 97 x1 = A[:, rowIdx:rowIdx+1] 98 x2 = A[:, colIdx:colIdx+1] 99 val = self.__calc_gaussian(x1, x2, mu) 100 KAA[rowIdx, colIdx] = KAA[colIdx, rowIdx] = val 101 return KAA 102 103 104 def __calc_gaussian(self, x1, x2, mu): 105 val = numpy.math.exp(-mu * numpy.linalg.norm(x1 - x2) ** 2) 106 # val = (numpy.sum(x1 * x2) + 1) 107 return val 108 109 110 class KPCAPlot(object): 111 112 @classmethod 113 def show_XTra(cls, X1Ori, X2Ori, mu=1): 114 XOri = numpy.vstack((X1Ori, X2Ori)) 115 kpcaObj = KPCA(XOri, mu) 116 XTra = kpcaObj.get_XTra(2) 117 118 X1Tra = XTra[:X1Ori.shape[0], :] 119 X2Tra = XTra[X1Ori.shape[0]:, :] 120 121 fig = plt.figure(figsize=(10, 3)) 122 ax1 = plt.subplot(1, 2, 1) 123 ax2 = plt.subplot(1, 2, 2) 124 125 ax1.scatter(X1Ori[:, 0], X1Ori[:, 1], c="red", s=5, label="cluster 0") 126 ax1.scatter(X2Ori[:, 0], X2Ori[:, 1], c="green", s=5, label="cluster 1") 127 ax1.set(title="Ori Data", xlabel="$x_1$", ylabel="$x_2$") 128 129 ax2.scatter(X1Tra[:, 0], X1Tra[:, 1], c="red", s=5, label="cluster 0") 130 ax2.scatter(X2Tra[:, 0], X2Tra[:, 1], c="green", s=5, label="cluster 1") 131 ax2.set(title="Tra Data", xlabel="$c_1$", ylabel="$c_2$") 132 133 ax1.legend() 134 ax2.legend() 135 fig.tight_layout() 136 fig.savefig("./show_XTra.png", dpi=100) 137 138 139 140 if __name__ == "__main__": 141 X1, X2 = getOriData(100, 100) 142 KPCAPlot.show_XTra(X1, X2, 1)
Kernel Principle Component Analysis
原文:https://www.cnblogs.com/xxhbdk/p/13463888.html