先存个码
# -- coding: utf-8 --
import numpy as np
import operator
def createDataSet():
# 创建训练集
group = np.array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])
labels = [‘A‘, ‘A‘, ‘B‘, ‘B‘]
return group, labels
def classify0(inX, dataSet, labels, k):
dataSetSize = dataSet.shape[0]
# 根据欧式距离计算训练集中每个样本到测试点的距离
diffMat = np.tile(inX, (dataSetSize, 1)) - dataSet
sqDiffMat = diffMat ** 2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances ** 0.5
# 计算完所有点的距离后,对数据按照从小到大的次序排序
sortedDistIndicies = distances.argsort()
# 确定前k个距离最小的元素所在的主要分类,最后返回发生频率最高的元素类别
classCount = {}
for i in range(k):
voteIlabel = labels[sortedDistIndicies[i]]
classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
if __name__ == ‘__main__‘:
group ,labels = createDataSet()
# 输入[0,0]点计算分类
result = classify0([0,0], group, labels, 3)
print(result)
原文:https://www.cnblogs.com/haimingv/p/12634163.html