聚类分析 k均值算法

时间：2019-11-01 00:54:31 阅读：111 评论：0 收藏：0 [点我收藏+]

dataSet = [          #数据集
    # 1
    [0.697, 0.460],
    # 2
    [0.774, 0.376],
    # 3
    [0.634, 0.264],
    # 4
    [0.608, 0.318],
    # 5
    [0.556, 0.215],
    # 6
    [0.403, 0.237],
    # 7
    [0.481, 0.149],
    # 8
    [0.437, 0.211],
    # 9
    [0.666, 0.091],
    # 10
    [0.243, 0.267],
    # 11
    [0.245, 0.057],
    # 12
    [0.343, 0.099],
    # 13
    [0.639, 0.161],
    # 14
    [0.657, 0.198],
    # 15
    [0.360, 0.370],
    # 16
    [0.593, 0.042],
    # 17
    [0.719, 0.103],
    # 18
    [0.359, 0.188],
    # 19
    [0.339, 0.241],
    # 20
    [0.282, 0.257],
    # 21
    [0.748, 0.232],
    # 22
    [0.714, 0.346],
    # 23
    [0.483, 0.312],
    # 24
    [0.478, 0.437],
    # 25
    [0.525, 0.369],
    # 26
    [0.751, 0.489],
    # 27
    [0.532, 0.472],
    # 28
    [0.473, 0.376],
    # 29
    [0.725, 0.445],
    # 30
    [0.446, 0.459]
    ]
# print(len(dataSet))
# print(dataSet)

# ‘‘‘
m = len(dataSet)           #存储dataSet的长度
# print(m)
k = int(input("请输入簇数："))
miu = []                   #用于存储均值向量
choice = sample(list(range(m)),k)  #从列表中随机抽样k个元素
miu = [dataSet[i] for i in choice] #初始化均值向量
# print("μ：\n{}".format(miu))

times = 0
while True:
    times = times+1
    if times>100:
        print("循环次数过多")
        break
    a = [[] for i in range(k)]  # 用于存储样本与各均值向量的距离
    # print(a)
    for j in range(m):          # 计算xj与各均值向量μi的距离
        bar = 1000000000000000000000   #聚类比较值
        biaoji = -1             # 簇标记
        for i in range(k):
            dis = np.hypot(dataSet[j][0]-miu[i][0],dataSet[j][1]-miu[i][1])
            if dis<bar:
                bar = dis; biaoji = i
        a[biaoji].append(dataSet[j])  #根据距离最近的均值向量确定xj的簇标记
    # print("簇0\n{}".format(a[0]))
    # print("簇1\n{}".format(a[1]))
    # print("簇2\n{}".format(a[2]))

    miu1 = [[] for i in range(k)]     #重新计算均值向量
    for i in range(k):
        miu1[i].append(np.sum([a[i][j][0] for j in range(len(a[i]))])/len(a[i]))
        miu1[i].append(np.sum([a[i][j][1] for j in range(len(a[i]))])/len(a[i]))
    if miu==miu1: #如果前后均值向量相等，跳出
        break
    else:
        miu = miu1
    # break

print("循环次数：\n{}".format(times))
print("簇0\n{}".format(a[0]))
print("簇1\n{}".format(a[1]))
print("簇2\n{}".format(a[2]))

其中得到的一组结果：

x1 = [[0.634, 0.264], [0.556, 0.215], [0.481, 0.149], [0.666, 0.091], [0.639, 0.161], [0.657, 0.198], [0.593, 0.042], [0.719, 0.103], [0.748, 0.232]]

x2 = [[0.403, 0.237], [0.437, 0.211], [0.243, 0.267], [0.245, 0.057], [0.343, 0.099], [0.36, 0.37], [0.359, 0.188], [0.339, 0.241], [0.282, 0.257]]

x3 = [[0.697, 0.46], [0.774, 0.376], [0.608, 0.318], [0.714, 0.346], [0.483, 0.312], [0.478, 0.437], [0.525, 0.369], [0.751, 0.489], [0.532, 0.472], [0.473, 0.376], [0.725, 0.445], [0.446, 0.459]]
[0.46, 0.376, 0.318, 0.346, 0.437, 0.369, 0.489, 0.472, 0.376, 0.445, 0.459]

用R作图：

a1_x = c(0.634, 0.556, 0.481, 0.666, 0.639, 0.657, 0.593, 0.719, 0.748)
a1_y = c(0.264, 0.215, 0.149, 0.091, 0.161, 0.198, 0.042, 0.103, 0.232)
a2_x = c(0.403, 0.437, 0.243, 0.245, 0.343, 0.36, 0.359, 0.339, 0.282)
a2_y = c(0.237, 0.211, 0.267, 0.057, 0.099, 0.37, 0.188, 0.241, 0.257)
a3_x = c(0.697, 0.774, 0.608, 0.714, 0.483, 0.478, 0.525, 0.751, 0.532, 0.473, 0.725, 0.446)
a3_y = c(0.46, 0.376, 0.318, 0.346, 0.312, 0.437, 0.369, 0.489, 0.472, 0.376, 0.445, 0.459)

plot(a1_x,a1_y,pch = 15,xlim = c(0,0.9),ylim = c(0,0.8),col = "red")
points(a2_x,a2_y,pch = 16,col = "blue")
points(a3_x,a3_y,pch = 17,col = "green")

技术分享图片

聚类分析 k均值算法

原文：https://www.cnblogs.com/jiaxinwei/p/11774565.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)