1 # coding:utf-8 2 import numpy as np 3 import matplotlib.pyplot as plt 4 5 def dis(x, y): #计算距离 6 return np.sum(np.power(y - x, 2)) 7 8 def dataN(length,k):#生成数据 9 z=range(k) 10 c=[5]*length 11 a1= [np.sin(i*2*np.pi/k) for i in range(k)] 12 a2= [np.cos(i*2*np.pi/k) for i in range(k)] 13 x=[[[i*j + np.random.uniform(0,5)]for i in c]for j in a1] 14 y=[[[i*j + np.random.uniform(0,5)]for i in c]for j in a2] 15 return x,y,z 16 17 def showP(x,y,z):#原始点作图 18 plt.figure(1) 19 color=[‘or‘, ‘ob‘, ‘og‘, ‘ok‘, ‘^r‘, ‘+r‘, ‘sr‘, ‘dr‘, ‘<r‘, ‘pr‘] 20 for j in z: 21 for i in xrange(length): 22 plt.plot(x[j][i], y[j][i],color[j]) 23 24 def initCentroids(dataSet, k):#初始化中心点 25 n, d = dataSet.shape 26 centroids = np.zeros((k, d)) 27 for i in range(k): 28 index = int(np.random.uniform(0, n)) 29 centroids[i] = dataSet[index] 30 return centroids 31 32 def kmeans(dataSet, k): #kmeans算法 33 n = dataSet.shape[0] 34 clusterAssment = np.mat(np.zeros((n, 2))) 35 clusterChanged = True 36 centroids = initCentroids(dataSet, k) 37 while clusterChanged: 38 clusterChanged = False 39 for i in xrange(n): 40 distance=[[dis(centroids[j], dataSet[i])] for j in range(k)] 41 minDist= min(distance) 42 minIndex=distance.index(minDist) 43 if clusterAssment[i, 0] != minIndex: 44 clusterChanged = True 45 clusterAssment[i] = minIndex, minDist[0] 46 for j in range(k): 47 pointsInCluster = dataSet[np.nonzero(clusterAssment[:, 0]== j)[0]] 48 centroids[j] = np.mean(pointsInCluster, axis = 0) 49 return centroids, clusterAssment 50 51 def showCluster(dataSet, k, centroids, clusterAssment):#结果作图 52 plt.figure(2) 53 n=len(dataSet) 54 mark = [‘or‘, ‘ob‘, ‘og‘, ‘ok‘, ‘^r‘, ‘+r‘, ‘sr‘, ‘dr‘, ‘<r‘, ‘pr‘] 55 for i in xrange(n): 56 markIndex = int(clusterAssment[i, 0]) 57 plt.plot(dataSet[i, 0], dataSet[i, 1], mark[markIndex]) 58 mark = [‘Dr‘, ‘Db‘, ‘Dg‘, ‘Dk‘, ‘^b‘, ‘+b‘, ‘sb‘, ‘db‘, ‘<b‘, ‘pb‘] 59 for i in range(k): 60 plt.plot(centroids[i, 0], centroids[i, 1], mark[i], markersize =8) 61 plt.show() 62 63 length=200 64 k=8 #k<=8 65 x,y,z=dataN(length,k) 66 showP(x,y,z) 67 68 dataSet=np.mat(zip(np.reshape(x,(1,length*k))[0],np.reshape(y,(1,length*k))[0])) 69 centroids, clusterAssment = kmeans(dataSet, k) 70 showCluster(dataSet, k, centroids, clusterAssment)
原文:http://www.cnblogs.com/qw12/p/5686940.html