首页 > 其他 > 详细

k均值算法

时间:2014-05-23 23:48:47      阅读:774      评论:0      收藏:0      [点我收藏+]
bubuko.com,布布扣
import matplotlib.pyplot as plt
import numpy as np
import time
from django.template.defaultfilters import center
def loadDataSet(fileName):
    dataMat=[]
    fr=open(fileName)
    for line in fr.readlines():
        curLine=line.strip().split(\t)
        fltLine=map(float,curLine)
        dataMat.append([i for i in fltLine])
    return dataMat

def distEclud(vecA,vecB):
    return np.sqrt(np.sum(np.power(vecA-vecB,2)))

def randCent(dataSet,k):
    n=np.shape(dataSet)[1]
    centroids=np.mat(np.zeros((k,n)))
    for j in range(n):
        minJ=min(dataSet[:,j])
        rangeJ=float(max(dataSet[:,j])-minJ)
        centroids[:,j]=minJ+rangeJ*np.random.rand(k,1)
    return centroids

def kMeans(dataSet,k):
    m=np.shape(dataSet)[0]
    clusterAssment=np.mat(np.zeros((m,2)))
    centroids=randCent(dataSet, k)
    clusterChanged=True
    while clusterChanged:
        clusterChanged=False
        for i in range(m):
            minDist=np.inf
            minIndex=-1
            for j in range(k):
                distJI=distEclud(centroids[j,:], dataSet[i,:])
                if distJI < minDist:
                    minDist=distJI;minIndex=j
            if  clusterAssment[i,0] != minIndex:
                clusterChanged=True
            clusterAssment[i,:]=minIndex,minDist**2
        for cent in range(k):
            ptsInClust=dataSet[np.nonzero(clusterAssment[:,0].A == cent)[0]]
            centroids[cent,:]=np.mean(ptsInClust, axis=0)
    return centroids,clusterAssment
def showImage(dataSet,center,label):
    c=[r,g,w,b]
    n=np.shape(dataSet)[0]
    for i in range(4):
        x=[],y=[]
        for j in range(n):
            if label[j]==i:
                x.append(dataSet[j,0])
                y.append(dataSet[j,1])
        plt.scatter(x,y,s=40,c=c[i])  
    center=center.A
    plt.scatter(center[:,0],center[:,1],c=m,marker=p,s=200)
    plt.show()             
if __name__ == __main__:
    startTime=time.clock()
    dataSet=loadDataSet("testSet.txt")
    dataSet=np.array(dataSet)
    print(dataSet)
    center,cluster=kMeans(dataSet, 4)
    print(center)
    endTime=time.clock()
    print(endTime-startTime)
    showImage(dataSet, center, cluster[:,0])
bubuko.com,布布扣

 

bubuko.com,布布扣

k均值算法,布布扣,bubuko.com

k均值算法

原文:http://www.cnblogs.com/sklww/p/3737003.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!