数据集
链接:https://pan.baidu.com/s/1QNkRKcWpLrpIeLmb29WxTg
提取码:pfor
代码
import numpy as np import pandas as pd from matplotlib import pyplot as plt from matplotlib import colors from scipy import io as spio from scipy import misc # 图片操作 def findClosetCentroids(X,initial_centroids): K = initial_centroids.shape[0] idx = np.zeros((X.shape[0],1)) dis = np.zeros((X.shape[0],K)) #储存每个点到分类中心的距离 idx = np.zeros((X.shape[0],1)) # 记录属于哪一类,数值范围在 [0,K] 之间 #计算每个点到类中心的距离 for i in range(X.shape[0]): for j in range(K): dis[i,j] = np.dot((X[i,:] - initial_centroids[j,:]).reshape(1,-1) , (X[i,:] - initial_centroids[j,:]).reshape(-1,1)) #使用矩阵的乘法代替sum函数 ddd,idx = np.where(dis == np.min(dis, axis = 1).reshape(-1,1)) return idx[0:dis.shape[0]] def computerCentroids(X,idx,K): n = X.shape[1] centroids = np.zeros((K,n)) for i in range(K): centroids[i,:] = np.mean(X[np.ravel(idx == i),:], axis = 0).reshape(1,-1) return centroids def plotProcessKMeans(X,centroids, previous_centroids): plt.scatter(X[:,0],X[:,1]) plt.plot(previous_centroids[:,0], previous_centroids[:,1],‘rx‘) #plt.plot(centroids[:,0], centroids[:,0],‘rx‘) for i in range(centroids.shape[0]): p1 = centroids[i,:] p2 = previous_centroids[i,:] plt.plot([p1[0],p2[0]],[p1[1],p2[1]],‘->‘) return plt def runKMeans(X,initial_centroids,max_iters,plot_progress): m,n = X.shape K = initial_centroids.shape[0] centroids = initial_centroids previous_centroids = centroids idx = np.zeros((m,1)) for i in range(max_iters): print("迭代次数:%d"%(i+1)) idx = findClosetCentroids(X,centroids) if plot_progress: plt = plotProcessKMeans(X,centroids, previous_centroids) previous_centroids = centroids #plt.show() # 此处的show()是过程 centroids = computerCentroids(X,idx,K) if plot_progress: plt.show() #此处的show()是结果 return centroids,idx def KMeansInitCentroids(X,K): m,n = X.shape m_arr = np.arange(0,m) centroids = np.zeros((K,n)) np.random.shuffle(m_arr) #打乱顺序 rand_indices = m_arr[0:K] #取前K个 centroids = X[rand_indices,:] return centroids def kMeansInitCentroids(X,K): XX = X.copy() np.random.shuffle(XX) return XX[0:K,:] def KMeans(): data = spio.loadmat("kmeandata.mat") X = data[‘X‘] initial_centroids = np.array([[3,3],[6,2],[8,5]]) max_iters = 10 runKMeans(X,initial_centroids,max_iters,True) def picKMeans(): print("K-Means压缩图片") img_data = misc.imread("图片名字.png") img_data = img_data / 255.0 #映射到0-1, 注意查看数据,此数据是个三维数据 img_size = img_data.shape #分别代表像素点,颜色数据 X = img_data.reshape(img_size[0]*img_size[1],img_size[2]) #将数据调整为二维数据 #print(pd.DataFrame(X)) #应该尝试不同的K的数据。直到出现 断肘现象, 横轴是K,纵轴是损失函数 K = 16 max_iters = 10 initial_centroids = kMeansInitCentroids(X,K) #print(pd.DataFrame(initial_centroids)) centroids,idx = runKMeans(X,initial_centroids,max_iters,False) #False 表示不画图 idx = findClosetCentroids(X,centroids) # 找中心点 #print(pd.DataFrame(centroids)) #print(pd.DataFrame(idx)) X_recovered = centroids[idx,:] #将原来的颜色数据点,压缩为只有16个颜色数据点 #print(pd.DataFrame(X_recovered)) X_recovered = X_recovered.reshape(img_size[0],img_size[1],img_size[2]) plt.figure(figsize=(10, 8)) plt.subplot(1,2,1) plt.axis("off") plt.imshow(img_data) plt.subplot(1,2,2) plt.axis("off") plt.imshow(X_recovered) plt.show() if __name__ == "__main__": KMeans() picKMeans()
原文:https://www.cnblogs.com/boniface/p/12284970.html