根据上述描述均值漂移聚类也就是根据密度来聚类的,样本会属于密度最大的那个类别的簇
1、基础偏移量
2、高斯偏移量
//TODO 这个公式的由来
3、更新新的质心为
1 from scipy.spatial import distance 2 from sklearn.neighbors import NearestNeighbors 3 from sklearn.cluster.dbscan_ import DBSCAN 4 from sklearn.cluster.dbscan_ import dbscan 5 import numpy as np 6 from matplotlib import pyplot as plt 7 from sklearn.cluster import MeanShift, estimate_bandwidth 8 9 from sklearn.cluster.tests.common import generate_clustered_data 10 11 min_samples = 10 12 eps = 0.0309 13 14 X = generate_clustered_data(seed=1, n_samples_per_cluster=1000) 15 16 #quantile 控制是否同一类别的距离 17 bandwidth = estimate_bandwidth(X, quantile=0.3, n_samples=len(X)) 18 meanshift = MeanShift(bandwidth=bandwidth, bin_seeding=True) # 构建对象 19 meanshift.fit(X) 20 labels = meanshift.labels_ 21 22 print(np.unique(labels)) 23 24 fig, ax = plt.subplots() 25 cluster_num = len(np.unique(labels)) # label的个数,即自动划分的族群的个数 26 for i in range(0, cluster_num): 27 x = [] 28 y = [] 29 for ind, label in enumerate(labels): 30 if label == i: 31 x.append(X[ind][0]) 32 y.append(X[ind][1]) 33 ax.scatter(x, y, s=1) 34 35 plt.show()
结果
原文:https://www.cnblogs.com/ylxn/p/11846184.html