记录一点代码,C++写的K-means聚类算法。从Test_data.txt文件中读取数据,k-means分类后给每个样本点打上标签,并存入Resut_data.txt中,最后两行(标签为2的)是聚类中心。
/***************************************** Copyright (c) 2015 Jingshuang Hu @filename:demo.cpp @datetime:2015.11.16 @author:HJS @e-mail:jingshuang_hu@163.com @blog:http://blog.csdn.net/hujingshuang *****************************************/ #include <iostream> #include <math.h> #include "class_point.h" using namespace std; void read_text(Point *point); void write_text(Point *point, Point *center); double point_distance(Point A, Point B); Point *k_means(Point *point); int main() { Point *center; Point point[1000]; read_text(point); center = k_means(point); write_text(point, center); return 0; }//end main //K-means算法 Point *k_means(Point *point) { Point *center = new Point[2]; //两个聚类中心 Point *pre_center = new Point[2]; center[0] = point[200]; //任意选取两个样本点作为聚类中心 center[1] = point[800]; double dis0 = 0, dis1 = 0; //临时距离 double sum0x = 0, sum0y = 0, sum1x = 0, sum1y = 0; int num0 = 0, num1 = 0; //类的个数 double d0 = 0, d1 = 0; do { pre_center[0] = center[0]; pre_center[1] = center[1]; for (int i = 0; i < 1000; ++i) { dis0 = point_distance(center[0], point[i]); dis1 = point_distance(center[1], point[i]); if (dis0 < dis1) { point[i].set_label(0); //打上标签 num0 += 1; //统计个数 sum0x += point[i].pos_x(); sum0y += point[i].pos_y(); } else { point[i].set_label(1); //打上标签 num1 += 1; //统计个数 sum1x += point[i].pos_x(); sum1y += point[i].pos_y(); } }//end for center[0].set_cursor(sum0x / num0, sum0y / num0); center[1].set_cursor(sum1x / num1, sum1y / num1); sum0x = sum0y = sum1x = sum1y = 0; num0 = num1 = 0; d0 = point_distance(center[0], pre_center[0]);//聚类中心不再有太大的变化,认为收敛 d1 = point_distance(center[1], pre_center[1]); }while((d0 > 0.00001) || (d1 > 0.00001));//终止条件 return center; }//end function //计算两点间距离 double point_distance(Point A, Point B) { return sqrt(pow(A.pos_x() - B.pos_x(), 2) + pow(A.pos_y() - B.pos_y(), 2)); } //读取样本点数据 void read_text(Point *point) { FILE *fp = fopen("Test_data.txt","r"); if (!fp) { cout << "打开文件失败!" << endl; return; } int i = 0; double x = 0, y = 0; while(!feof(fp)) { fscanf(fp, "%lf", &x); fscanf(fp, "%lf", &y); point[i].set_cursor(x, y); ++i; } fclose(fp); return; } //保存样本点数据(保留一位小数) void write_text(Point *point, Point *center) { FILE *fp = fopen("Result_data.txt", "w"); for(int i = 0; i < 1000; ++i) { fprintf(fp, "%.1lf\t%.1lf\t%d\n", point[i].pos_x(), point[i].pos_y(), point[i].pos_label()); } fprintf(fp, "%.1lf\t%.1lf\t%d\n", center[0].pos_x(), center[0].pos_y(), 2); fprintf(fp, "%.1lf\t%.1lf\t%d", center[1].pos_x(), center[1].pos_y(), 2); fclose(fp); }
/***************************************** Copyright (c) 2015 Jingshuang Hu @filename:clas_point.h @datetime:2015.11.16 @author:HJS @e-mail:jingshuang_hu@163.com @blog:http://blog.csdn.net/hujingshuang *****************************************/ #ifndef _CLASS_POINT_H_ #define _CLASS_POINT_H_ #include <iostream> using namespace std; class Point { public: Point(); Point(Point &); //拷贝构造函数 double pos_x(void); //获取x坐标 double pos_y(void); //获取y坐标 int pos_label(void); //获取样本点标签 void set_cursor(double, double);//设置坐标 void set_label(int); //设置标签 private: int label; //标签 double x; //横坐标 double y; //纵坐标 }; #endif
/***************************************** Copyright (c) 2015 Jingshuang Hu @filename:class_point.cpp @datetime:2015.11.16 @author:HJS @e-mail:jingshuang_hu@163.com @blog:http://blog.csdn.net/hujingshuang *****************************************/ #include "class_point.h" Point::Point() {} Point::Point(Point &point) //拷贝构造函数 { x = point.x; y = point.y; label = point.label; } double Point::pos_x(void) { return x; } double Point::pos_y(void) { return y; } int Point::pos_label(void) { return label; } void Point::set_cursor(double x = 0, double y = 0) { this->x = x; this->y = y; } void Point::set_label(int label = 0) { this->label = label; }测试数据及分类数据已上传,下载地址:http://download.csdn.net/detail/hujingshuang/9273439
版权声明:本文为博主原创文章,未经博主允许不得转载。
原文:http://blog.csdn.net/hujingshuang/article/details/49867455