记录一点代码,C++写的K-means聚类算法。从Test_data.txt文件中读取数据,k-means分类后给每个样本点打上标签,并存入Resut_data.txt中,最后两行(标签为2的)是聚类中心。
/*****************************************
Copyright (c) 2015 Jingshuang Hu
@filename:demo.cpp
@datetime:2015.11.16
@author:HJS
@e-mail:jingshuang_hu@163.com
@blog:http://blog.csdn.net/hujingshuang
*****************************************/
#include <iostream>
#include <math.h>
#include "class_point.h"
using namespace std;
void read_text(Point *point);
void write_text(Point *point, Point *center);
double point_distance(Point A, Point B);
Point *k_means(Point *point);
int main()
{
Point *center;
Point point[1000];
read_text(point);
center = k_means(point);
write_text(point, center);
return 0;
}//end main
//K-means算法
Point *k_means(Point *point)
{
Point *center = new Point[2]; //两个聚类中心
Point *pre_center = new Point[2];
center[0] = point[200]; //任意选取两个样本点作为聚类中心
center[1] = point[800];
double dis0 = 0, dis1 = 0; //临时距离
double sum0x = 0, sum0y = 0, sum1x = 0, sum1y = 0;
int num0 = 0, num1 = 0; //类的个数
double d0 = 0, d1 = 0;
do
{
pre_center[0] = center[0];
pre_center[1] = center[1];
for (int i = 0; i < 1000; ++i)
{
dis0 = point_distance(center[0], point[i]);
dis1 = point_distance(center[1], point[i]);
if (dis0 < dis1)
{
point[i].set_label(0); //打上标签
num0 += 1; //统计个数
sum0x += point[i].pos_x();
sum0y += point[i].pos_y();
}
else
{
point[i].set_label(1); //打上标签
num1 += 1; //统计个数
sum1x += point[i].pos_x();
sum1y += point[i].pos_y();
}
}//end for
center[0].set_cursor(sum0x / num0, sum0y / num0);
center[1].set_cursor(sum1x / num1, sum1y / num1);
sum0x = sum0y = sum1x = sum1y = 0;
num0 = num1 = 0;
d0 = point_distance(center[0], pre_center[0]);//聚类中心不再有太大的变化,认为收敛
d1 = point_distance(center[1], pre_center[1]);
}while((d0 > 0.00001) || (d1 > 0.00001));//终止条件
return center;
}//end function
//计算两点间距离
double point_distance(Point A, Point B)
{
return sqrt(pow(A.pos_x() - B.pos_x(), 2) + pow(A.pos_y() - B.pos_y(), 2));
}
//读取样本点数据
void read_text(Point *point)
{
FILE *fp = fopen("Test_data.txt","r");
if (!fp)
{
cout << "打开文件失败!" << endl;
return;
}
int i = 0;
double x = 0, y = 0;
while(!feof(fp))
{
fscanf(fp, "%lf", &x);
fscanf(fp, "%lf", &y);
point[i].set_cursor(x, y);
++i;
}
fclose(fp);
return;
}
//保存样本点数据(保留一位小数)
void write_text(Point *point, Point *center)
{
FILE *fp = fopen("Result_data.txt", "w");
for(int i = 0; i < 1000; ++i)
{
fprintf(fp, "%.1lf\t%.1lf\t%d\n", point[i].pos_x(), point[i].pos_y(), point[i].pos_label());
}
fprintf(fp, "%.1lf\t%.1lf\t%d\n", center[0].pos_x(), center[0].pos_y(), 2);
fprintf(fp, "%.1lf\t%.1lf\t%d", center[1].pos_x(), center[1].pos_y(), 2);
fclose(fp);
}
/*****************************************
Copyright (c) 2015 Jingshuang Hu
@filename:clas_point.h
@datetime:2015.11.16
@author:HJS
@e-mail:jingshuang_hu@163.com
@blog:http://blog.csdn.net/hujingshuang
*****************************************/
#ifndef _CLASS_POINT_H_
#define _CLASS_POINT_H_
#include <iostream>
using namespace std;
class Point
{
public:
Point();
Point(Point &); //拷贝构造函数
double pos_x(void); //获取x坐标
double pos_y(void); //获取y坐标
int pos_label(void); //获取样本点标签
void set_cursor(double, double);//设置坐标
void set_label(int); //设置标签
private:
int label; //标签
double x; //横坐标
double y; //纵坐标
};
#endif
/*****************************************
Copyright (c) 2015 Jingshuang Hu
@filename:class_point.cpp
@datetime:2015.11.16
@author:HJS
@e-mail:jingshuang_hu@163.com
@blog:http://blog.csdn.net/hujingshuang
*****************************************/
#include "class_point.h"
Point::Point()
{}
Point::Point(Point &point) //拷贝构造函数
{
x = point.x;
y = point.y;
label = point.label;
}
double Point::pos_x(void)
{
return x;
}
double Point::pos_y(void)
{
return y;
}
int Point::pos_label(void)
{
return label;
}
void Point::set_cursor(double x = 0, double y = 0)
{
this->x = x;
this->y = y;
}
void Point::set_label(int label = 0)
{
this->label = label;
}
测试数据及分类数据已上传,下载地址:http://download.csdn.net/detail/hujingshuang/9273439版权声明:本文为博主原创文章,未经博主允许不得转载。
原文:http://blog.csdn.net/hujingshuang/article/details/49867455