计算RGB图像的直方图
// kernel __kernel void histogram(__global uchar* imgdata, __global uint *histogram, __local uint *local_histogram, uint data_size_item, uint all_byte_size) { // 对局部数据进行初始化 for(uchar i =0;i<32;i++) { local_histogram[0]=0; } barrier(CLK_LOCAL_MEM_FENCE);// 局部同步 int item_offset = get_global_id(0) * data_size_item *3; // 遍历该工作项所处理的数据 for(int i = item_offset;i<item_offset+data_size_item *3&&i<all_byte_size;i+=3) { // B atomic_inc(local_histogram+imgdata[i]/8+64); // G atomic_inc(local_histogram+imgdata[i+1]/8+32); // R atomic_inc(local_histogram+imgdata[i+2]/8); } barrier(CLK_GLOBAL_MEM_FENCE); // 全局同步 // 归并 int i = get_local_id(0); if(i < 96) { atomic_add(histogram+i,local_histogram[i]); } }
#include <iostream> #include <opencv2/opencv.hpp> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/stat.h> #ifdef __APPLE__ #include <OpenCL/opencl.h> #else #include <CL/cl.h> #endif const char histogram_cl_kernel_filename[]= "histogram.cl"; /** * 获取设备 * @return cl_device_id */ cl_device_id getdevice() { cl_platform_id platform; cl_device_id dev; int err; // 获取一个平台 err = clGetPlatformIDs(1,&platform,NULL); if(err<0) { perror("获取平台失败!"); exit(1); } // 获取一个GPU设备 err=clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&dev,NULL); if(err<0) { perror("获取设备失败!"); exit(1); } return dev; } /** * 创建并编译程序 * cl_context ctx:上下文 * cl_device_id dev : 设备 * filename: 文件名称 * @return cl_program */ cl_program build_program(cl_context ctx, cl_device_id dev,const char* filename) { cl_program program; FILE *program_handle; char *program_buffer ,*program_log; size_t program_size, log_size; int err; // 从文件中读取程序内容 program_handle = fopen(filename,"r"); if(program_handle == NULL) { perror("程序文件无法打开!"); exit(1); } fseek(program_handle,0,SEEK_END); program_size=ftell(program_handle); rewind(program_handle); program_buffer = (char*)malloc(program_size + 1); program_buffer[program_size] = ‘\0‘; fread(program_buffer, sizeof(char), program_size, program_handle); fclose(program_handle); // 创建 cl_program; program = clCreateProgramWithSource(ctx,1,(const char **)&program_buffer,&program_size,&err); if(err<0) { perror("创建cl_program失败!"); exit(1); } free(program_buffer); // 编译 cl_program err = clBuildProgram(program,0,NULL,NULL,NULL,NULL); if(err<0) { // 编译失败获取 失败信息 clGetProgramBuildInfo(program,dev,CL_PROGRAM_BUILD_LOG,0,NULL,&log_size); program_log = (char*)malloc(log_size+1); program_log[log_size]=‘\0‘; clGetProgramBuildInfo(program,dev,CL_PROGRAM_BUILD_LOG,log_size+1,program_log,NULL); std::cout<<"program_log:\n "<<program_log<<std::endl; free(program_log); exit(1); } return program; } void calhistogram() { int err; // 获取设备 cl_device_id device = getdevice(); // 创建上下文 cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); if(err<0) { perror("创建上下文失败!"); exit(1); } // 创建并编译程序 cl_program program = build_program(context,device,histogram_cl_kernel_filename); // 创建内核 cl_kernel kernel = clCreateKernel(program,"histogram",&err); // 创建缓存对象 // 读取图片数据 cv::Mat img = cv::imread("7.jpg"); unsigned char * data = img.data; unsigned int size = img.cols*img.rows*3; std::cout<<"字节数:="<<size<<std::endl; std::cout<<"像素点数:="<<img.cols*img.rows<<std::endl; cl_mem imgdata = clCreateBuffer(context,CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,size,(void *)data,&err); if(err<0) { std::cout<<err<<std::endl; perror("创建图像缓存对象失败!"); exit(1); } unsigned int result[96]={0}; cl_mem result_buffer = clCreateBuffer(context,CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,sizeof(result),result,NULL); // 获取 CU 的个数 unsigned int size_CU =0; clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(size_CU), &size_CU, NULL); std::cout<<"计算单元(CU)的个数为: "<<size_CU<<std::endl; // 获取每个工作组中工作项的 数量限制 size_t item_size_per_group =0; clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(item_size_per_group),&item_size_per_group,NULL); std::cout<<"每个工作组中工作的最大数量为: "<<item_size_per_group<<std::endl; // 工作项的总个数 size_t item_num = size_CU*item_size_per_group; std::cout<<"工作项的总个数为:"<<item_num<<std::endl; // 每个工作项负的 点个数 unsigned int size_per_item = img.cols*img.rows/item_num +1; std::cout<<"每个工作项负责的点数为:"<<size_per_item<<std::endl; // 设置核参数 err = clSetKernelArg(kernel,0,sizeof(imgdata),&imgdata); // 图像数据 err |= clSetKernelArg(kernel,1,sizeof(result_buffer),&result_buffer); // 存储结果的地址 err |= clSetKernelArg(kernel,2,sizeof(result),NULL); // 局部结果 err |= clSetKernelArg(kernel,3,sizeof(size_per_item),&size_per_item); // 每个项处理的数据点数大小 err |= clSetKernelArg(kernel,4,sizeof(size),&size); if(err<0) { perror("设置参数失败!"); exit(1); } // 创建命令队列 cl_command_queue queue = clCreateCommandQueue(context, device , 0 ,&err); if(err<0) { perror("创建命令队列失败!"); exit(1); } size_t offset =0; err = clEnqueueNDRangeKernel(queue,kernel,1,&offset,&item_num, &item_size_per_group,0,NULL,NULL); if(err<0) { perror("Enqueue the kernel failed!"); exit(1); } // 读取结果命令 err = clEnqueueReadBuffer(queue,result_buffer,CL_TRUE,0,sizeof(result),&result,0,NULL,NULL); if(err<0) { perror("读取结果失败!"); exit(1); } // 输出结果 // R int temp=0; for(int i=0;i<32;i++) { temp+=result[i]; std::cout<<i*8<<"---"<<(i+1)*8-1<<":"<<result[i]<<std::endl; } std::cout<<temp<<std::endl; } int main() { std::cout << "Hello, World!" << std::endl; calhistogram(); return 0; }
原文:https://www.cnblogs.com/feihu-h/p/12107537.html