CUDA Runtime API提供了用于计时的接口,可以用如下代码对在GPU上运行的CUDA程序进行计时,计时结果以毫秒为单位:
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
//需要计时的在GPU上运行的程序
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
float kernelTime;
cudaEventElapsedTime(&kernelTime, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
printf("Kernel time: %.2f ms\n", kernelTime);
原文:http://blog.csdn.net/warren912/article/details/19920111