问题

测试CUDA应用的时间

解决方案

引用自stackoverflow：

You could do sth along the lines of :

#include<sys/time.h>

struct timeval t1, t2;

gettimeofday(&t1, 0);

kernel_call<<<dimGrid, dimBlock, 0>>>();

gpuErrchk(cudaThreadSynchronize());

gettimeofday(&t2, 0);

double time = (1000000.0*(t2.tv_sec-t1.tv_sec) + t2.tv_usec-t1.tv_usec)/1000000.0;

printf("Time to generate:  %3.1f ms \n", time);

or:

float time;
cudaEvent_t start, stop;

gpuErrchk( cudaEventCreate(&start) );
gpuErrchk( cudaEventCreate(&stop) );
gpuErrchk( cudaEventRecord(start, 0) );

kernel_call<<<dimGrid, dimBlock, 0>>>();

gpuErrchk( cudaEventRecord(stop, 0) );
gpuErrchk( cudaEventSynchronize(stop) );
gpuErrchk( cudaEventElapsedTime(&time, start, stop) );

printf("Time to generate:  %3.1f ms \n", time);

其中，在cpu端计时，由于GPU的kenernl与cpu是异步执行，所以在获得整个cuda应用的结束时间前，必须要调用cudaThreadSynchronize（现替换成cudaDeviceSynchronize），否则测的时间是最后一个kernel 启动的时间。