To measure the computation time of a kernel, or data transfer from host to device or backwards, use the following code.
//INITIALIZE CUDA EVENTS
cudaEvent_t start,stop;
float elapsedTime;<br>
//CREATING EVENTS
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start,0);
Your cuda code goes here
//CUDA KERNEL STUFF HERE...
//FINISH RECORDING
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
//CALCULATE ELAPSED TIME
cudaEventElapsedTime(&elapsedTime,start,stop);
//DISPLAY COMPUTATION TIME
cout<<"\n\nElapsed Time = "<<elapsedTime<<" ms";
Also note that you might need to include <cuda_runtime.h> among the included files in the beginning of your program.
Hope this helps. Do contact if assistance is required.
//INITIALIZE CUDA EVENTS
cudaEvent_t start,stop;
float elapsedTime;<br>
//CREATING EVENTS
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start,0);
Your cuda code goes here
//CUDA KERNEL STUFF HERE...
//FINISH RECORDING
cudaEventRecord(stop,0);
cudaEventSynchronize(stop);
//CALCULATE ELAPSED TIME
cudaEventElapsedTime(&elapsedTime,start,stop);
//DISPLAY COMPUTATION TIME
cout<<"\n\nElapsed Time = "<<elapsedTime<<" ms";
Also note that you might need to include <cuda_runtime.h> among the included files in the beginning of your program.
Hope this helps. Do contact if assistance is required.