comparison src/test/twice.cc @ 301:609bf62768b9

add -DUSE_CUDA=1 flag to cmake
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 12 Feb 2017 12:35:11 +0900
parents 8bbc0012e1a4
children 1839586f5b41
comparison
equal deleted inserted replaced
300:8bbc0012e1a4 301:609bf62768b9
110 110
111 // Asynchronous launch kernel 111 // Asynchronous launch kernel
112 for (int i=0;i<num_exec;i++,cur++) { 112 for (int i=0;i<num_exec;i++,cur++) {
113 if (num_stream <= cur) 113 if (num_stream <= cur)
114 cur=0; 114 cur=0;
115 B[i] = (float)(i+1); 115 //B[i] = (float)(i+1);
116 //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); 116 //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]);
117 void* args[] = {&devA, &devB[i], &devOut[i]}; 117 void* args[] = {&devA, &devB[i], &devOut[i]};
118 checkCudaErrors(cuLaunchKernel(function, 118 checkCudaErrors(cuLaunchKernel(function,
119 LENGTH, 1, 1, 119 LENGTH, 1, 1,
120 THREAD, 1, 1, 120 THREAD, 1, 1,
121 0, stream[cur], args, NULL)); 121 0, num_stream ? stream[cur] : NULL , args, NULL));
122 //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); 122 //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]);
123 } 123 }
124 124
125 cur = 0; 125 cur = 0;
126 126