Mercurial > hg > GearsTemplate
diff src/parallel_execution/CUDAExecutor.cbc @ 538:c0b6ce2ed820
Add comment
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 13 Feb 2018 04:35:17 +0900 |
parents | b78533641f9b |
children |
line wrap: on
line diff
--- a/src/parallel_execution/CUDAExecutor.cbc Tue Feb 06 05:14:55 2018 +0900 +++ b/src/parallel_execution/CUDAExecutor.cbc Tue Feb 13 04:35:17 2018 +0900 @@ -43,6 +43,7 @@ executor->maxThreadPerBlockX = 1; executor->maxThreadPerBlockY = 1; executor->maxThreadPerBlockZ = 1; + // maxThreadPerBlockX * maxThreadPerBlockY * maxThreadPerBlockZ <= maxThreadPerBlock if (iterator->x > 1 && iterator->y == 1 && iterator->z == 1) { executor->maxThreadPerBlockX = executor->maxThreadPerBlock; executor->maxThreadPerBlockY = 1; @@ -63,7 +64,6 @@ } __code execCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) { - // Asynchronous launch kernel task->num_exec = 1; if (task->iterate) { struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator; @@ -71,6 +71,7 @@ int blockDimX = computeblockDim(iterator->x, executor->maxThreadPerBlockX); int blockDimY = computeblockDim(iterator->y, executor->maxThreadPerBlockY); int blockDimZ = computeblockDim(iterator->z, executor->maxThreadPerBlockZ); + // launch kernel checkCudaErrors(cuLaunchKernel(task->function, iterator->x/blockDimX, iterator->y/blockDimY, iterator->z/blockDimZ, blockDimX, blockDimY, blockDimZ, @@ -87,7 +88,7 @@ } __code writeCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) { - // wait for stream + // Asynchronous launch kernel checkCudaErrors(cuCtxSynchronize()); struct Timer* timer = executor->timer; goto timer->end(writeCUDAExecutor1);