view src/cuLaunchKernel.cbc @ 11:a8bc8c6b48bd default tip

fix
author soto@cr.ie.u-ryukyu.ac.jp
date Tue, 15 Sep 2020 07:06:29 +0900
parents 73127e0ab57c
children
line wrap: on
line source

int computeblockDim(int count, int maxThreadPerBlock) {
    return count < maxThreadPerBlock ? count : maxThreadPerBlock;
}

__code execCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
    // check data parallelism task
    if (task->iterate) {
        struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator;

        // compute block thread size 
        int blockDimX = computeblockDim(iterator->x, executor->maxThreadPerBlock);
        int blockDimY = computeblockDim(iterator->y, executor->maxThreadPerBlock);
        int blockDimZ = computeblockDim(iterator->z, executor->maxThreadPerBlock);

        checkCudaErrors(cuLaunchKernel(task->function,
                    iterator->x/blockDimX, iterator->y/blockDimY, iterator->z/blockDimZ,
                    blockDimX, blockDimY, blockDimZ,
                    0, NULL, (void**)executor->kernelParams, NULL));
    }
    ...
}