Mercurial > hg > Game > Cerium
changeset 2012:9360e782a431 draft
Optimization data transfer. not running
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 01 Jul 2014 03:40:54 +0900 |
parents | faaea4e1ce1c |
children | d43c2b7932ea |
files | TaskManager/Cuda/CudaScheduler.cc TaskManager/Cuda/CudaScheduler.h example/cuda_fft/Makefile.def example/fft/main.cc |
diffstat | 4 files changed, 23 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Cuda/CudaScheduler.cc Wed Jun 11 17:22:17 2014 +0900 +++ b/TaskManager/Cuda/CudaScheduler.cc Tue Jul 01 03:40:54 2014 +0900 @@ -125,47 +125,38 @@ // parameter is passed as first kernel arg ret = cuMemcpyHtoDAsync(cudabuffer[cur].memin[param], nextTask->param(0), sizeof(memaddr)*nextTask->param_count, cudabuffer[cur].stream); if (ret!=0) { CudaTaskError(cudabuffer, cur, tasklist, ret); continue; } + cudabuffer[cur].kernelParams[param] = &cudabuffer[cur].memin[param]; param++; for(int i=0;i<nextTask->inData_count;i++) { ListElement *input_buf = nextTask->inData(i); if (input_buf->size==0) break; - createBuffer(&cudabuffer[cur], cudabuffer[cur].memin, param, input_buf->size); - if (ret!=0) { CudaTaskError(cudabuffer, cur, tasklist, ret); continue; } - ret = cuMemcpyHtoDAsync(cudabuffer[cur].memin[param], input_buf->addr, input_buf->size, cudabuffer[cur].stream); - if (ret!=0) { CudaTaskError(cudabuffer, cur, tasklist, ret); continue; } - + if (!transmitted[input_buf]) { + createBuffer(&cudabuffer[cur], cudabuffer[cur].memin, param, input_buf->size); + if (ret!=0) { CudaTaskError(cudabuffer, cur, tasklist, ret); continue; } + ret = cuMemcpyHtoDAsync(cudabuffer[cur].memin[param], input_buf->addr, input_buf->size, cudabuffer[cur].stream); + if (ret!=0) { CudaTaskError(cudabuffer, cur, tasklist, ret); continue; } + transmitted.insert(make_pair(input_buf, cudabuffer[cur].memin[param])); + } + cudabuffer[cur].kernelParams[param] = &(transmitted[input_buf]); param++; } + cudabuffer[cur].in_size = param; // +1 means param for(int i = 0; i<nextTask->outData_count;i++) { // set output data ListElement *output_buf = nextTask->outData(i); if (output_buf->size==0) break; - if (!flag[cur].flip) { // flip use memin for output + if (!transmitted[output_buf]) { createBuffer(&cudabuffer[cur], cudabuffer[cur].memout, i, output_buf->size); if (ret!=0) { CudaTaskError(cudabuffer, cur, tasklist, ret); continue; } - // enqueue later + transmitted.insert(make_pair(output_buf, cudabuffer[cur].memout[i])); } + cudabuffer[cur].kernelParams[param] = &(transmitted[output_buf]); param++; } cudabuffer[cur].out_size = param - cudabuffer[cur].in_size; // no buffer on flip, but flip use memout event - - if (!flag[cur].flip) { - for (int i = 0; i<cudabuffer[cur].in_size; i++) { - cudabuffer[cur].kernelParams[i] = &cudabuffer[cur].memin[i]; - } - for (int i = 0; i<cudabuffer[cur].out_size; i++) { - cudabuffer[cur].kernelParams[i+cudabuffer[cur].in_size] = &cudabuffer[cur].memout[i]; - } - } else { - for (int i = 0; i<cudabuffer[cur].in_size; i++) { - cudabuffer[cur].kernelParams[i] = &cudabuffer[cur].memin[i]; - } - } - - if (ret!=0) { CudaTaskError(cudabuffer , cur, tasklist, ret); continue; } } return cur; } @@ -201,6 +192,7 @@ // flip use memin buffer and memout event ret = cuMemcpyDtoHAsync(output_buf->addr, mem[i0], output_buf->size, cudabuffer[cur].stream); if (ret!=0) { CudaTaskError(cudabuffer, cur, tasklist, ret); continue; } + transmitted.erase(output_buf); } } return nextTask;
--- a/TaskManager/Cuda/CudaScheduler.h Wed Jun 11 17:22:17 2014 +0900 +++ b/TaskManager/Cuda/CudaScheduler.h Tue Jul 01 03:40:54 2014 +0900 @@ -7,9 +7,12 @@ #include "HTask.h" #include "TaskManager.h" #include <cuda.h> +#include <map> extern TaskObject cuda_task_list[MAX_TASK_OBJECT]; +using namespace std; + #define STAGE 8 class CudaScheduler : public MainScheduler { @@ -33,7 +36,6 @@ // platform は OpenCL が複数のメーカーの GPU に対応してるから必要 // Cuda の場合、NVIDIA だけなので必要ない? CUdevice device; - unsigned int ret_num_platforms; // たぶん要らない int ret_num_devices; CUcontext context; // command_queue command_queue; @@ -43,10 +45,13 @@ memaddr reply; // cl_kernel に相当 // 変数名は function にすべきか kernel にすべきか - // とりあえず、kernel で + // とりあえず、OpenCL に合わせて kernel で CUfunction kernel[STAGE]; CudaBuffer cudabuffer[STAGE]; + // record transmitted data. + map<ListElement*, CUdeviceptr> transmitted; + HTask::htask_flag flag[STAGE]; private:
--- a/example/cuda_fft/Makefile.def Wed Jun 11 17:22:17 2014 +0900 +++ b/example/cuda_fft/Makefile.def Tue Jul 01 03:40:54 2014 +0900 @@ -5,4 +5,4 @@ CC = clang++ NVCC = nvcc CFLAGS = -Wall $(OPT) -NVCCFLAGS = -ptx -arch=sm_20 \ No newline at end of file +NVCCFLAGS = -ptx -arch=sm_20 #-g -G \ No newline at end of file