annotate src/parallel_execution/CUDAExecutor.cbc @ 435:af0ec811b20e

Add CUDAExecutor
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Sat, 04 Nov 2017 04:14:36 +0900
parents
children 08a93fc2f0d3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
435
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 #include "../context.h"
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 #include <stdio.h>
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 // includes, project
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 #include <driver_types.h>
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 #include <cuda_runtime.h>
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
7 #include <cuda.h>
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 #include "helper_cuda.h"
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
9
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 Executor* createCUDAExecutor(struct Context* context) {
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 struct Executor* executor = new Executor();
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 struct CUDAExecutor* cudaExecutor = new CUDAExecutor();
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 executor->executor = (union Data*)cudaExecutor;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 executor->read = C_readCUDAExecutor;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 executor->exec = C_execCUDAExecutor;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 executor->write = C_writeCUDAExecutor;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 return executor;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 }
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
19
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 __code readCUDAExecutor(struct CUDAExecutor* executor, struct Context* task) {
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 int paramLen = buffer->inputLen + buffer->outputLen;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 struct CUDABuffer buffer = executor->buffer;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 buffer->kernelParams = ALLOCATE_PTR_ARRAY(context, CudevicePtr, paramLen);
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 struct CUDABuffer buffer = executor->buffer;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 CUdeviceptr* deviceptrs = ALLOCATE_ARRAY(context, CudevicePtr, paramLen);
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 for (int i = 0; i < paramLen; i++) {
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 CUdeviceptr deviceptr = deviceptrs[i];
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28 // memory allocate
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 union Data* data = i < inputLen? buffer->inputData[i] : buffer->outputData[i-inputLen];
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 checkCUDAErrors(cuMemAlloc(deviceptr, GET_SIZE(data)));
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 checkCUDAErrors(cuMemcpyHtoD(deviceptr, data, GET_SIZE(data)));
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 // Synchronous data transfer(host to device)
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 buffer->kernelParams[paramCount++] = &deviceptr;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 }
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 }
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
36
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 void cudaLoadFunction(struct Context* context, char* filename, char* function) {
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
38 checkCUDAErrors(cuModuleLoad(&context->module, filename));
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 checkCUDAErrors(cuModuleGetFunction(&context->function, context->module, function));
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 }
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 __code execCUDAExecutor(struct CUDAExecutor* executor, struct Context* task) {
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 // Asynchronous launch kernel
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44 task->num_exec = 1;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 struct CUDABuffer buffer = executor->buffer;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 if (task->iterate) {
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
47 struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 checkCUDAErrors(cuLaunchKernel(task->function,
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 iterator->x, iterator->y, iterator->z,
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 1, 1, 1,
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
51 0, NULL, (void**)buffer->kernelParams, NULL));
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 } else {
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
53 checkCUDAErrors(cuLaunchKernel(task->function,
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 1, 1, 1,
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 1, 1, 1,
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 0, NULL, (void**)buffer->kernelParams, NULL));
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
57 }
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
58 }
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
59
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 __code writeCUDAExecutor(struct CUDAExecutor* executor, struct Context* task) {
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
61 //結果を取ってくるコマンドを入力する
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
62 //コマンドの終了待ちを行う
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 int paramLen = buffer->inputLen + buffer->outputLen;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
64 struct CUDABuffer buffer = executor->buffer;
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 for (int i = 0; i < paramLen; i++) {
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
66 CUdeviceptr* deviceptr = buffer->kernelParams[i];
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
67 union Data* data = i < inputLen? buffer->inputData[i] : buffer->outputData[i-inputLen];
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 checkCUDAErrors(cuMemcpyDtoH(data, *deviceptr, GET_SIZE(data)));
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
69 cuMemFree(*deviceptr);
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 }
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 // wait for stream
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 checkCUDAErrors(cuCtxSynchronize());
af0ec811b20e Add CUDAExecutor
Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
73 }