annotate src/parallel_execution/CUDAtwice.cbc @ 303:1dbaef86593b

CUDAtwice.cbc
author ikkun
date Mon, 13 Feb 2017 18:23:29 +0900
parents 8e7926f3e271
children ae4f6aa427f5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
92
851da1107223 implement twice
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 87
diff changeset
1 #include <stdio.h>
851da1107223 implement twice
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 87
diff changeset
2
851da1107223 implement twice
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 87
diff changeset
3 #include "context.h"
851da1107223 implement twice
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 87
diff changeset
4 #include "origin_cs.h"
851da1107223 implement twice
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 87
diff changeset
5
257
cd3486e4ba70 fix make error of twice
mir3636
parents: 248
diff changeset
6 __code twice(struct Context* context, struct LoopCounter* loopCounter, int index, int prefix, int* array, struct Context* workerContext) {
95
3e28ee215c0e modify twice, use OSAtomiceCompareAndSwap
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 92
diff changeset
7 int i = loopCounter->i;
3e28ee215c0e modify twice, use OSAtomiceCompareAndSwap
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 92
diff changeset
8 if (i < prefix) {
3e28ee215c0e modify twice, use OSAtomiceCompareAndSwap
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 92
diff changeset
9 array[i+index*prefix] = array[i+index*prefix]*2;
3e28ee215c0e modify twice, use OSAtomiceCompareAndSwap
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 92
diff changeset
10 loopCounter->i++;
3e28ee215c0e modify twice, use OSAtomiceCompareAndSwap
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 92
diff changeset
11
193
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 189
diff changeset
12 goto meta(context, C_twice);
95
3e28ee215c0e modify twice, use OSAtomiceCompareAndSwap
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 92
diff changeset
13 }
3e28ee215c0e modify twice, use OSAtomiceCompareAndSwap
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 92
diff changeset
14
3e28ee215c0e modify twice, use OSAtomiceCompareAndSwap
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 92
diff changeset
15 loopCounter->i = 0;
248
1ede5390cda2 Fix segmentation fault but not multi thread running
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents: 247
diff changeset
16 goto meta(workerContext, workerContext->next);
92
851da1107223 implement twice
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 87
diff changeset
17 }
851da1107223 implement twice
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 87
diff changeset
18
851da1107223 implement twice
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 87
diff changeset
19 __code twice_stub(struct Context* context) {
257
cd3486e4ba70 fix make error of twice
mir3636
parents: 248
diff changeset
20 struct Context* workerContext = context->worker->worker->CPUWorker.context;
303
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
21
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
22 // memory allocate
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
23 CUdeviceptr devA;
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
24 CUdeviceptr devB[num_exec];
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
25 CUdeviceptr devOut[num_exec];
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
26
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
27 checkCudaErrors(cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float)));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
28 for (int i=0;i<num_exec;i++) {
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
29 checkCudaErrors(cuMemAlloc(&devB[i], sizeof(float)));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
30 checkCudaErrors(cuMemAlloc(&devOut[i], LENGTH*THREAD*sizeof(float)));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
31 }
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
32
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
33 //twiceカーネルが定義されてなければそれをロードする
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
34 checkCudaErrors(cuModuleLoad(&module, "multiply.ptx"));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
35 checkCudaErrors(cuModuleGetFunction(&function, module, "multiply"));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
36
275
06dab015a54d GPUWorker
ikkun
parents: 257
diff changeset
37 //入力のDataGearをGPUにbuffer経由で送る
303
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
38 // Synchronous data transfer(host to device)
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
39 checkCudaErrors(cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float)));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
40
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
41 // Asynchronous launch kernel
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
42 for (int i=0;i<num_exec;i++,cur++) {
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
43 if (num_stream <= cur)
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
44 cur=0;
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
45 //B[i] = (float)(i+1);
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
46 //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]);
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
47 void* args[] = {&devA, &devB[i], &devOut[i]};
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
48 checkCudaErrors(cuLaunchKernel(function,
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
49 LENGTH, 1, 1,
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
50 THREAD, 1, 1,
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
51 0, num_stream ? stream[cur] : NULL , args, NULL));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
52 //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]);
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
53 }
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
54
275
06dab015a54d GPUWorker
ikkun
parents: 257
diff changeset
55 //結果を取ってくるコマンドを入力する
06dab015a54d GPUWorker
ikkun
parents: 257
diff changeset
56 //コマンドの終了待ちを行う
303
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
57 // Asynchronous data transfer(device to host)
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
58 for (int i=0;i<num_exec;i++,cur++) {
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
59 if (num_stream <= cur)
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
60 cur = 0;
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
61 if (num_stream) {
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
62 checkCudaErrors(cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
63 } else {
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
64 checkCudaErrors(cuMemcpyDtoH(result[i], devOut[i], LENGTH*THREAD*sizeof(float)));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
65 }
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
66 }
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
67
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
68 // wait for stream
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
69 for (int i=0;i<num_stream;i++)
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
70 checkCudaErrors(cuStreamSynchronize(stream[i]));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
71 // Asynchronous data transfer(device to host)
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
72 for (int i=0;i<num_exec;i++,cur++) {
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
73 if (num_stream <= cur)
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
74 cur = 0;
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
75 if (num_stream) {
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
76 checkCudaErrors(cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
77 } else {
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
78 checkCudaErrors(cuMemcpyDtoH(result[i], devOut[i], LENGTH*THREAD*sizeof(float)));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
79 }
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
80 }
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
81
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
82 // wait for stream
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
83 for (int i=0;i<num_stream;i++)
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
84 checkCudaErrors(cuStreamSynchronize(stream[i]));
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
85
1dbaef86593b CUDAtwice.cbc
ikkun
parents: 302
diff changeset
86
275
06dab015a54d GPUWorker
ikkun
parents: 257
diff changeset
87 //continationにそってGPUworkerに戻る
248
1ede5390cda2 Fix segmentation fault but not multi thread running
Tatsuki IHA <e125716@ie.u-ryukyu.ac.jp>
parents: 247
diff changeset
88 goto twice(context, Gearef(context, LoopCounter), 0, 0, NULL, workerContext);
92
851da1107223 implement twice
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 87
diff changeset
89 }