changeset 438:7679093bdd72

Work CUDAtwice
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Mon, 06 Nov 2017 00:11:43 +0900
parents 2c1b1d56bf1e
children eab6f8cd2820
files src/parallel_execution/CMakeLists.txt src/parallel_execution/CUDAExecutor.cbc src/parallel_execution/context.h src/parallel_execution/examples/bitonicSort/bitonicSort.cbc src/parallel_execution/examples/twice/CUDAtwice.cu src/parallel_execution/examples/twice/createArray.cbc src/parallel_execution/examples/twice/main.cbc src/parallel_execution/examples/twice/twice.cbc
diffstat 8 files changed, 91 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/src/parallel_execution/CMakeLists.txt	Sat Nov 04 08:30:25 2017 +0900
+++ b/src/parallel_execution/CMakeLists.txt	Mon Nov 06 00:11:43 2017 +0900
@@ -61,7 +61,7 @@
   TARGET
       twice
   SOURCES
-  examples/twice/main.cbc examples/twice/createArray.cbc examples/twice/twice.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc TimeImpl.cbc MultiDimIterator.cbc
+      examples/twice/main.cbc examples/twice/createArray.cbc examples/twice/twice.cbc examples/twice/printArray.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc TimeImpl.cbc MultiDimIterator.cbc
 )
 
 GearsCommand(
@@ -83,7 +83,7 @@
       TARGET
           CUDAtwice
       SOURCES 
-        examples/twice/main.cbc examples/twice/twice.cbc examples/twice/CUDAtwice.cu examples/twice/createArray.cbc CPUWorker.cbc TimeImpl.cbc examples/twice/twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc CUDAWorker.cbc cuda.c MultiDimIterator.cbc
+      examples/twice/main.cbc examples/twice/twice.cbc examples/twice/CUDAtwice.cu examples/twice/createArray.cbc examples/twice/printArray.cbc CPUWorker.cbc TimeImpl.cbc examples/twice/twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc CUDAWorker.cbc cuda.c MultiDimIterator.cbc CudaExecutor.cbc
     )
     set_target_properties(CUDAtwice PROPERTIES COMPILE_FLAGS "-Wall -g -DUSE_CUDAWorker=1") # -DUSE_CUDA_MAIN_THREAD
 
--- a/src/parallel_execution/CUDAExecutor.cbc	Sat Nov 04 08:30:25 2017 +0900
+++ b/src/parallel_execution/CUDAExecutor.cbc	Mon Nov 06 00:11:43 2017 +0900
@@ -41,8 +41,8 @@
     if (task->iterate) {
         struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator;
         checkCudaErrors(cuLaunchKernel(task->function,
-                    iterator->x, iterator->y, iterator->z,
-                    1, 1, 1,
+                    iterator->x/1024, iterator->y, iterator->z,
+                    1024, 1, 1,
                     0, NULL, (void**)executor->kernelParams, NULL));
     } else {
         checkCudaErrors(cuLaunchKernel(task->function,
--- a/src/parallel_execution/context.h	Sat Nov 04 08:30:25 2017 +0900
+++ b/src/parallel_execution/context.h	Mon Nov 06 00:11:43 2017 +0900
@@ -75,7 +75,7 @@
 
 // (SingleLinkedStack *)context->data[D_Stack]->Stack.stack->Stack.stack
 
-#define GearImpl(context, intf, name) (Gearef(context, intf)->name->intf.name) 
+#define GearImpl(context, intf, name) (Gearef(context, intf)->name->intf.name)
 
 #include "c/enumCode.h"
 
@@ -110,7 +110,7 @@
     int num_exec;
     CUmodule module;
     CUfunction function;
-#endif 
+#endif
     union Data **data;
 
     /* multi dimension parameter */
@@ -118,6 +118,10 @@
     struct Iterator* iterator;
 };
 
+typedef int Int;
+#ifndef USE_CUDAWorker
+typedef unsigned long long CUdeviceptr;
+#endif
 union Data {
     struct Meta {
         enum DataType type;
@@ -140,7 +144,7 @@
     } LoopCounter;
     struct TaskManager {
 #ifdef USE_CUDA_MAIN_THREAD
-        volatile 
+        volatile
 #endif
         union Data* taskManager;
         enum Code spawn;      // start NEW context on the worker
@@ -234,7 +238,7 @@
         union Data* stack;
         union Data* data;
         union Data* data1;
-        enum Code whenEmpty; 
+        enum Code whenEmpty;
         enum Code clear;
         enum Code push;
         enum Code pop;
@@ -259,9 +263,8 @@
         struct Element* next;
     } Element;
     struct Array {
-        int size; 
-        int prefix; 
-        int* array;
+        int prefix;
+        Int* array;
     } Array;
     struct Tree {
         union Data* tree;
@@ -278,7 +281,7 @@
         struct Node* previous; // parent of reading node of original tree
         struct Node* newNode; // writing node of new tree
         struct Node* parent;
-        struct Node* grandparent; 
+        struct Node* grandparent;
         struct Stack* nodeStack;
         int result;
     } RedBlackTree;
@@ -359,19 +362,22 @@
         CUdeviceptr** kernelParams;
         struct CUDABuffer* buffer;
     } CUDAExecutor;
-    CUdeviceptr CUdeviceptr;
-#else
-    struct CUDAExecutor {
-    } CUDAExecutor;
-#endif
     struct CUDABuffer {
         int inputLen;
         int outputLen;
         union Data** inputData;
         union Data** outputData;
     } CUDABuffer;
+    CUdeviceptr CUdeviceptr;
+#else
+    struct CUDAExecutor {
+    } CUDAExecutor;
+    struct CUDABuffer {
+    } CUDABuffer;
+    CUdeviceptr CUdeviceptr;
+#endif
+    Int Int;
 }; // union Data end       this is necessary for context generator
-
 typedef union Data Data;
 
 #include "c/typedefData.h"
--- a/src/parallel_execution/examples/bitonicSort/bitonicSort.cbc	Sat Nov 04 08:30:25 2017 +0900
+++ b/src/parallel_execution/examples/bitonicSort/bitonicSort.cbc	Mon Nov 06 00:11:43 2017 +0900
@@ -95,7 +95,7 @@
     goto code2();
 }
 
-__code code2(struct LoopCounter* loopCounter, struct TaskManager* taskManager, struct Time* time) {
+__code code2(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
     goto taskManager->shutdown(exit_code);
 }
 
--- a/src/parallel_execution/examples/twice/CUDAtwice.cu	Sat Nov 04 08:30:25 2017 +0900
+++ b/src/parallel_execution/examples/twice/CUDAtwice.cu	Mon Nov 06 00:11:43 2017 +0900
@@ -1,7 +1,12 @@
 extern "C" {
-    __global__ void twice(int* array, int* prefixPtr) {
+    struct Array {
+        int prefix;
+        int* array;
+    } Array;
+
+    __global__ void twice(int* array, struct Array* inputArray) {
         int i = 0;
-        int prefix = *prefixPtr;
+        int prefix = inputArray->prefix;
 C_twice:
         if (i < prefix) {
             array[i+(blockIdx.x*blockDim.x+threadIdx.x)*prefix] = array[i+(blockIdx.x*blockDim.x+threadIdx.x)*prefix]*2;
--- a/src/parallel_execution/examples/twice/createArray.cbc	Sat Nov 04 08:30:25 2017 +0900
+++ b/src/parallel_execution/examples/twice/createArray.cbc	Mon Nov 06 00:11:43 2017 +0900
@@ -1,24 +1,39 @@
 #include <stdio.h>
-
 #include "../../../context.h"
 
 extern int length;
 extern int split;
-extern int* array_ptr;
 
-__code createArray(__code next(struct Array* output, ...)) {
+__code createArray(__code next(struct Array* output, struct Time* output1, ...), struct LoopCounter* loopCounter) {
     struct Array* output = *O_output;
-    output->prefix = length/split;
-    output->array  = array_ptr;
-    output->size   = length;
+    struct Time* output1 = *O_output1;
+    int i = loopCounter->i;
+    if (i == 0){
+        output->array = (Int*)ALLOCATE_ARRAY(context, Int, length);
+        output->prefix = length/split;
+    }
+    if (i == GET_LEN(output->array)){
+        printf("created Array\n");
+        loopCounter->i = 0;
+        Gearef(context, Time)->time = (union Data*)output1;
+        Gearef(context, Time)->next = context->next;
+        *O_output = output;
+        *O_output1 = output1;
+        goto meta(context, output1->start);
+    }
+    output->array[i] = i;
+    loopCounter->i++;
     *O_output = output;
-    printf("created Array\n");
-    goto meta(context, context->next);
+    *O_output1 = output1;
+    goto meta(context, C_createArray);
 }
 
 __code createArray_stub(struct Context* context) {
     Array** O_output = (struct Array **)&context->data[context->odg];
+    Time** O_output1 = (struct Time**)&context->data[context->odg+1];
     goto createArray(context,
-                     context->next,
-                     O_output);
+            context->next,
+            O_output,
+            O_output1,
+            Gearef(context, LoopCounter));
 }
--- a/src/parallel_execution/examples/twice/main.cbc	Sat Nov 04 08:30:25 2017 +0900
+++ b/src/parallel_execution/examples/twice/main.cbc	Mon Nov 06 00:11:43 2017 +0900
@@ -66,39 +66,23 @@
     /* puts("tree"); */
     /* print_tree(context->data[Tree]->tree.root); */
     /* puts("result"); */
-    time->time = (union Data*)createTimeImpl(context);
-    time->next = C_createTask1;
-    goto meta(context, time->time->Time.start);
-}
-
-__code code2(struct Time* time, struct TaskManager* taskManager) {
-    time->next = C_code3;
-    taskManager->next = time->time->Time.end;
-    goto meta(context, taskManager->taskManager->TaskManager.shutdown);
+    goto meta(context, C_createTask1);
 }
 
-__code code3(struct LoopCounter* loopCounter) {
-    int i = loopCounter->i;
 
-    if (i < length) {
-        //printf("%d\n", array_ptr[i]);
-        if (array_ptr[i] == (i*2)) {
-            loopCounter->i++;
-            goto meta(context, C_code3);
-        } else
-            puts("wrong result");
-    }
+__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    Array* array1 = &ALLOCATE_DATA_GEAR(context, Array)->Array;
+    Array* array2 = &ALLOCATE_DATA_GEAR(context, Array)->Array;
+    Time* time = createTimeImpl(context);
 
-    goto meta(context, C_exit_code);
+    par goto createArray(array1, time, __exit);
+    par goto twice(array1, array2, iterate(split), __exit);
+    par goto printArray(array2, time, __exit);
+    goto code2();
 }
 
-__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    Array* array = &ALLOCATE_DATA_GEAR(context, Array)->Array;
-
-    par goto createArray(array, __exit);
-
-    par goto twice(array, iterate(split), __exit);
-    goto code2();
+__code code2(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    goto taskManager->shutdown(exit_code);
 }
 
 void init(int argc, char** argv) {
@@ -119,12 +103,6 @@
 
 int main(int argc, char** argv) {
     init(argc, argv);
-
-    array_ptr = NEWN(length, int);
-
-    for(int i=0; i<length; i++)
-        array_ptr[i]=i;
-
     struct Context* main_context = NEW(struct Context);
     initContext(main_context);
     main_context->next = C_initDataGears;
--- a/src/parallel_execution/examples/twice/twice.cbc	Sat Nov 04 08:30:25 2017 +0900
+++ b/src/parallel_execution/examples/twice/twice.cbc	Mon Nov 06 00:11:43 2017 +0900
@@ -3,10 +3,11 @@
 #include "../../../context.h"
 
 #ifdef USE_CUDAWorker
-extern void cudaExec(struct Context* context, Array* array);
+extern void cudaLoadFunction(struct Context* context, char* filename, char* function);
 #endif
 
-__code twice(struct Array* array, struct MultiDim* multiDim, __code next(...), struct LoopCounter* loopCounter) {
+__code twice(struct Array* array, struct MultiDim* multiDim, __code next(struct Array* output, ...), struct LoopCounter* loopCounter) {
+    struct Array* output = *O_output;
     int i = loopCounter->i;
     int index = multiDim->x;
     if (i < array->prefix) {
@@ -17,21 +18,38 @@
     }
 
     loopCounter->i = 0;
+    output->array = array->array;
     goto meta(context, context->next);
 }
 
 __code twice_stub(struct Context* context) {
 #ifdef USE_CUDAWorker
-    struct Array* array = &context->data[context->idg]->Array;
     if (context->gpu) {
-        cudaExec(context, array);
+        Array* inputArray  = &context->data[context->idg]->Array;
+        Array* outputArray = &context->data[context->odg]->Array;
+        CUDABuffer* buffer = &ALLOCATE(context, CUDABuffer)->CUDABuffer;
+        buffer->inputData = (union Data**)ALLOCATE_PTR_ARRAY(context, Array, 2);
+        buffer->inputData[0] = (union Data*)inputArray->array;
+        buffer->inputData[1] = (union Data*)inputArray;
+        buffer->outputData = NULL;
+        buffer->inputLen = 2;
+        buffer->outputLen = 0;
         //continuationにそってGPUworkerに戻る
-        goto meta(context, context->next);
+        outputArray->array = inputArray->array;
+        Executor* executor = context->worker->worker->CUDAWorker.executor;
+        executor->executor->CUDAExecutor.buffer = buffer;
+        cudaLoadFunction(context, "c/examples/twice/CUDAtwice.ptx", "twice");
+        Gearef(context, Executor)->executor = (union Data*)executor;
+        Gearef(context, Executor)->task = context;
+        Gearef(context, Executor)->next = context->next;
+        goto meta(context, executor->read);
     }
 #endif
+    Array** O_output = (struct Array **)&context->data[context->odg];
     goto twice(context,
                &context->data[context->idg]->Array,
                &context->data[context->idg+1]->MultiDim,
                context->next,
+               O_output,
                Gearef(context, LoopCounter));
 }