Mercurial > hg > Gears > GearsAgda
changeset 438:7679093bdd72
Work CUDAtwice
author | Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 06 Nov 2017 00:11:43 +0900 |
parents | 2c1b1d56bf1e |
children | eab6f8cd2820 |
files | src/parallel_execution/CMakeLists.txt src/parallel_execution/CUDAExecutor.cbc src/parallel_execution/context.h src/parallel_execution/examples/bitonicSort/bitonicSort.cbc src/parallel_execution/examples/twice/CUDAtwice.cu src/parallel_execution/examples/twice/createArray.cbc src/parallel_execution/examples/twice/main.cbc src/parallel_execution/examples/twice/twice.cbc |
diffstat | 8 files changed, 91 insertions(+), 69 deletions(-) [+] |
line wrap: on
line diff
--- a/src/parallel_execution/CMakeLists.txt Sat Nov 04 08:30:25 2017 +0900 +++ b/src/parallel_execution/CMakeLists.txt Mon Nov 06 00:11:43 2017 +0900 @@ -61,7 +61,7 @@ TARGET twice SOURCES - examples/twice/main.cbc examples/twice/createArray.cbc examples/twice/twice.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc TimeImpl.cbc MultiDimIterator.cbc + examples/twice/main.cbc examples/twice/createArray.cbc examples/twice/twice.cbc examples/twice/printArray.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc TimeImpl.cbc MultiDimIterator.cbc ) GearsCommand( @@ -83,7 +83,7 @@ TARGET CUDAtwice SOURCES - examples/twice/main.cbc examples/twice/twice.cbc examples/twice/CUDAtwice.cu examples/twice/createArray.cbc CPUWorker.cbc TimeImpl.cbc examples/twice/twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc CUDAWorker.cbc cuda.c MultiDimIterator.cbc + examples/twice/main.cbc examples/twice/twice.cbc examples/twice/CUDAtwice.cu examples/twice/createArray.cbc examples/twice/printArray.cbc CPUWorker.cbc TimeImpl.cbc examples/twice/twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc CUDAWorker.cbc cuda.c MultiDimIterator.cbc CudaExecutor.cbc ) set_target_properties(CUDAtwice PROPERTIES COMPILE_FLAGS "-Wall -g -DUSE_CUDAWorker=1") # -DUSE_CUDA_MAIN_THREAD
--- a/src/parallel_execution/CUDAExecutor.cbc Sat Nov 04 08:30:25 2017 +0900 +++ b/src/parallel_execution/CUDAExecutor.cbc Mon Nov 06 00:11:43 2017 +0900 @@ -41,8 +41,8 @@ if (task->iterate) { struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator; checkCudaErrors(cuLaunchKernel(task->function, - iterator->x, iterator->y, iterator->z, - 1, 1, 1, + iterator->x/1024, iterator->y, iterator->z, + 1024, 1, 1, 0, NULL, (void**)executor->kernelParams, NULL)); } else { checkCudaErrors(cuLaunchKernel(task->function,
--- a/src/parallel_execution/context.h Sat Nov 04 08:30:25 2017 +0900 +++ b/src/parallel_execution/context.h Mon Nov 06 00:11:43 2017 +0900 @@ -75,7 +75,7 @@ // (SingleLinkedStack *)context->data[D_Stack]->Stack.stack->Stack.stack -#define GearImpl(context, intf, name) (Gearef(context, intf)->name->intf.name) +#define GearImpl(context, intf, name) (Gearef(context, intf)->name->intf.name) #include "c/enumCode.h" @@ -110,7 +110,7 @@ int num_exec; CUmodule module; CUfunction function; -#endif +#endif union Data **data; /* multi dimension parameter */ @@ -118,6 +118,10 @@ struct Iterator* iterator; }; +typedef int Int; +#ifndef USE_CUDAWorker +typedef unsigned long long CUdeviceptr; +#endif union Data { struct Meta { enum DataType type; @@ -140,7 +144,7 @@ } LoopCounter; struct TaskManager { #ifdef USE_CUDA_MAIN_THREAD - volatile + volatile #endif union Data* taskManager; enum Code spawn; // start NEW context on the worker @@ -234,7 +238,7 @@ union Data* stack; union Data* data; union Data* data1; - enum Code whenEmpty; + enum Code whenEmpty; enum Code clear; enum Code push; enum Code pop; @@ -259,9 +263,8 @@ struct Element* next; } Element; struct Array { - int size; - int prefix; - int* array; + int prefix; + Int* array; } Array; struct Tree { union Data* tree; @@ -278,7 +281,7 @@ struct Node* previous; // parent of reading node of original tree struct Node* newNode; // writing node of new tree struct Node* parent; - struct Node* grandparent; + struct Node* grandparent; struct Stack* nodeStack; int result; } RedBlackTree; @@ -359,19 +362,22 @@ CUdeviceptr** kernelParams; struct CUDABuffer* buffer; } CUDAExecutor; - CUdeviceptr CUdeviceptr; -#else - struct CUDAExecutor { - } CUDAExecutor; -#endif struct CUDABuffer { int inputLen; int outputLen; union Data** inputData; union Data** outputData; } CUDABuffer; + CUdeviceptr CUdeviceptr; +#else + struct CUDAExecutor { + } CUDAExecutor; + struct CUDABuffer { + } CUDABuffer; + CUdeviceptr CUdeviceptr; +#endif + Int Int; }; // union Data end this is necessary for context generator - typedef union Data Data; #include "c/typedefData.h"
--- a/src/parallel_execution/examples/bitonicSort/bitonicSort.cbc Sat Nov 04 08:30:25 2017 +0900 +++ b/src/parallel_execution/examples/bitonicSort/bitonicSort.cbc Mon Nov 06 00:11:43 2017 +0900 @@ -95,7 +95,7 @@ goto code2(); } -__code code2(struct LoopCounter* loopCounter, struct TaskManager* taskManager, struct Time* time) { +__code code2(struct LoopCounter* loopCounter, struct TaskManager* taskManager) { goto taskManager->shutdown(exit_code); }
--- a/src/parallel_execution/examples/twice/CUDAtwice.cu Sat Nov 04 08:30:25 2017 +0900 +++ b/src/parallel_execution/examples/twice/CUDAtwice.cu Mon Nov 06 00:11:43 2017 +0900 @@ -1,7 +1,12 @@ extern "C" { - __global__ void twice(int* array, int* prefixPtr) { + struct Array { + int prefix; + int* array; + } Array; + + __global__ void twice(int* array, struct Array* inputArray) { int i = 0; - int prefix = *prefixPtr; + int prefix = inputArray->prefix; C_twice: if (i < prefix) { array[i+(blockIdx.x*blockDim.x+threadIdx.x)*prefix] = array[i+(blockIdx.x*blockDim.x+threadIdx.x)*prefix]*2;
--- a/src/parallel_execution/examples/twice/createArray.cbc Sat Nov 04 08:30:25 2017 +0900 +++ b/src/parallel_execution/examples/twice/createArray.cbc Mon Nov 06 00:11:43 2017 +0900 @@ -1,24 +1,39 @@ #include <stdio.h> - #include "../../../context.h" extern int length; extern int split; -extern int* array_ptr; -__code createArray(__code next(struct Array* output, ...)) { +__code createArray(__code next(struct Array* output, struct Time* output1, ...), struct LoopCounter* loopCounter) { struct Array* output = *O_output; - output->prefix = length/split; - output->array = array_ptr; - output->size = length; + struct Time* output1 = *O_output1; + int i = loopCounter->i; + if (i == 0){ + output->array = (Int*)ALLOCATE_ARRAY(context, Int, length); + output->prefix = length/split; + } + if (i == GET_LEN(output->array)){ + printf("created Array\n"); + loopCounter->i = 0; + Gearef(context, Time)->time = (union Data*)output1; + Gearef(context, Time)->next = context->next; + *O_output = output; + *O_output1 = output1; + goto meta(context, output1->start); + } + output->array[i] = i; + loopCounter->i++; *O_output = output; - printf("created Array\n"); - goto meta(context, context->next); + *O_output1 = output1; + goto meta(context, C_createArray); } __code createArray_stub(struct Context* context) { Array** O_output = (struct Array **)&context->data[context->odg]; + Time** O_output1 = (struct Time**)&context->data[context->odg+1]; goto createArray(context, - context->next, - O_output); + context->next, + O_output, + O_output1, + Gearef(context, LoopCounter)); }
--- a/src/parallel_execution/examples/twice/main.cbc Sat Nov 04 08:30:25 2017 +0900 +++ b/src/parallel_execution/examples/twice/main.cbc Mon Nov 06 00:11:43 2017 +0900 @@ -66,39 +66,23 @@ /* puts("tree"); */ /* print_tree(context->data[Tree]->tree.root); */ /* puts("result"); */ - time->time = (union Data*)createTimeImpl(context); - time->next = C_createTask1; - goto meta(context, time->time->Time.start); -} - -__code code2(struct Time* time, struct TaskManager* taskManager) { - time->next = C_code3; - taskManager->next = time->time->Time.end; - goto meta(context, taskManager->taskManager->TaskManager.shutdown); + goto meta(context, C_createTask1); } -__code code3(struct LoopCounter* loopCounter) { - int i = loopCounter->i; - if (i < length) { - //printf("%d\n", array_ptr[i]); - if (array_ptr[i] == (i*2)) { - loopCounter->i++; - goto meta(context, C_code3); - } else - puts("wrong result"); - } +__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) { + Array* array1 = &ALLOCATE_DATA_GEAR(context, Array)->Array; + Array* array2 = &ALLOCATE_DATA_GEAR(context, Array)->Array; + Time* time = createTimeImpl(context); - goto meta(context, C_exit_code); + par goto createArray(array1, time, __exit); + par goto twice(array1, array2, iterate(split), __exit); + par goto printArray(array2, time, __exit); + goto code2(); } -__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) { - Array* array = &ALLOCATE_DATA_GEAR(context, Array)->Array; - - par goto createArray(array, __exit); - - par goto twice(array, iterate(split), __exit); - goto code2(); +__code code2(struct LoopCounter* loopCounter, struct TaskManager* taskManager) { + goto taskManager->shutdown(exit_code); } void init(int argc, char** argv) { @@ -119,12 +103,6 @@ int main(int argc, char** argv) { init(argc, argv); - - array_ptr = NEWN(length, int); - - for(int i=0; i<length; i++) - array_ptr[i]=i; - struct Context* main_context = NEW(struct Context); initContext(main_context); main_context->next = C_initDataGears;
--- a/src/parallel_execution/examples/twice/twice.cbc Sat Nov 04 08:30:25 2017 +0900 +++ b/src/parallel_execution/examples/twice/twice.cbc Mon Nov 06 00:11:43 2017 +0900 @@ -3,10 +3,11 @@ #include "../../../context.h" #ifdef USE_CUDAWorker -extern void cudaExec(struct Context* context, Array* array); +extern void cudaLoadFunction(struct Context* context, char* filename, char* function); #endif -__code twice(struct Array* array, struct MultiDim* multiDim, __code next(...), struct LoopCounter* loopCounter) { +__code twice(struct Array* array, struct MultiDim* multiDim, __code next(struct Array* output, ...), struct LoopCounter* loopCounter) { + struct Array* output = *O_output; int i = loopCounter->i; int index = multiDim->x; if (i < array->prefix) { @@ -17,21 +18,38 @@ } loopCounter->i = 0; + output->array = array->array; goto meta(context, context->next); } __code twice_stub(struct Context* context) { #ifdef USE_CUDAWorker - struct Array* array = &context->data[context->idg]->Array; if (context->gpu) { - cudaExec(context, array); + Array* inputArray = &context->data[context->idg]->Array; + Array* outputArray = &context->data[context->odg]->Array; + CUDABuffer* buffer = &ALLOCATE(context, CUDABuffer)->CUDABuffer; + buffer->inputData = (union Data**)ALLOCATE_PTR_ARRAY(context, Array, 2); + buffer->inputData[0] = (union Data*)inputArray->array; + buffer->inputData[1] = (union Data*)inputArray; + buffer->outputData = NULL; + buffer->inputLen = 2; + buffer->outputLen = 0; //continuationにそってGPUworkerに戻る - goto meta(context, context->next); + outputArray->array = inputArray->array; + Executor* executor = context->worker->worker->CUDAWorker.executor; + executor->executor->CUDAExecutor.buffer = buffer; + cudaLoadFunction(context, "c/examples/twice/CUDAtwice.ptx", "twice"); + Gearef(context, Executor)->executor = (union Data*)executor; + Gearef(context, Executor)->task = context; + Gearef(context, Executor)->next = context->next; + goto meta(context, executor->read); } #endif + Array** O_output = (struct Array **)&context->data[context->odg]; goto twice(context, &context->data[context->idg]->Array, &context->data[context->idg+1]->MultiDim, context->next, + O_output, Gearef(context, LoopCounter)); }