Mercurial > hg > Gears > GearsAgda
changeset 317:51aa65676e37
CUDAtwice.cbc is called now
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 15 Feb 2017 16:28:25 +0900 |
parents | 54d203daf06b (diff) faa746c449c6 (current diff) |
children | 054c47e6ca20 |
files | src/parallel_execution/CUDAWorker.cbc src/parallel_execution/TaskManagerImpl.cbc src/parallel_execution/main.cbc |
diffstat | 5 files changed, 65 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/src/parallel_execution/CUDAWorker.cbc Wed Feb 15 15:09:03 2017 +0900 +++ b/src/parallel_execution/CUDAWorker.cbc Wed Feb 15 16:28:25 2017 +0900 @@ -13,37 +13,46 @@ #include "../context.h" static void start_CUDAworker(Worker* worker); -static void cudaInit(struct CUDAWorker *cudaWorker) ; +static void cudaInit(struct CUDAWorker *cudaWorker,int phase) ; -static int cuda_initialized = 0; +volatile int cuda_initialized = 0; -Worker* createCUDAWorker(struct Context* context, int id, Queue* queue) { +Worker* createCUDAWorker(struct Context* context, int id, Queue* queue, TaskManagerImpl *im) { struct Worker* worker = ALLOC(context, Worker); struct CUDAWorker* cudaWorker = new CUDAWorker(); + + cudaInit(cudaWorker,0); + worker->worker = (union Data*)cudaWorker; worker->tasks = queue; cudaWorker->id = id; worker->shutdown = C_shutdownCUDAWorker; - pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker); + // pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker); + if (im) { + im->workers[0] = worker; + } + cuda_initialized = 1; + start_CUDAworker(worker); return worker; } -static void cudaInit(struct CUDAWorker *cudaWorker) { +static void cudaInit(struct CUDAWorker *cudaWorker,int phase) { // initialize and load kernel cudaWorker->num_stream = 1; // number of stream // cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream ); -printf("cudaInit 1\n"); + if (phase==0) checkCudaErrors(cuInit(0)); + if (phase==0) checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0)); -printf("cudaInit 2\n"); + if (phase==0) checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device)); -printf("cudaInit 3\n"); // if (cudaWorker->num_stream) { // for (int i=0;i<cudaWorker->num_stream;i++) // checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0)); // } - cuda_initialized = 1; -printf("cudaInit done\n"); + CUdeviceptr devA; + checkCudaErrors(cuMemAlloc(&devA, 16)); + } static void start_CUDAworker(Worker* worker) { @@ -68,10 +77,10 @@ __code getTaskCUDA(struct Worker* worker, struct Context* task) { if (!task) return; // end thread - if (cuda_initialized==0) { - CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; - cudaInit(cudaWorker); - } +// if (cuda_initialized==0 || 1) { +// CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker; +// cudaInit(cudaWorker,1); +// } worker->taskReceive = C_taskReceiveCUDAWorker; task->worker = worker; enum Code taskCg = task->next;
--- a/src/parallel_execution/CUDAtwice.cbc Wed Feb 15 15:09:03 2017 +0900 +++ b/src/parallel_execution/CUDAtwice.cbc Wed Feb 15 16:28:25 2017 +0900 @@ -18,7 +18,7 @@ checkCudaErrors(cuMemAlloc(&devLoopCounter, sizeof(LoopCounter))); //twiceカーネルが定義されてなければそれをロードする - checkCudaErrors(cuModuleLoad(&context->module, "CUDAtwice.ptx")); + checkCudaErrors(cuModuleLoad(&context->module, "c/CUDAtwice.ptx")); checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice")); printf("CUdA Exe 2\n"); @@ -38,7 +38,9 @@ //結果を取ってくるコマンドを入力する //コマンドの終了待ちを行う checkCudaErrors(cuMemcpyDtoH(array->array, devA, array->size)); + // wait for stream + checkCudaErrors(cuCtxSynchronize()); } __code CUDAtwice(struct Context* context, struct LoopCounter* loopCounter, int index, int prefix, int* array, struct Context* workerContext) {
--- a/src/parallel_execution/TaskManagerImpl.cbc Wed Feb 15 15:09:03 2017 +0900 +++ b/src/parallel_execution/TaskManagerImpl.cbc Wed Feb 15 16:28:25 2017 +0900 @@ -17,11 +17,11 @@ taskManager->spawn = C_spawnTaskManager; taskManager->shutdown = C_shutdownTaskManager; struct TaskManagerImpl* taskManagerImpl = new TaskManagerImpl(); - taskManager->taskManager = (union Data*)taskManagerImpl; taskManagerImpl -> activeQueue = createSingleLinkedQueue(context); taskManagerImpl -> taskQueue = createSingleLinkedQueue(context); taskManagerImpl -> numWorker = taskManager->maxCPU; createWorkers(context, taskManager, taskManagerImpl); + taskManager->taskManager = (union Data*)taskManagerImpl; return taskManager; } @@ -35,7 +35,8 @@ for (;i<taskManager->cpu;i++) { #ifdef USE_CUDAWorker Queue* queue = createSynchronizedQueue(context); - taskManagerImpl->workers[i] = (Worker*)createCUDAWorker(context, i, queue); + // taskManagerImpl->workers[i] = (Worker*)createCUDAWorker(context, i, queue,0); + taskManagerImpl->workers[i] = (Worker*)queue; #else Queue* queue = createSynchronizedQueue(context); taskManagerImpl->workers[i] = (Worker*)createCPUWorker(context, i, queue); @@ -48,9 +49,13 @@ } __code createTask(struct TaskManager* taskManager) { + TaskManager *t = (TaskManager *)taskManager->taskManager; + TaskManagerImpl *im = (TaskManagerImpl *)t->taskManager; + taskManager->context = NEW(struct Context); initContext(taskManager->context); taskManager->context->taskManager = taskManager; + struct Queue* tasks = im->workers[0]->tasks; goto meta(context, C_setWorker); }
--- a/src/parallel_execution/context.h Wed Feb 15 15:09:03 2017 +0900 +++ b/src/parallel_execution/context.h Wed Feb 15 16:28:25 2017 +0900 @@ -102,7 +102,7 @@ int i; } LoopCounter; struct TaskManager { - union Data* taskManager; + volatile union Data* taskManager; enum Code createTask; // create NEW contexts for execution & argument enum Code spawn; // start NEW context on the worker enum Code shutdown; @@ -141,14 +141,14 @@ } CPUWorker; #ifdef USE_CUDAWorker struct CUDAWorker { + CUdevice device; + CUcontext cuCtx; pthread_t thread; struct Context* context; int id; struct Queue* tasks; int runFlag; enum Code next; - CUdevice device; - CUcontext cuCtx; int num_stream; CUstream *stream; } CUDAWorker;
--- a/src/parallel_execution/main.cbc Wed Feb 15 15:09:03 2017 +0900 +++ b/src/parallel_execution/main.cbc Wed Feb 15 16:28:25 2017 +0900 @@ -27,15 +27,43 @@ } } +void *start_taskManager(struct Context *context) { + goto initDataGears(context, Gearef(context, LoopCounter), Gearef(context, TaskManager)); + return 0; +} + +#ifdef USE_CUDAWorker +extern volatile int cuda_initialized; +#endif + __code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) { // loopCounter->tree = createRedBlackTree(context); loopCounter->i = 0; taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0); +#ifdef USE_CUDAWorker + while(! cuda_initialized) {}; +#endif goto meta(context, C_createTask1); } __code initDataGears_stub(struct Context* context) { - goto initDataGears(context, Gearef(context, LoopCounter), Gearef(context, TaskManager)); + struct TaskManager* taskManager = Gearef(context, TaskManager); + taskManager->taskManager = 0; +#ifndef USE_CUDAWorker + struct LoopCounter* loopCounter = Gearef(context, LoopCounter); + goto initDataGears(context, loopCounter, taskManager); +#else + cuda_initialized = 0; + pthread_t thread; + pthread_create(&thread, NULL, (void*)&start_taskManager, context); + while (taskManager->taskManager == 0); + TaskManager *t = (TaskManager*)taskManager->taskManager; + TaskManagerImpl *im = (TaskManagerImpl*)t->taskManager; + struct Queue *q = (Queue *)im->workers[0]; + createCUDAWorker(context,0,q, im); + pthread_join(thread,0); + exit(0); +#endif } __code code1(struct Time* time) {