Members/Moririn: src/parallel_execution/cuda.c comparison

comparison src/parallel_execution/cuda.c @ 410:85b0ddbf458e

Fix CudaWorker

author	Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date	Thu, 14 Sep 2017 02:35:20 +0900
parents	c5cd9888bf2a
children	0eba9a04633f

comparison

equal deleted inserted replaced

-:4d1e3697a6b8
+:85b0ddbf458e
 #include <cuda_runtime.h>
 #include <cuda.h>
 #include "helper_cuda.h"
 #include "pthread.h"
-// #include "context.h"
+#include "context.h"
+/*
 struct Context {
 int next;
 struct Worker* worker;
 struct TaskManager* taskManager;
 int codeNum;
 void* heapStart;
 void* heap;
 long heapLimit;
 int dataNum;
 int idgCount; //number of waiting dataGear
+int idg;
+int maxIdg;
 int odg;
 int maxOdg;
 int workerId;
+struct Context* task;
+struct Queue* tasks;
 int num_exec;
 CUmodule module;
 CUfunction function;
 union Data **data;
+// multi dimension parameter
+int iterate;
+struct Iterator* iterator;
 };
 struct CUDAWorker {
 CUdevice device;
 CUcontext cuCtx;
 pthread_t thread;
 struct Context* context;
 int id;
 struct Queue* tasks;
 int runFlag;
 int next;
 int num_stream;
 CUstream *stream;
 } CUDAWorker;
 struct LoopCounter {
 int i;
 } LoopCounter;
 struct Array {
 int size;
 int index;
 int prefix;
 int* array;
 } Array;
+*/
 void cudaInit(struct CUDAWorker *cudaWorker,int phase) {
 // initialize and load kernel
 cudaWorker->num_stream = 1; // number of stream
 //    cudaWorker->stream = NEWN(cudaWorker->num_stream, CUstream );
 if (phase==0)
 checkCudaErrors(cuInit(0));
 if (phase==0)
 checkCudaErrors(cuDeviceGet(&cudaWorker->device, 0));
 if (phase==0)
 checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device));
 //    if (cudaWorker->num_stream) {
 //        for (int i=0;i<cudaWorker->num_stream;i++)
 //            checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0));
 //    }
+printf("cuda Init: Done\n");
 }
-void CUDAExec(struct Context* context, struct Array* array, struct LoopCounter *loopCounter) {
+void CUDAExec(struct Context* context, struct Array* array) {
+printf("cuda exec start\n");
 // Worker *worker = context->worker;
 // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
 // memory allocate
 CUdeviceptr devA;
-CUdeviceptr devLoopCounter;
-checkCudaErrors(cuMemAlloc(&devA, array->size));
+checkCudaErrors(cuMemAlloc(&devA, sizeof(int)*array->size));
-checkCudaErrors(cuMemAlloc(&devLoopCounter, sizeof(LoopCounter)));
 //twiceカーネルが定義されてなければそれをロードする
-checkCudaErrors(cuModuleLoad(&context->module, "c/CUDAtwice.ptx"));
+checkCudaErrors(cuModuleLoad(&context->module, "c/examples/twice/CUDAtwice.ptx"));
 checkCudaErrors(cuModuleGetFunction(&context->function, context->module, "twice"));
 //入力のDataGearをGPUにbuffer経由で送る
 // Synchronous data transfer(host to device)
-checkCudaErrors(cuMemcpyHtoD(devLoopCounter, loopCounter, sizeof(LoopCounter)));
 checkCudaErrors(cuMemcpyHtoD(devA, array->array, array->size));
 // Asynchronous launch kernel
 context->num_exec = 1;
-void* args[] = {&devLoopCounter,&array->index,&array->prefix,&devA};
+void* args[] = {&devA};
-checkCudaErrors(cuLaunchKernel(context->function,
+if (context->iterate) {
-1, 1, 1,
+struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator;
-1, 1, 1,
+checkCudaErrors(cuLaunchKernel(context->function,
-0, NULL, args, NULL));
+iterator->x, iterator->y, iterator->z,
+1, 1, 1,
+0, NULL, args, NULL));
+} else {
+checkCudaErrors(cuLaunchKernel(context->function,
+1, 1, 1,
+1, 1, 1,
+0, NULL, args, NULL));
+}
 //結果を取ってくるコマンドを入力する
 //コマンドの終了待ちを行う
 checkCudaErrors(cuMemcpyDtoH(array->array, devA, array->size));
 // wait for stream
 checkCudaErrors(cuCtxSynchronize());
 }
 void cudaShutdown( struct CUDAWorker *worker) {
 //    for (int i=0;i<worker->num_stream;i++)
 //        checkCudaErrors(cuStreamDestroy(worker->stream[i]));
 checkCudaErrors(cuCtxDestroy(worker->cuCtx));
 }

Mercurial > hg > Members > Moririn

comparison src/parallel_execution/cuda.c @ 410:85b0ddbf458e