Mercurial > hg > Game > Cerium
changeset 2067:0e2389a5ac4e draft
fix Cudaexmple
author | ikkun |
---|---|
date | Fri, 03 Feb 2017 19:09:16 +0900 |
parents | 53643db3f0f9 |
children | 45c230f52257 |
files | example/Cuda/Makefile example/Cuda/main.cc example/Cuda/multiply.cu |
diffstat | 3 files changed, 44 insertions(+), 26 deletions(-) [+] |
line wrap: on
line diff
--- a/example/Cuda/Makefile Wed Feb 17 17:49:13 2016 +0900 +++ b/example/Cuda/Makefile Fri Feb 03 19:09:16 2017 +0900 @@ -30,7 +30,7 @@ $(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS) debug: $(TARGET) - sudo gdb ./$(TARGET) + sudo lldb ./$(TARGET) clean: rm -f $(TARGET) $(OBJS) $(TASK_OBJS) $(CUDA_OBJS)
--- a/example/Cuda/main.cc Wed Feb 17 17:49:13 2016 +0900 +++ b/example/Cuda/main.cc Fri Feb 03 19:09:16 2017 +0900 @@ -2,11 +2,28 @@ #include <sys/time.h> #include <string.h> #include <cuda.h> +#include <cuda_runtime.h> +#include <stdlib.h> -#define LENGTH 10 -#define THREAD 10 + +#define LENGTH (10) +#define THREAD (10) + +void +report_error(cudaError_t err, const char* file, int lineNo) { + fprintf(stderr, "[cudaError] %s (error code: %d) at %s line %d\n", cudaGetErrorString(err), err, file, lineNo); +} -static double +#define CUDA_CALL(func) \ + do { \ + if ((func) != CUDA_SUCCESS) { \ + cudaError_t err = cudaGetLastError(); \ + report_error(err, __FILE__, __LINE__); \ + exit(err); \ + } \ + } while(0) + +double getTime() { struct timeval tv; gettimeofday(&tv, NULL); @@ -47,23 +64,23 @@ CUfunction function; CUstream stream[num_stream]; - cuInit(0); - cuDeviceGet(&device, 0); - cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device); - cuModuleLoad(&module, "multiply.ptx"); - cuModuleGetFunction(&function, module, "multiply"); + CUDA_CALL(cuInit(0)); + CUDA_CALL(cuDeviceGet(&device, 0)); + CUDA_CALL(cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device)); + CUDA_CALL(cuModuleLoad(&module, "multiply.ptx")); + CUDA_CALL(cuModuleGetFunction(&function, module, "multiply")); for (int i=0;i<num_stream;i++) - cuStreamCreate(&stream[i],0); + CUDA_CALL(cuStreamCreate(&stream[i],0)); // memory allocate CUdeviceptr devA; CUdeviceptr devB[num_exec]; CUdeviceptr devOut[num_exec]; - cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float)); + CUDA_CALL(cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float))); for (int i=0;i<num_exec;i++) { - cuMemAlloc(&devB[i], sizeof(float)); - cuMemAlloc(&devOut[i], LENGTH*THREAD*sizeof(float)); + CUDA_CALL(cuMemAlloc(&devB[i], sizeof(float))); + CUDA_CALL(cuMemAlloc(&devOut[i], LENGTH*THREAD*sizeof(float))); } // input buffer @@ -80,7 +97,7 @@ result[i] = new float[LENGTH*THREAD]; // Synchronous data transfer(host to device) - cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float)); + CUDA_CALL(cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float))); // Asynchronous data transfer(host to device) int cur = 0; @@ -89,7 +106,7 @@ if (num_stream <= cur) cur = 0; B[i] = (float)(i+1); - cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); + CUDA_CALL(cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur])); } cur = 0; @@ -101,10 +118,10 @@ B[i] = (float)(i+1); //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); void* args[] = {&devA, &devB[i], &devOut[i]}; - cuLaunchKernel(function, + CUDA_CALL(cuLaunchKernel(function, LENGTH, 1, 1, THREAD, 1, 1, - 0, stream[cur], args, NULL); + 0, stream[cur], args, NULL)); //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); } @@ -115,12 +132,12 @@ for (int i=0;i<num_exec;i++,cur++) { if (num_stream <= cur) cur = 0; - cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); + CUDA_CALL(cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur])); } // wait for stream for (int i=0;i<num_stream;i++) - cuStreamSynchronize(stream[i]); + CUDA_CALL(cuStreamSynchronize(stream[i])); //printf("%0.6f\n",getTime()-start); @@ -128,15 +145,15 @@ check_data(A,(float)(i+1),result[i]); // memory release - cuMemFree(devA); + CUDA_CALL(cuMemFree(devA)); for (int i=0;i<num_exec;i++) { - cuMemFree(devB[i]); - cuMemFree(devOut[i]); + CUDA_CALL(cuMemFree(devB[i])); + CUDA_CALL(cuMemFree(devOut[i])); } for (int i=0;i<num_stream;i++) - cuStreamDestroy(stream[i]); - cuModuleUnload(module); - cuCtxDestroy(context); + CUDA_CALL(cuStreamDestroy(stream[i])); + CUDA_CALL(cuModuleUnload(module)); + CUDA_CALL(cuCtxDestroy(context)); delete[] A; delete[] B; @@ -146,3 +163,4 @@ return 0; } +//