Mercurial > hg > GearsTemplate
changeset 301:609bf62768b9
add -DUSE_CUDA=1 flag to cmake
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 12 Feb 2017 12:35:11 +0900 (2017-02-12) |
parents | 8bbc0012e1a4 |
children | 8e7926f3e271 |
files | src/parallel_execution/CMakeLists.txt src/parallel_execution/GPUtwice.cu src/test/helper_cuda.h src/test/multiply.cu src/test/twice.cc |
diffstat | 5 files changed, 37 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/src/parallel_execution/CMakeLists.txt Sun Feb 12 09:12:21 2017 +0900 +++ b/src/parallel_execution/CMakeLists.txt Sun Feb 12 12:35:11 2017 +0900 @@ -1,11 +1,19 @@ cmake_minimum_required(VERSION 2.8) +set(USE_CUDA,0) + # -DUSE_CUDA # add_definitions("-Wall -g -O") add_definitions("-Wall -g") set(CMAKE_C_COMPILER $ENV{CBC_COMPILER}) - + +if (${USE_CUDA}) + set(NVCCFLAG "-std=c++11" "-g" "-O0" ) + set(CUDA_LINK_FLAGS "-framework CUDA -lc++ -Wl,-search_paths_first -Wl,-headerpad_max_install_names /Developer/NVIDIA/CUDA-8.0/lib/libcudart_static.a -Wl,-rpath,/usr/local/cuda/lib") + find_package(CUDA REQUIRED) +endif() + macro( GearsCommand ) set( _OPTIONS_ARGS ) set( _ONE_VALUE_ARGS TARGET ) @@ -21,6 +29,13 @@ DEPENDS ${i} COMMAND "perl" "generate_stub.pl" "-o" ${j} ${i} ) + elseif (${i} MATCHES "\\.cu") + string(REGEX REPLACE "(.*).cbc" "c/\\1.ptx" j ${i}) + add_custom_command ( + OUTPUT ${j} + DEPENDS ${i} + COMMAND nvcc ${NVCCFLAG} -c -ptx -o ${j} ${i} + ) else() set(j ${i}) endif() @@ -43,6 +58,15 @@ main.cbc RedBlackTree.cbc compare.c SingleLinkedStack.cbc CPUWorker.cbc time.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc SemaphoreImpl.cbc ) +if (${USE_CUDA}) + GearsCommand( + TARGET + GPUtwice + SOURCES + main.cbc RedBlackTree.cbc compare.c SingleLinkedStack.cbc CPUWorker.cbc time.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc SemaphoreImpl.cbc GPUWorker.cbc GPUtwice.cu + ) +endif() + GearsCommand( TARGET queue_test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/parallel_execution/GPUtwice.cu Sun Feb 12 12:35:11 2017 +0900 @@ -0,0 +1,8 @@ +extern "C" { + __global__ void multiply(float* A, float* B, float* C) { +// printf("%d %d\n",i[0],i[1]); + int index = blockIdx.x * blockDim.x + threadIdx.x; + C[index] = A[index] * B[0]; + } + +}
--- a/src/test/helper_cuda.h Sun Feb 12 09:12:21 2017 +0900 +++ b/src/test/helper_cuda.h Sun Feb 12 12:35:11 2017 +0900 @@ -292,7 +292,7 @@ #ifdef __cuda_cuda_h__ // CUDA Driver API errors -static const char *_cudaGetErrorEnum(CUresult error) +const char *_cudaGetErrorEnum(CUresult error) { switch (error) {
--- a/src/test/multiply.cu Sun Feb 12 09:12:21 2017 +0900 +++ b/src/test/multiply.cu Sun Feb 12 12:35:11 2017 +0900 @@ -1,5 +1,5 @@ extern "C" { - __global__ void multiply(float* A, float* B, float* C,int* i) { + __global__ void multiply(float* A, float* B, float* C) { // printf("%d %d\n",i[0],i[1]); int index = blockIdx.x * blockDim.x + threadIdx.x; C[index] = A[index] * B[0];
--- a/src/test/twice.cc Sun Feb 12 09:12:21 2017 +0900 +++ b/src/test/twice.cc Sun Feb 12 12:35:11 2017 +0900 @@ -112,13 +112,13 @@ for (int i=0;i<num_exec;i++,cur++) { if (num_stream <= cur) cur=0; - B[i] = (float)(i+1); + //B[i] = (float)(i+1); //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); void* args[] = {&devA, &devB[i], &devOut[i]}; checkCudaErrors(cuLaunchKernel(function, LENGTH, 1, 1, THREAD, 1, 1, - 0, stream[cur], args, NULL)); + 0, num_stream ? stream[cur] : NULL , args, NULL)); //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); }