changeset 301:609bf62768b9

add -DUSE_CUDA=1 flag to cmake
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 12 Feb 2017 12:35:11 +0900 (2017-02-12)
parents 8bbc0012e1a4
children 8e7926f3e271
files src/parallel_execution/CMakeLists.txt src/parallel_execution/GPUtwice.cu src/test/helper_cuda.h src/test/multiply.cu src/test/twice.cc
diffstat 5 files changed, 37 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/src/parallel_execution/CMakeLists.txt	Sun Feb 12 09:12:21 2017 +0900
+++ b/src/parallel_execution/CMakeLists.txt	Sun Feb 12 12:35:11 2017 +0900
@@ -1,11 +1,19 @@
 cmake_minimum_required(VERSION 2.8)
 
+set(USE_CUDA,0)
+
 # -DUSE_CUDA
 #  add_definitions("-Wall -g -O")
 add_definitions("-Wall -g")
 
 set(CMAKE_C_COMPILER $ENV{CBC_COMPILER})
- 
+
+if (${USE_CUDA})
+    set(NVCCFLAG "-std=c++11" "-g" "-O0" )
+    set(CUDA_LINK_FLAGS "-framework CUDA -lc++ -Wl,-search_paths_first -Wl,-headerpad_max_install_names /Developer/NVIDIA/CUDA-8.0/lib/libcudart_static.a -Wl,-rpath,/usr/local/cuda/lib") 
+    find_package(CUDA REQUIRED)
+endif()
+
 macro( GearsCommand )
     set( _OPTIONS_ARGS )
     set( _ONE_VALUE_ARGS TARGET )
@@ -21,6 +29,13 @@
                 DEPENDS   ${i} 
                 COMMAND  "perl" "generate_stub.pl" "-o" ${j} ${i}
             )
+        elseif (${i} MATCHES "\\.cu")
+            string(REGEX REPLACE "(.*).cbc" "c/\\1.ptx" j ${i})
+            add_custom_command (
+                OUTPUT    ${j} 
+                DEPENDS   ${i} 
+                COMMAND  nvcc ${NVCCFLAG} -c -ptx -o ${j} ${i}
+            )
         else()
             set(j ${i})
         endif()
@@ -43,6 +58,15 @@
       main.cbc RedBlackTree.cbc compare.c SingleLinkedStack.cbc CPUWorker.cbc time.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc SemaphoreImpl.cbc
 )
 
+if (${USE_CUDA})
+    GearsCommand(
+      TARGET
+          GPUtwice
+      SOURCES 
+          main.cbc RedBlackTree.cbc compare.c SingleLinkedStack.cbc CPUWorker.cbc time.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc SemaphoreImpl.cbc  GPUWorker.cbc GPUtwice.cu
+    )
+endif()
+
 GearsCommand(
   TARGET
       queue_test
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/GPUtwice.cu	Sun Feb 12 12:35:11 2017 +0900
@@ -0,0 +1,8 @@
+extern "C" {
+    __global__ void multiply(float* A, float* B, float* C) {
+//        printf("%d %d\n",i[0],i[1]);
+        int index = blockIdx.x * blockDim.x + threadIdx.x;
+        C[index] = A[index] * B[0];
+    }
+
+}
--- a/src/test/helper_cuda.h	Sun Feb 12 09:12:21 2017 +0900
+++ b/src/test/helper_cuda.h	Sun Feb 12 12:35:11 2017 +0900
@@ -292,7 +292,7 @@
 
 #ifdef __cuda_cuda_h__
 // CUDA Driver API errors
-static const char *_cudaGetErrorEnum(CUresult error)
+const char *_cudaGetErrorEnum(CUresult error)
 {
     switch (error)
     {
--- a/src/test/multiply.cu	Sun Feb 12 09:12:21 2017 +0900
+++ b/src/test/multiply.cu	Sun Feb 12 12:35:11 2017 +0900
@@ -1,5 +1,5 @@
 extern "C" {
-    __global__ void multiply(float* A, float* B, float* C,int* i) {
+    __global__ void multiply(float* A, float* B, float* C) {
 //        printf("%d %d\n",i[0],i[1]);
         int index = blockIdx.x * blockDim.x + threadIdx.x;
         C[index] = A[index] * B[0];
--- a/src/test/twice.cc	Sun Feb 12 09:12:21 2017 +0900
+++ b/src/test/twice.cc	Sun Feb 12 12:35:11 2017 +0900
@@ -112,13 +112,13 @@
     for (int i=0;i<num_exec;i++,cur++) {
         if (num_stream <= cur)
             cur=0;
-        B[i] = (float)(i+1);
+        //B[i] = (float)(i+1);
         //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]);
         void* args[] = {&devA, &devB[i], &devOut[i]};
         checkCudaErrors(cuLaunchKernel(function,
                        LENGTH, 1, 1,
                        THREAD, 1, 1,
-                                 0, stream[cur], args, NULL));
+                                 0, num_stream ? stream[cur] : NULL , args, NULL));
         //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]);
     }