# HG changeset patch # User Tatsuki IHA # Date 1507270236 -32400 # Node ID 08fc3e5c8b81e31a44e5dd26b7ea944f53561c6e # Parent 764c92c3b1817c3251422b191b239a37ba5a25b6# Parent 3789144f972e4b9906231a9b965ffc1746532627 Merge diff -r 3789144f972e -r 08fc3e5c8b81 src/parallel_execution/CMakeLists.txt --- a/src/parallel_execution/CMakeLists.txt Fri Oct 06 14:42:32 2017 +0900 +++ b/src/parallel_execution/CMakeLists.txt Fri Oct 06 15:10:36 2017 +0900 @@ -85,7 +85,7 @@ SOURCES examples/twice/main.cbc examples/twice/twice.cbc examples/twice/CUDAtwice.cu examples/twice/createArray.cbc CPUWorker.cbc TimeImpl.cbc examples/twice/twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc CUDAWorker.cbc cuda.c MultiDimIterator.cbc ) - set_target_properties(CUDAtwice PROPERTIES COMPILE_FLAGS "-Wall -g -DUSE_CUDAWorker=1") + set_target_properties(CUDAtwice PROPERTIES COMPILE_FLAGS "-Wall -g -DUSE_CUDAWorker=1") # -DUSE_CUDA_MAIN_THREAD GearsCommand( TARGET diff -r 3789144f972e -r 08fc3e5c8b81 src/parallel_execution/examples/bitonicSort/CUDAbitonicSwap.cu --- a/src/parallel_execution/examples/bitonicSort/CUDAbitonicSwap.cu Fri Oct 06 14:42:32 2017 +0900 +++ b/src/parallel_execution/examples/bitonicSort/CUDAbitonicSwap.cu Fri Oct 06 15:10:36 2017 +0900 @@ -9,7 +9,7 @@ int i = 0; C_bitonicSwap: if (i < prefix) { - int index = i + blockIdx.x * prefix; + int index = i + (blockIdx.x * blockDim.x + threadIdx.x) * prefix; int position = index/block; int index1 = index+block*position; int index2 = (first == 1)? ((block<<1)*(position+1))-(index1%block)-1 : index1+block; diff -r 3789144f972e -r 08fc3e5c8b81 src/parallel_execution/examples/twice/CUDAtwice.cu --- a/src/parallel_execution/examples/twice/CUDAtwice.cu Fri Oct 06 14:42:32 2017 +0900 +++ b/src/parallel_execution/examples/twice/CUDAtwice.cu Fri Oct 06 15:10:36 2017 +0900 @@ -4,7 +4,7 @@ int prefix = *prefixPtr; C_twice: if (i < prefix) { - array[i+blockIdx.x*prefix] = array[i+blockIdx.x*prefix]*2; + array[i+(blockIdx.x*blockDim.x+threadIdx.x)*prefix] = array[i+(blockIdx.x*blockDim.x+threadIdx.x)*prefix]*2; i++; goto C_twice; }