Mercurial > hg > Game > Cerium
changeset 1919:d6e033734c12 draft
running cuda sample
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 28 Jan 2014 18:33:19 +0900 |
parents | 15e8c50ed570 |
children | 273638411ebf |
files | example/Cuda/Makefile example/Cuda/Makefile.def example/Cuda/main.cc example/Cuda/multiply.cu |
diffstat | 4 files changed, 37 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/example/Cuda/Makefile Fri Jan 24 07:16:26 2014 +0900 +++ b/example/Cuda/Makefile Tue Jan 28 18:33:19 2014 +0900 @@ -1,7 +1,8 @@ include ./Makefile.def SRCS_TMP = $(wildcard *.cc) -SRCS_EXCLUDE = # 除外するファイルを書く +SRCS_EXCLUDE = # 除外するファイルを書く + SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) OBJS = $(SRCS:.cc=.o) @@ -13,7 +14,17 @@ LIBS = -I/Developer/NVIDIA/CUDA-5.5/include -F/Library/Frameworks -framework CUDA -.SUFFIXES: .cc .o +CUDA_SRCS_TMP = $(wildcard *.cu) +CUDA_SRCS_EXCLUDE = # 除外するファイルを書く + +CUDA_SRCS = $(filter-out $(CUDA_SRCS_EXCLUDE),$(CUDA_SRCS_TMP)) +CUDA_OBJS = $(CUDA_SRCS:.cu) + +CUDA_TASK_SRCS_TMP = $(wildcard $(TASK_DIR2)/*.cu $(TASK_DIR1)/*.cu) +CUDA_TASK_SRCS = $(filter-out $(TASK_DIR1)/$(TASK_SRCS_EXCLUDE),$(CUDA_TASK_SRCS_TMP)) +CUDA_TASK_OBJS = $(CUDA_TASK_SRCS:.cu) + +.SUFFIXES: .cc .o .cu .cc.o: $(CC) $(CFLAGS) $(LIBS) $(INCLUDE) -c $< -o $@
--- a/example/Cuda/Makefile.def Fri Jan 24 07:16:26 2014 +0900 +++ b/example/Cuda/Makefile.def Tue Jan 28 18:33:19 2014 +0900 @@ -5,4 +5,6 @@ OPT = -g -O0 CC = clang++ -CFLAGS = -Wall $(OPT) \ No newline at end of file +NVCC = nvcc +CFLAGS = -Wall $(OPT) +NVCCFLAGS = -ptx \ No newline at end of file
--- a/example/Cuda/main.cc Fri Jan 24 07:16:26 2014 +0900 +++ b/example/Cuda/main.cc Tue Jan 28 18:33:19 2014 +0900 @@ -15,25 +15,29 @@ return; } - +void print_result(float* C) { + for (int i=0; i<LENGTH; i++) { + printf("%f\n",C[i]); + } +} int main() { CUdevice device; CUcontext context; CUmodule module; CUfunction function; - // CUresult result; + CUresult result; cuInit(0); cuDeviceGet(&device, 0); cuCtxCreate(&context, 0, device); - cuModuleLoad(&module, "multiply.cu"); + cuModuleLoad(&module, "multiply.ptx"); cuModuleGetFunction(&function, module, "multiply"); - + float* A = new float[LENGTH]; float* B = new float[LENGTH]; float* C = new float[LENGTH]; - + for (int i=0; i<LENGTH; i++) { A[i] = (float)(i+1000); B[i] = (float)(i+1)/10.f; @@ -49,28 +53,26 @@ cuMemcpyHtoD(devB, B, LENGTH*sizeof(float)); cuMemcpyHtoD(devC, C, LENGTH*sizeof(float)); - cuParamSetv(function, 0, A, LENGTH*sizeof(float)); - cuParamSetv(function, 0, B, LENGTH*sizeof(float)); - cuParamSetv(function, 0, C, LENGTH*sizeof(float)); - + void* args[] = {&devA, &devB, &devC}; + cuLaunchKernel(function, LENGTH, 1, 1, 1, 1, 1, - 0, NULL, NULL, NULL); - + 0, NULL, args, NULL); + cuMemcpyDtoH(C, devC, LENGTH*sizeof(float)); + // print_result(C); check_data(A, B, C); delete[] A; delete[] B; delete[] C; - cuModuleUnload(module); cuMemFree(devA); cuMemFree(devB); cuMemFree(devC); - + cuModuleUnload(module); + cuCtxDestroy(context); return 0; } -
--- a/example/Cuda/multiply.cu Fri Jan 24 07:16:26 2014 +0900 +++ b/example/Cuda/multiply.cu Tue Jan 28 18:33:19 2014 +0900 @@ -1,5 +1,6 @@ -__global__ void multiply(int* A, int* B, int* C) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - - C[index] = A[index] * B[index]; +extern "C" { + __global__ void multiply(float* A, float* B, float* C) { + int index = blockIdx.x * blockDim.x + threadIdx.x; + C[index] = A[index] * B[index]; + } }