Mercurial > hg > Game > Cerium
changeset 1918:15e8c50ed570 draft
add cuda sample, not running
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 24 Jan 2014 07:16:26 +0900 |
parents | 64bd56aed386 |
children | d6e033734c12 |
files | example/Cuda/Makefile example/Cuda/Makefile.def example/Cuda/main.cc example/Cuda/multiply.cu |
diffstat | 4 files changed, 123 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/Cuda/Makefile Fri Jan 24 07:16:26 2014 +0900 @@ -0,0 +1,34 @@ +include ./Makefile.def + +SRCS_TMP = $(wildcard *.cc) +SRCS_EXCLUDE = # 除外するファイルを書く +SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) +OBJS = $(SRCS:.cc=.o) + +TASK_SRCS_TMP = $(wildcard $(TASK_DIR2)/*.cc $(TASK_DIR1)/*.cc) +TASK_SRCS = $(filter-out $(TASK_DIR1)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP)) +TASK_OBJS = $(TASK_SRCS:.cc=.o) + +CC += $(ABI) + +LIBS = -I/Developer/NVIDIA/CUDA-5.5/include -F/Library/Frameworks -framework CUDA + +.SUFFIXES: .cc .o + +.cc.o: + $(CC) $(CFLAGS) $(LIBS) $(INCLUDE) -c $< -o $@ + +all: $(TARGET) + +$(TARGET): $(OBJS) $(TASK_OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) + +link: + $(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS) + +debug: $(TARGET) + sudo gdb ./$(TARGET) + +clean: + rm -f $(TARGET) $(OBJS) $(TASK_OBJS) + rm -f *~ \#*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/Cuda/Makefile.def Fri Jan 24 07:16:26 2014 +0900 @@ -0,0 +1,8 @@ +TARGET = multiply + +ABIBIT=64 + +OPT = -g -O0 + +CC = clang++ +CFLAGS = -Wall $(OPT) \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/Cuda/main.cc Fri Jan 24 07:16:26 2014 +0900 @@ -0,0 +1,76 @@ +#include <stdio.h> + +#include <cuda.h> + +#define LENGTH 1000 + +void check_data(float* A,float* B,float* C) { + for (int i=0; i<LENGTH; i++) { + if (A[i]*B[i]!=C[i]) { + puts("failure."); + return; + } + } + puts("success."); + return; +} + + + +int main() { + CUdevice device; + CUcontext context; + CUmodule module; + CUfunction function; + // CUresult result; + + cuInit(0); + cuDeviceGet(&device, 0); + cuCtxCreate(&context, 0, device); + cuModuleLoad(&module, "multiply.cu"); + cuModuleGetFunction(&function, module, "multiply"); + + float* A = new float[LENGTH]; + float* B = new float[LENGTH]; + float* C = new float[LENGTH]; + + for (int i=0; i<LENGTH; i++) { + A[i] = (float)(i+1000); + B[i] = (float)(i+1)/10.f; + } + + CUdeviceptr devA,devB,devC; + + cuMemAlloc(&devA, LENGTH*sizeof(float)); + cuMemAlloc(&devB, LENGTH*sizeof(float)); + cuMemAlloc(&devC, LENGTH*sizeof(float)); + + cuMemcpyHtoD(devA, A, LENGTH*sizeof(float)); + cuMemcpyHtoD(devB, B, LENGTH*sizeof(float)); + cuMemcpyHtoD(devC, C, LENGTH*sizeof(float)); + + cuParamSetv(function, 0, A, LENGTH*sizeof(float)); + cuParamSetv(function, 0, B, LENGTH*sizeof(float)); + cuParamSetv(function, 0, C, LENGTH*sizeof(float)); + + cuLaunchKernel(function, + LENGTH, 1, 1, + 1, 1, 1, + 0, NULL, NULL, NULL); + + cuMemcpyDtoH(C, devC, LENGTH*sizeof(float)); + + check_data(A, B, C); + + delete[] A; + delete[] B; + delete[] C; + cuModuleUnload(module); + cuMemFree(devA); + cuMemFree(devB); + cuMemFree(devC); + + + return 0; +} +