Mercurial > hg > Game > Cerium
changeset 1520:031f26b15ae6 draft
add many_task/gpu
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 10 Nov 2012 19:42:22 +0900 |
parents | 9a5f87f4b60f |
children | 9ae6eedd3ee3 |
files | example/many_task/Makefile.gpu example/many_task/gpu/Makefile example/many_task/gpu/QuickSort.cl example/many_task/gpu/gpu_task_init.cc example/many_task/gpu/sort_test.cc example/many_task/gpu/sort_test.h |
diffstat | 6 files changed, 306 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/example/many_task/Makefile.gpu Sat Nov 10 18:21:16 2012 +0900 +++ b/example/many_task/Makefile.gpu Sat Nov 10 19:42:22 2012 +0900 @@ -1,26 +1,29 @@ include ./Makefile.def + SRCS_TMP = $(wildcard *.cc) -SRCS_EXCLUDE = # 除外するファイルを書く +SRCS_EXCLUDE = sort_test.cc task_init.cc # 除外するファイルを書く SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) OBJS = $(SRCS:.cc=.o) -TASK_DIR = ppe +TASK_DIR = gpu TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc) -TASK_SRCS_EXCLUDE = sort_test.cc +TASK_SRCS_EXCLUDE = sort_test.cc TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP)) TASK_OBJS = $(TASK_SRCS:.cc=.o) -CFLAGS += -D__CERIUM_GPU__ +CC += $(ABI) +# CFLAGS = -g -Wall# -O9 #-DDEBUG -LIBS += -lGpuManager -framework opencl `sdl-config --libs` +INCLUDE = -I${CERIUM}/include/TaskManager -I. -I.. +LIBS = -L${CERIUM}/TaskManager -DUSE_SIMPLE_TASK -lGpuManager -framework opencl `sdl-config --libs` .SUFFIXES: .cc .o .cc.o: $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ -all: $(TARGET) +all: $(TARGET) $(TARGET): $(OBJS) $(TASK_OBJS) $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) @@ -28,8 +31,12 @@ link: $(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS) +debug: $(TARGET) + sudo gdb ./$(TARGET) clean: rm -f $(TARGET) $(OBJS) $(TASK_OBJS) rm -f *~ \#* rm -f ppe/*~ ppe/\#* + rm -f spe/*~ spe/\#* + rm -f gpu/*~ gpu/\#*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/gpu/Makefile Sat Nov 10 19:42:22 2012 +0900 @@ -0,0 +1,37 @@ +TARGET = sort_test +CERIUM = ../../../../Cerium + +CC = g++ +CFLAGS = -g -Wall + +INCLUDE = -I${CERIUM}/include/TaskManager -I. -I../ +LIBS = -L${CERIUM}/TaskManager + + +SRCS_TMP = $(wildcard *.cc) +SRCS_EXCLUDE = gpu_task_init.cc # 除外するファイルを書く +SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) +OBJS = $(SRCS:.cc=.o) + +LIBS += -framework opencl + +.SUFFIXES: .cc .o + +.cc.o: + $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ + +all: $(TARGET) +gpu: all + +$(TARGET): $(OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) + +link: + $(CC) -o $(TARGET) $(OBJS) $(LIBS) + +debug: $(TARGET) + sudo gdb ./$(TARGET) + +clean: + rm -f $(TARGET) $(OBJS) + rm -f *~ \#*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/gpu/QuickSort.cl Sat Nov 10 19:42:22 2012 +0900 @@ -0,0 +1,50 @@ +typedef struct Data { + int index; + int ptr; + int pad[2]; +} Data, *DataPtr; + +inline void +swap(__global Data *data, int left, int right ) +{ + Data tmp = data[left]; + data[left] = data[right]; + data[right] = tmp; +} + +__kernel void +quick_sort(__constant int *count, + __global Data *data) +{ + int begin = 0; + int end = count[0]; + + int stack[1024]; + int sp = 0; + int p; + while (1) { + while (begin < end) { + int where = (begin + end) / 2; + int pivot = data[where].index; + data[where].index = data[begin].index; + int i; + p = begin; + for (i=begin+1; i<=end; i++) { + if (data[i].index < pivot) { + p++; + swap(data, p, i); + } + } + data[begin].index = data[p].index; + data[p].index = pivot; + + stack[sp++] = p + 1; + stack[sp++] = end; + end = p - 1; + } + if (sp == 0) return; + end = stack[--sp]; + begin = stack[--sp]; + begin = p + 1; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/gpu/gpu_task_init.cc Sat Nov 10 19:42:22 2012 +0900 @@ -0,0 +1,12 @@ +#include "Func.h" +#include "GpuScheduler.h" +#include "Scheduler.h" + +SchedExternTask(SortSimple); + +void +task_init(void) +{ + GpuSchedRegister(QUICK_SORT, "gpu/QuickSort.cl", "quick_sort"); + SchedRegister(SortSimple); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/gpu/sort_test.cc Sat Nov 10 19:42:22 2012 +0900 @@ -0,0 +1,183 @@ +#include <stdlib.h> +#include <OpenCL/opencl.h> +#include <stdio.h> +#include <fcntl.h> +#include <string.h> +#include <sys/time.h> +#include <sys/stat.h> +#include "sort.h" +#include "sort_test.h" +#define DEFAULT 432 + +extern int data_length; +extern DataPtr data; + +// 計測用 +static double st_time; +static double ed_time; +static int length = DEFAULT; + +int +init(int argc, char **argv) +{ + for (int i = 1; argv[i]; ++i) { + if (strcmp(argv[i], "--length") == 0 || strcmp(argv[i], "-l") == 0) { + length = atoi(argv[++i]); + } + } + + return 0; +} + +int +get_split_num(int len, int num) +{ + if (len / num < MAX_BLOCK_SIZE) { + return num; + } else { + // 切り上げ + return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE; + } +} + + +static double +getTime() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + (double)tv.tv_usec*1e-6; +} + +void +show( Data *data, int size ) +{ + puts("-----------------------------------------------"); + for(int i=0; i<=size; i++) printf("data[%02d].index = %d\n", i, data[i].index); + puts("-----------------------------------------------"); + return; +} + +Sort sorter; + +static void +check_data() +{ + for(int i=0; i< sorter.data_length-1;i++) { + if (sorter.data[i].index>sorter.data[i+1].index) { + printf("Data are not sorted at %d. %d > %d \n",i, sorter.data[i].index,sorter.data[i+1].index); + return; + } + } + printf("Data are sorted\n"); +} + +void +gpu_init() +{ + clGetPlatformIDs(1, &platform_id, &ret_num_platforms); + clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, + &ret_num_devices); + + context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); + command_queue = clCreateCommandQueue(context, device_id, 0, &ret); + + //ファイルオープン + + const char* filename = "QuickSort.cl"; + const char* functionname = "quick_sort"; + + int fp = open(filename, O_RDONLY); + + if (!fp) { + fprintf(stderr, "Failed to load kernel.\n"); + exit(1); + } + + struct stat stats; + fstat(fp,&stats); + off_t size = stats.st_size; + + if (!size) { + fprintf(stderr, "Failed to load kernel.\n"); + exit(1); + } + + char *kernel_src_str = new char[size]; + size_t kernel_code_size = read(fp, kernel_src_str, size); + close(fp); + + program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src_str, + (const size_t *)&kernel_code_size, &ret); + clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); + kernel = clCreateKernel(program,functionname, &ret); +} + +void +sort_start(Sort s){ + + Sort sorter = s; + int length = sorter.data_length; + + //メモリバッファの作成 + cl_mem mem_count = clCreateBuffer(context, CL_MEM_READ_ONLY,sizeof(int),NULL, &ret); + cl_mem mem_data = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(Data)*length, NULL, &ret); + + st_time = getTime(); + + //メモリバッファに入力データを書き込み + ret = clEnqueueWriteBuffer(command_queue, mem_count, CL_TRUE, 0, + sizeof(int), &length, 0, NULL, NULL); + ret = clEnqueueWriteBuffer(command_queue, mem_data, CL_TRUE, 0, + sizeof(Data)*length, sorter.data, 0, NULL, NULL); + + //print_data(data, count, "before"); + clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&mem_count); + clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&mem_data); + + ev = NULL; + + ret = clEnqueueTask(command_queue, kernel, 0, NULL, &ev); + + //メモリバッファから結果を取得 + ret = clEnqueueReadBuffer(command_queue, mem_data, CL_TRUE, 0,sizeof(Data)*length, sorter.data, 1, &ev, NULL); + clFlush(command_queue); + ed_time = getTime(); + show(sorter.data, length-1); + check_data(); + printf("Time: %0.6f\n",ed_time-st_time); + + clReleaseKernel(kernel); + clReleaseProgram(program); + clReleaseMemObject(mem_data); + clReleaseEvent(ev); + clReleaseCommandQueue(command_queue); + clReleaseContext(context); + +} + +int main(int argc, char *argv[]) { + + // 無効な引数ならデフォルトの値として432を設定 + + + if (argc>1) { + if (init(argc,argv) < 0) { + return -1; + } + } + + gpu_init(); + + sorter.data = new Data[length]; + sorter.data_length = length; + sorter.split_num = get_split_num(sorter.data_length, 1); // (length, cpu_num) + + for (int i = 0; i < length; i++) { + sorter.data[i].index = rand()%10000; + sorter.data[i].ptr = i; + } + + sort_start(sorter); + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/gpu/sort_test.h Sat Nov 10 19:42:22 2012 +0900 @@ -0,0 +1,11 @@ + cl_platform_id platform_id; + cl_uint ret_num_platforms; + cl_device_id device_id; + cl_uint ret_num_devices; + cl_int ret; + + cl_context context; + cl_command_queue command_queue; + cl_program program; + cl_kernel kernel; + cl_event ev;