Mercurial > hg > Game > Cerium
changeset 1515:b3644b73d2cf draft
add flip flag test
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 23 Oct 2012 14:41:27 +0900 |
parents | 99ea7b932470 |
children | e544f9747169 |
files | TaskManager/Gpu/GpuScheduler.cc TaskManager/Gpu/GpuScheduler.h TaskManager/test/GpuRunTest/GpuRunTest.cc TaskManager/test/GpuRunTest/Makefile example/basic/Makefile.gpu example/basic/main.cc example/flip/GpuFunc.h example/flip/Makefile example/flip/Makefile.def example/flip/main.cc example/flip/task_init.cc example/flip/twice.cl example/many_task/Makefile example/many_task/Makefile.def example/many_task/Makefile.macosx example/many_task/main.cc example/many_task/ppe/sort_test.cc example/many_task/sort.cc |
diffstat | 18 files changed, 305 insertions(+), 96 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc Sun Oct 14 02:40:05 2012 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Tue Oct 23 14:41:27 2012 +0900 @@ -82,15 +82,14 @@ load_kernel(nextTask->command); cl_kernel& kernel = *task_list[nextTask->command].gputask->kernel; - int err = CL_SUCCESS; int param = 0; cl_mem memparam = clCreateBuffer(context, CL_MEM_READ_ONLY, - sizeof(memaddr)*nextTask->param_count, NULL, NULL); - err |= clEnqueueWriteBuffer(command_queue[cur], memparam, CL_TRUE, 0, sizeof(memaddr)*nextTask->param_count, - nextTask->param(0), 0, NULL, NULL); - err |= clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memparam); + sizeof(memaddr)*nextTask->param_count, NULL, NULL); + ret = clEnqueueWriteBuffer(command_queue[cur], memparam, CL_TRUE, 0, + sizeof(memaddr)*nextTask->param_count,nextTask->param(0), 0, NULL, NULL); + ret = clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memparam); param++; @@ -100,9 +99,9 @@ for(int i=0;i<nextTask->inData_count;i++) { memin[i] = clCreateBuffer(context, mem_flag, nextTask->inData(i)->size, NULL, NULL); - err |= clEnqueueWriteBuffer(command_queue[cur], memin[i], CL_TRUE, 0, + ret = clEnqueueWriteBuffer(command_queue[cur], memin[i], CL_TRUE, 0, nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL); - err |= clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memin[i]); + ret = clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memin[i]); param++; } @@ -111,24 +110,26 @@ cl_mem *memout = new cl_mem[nextTask->outData_count]; cl_mem_flags out_mem_flag = flag.flip? CL_MEM_READ_WRITE : CL_MEM_WRITE_ONLY; + for(int i=0;i<nextTask->outData_count;i++) { - memout[i] = clCreateBuffer(context, out_mem_flag, nextTask->outData(i)->size, NULL, NULL); + memout[i] = clCreateBuffer(context, out_mem_flag, nextTask->outData(i)->size, NULL, &ret); if (flag.flip) { // use output buffer as input buffer - err |= clEnqueueWriteBuffer(command_queue[cur], memout[i], CL_TRUE, 0, - nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL); } - err |= clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memout[i]); + ret = clEnqueueWriteBuffer(command_queue[cur], memout[i], CL_TRUE, 0, + nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL); + } + ret = clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memout[i]); param++; } + cl_event ev = NULL; - clEnqueueTask(command_queue[cur], kernel, 0, NULL, &ev); - + ret = clEnqueueTask(command_queue[cur], kernel, 0, NULL, &ev); // ndrange flagが0ならdim,global_work_size[0],local_work_size[0] = 1で固定に // clEnqueueNDRange // (command_queue[cur], kernel, dim, NULL,global_work_size[0],local_work_size[0],NULL&ev); for(int i=0;i<nextTask->outData_count;i++) { - err |= clEnqueueReadBuffer(command_queue[cur], memout[i], CL_TRUE, 0, - nextTask->outData(i)->size, nextTask->outData(i)->addr, 1, &ev, NULL); + ret = clEnqueueReadBuffer(command_queue[cur], memout[i], CL_TRUE, 0, + nextTask->outData(i)->size, nextTask->outData(i)->addr, 1, &ev, NULL); } }
--- a/TaskManager/Gpu/GpuScheduler.h Sun Oct 14 02:40:05 2012 +0900 +++ b/TaskManager/Gpu/GpuScheduler.h Tue Oct 23 14:41:27 2012 +0900 @@ -55,3 +55,4 @@ extern void gpu_register_task(int cmd,const char* filename,const char* functionname); extern void gpu_register_ndrange(int, int, size_t*); +
--- a/TaskManager/test/GpuRunTest/GpuRunTest.cc Sun Oct 14 02:40:05 2012 +0900 +++ b/TaskManager/test/GpuRunTest/GpuRunTest.cc Tue Oct 23 14:41:27 2012 +0900 @@ -82,7 +82,7 @@ HTaskPtr twice = manager->create_task(Twice); - twice->set_param(0, (memaddr)length); + twice->set_param(0, (memaddr)&length); twice->set_inData(0, indata, sizeof (int)*length); twice->set_outData(0, outdata, sizeof (int)*length); twice->set_cpu(GPU_0);
--- a/TaskManager/test/GpuRunTest/Makefile Sun Oct 14 02:40:05 2012 +0900 +++ b/TaskManager/test/GpuRunTest/Makefile Tue Oct 23 14:41:27 2012 +0900 @@ -5,7 +5,7 @@ SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) OBJS = $(SRCS:.cc=.o) -LIBS += -lGpuManager -framework opencl `sdl-config --libs` +LIBS += -lGpuManager -framework opencl `sdl-config --libs` .SUFFIXES: .cc .o
--- a/example/basic/Makefile.gpu Sun Oct 14 02:40:05 2012 +0900 +++ b/example/basic/Makefile.gpu Tue Oct 23 14:41:27 2012 +0900 @@ -5,7 +5,7 @@ SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) OBJS = $(SRCS:.cc=.o) -TASK_DIR = ppe +TASK_DIR = gpu TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc) TASK_SRCS_EXCLUDE = TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
--- a/example/basic/main.cc Sun Oct 14 02:40:05 2012 +0900 +++ b/example/basic/main.cc Tue Oct 23 14:41:27 2012 +0900 @@ -19,7 +19,7 @@ { printf("%s ---\n", title); for (int i = 0; i < size; i++) { - printf("%2d ", data[i]); + printf("%2d ", data[i]); } printf("\n"); } @@ -57,7 +57,7 @@ int *data = (int*)manager->allocate(sizeof(int)*length); for (int i = 0; i < length; i++) { - data[i] = i; + data[i] = i; } print_data(data, length, "before"); @@ -65,7 +65,7 @@ /** * Create Task * create_task(Task ID); - */ + */ twice = manager->create_task(TWICE_TASK); twice->set_cpu(SPE_ANY); @@ -73,6 +73,7 @@ * Set of Input Data * add_inData(address of input data, size of input data); */ + // twice->set_param(0,(memaddr)&length); twice->set_inData(0,data, sizeof(int)*length); /** @@ -93,14 +94,14 @@ twice->set_post(twice_result, (void*)data, 0); // add Active Queue - twice->spawn(); + twice->spawn(); } int TMmain(TaskManager *manager,int argc, char *argv[]) { if (init(argc, argv) < 0) { - return -1; + return -1; } // Task Register @@ -108,7 +109,7 @@ task_init(); for (int i = 0; i < task; ++i) { - twice_init(manager); + twice_init(manager); } return 0;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/GpuFunc.h Tue Oct 23 14:41:27 2012 +0900 @@ -0,0 +1,6 @@ + +enum { +#include "SysTasks.h" + Twice, + // Func1, +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/Makefile Tue Oct 23 14:41:27 2012 +0900 @@ -0,0 +1,29 @@ +include ./Makefile.def + +SRCS_TMP = $(wildcard *.cc) +SRCS_EXCLUDE = # 除外するファイルを書く +SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) +OBJS = $(SRCS:.cc=.o) + +LIBS += -lGpuManager -framework opencl `sdl-config --libs` + +.SUFFIXES: .cc .o + +.cc.o: + $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ + +all: $(TARGET) +gpu: all + +$(TARGET): $(OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) + +link: + $(CC) -o $(TARGET) $(OBJS) $(LIBS) + +debug: $(TARGET) + sudo gdb ./$(TARGET) + +clean: + rm -f $(TARGET) $(OBJS) + rm -f *~ \#*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/Makefile.def Tue Oct 23 14:41:27 2012 +0900 @@ -0,0 +1,9 @@ +TARGET = fliptest + +CERIUM = ../../../Cerium + +CC = g++ +CFLAGS = -g -Wall + +INCLUDE = -I${CERIUM}/include/TaskManager -I. -I../.. +LIBS = -L${CERIUM}/TaskManager
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/main.cc Tue Oct 23 14:41:27 2012 +0900 @@ -0,0 +1,114 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <sys/stat.h> +#include "types.h" +#include "TaskManager.h" +#include "GpuFunc.h" + +#define DEFAULT 5 +static long int length = DEFAULT; +static int task = 1; +int *indata; + +extern void task_init(void); + +char usr_help_str[] = "GpuRun [length]\n"; + +void +print_data(int *data, int size, const char *title) +{ + printf("%s ---\n", title); + for ( int i = 0; i < size; i++) { + printf("%2d ", data[i]); + } + printf("\n"); +} + +/** + * タスク終了後の data1, data2 の確認 + */ +void +twice_result(SchedTask *s, void *a, void *b) +{ + int* data = (int*)a; + long length = (long)b; + print_data(data, length, "after"); +} + + +int +init(int argc, char **argv) +{ + for (int i = 1; argv[i]; ++i) { + if (strcmp(argv[i], "-length") == 0) { + length = atoi(argv[++i]); + } else if (strcmp(argv[i], "-count") == 0) { + task = atoi(argv[++i]); + } + } + + return 0; +} + + +void +tester(int *indata, int *outdata, int num) { + + //チェック + int check = 0; + for (int c=0; c<num; c++) { + if(outdata[c] == indata[c]*2) { + check++; + } + } + + printf("Computed '%d/%d' correct values\n",check,num); + +} + + +void +test(TaskManager *manager) { + indata = new int[length]; + + for (int c=0; c < length ;c++) { + indata[c] = c; + } + + print_data(indata, length, "before"); + + HTaskPtr twice = manager->create_task(Twice); + + twice->set_param(0, (memaddr)length); + twice->set_inData(0, indata, sizeof (int)*length); + twice->set_outData(0, indata, sizeof (int)*length); + twice->set_cpu(GPU_0); + twice->flip(); + + /* + * set_post() で ppe task を渡せるようにしたい + */ + twice->set_post(twice_result, (void*)indata, (void*)length); + + twice->spawn(); +} + +int +TMmain(TaskManager *manager, int argc, char* argv[]) +{ + if (init(argc, argv) < 0) { + return -1; + } + + task_init(); + + for (int i = 0; i < task; ++i) { + test(manager); + } + + return 0; +} + +/* end */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/task_init.cc Tue Oct 23 14:41:27 2012 +0900 @@ -0,0 +1,12 @@ +#include "GpuFunc.h" +#include "GpuScheduler.h" + +void +task_init(void) +{ + int cmd = Twice; + int dim = 2; + size_t *l_work_size = new size_t(dim); + GpuNDRangeRegister(cmd, dim, l_work_size); + GpuSchedRegister(cmd, "twice.cl", "twice"); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/twice.cl Tue Oct 23 14:41:27 2012 +0900 @@ -0,0 +1,12 @@ +__kernel void +twice(__constant int *data_count, + __global int *input_data) + //__global int *output_data) +{ + long count = (long)data_count[0]; + for (int i = 0; i<count; i++) { + // output_data[i] = 2*input_data[i]; + input_data[i] *= 2; + } + +}
--- a/example/many_task/Makefile Sun Oct 14 02:40:05 2012 +0900 +++ b/example/many_task/Makefile Tue Oct 23 14:41:27 2012 +0900 @@ -16,6 +16,11 @@ @echo "Make for PS3 (Cell)" @$(MAKE) -f Makefile.cell +gpu: FORCE + @echo "Make for OpenCL" + @$(MAKE) -f Makefile.gpu + + FORCE: clean:
--- a/example/many_task/Makefile.def Sun Oct 14 02:40:05 2012 +0900 +++ b/example/many_task/Makefile.def Tue Oct 23 14:41:27 2012 +0900 @@ -10,7 +10,7 @@ # OPT = -g -O9 # OPT = -g CC = g++ -CFLAGS = -DUSE_SIMPLE_TASK -Wall $(OPT) +CFLAGS = -Wall $(OPT) -DUSE_SIMPLE_TASK # CFLAGS = -Wall $(OPT) INCLUDE = -I${CERIUM}/include/TaskManager -I. -I..
--- a/example/many_task/Makefile.macosx Sun Oct 14 02:40:05 2012 +0900 +++ b/example/many_task/Makefile.macosx Tue Oct 23 14:41:27 2012 +0900 @@ -2,7 +2,7 @@ SRCS_TMP = $(wildcard *.cc) -SRCS_EXCLUDE = sort_test.cc ppe/task_init.cc # 除外するファイルを書く +SRCS_EXCLUDE = sort_test.cc # 除外するファイルを書く SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) OBJS = $(SRCS:.cc=.o)
--- a/example/many_task/main.cc Sun Oct 14 02:40:05 2012 +0900 +++ b/example/many_task/main.cc Tue Oct 23 14:41:27 2012 +0900 @@ -37,18 +37,6 @@ return tv.tv_sec + (double)tv.tv_usec*1e-6; } -/* -static void -show_data(void) -{ - puts("-----------------------------------------------"); - for(int i = 0; i < data_length; i++) { - printf("data[%02d].index = %d\n", i, data[i].index); - } - puts("-----------------------------------------------"); -} -*/ - const char *usr_help_str = "Usage: ./sort [option]\n \ options\n\ -cpu Number of SPE used (default 1)\n\ @@ -80,6 +68,16 @@ Sort sorter; static void +show_data(void) +{ + puts("-----------------------------------------------"); + for(int i = 0; i < sorter.data_length; i++) { + printf("data[%02d].index = %d\n", i, sorter.data[i].index); + } + puts("-----------------------------------------------"); +} + +static void check_data() { for(int i=0; i< sorter.data_length-1;i++) { @@ -112,11 +110,11 @@ sorter.data[i].ptr = i; } + // show_data(); HTaskPtr restart = manager->create_task(sort_task,0,0,0,0); + // default ではSortSimpleがsetされている。SortSimpleはsort.ccに + restart->set_param(0,(memaddr)&sorter); - //restart->set_inData(0,sorter.data,sizeof(Data)*length); - //restart->set_param(0,(memaddr)&length); - // restart->set_outData(0,sorter.data,sizeof(Data)*(int)length); // set flip flag restart->spawn(); } @@ -133,7 +131,6 @@ task_init(); int cpu = manager->get_cpuNum(); - // in case of -cpu 0 if (cpu==0) cpu = 1; if (1) { @@ -156,7 +153,7 @@ TMend(TaskManager *manager) { ed_time = getTime(); - //show_data(); + // show_data(); check_data(); printf("Time: %0.6f\n",ed_time-st_time); }
--- a/example/many_task/ppe/sort_test.cc Sun Oct 14 02:40:05 2012 +0900 +++ b/example/many_task/ppe/sort_test.cc Tue Oct 23 14:41:27 2012 +0900 @@ -3,13 +3,21 @@ #include <stdlib.h> //#include "sort.h" #include "QuickSort.h" +#include <sys/time.h> // sort.cc extern int data_length; extern DataPtr data; -extern void quick_sort(DataPtr, int, int); static int length = 1200; extern void qsort_test(Data*, int, int); +static double +getTime() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + (double)tv.tv_usec*1e-6; +} + void show( Data *data, int size ) { @@ -63,8 +71,14 @@ int begin = 0; int end = length; + double st_time; + double ed_time; //show(sorter.data, end-1); + st_time = getTime(); qsort_test(sorter.data, begin, end); + ed_time = getTime(); + printf("Time: %0.6f\n",ed_time-st_time); + //show(sorter.data, end-1); check_data(); return 0;
--- a/example/many_task/sort.cc Sun Oct 14 02:40:05 2012 +0900 +++ b/example/many_task/sort.cc Tue Oct 23 14:41:27 2012 +0900 @@ -23,12 +23,12 @@ get_split_num(int len, int num) { if (len / num < MAX_BLOCK_SIZE) { - return num; + return num; } else { - // 切り上げ - return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE; + // 切り上げ + return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE; } -} +} /** @@ -53,77 +53,85 @@ int last_half_block_num = half_block_num+(last_block_num/2); if (--sort_count < 0) { - return 0; + return 0; } for (int i = 0; i < s->split_num-1; i++) { - s->fsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num], sizeof(Data)*block_num, - (memaddr)&s->data[i*block_num], sizeof(Data)*block_num); - if (i>0 && s->bsort[i-1]) { - s->fsort[i]->wait_for(s->bsort[i-1]); - } - if (i<s->split_num-2 && s->bsort[i]) { - s->fsort[i]->wait_for(s->bsort[i]); - } - s->fsort[i]->set_cpu(SPE_ANY); + s->fsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num], sizeof(Data)*block_num, + (memaddr)&s->data[i*block_num], sizeof(Data)*block_num); + s->fsort[i]->flip(); + if (i>0 && s->bsort[i-1]) { + s->fsort[i]->wait_for(s->bsort[i-1]); + } + if (i<s->split_num-2 && s->bsort[i]) { + s->fsort[i]->wait_for(s->bsort[i]); + } + s->fsort[i]->set_cpu(SPE_ANY); + s->fsort[i]->set_param(0,(memaddr)block_num); } // 最後の block は端数なので last_block_num を使う { - int i = s->split_num-1; + int i = s->split_num-1; - s->fsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num, - (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num); - if (i>0 && s->bsort[i-1]) { - s->fsort[i]->wait_for(s->bsort[i-1]); - } - s->fsort[i]->set_cpu(SPE_ANY); + s->fsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num, + (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num); + s->fsort[i]->flip(); + if (i>0 && s->bsort[i-1]) { + s->fsort[i]->wait_for(s->bsort[i-1]); + } + s->fsort[i]->set_cpu(SPE_ANY); + s->fsort[i]->set_param(0,(memaddr)last_block_num); } if (s->split_num > 1) { - for (int i = 0; i < half_num-1; i++) { - if (s->bsort[i]) manager->free_htask(s->bsort[i]); - s->bsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); - s->bsort[i]->set_cpu(SPE_ANY); - } + for (int i = 0; i < half_num-1; i++) { + if (s->bsort[i]) manager->free_htask(s->bsort[i]); + s->bsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); + s->bsort[i]->flip(); + s->bsort[i]->set_cpu(SPE_ANY); + s->bsort[i]->set_param(0,(memaddr)block_num); + } - { - int i = half_num-1; + { + int i = half_num-1; - if (s->bsort[i]) manager->free_htask(s->bsort[i]); - s->bsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); - s->bsort[i]->set_cpu(SPE_ANY); - } - - for (int i = 0; i < half_num; i++) { - s->bsort[i]->wait_for(s->fsort[i]); - s->bsort[i]->wait_for(s->fsort[i+1]); - s->bsort[i]->no_auto_free(); - s->bsort[i]->spawn(); - } + if (s->bsort[i]) manager->free_htask(s->bsort[i]); + s->bsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); + s->bsort[i]->flip(); + s->bsort[i]->set_cpu(SPE_ANY); + s->bsort[i]->set_param(0,(memaddr)last_half_block_num); + } + + for (int i = 0; i < half_num; i++) { + s->bsort[i]->wait_for(s->fsort[i]); + s->bsort[i]->wait_for(s->fsort[i+1]); + s->bsort[i]->no_auto_free(); + s->bsort[i]->spawn(); + } } HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0); restart->set_param(0,(memaddr)s); if (!all) restart->wait_for(s->fsort[0]); for (int i = 0; i < s->split_num; i++) { - s->fsort[i]->spawn(); + s->fsort[i]->spawn(); } if (sort_count == 1) { - // last loop wait for all task - // we should not need this? - for (int i = 0; i < half_num; i++) { - restart->wait_for(s->bsort[i]); - s->bsort[i]->auto_free(); - } + // last loop wait for all task + // we should not need this? + for (int i = 0; i < half_num; i++) { + restart->wait_for(s->bsort[i]); + s->bsort[i]->auto_free(); + } } restart->spawn(); return 0;