Mercurial > hg > Game > Cerium
changeset 1524:32305a19a380 draft cell
merge branch
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 14 Nov 2012 09:45:28 +0900 |
parents | 30145272ff0b (current diff) d232231e1425 (diff) |
children | 23f8034d8100 67a2da98d95c |
files | TaskManager/kernel/ppe/CpuThreads.cc example/many_task/ppe/QuickSort.cc.loop example/many_task/ppe/task_init.cc |
diffstat | 44 files changed, 814 insertions(+), 313 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Fifo/PreRefDmaManager.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/Fifo/PreRefDmaManager.cc Wed Nov 14 09:45:28 2012 +0900 @@ -3,7 +3,7 @@ #include "TaskManagerImpl.h" void * -PreRefDmaManager::dma_load(Scheduler *s, void *buf, memaddr addr, uint32 size, uint32 mask) +PreRefDmaManager::dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask) { unsigned long long wait = 0; (this->*start_dmawait_profile)();
--- a/TaskManager/Fifo/PreRefDmaManager.h Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/Fifo/PreRefDmaManager.h Wed Nov 14 09:45:28 2012 +0900 @@ -7,7 +7,7 @@ public: /* functions */ - virtual void *dma_load(Scheduler *s, void *buf, memaddr addr, uint32 size, uint32 mask); + virtual void *dma_load(Scheduler *s, memaddr addr, uint32 size, uint32 mask); virtual void free_(void *buf); } ;
--- a/TaskManager/Fifo/gettime.h Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/Fifo/gettime.h Wed Nov 14 09:45:28 2012 +0900 @@ -10,8 +10,8 @@ */ inline unsigned long long gettime() { - unsigned long long time; -#ifdef __CERIUM_FIFO__ + unsigned long long time = 0; +#ifdef __CERIUM_FIFO__ // ?? struct timespec ts; #ifndef __APPLE__
--- a/TaskManager/Fifo/rdtsc.h Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/Fifo/rdtsc.h Wed Nov 14 09:45:28 2012 +0900 @@ -6,8 +6,8 @@ */ inline unsigned long long rdtsc() { - unsigned long long ret; -#ifdef __CERIUM_FIFO__ + unsigned long long ret = 0; +#ifdef __CERIUM_FIFO__ // ?? __asm__ volatile ("rdtsc" : "=A" (ret)); #endif // __CERIUM_FIFO__ return ret;
--- a/TaskManager/Gpu/GpuScheduler.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Wed Nov 14 09:45:28 2012 +0900 @@ -3,6 +3,8 @@ #include "PreRefDmaManager.h" #include "SchedTask.h" #include "stdio.h" +// +#include "ListData.h" #include <fcntl.h> #include <sys/stat.h> #include <string.h> @@ -82,15 +84,14 @@ load_kernel(nextTask->command); cl_kernel& kernel = *task_list[nextTask->command].gputask->kernel; - int err = CL_SUCCESS; int param = 0; cl_mem memparam = clCreateBuffer(context, CL_MEM_READ_ONLY, - sizeof(memaddr)*nextTask->param_count, NULL, NULL); - err |= clEnqueueWriteBuffer(command_queue[cur], memparam, CL_TRUE, 0, sizeof(memaddr)*nextTask->param_count, - nextTask->param(0), 0, NULL, NULL); - err |= clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memparam); + sizeof(memaddr)*nextTask->param_count, NULL, NULL); + ret = clEnqueueWriteBuffer(command_queue[cur], memparam, CL_TRUE, 0, + sizeof(memaddr)*nextTask->param_count,nextTask->param(0), 0, NULL, NULL); + ret = clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memparam); param++; @@ -99,10 +100,10 @@ if (!flag.flip) { for(int i=0;i<nextTask->inData_count;i++) { memin[i] = clCreateBuffer(context, mem_flag, nextTask->inData(i)->size, NULL, NULL); - - err |= clEnqueueWriteBuffer(command_queue[cur], memin[i], CL_TRUE, 0, - nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL); - err |= clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memin[i]); + ListElement *input_buf = nextTask->inData(i); + ret = clEnqueueWriteBuffer(command_queue[cur], memin[i], CL_TRUE, 0, + input_buf->size, input_buf->addr, 0, NULL, NULL); + ret = clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memin[i]); param++; } @@ -111,39 +112,52 @@ cl_mem *memout = new cl_mem[nextTask->outData_count]; cl_mem_flags out_mem_flag = flag.flip? CL_MEM_READ_WRITE : CL_MEM_WRITE_ONLY; + for(int i=0;i<nextTask->outData_count;i++) { - memout[i] = clCreateBuffer(context, out_mem_flag, nextTask->outData(i)->size, NULL, NULL); + ListElement *output_buf = nextTask->outData(i); + memout[i] = clCreateBuffer(context, out_mem_flag, output_buf->size, NULL, &ret); + if (flag.flip) { // use output buffer as input buffer - err |= clEnqueueWriteBuffer(command_queue[cur], memout[i], CL_TRUE, 0, - nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL); } - err |= clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memout[i]); + ListElement *input_buf = nextTask->inData(i); + + ret = clEnqueueWriteBuffer(command_queue[cur], memout[i], CL_TRUE, 0, + input_buf->size, input_buf->addr, 0, NULL, NULL); + } + ret = clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memout[i]); param++; } + cl_event ev = NULL; - clEnqueueTask(command_queue[cur], kernel, 0, NULL, &ev); - + ret = clEnqueueTask(command_queue[cur], kernel, 0, NULL, &ev); // ndrange flagが0ならdim,global_work_size[0],local_work_size[0] = 1で固定に // clEnqueueNDRange // (command_queue[cur], kernel, dim, NULL,global_work_size[0],local_work_size[0],NULL&ev); for(int i=0;i<nextTask->outData_count;i++) { - err |= clEnqueueReadBuffer(command_queue[cur], memout[i], CL_TRUE, 0, - nextTask->outData(i)->size, nextTask->outData(i)->addr, 1, &ev, NULL); + ListElement *output_buf = nextTask->outData(i); + ret = clEnqueueReadBuffer(command_queue[cur], memout[i], CL_TRUE, 0, + output_buf->size, output_buf->addr, 1, &ev, NULL); + } } reply[cur] = (memaddr)tasklist->waiter; - clFlush(command_queue[1-cur]); // waiting for queued task - if(reply[1-cur]) { - connector->mail_write(reply[1-cur]); + clFlush(command_queue[cur]); // waiting for queued task + //clFinish(command_queue[cur]); // waiting for queued task + // pipeline : 1-cur + // no pipeline : cur + + if(reply[cur]) { + connector->mail_write(reply[cur]); } params_addr = (memaddr)tasklist->next; cur = 1 - cur; } - - clFlush(command_queue[1-cur]); // waiting for queued task - connector->mail_write(reply[1-cur]); + /* + clFlush(command_queue[1-cur]); // waiting for queued task + connector->mail_write(reply[1-cur]); + */ connector->mail_write((memaddr)MY_SPE_STATUS_READY); } @@ -165,29 +179,29 @@ const char *filename = (const char *)task_list[cmd].gputask->kernel; const char *functionname = task_list[cmd].name; - int fp; + int fd; char *source_str; size_t source_size; - fp = open(filename, O_RDONLY); + fd = open(filename, O_RDONLY); - if (!fp) { - fprintf(stderr, "Failed to load kernel.\n"); + if (fd<0) { + fprintf(stderr, "Failed to load kernel %s.\n",filename); exit(1); } struct stat stats; - fstat(fp,&stats); + fstat(fd,&stats); off_t size = stats.st_size; - if (!size) { + if (size<=0) { fprintf(stderr, "Failed to load kernel.\n"); exit(1); } source_str = (char*)alloca(size); - source_size = read(fp, source_str, size); - close(fp); + source_size = read(fd, source_str, size); + close(fd); cl_program program = clCreateProgramWithSource(context, 1, @@ -205,7 +219,6 @@ void gpu_register_task(int cmd, const char* filename, const char* functionname) { - task_list[cmd].gputask->a = 1; task_list[cmd].run = not_ready; // not yet ready task_list[cmd].load = null_loader; task_list[cmd].wait = null_loader; @@ -213,4 +226,12 @@ task_list[cmd].gputask->kernel = (cl_kernel *) filename; } +void +gpu_register_ndrange(int cmd, int dim, size_t* l_work_size) +{ + task_list[cmd].gputask->dim = dim; + task_list[cmd].gputask->l_work_size = l_work_size; + +} + /* end */
--- a/TaskManager/Gpu/GpuScheduler.h Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/Gpu/GpuScheduler.h Wed Nov 14 09:45:28 2012 +0900 @@ -45,9 +45,14 @@ }; -extern void gpu_register_task(int cmd,const char* filename,const char* functionname); +#define GpuSchedRegister(str, filename, functionname) \ + gpu_register_task(str, filename, functionname); + +#define GpuNDRangeRegister(str, dimension, g_worksizePtr) \ + gpu_register_ndrange(str, dimension, g_worksizePtr); #endif -#define GpuSchedRegister(str, filename, functionname) \ - gpu_register_task(str, filename, functionname); +extern void gpu_register_task(int cmd,const char* filename,const char* functionname); +extern void gpu_register_ndrange(int, int, size_t*); +
--- a/TaskManager/Gpu/GpuThreads.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/Gpu/GpuThreads.cc Wed Nov 14 09:45:28 2012 +0900 @@ -30,11 +30,11 @@ void GpuThreads::init() { - args->scheduler = new GpuScheduler(); + args->scheduler = new GpuScheduler(); args->useRefDma = use_refdma; pthread_create(&threads[0], NULL, &gpu_thread_run, args); - + } void * @@ -42,10 +42,10 @@ { gpu_thread_arg_t *argt = (gpu_thread_arg_t *) args; Scheduler *g_scheduler = argt->scheduler; - + TaskManagerImpl *manager = new SpeTaskManagerImpl(); g_scheduler->init(manager, argt->useRefDma); - + manager->set_scheduler(g_scheduler); argt->wait->sem_v();
--- a/TaskManager/Makefile.def Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/Makefile.def Wed Nov 14 09:45:28 2012 +0900 @@ -33,13 +33,13 @@ ABIBIT = 64 -OPT = -g -DMAIL_QUEUE -DNOT_CHECK -DTASK_LIST_MAIL #-DEARLY_TOUCH -DUSE_CACHE -#OPT = -O9 -DMAIL_QUEUE -DNOT_CHECK #-DTASK_LIST_MAIL #-DEARLY_TOUCH -DUSE_CACHE +OPT = -g -O0 -DMAIL_QUEUE -DNOT_CHECK -DTASK_LIST_MAIL #-DEARLY_TOUCH -DUSE_CACHE +#OPT = -O9 -DMAIL_QUEUE -DNOT_CHECK #-DTASK_LIST_MAIL #-DEARLY_TOUCH -DUSE_CACHE -CC = g++ +CC = clang++ CFLAGS = -Wall `sdl-config --cflags` -m$(ABIBIT) $(OPT) LIBS = -m$(ABIBIT)
--- a/TaskManager/kernel/ppe/CpuThreads.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/kernel/ppe/CpuThreads.cc Wed Nov 14 09:45:28 2012 +0900 @@ -9,7 +9,7 @@ #include "SchedNop.h" #include "SpeTaskManagerImpl.h" #include "CellScheduler.h" - +#include <fcntl.h> SchedExternTask(ShowTime); SchedExternTask(StartProfile); @@ -39,7 +39,7 @@ } for (int i = 0; i < cpu_num; i++) { - delete args[i].scheduler; + delete args[i].scheduler; } delete [] threads; @@ -64,7 +64,7 @@ SchedRegister(ShowTime); SchedRegister(StartProfile); - argt->wait->sem_v(); //準備完了したスレッドができるたびに+1していく + argt->wait->sem_v(); //準備完了したスレッドができるたびに+1していく c_scheduler->run(new SchedNop()); c_scheduler->finish(); @@ -90,12 +90,12 @@ } for (int i = 0; i < cpu_num; i++) { - pthread_create(&threads[i], NULL, - &cpu_thread_run, (void*)&args[i]); + pthread_create(&threads[i], NULL, + &cpu_thread_run, (void*)&args[i]); } for (int i = 0; i < cpu_num; i++) { - wait->sem_p(); + wait->sem_p(); } } @@ -109,7 +109,7 @@ */ int CpuThreads::get_mail(int cpuid, int count, memaddr *ret) -{ +{ #ifdef __CERIUM_GPU__ if (is_gpu(cpuid)) return gpu->get_mail(cpuid, count, ret); #endif @@ -128,7 +128,7 @@ } else { return 0; //mailがないとき0を返す } - + } /** * Inbound Mailbox
--- a/TaskManager/kernel/schedule/SchedTask.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/kernel/schedule/SchedTask.cc Wed Nov 14 09:45:28 2012 +0900 @@ -134,7 +134,7 @@ if (outListData.bound != dout) free(outListData.bound); #ifdef TASK_LIST_MAIL if ((cur_index->next() >= list->last()) ) - connector->mail_write(waiter); + connector->mail_write(waiter); #else connector->mail_write(waiter); #endif @@ -147,28 +147,28 @@ if (cur_index == 0) { // 最初の一つ SchedTask *nextSched = new SchedTask(); - nextSched->init(list, &list->tasks[0], scheduler, this->tag^1); - return nextSched; + nextSched->init(list, &list->tasks[0], scheduler, this->tag^1); + return nextSched; } TaskPtr nextTask = cur_index->next(); if (nextTask < list->last()) { - // Task List が残っているので、次を準備 + // Task List が残っているので、次を準備 + + TaskPtr nextTask = cur_index->next(); - TaskPtr nextTask = cur_index->next(); - SchedTask *nextSched = new SchedTask(); - nextSched->init(list, nextTask, scheduler, this->tag^1); - return nextSched; + nextSched->init(list, nextTask, scheduler, this->tag^1); + return nextSched; } else { memaddr nextList = (memaddr)list->next; if (nextList == 0) { - // もう何もする必要がない - + // もう何もする必要がない + return new SchedNop2Ready(scheduler); } else { - // 新しいリストに取り掛かる - int dma_tag_switch = 0; - return new SchedTaskList(nextList, scheduler, dma_tag_switch); + // 新しいリストに取り掛かる + int dma_tag_switch = 0; + return new SchedTaskList(nextList, scheduler, dma_tag_switch); } } } @@ -388,7 +388,7 @@ return manager->create_task(cmd, __builtin_return_address(0)); } -HTaskPtr +HTaskPtr SchedTask::create_task(int cmd, memaddr r, long rs, memaddr w, long ws) { return manager->create_task(cmd,r,rs,w,ws, __builtin_return_address(0)); @@ -422,12 +422,12 @@ manager->set_task_cpu(t, cpu); } -void* SchedTask::allocate(int size) +void* SchedTask::allocate(int size) { return manager->allocate(size) ; } -void* SchedTask::allocate(int size,int align) +void* SchedTask::allocate(int size,int align) { return manager->allocate(size,align) ; } @@ -437,14 +437,14 @@ manager->polling(); } -Scheduler* SchedTask::get_scheduler() +Scheduler* SchedTask::get_scheduler() { return scheduler; } /* system call */ -int +int SchedTask::printf(const char * format, ...) { va_list ap;
--- a/TaskManager/kernel/schedule/Scheduler.h Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/kernel/schedule/Scheduler.h Wed Nov 14 09:45:28 2012 +0900 @@ -37,6 +37,8 @@ typedef struct gpu_task_object { #ifdef __CERIUM_GPU__ cl_kernel *kernel; + int dim; + size_t *l_work_size; #endif } GpuTaskObject;
--- a/TaskManager/test/GpuRunTest/GpuFunc.h Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/test/GpuRunTest/GpuFunc.h Wed Nov 14 09:45:28 2012 +0900 @@ -1,6 +1,7 @@ enum { #include "SysTasks.h" + mogyo, Twice, // Func1, };
--- a/TaskManager/test/GpuRunTest/Makefile Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/test/GpuRunTest/Makefile Wed Nov 14 09:45:28 2012 +0900 @@ -5,7 +5,7 @@ SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) OBJS = $(SRCS:.cc=.o) -LIBS += -lGpuManager -framework opencl `sdl-config --libs` +LIBS += -lGpuManager -framework opencl `sdl-config --libs` .SUFFIXES: .cc .o
--- a/TaskManager/test/GpuRunTest/Makefile.def Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/test/GpuRunTest/Makefile.def Wed Nov 14 09:45:28 2012 +0900 @@ -2,8 +2,8 @@ CERIUM = ../../../../Cerium -CC = g++ -CFLAGS = -g -Wall +CC = clang++ +CFLAGS = -g -Wall -O0 INCLUDE = -I${CERIUM}/include/TaskManager -I. -I../.. LIBS = -L${CERIUM}/TaskManager
--- a/TaskManager/test/GpuRunTest/task_init.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/TaskManager/test/GpuRunTest/task_init.cc Wed Nov 14 09:45:28 2012 +0900 @@ -5,5 +5,8 @@ task_init(void) { int cmd = Twice; + int dim = 2; + size_t *l_work_size = new size_t(dim); + GpuNDRangeRegister(cmd, dim, l_work_size); GpuSchedRegister(cmd, "twice.cl", "twice"); }
--- a/example/OpenCL/twice.cl Tue Oct 02 18:17:34 2012 +0900 +++ b/example/OpenCL/twice.cl Wed Nov 14 09:45:28 2012 +0900 @@ -3,18 +3,8 @@ __global int *input_data, __global int *output_data) { - int a = 1; - int b = rec(a); long count = (long)data_count[0]; for (int i = 0; i<count; i++) { - output_data[i] = b; + output_data[i] = input_data[i]*2; } } - -int -rec(int a) -{ - if (a<=1) return a; - return rec(a-1)+rec(a-2); - -}
--- a/example/basic/Makefile.gpu Tue Oct 02 18:17:34 2012 +0900 +++ b/example/basic/Makefile.gpu Wed Nov 14 09:45:28 2012 +0900 @@ -5,7 +5,7 @@ SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) OBJS = $(SRCS:.cc=.o) -TASK_DIR = ppe +TASK_DIR = gpu TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc) TASK_SRCS_EXCLUDE = TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
--- a/example/basic/main.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/example/basic/main.cc Wed Nov 14 09:45:28 2012 +0900 @@ -19,7 +19,7 @@ { printf("%s ---\n", title); for (int i = 0; i < size; i++) { - printf("%2d ", data[i]); + printf("%2d ", data[i]); } printf("\n"); } @@ -57,7 +57,7 @@ int *data = (int*)manager->allocate(sizeof(int)*length); for (int i = 0; i < length; i++) { - data[i] = i; + data[i] = i; } print_data(data, length, "before"); @@ -65,7 +65,7 @@ /** * Create Task * create_task(Task ID); - */ + */ twice = manager->create_task(TWICE_TASK); twice->set_cpu(SPE_ANY); @@ -73,6 +73,7 @@ * Set of Input Data * add_inData(address of input data, size of input data); */ + // twice->set_param(0,(memaddr)&length); twice->set_inData(0,data, sizeof(int)*length); /** @@ -93,14 +94,14 @@ twice->set_post(twice_result, (void*)data, 0); // add Active Queue - twice->spawn(); + twice->spawn(); } int TMmain(TaskManager *manager,int argc, char *argv[]) { if (init(argc, argv) < 0) { - return -1; + return -1; } // Task Register @@ -108,7 +109,7 @@ task_init(); for (int i = 0; i < task; ++i) { - twice_init(manager); + twice_init(manager); } return 0;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/GpuFunc.h Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,6 @@ + +enum { +#include "SysTasks.h" + Twice, + // Func1, +};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/Makefile Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,29 @@ +include ./Makefile.def + +SRCS_TMP = $(wildcard *.cc) +SRCS_EXCLUDE = # 除外するファイルを書く +SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) +OBJS = $(SRCS:.cc=.o) + +LIBS += -lGpuManager -framework opencl `sdl-config --libs` + +.SUFFIXES: .cc .o + +.cc.o: + $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ + +all: $(TARGET) +gpu: all + +$(TARGET): $(OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) + +link: + $(CC) -o $(TARGET) $(OBJS) $(LIBS) + +debug: $(TARGET) + sudo gdb ./$(TARGET) + +clean: + rm -f $(TARGET) $(OBJS) + rm -f *~ \#*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/Makefile.def Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,9 @@ +TARGET = fliptest + +CERIUM = ../../../Cerium + +CC = g++ +CFLAGS = -g -Wall + +INCLUDE = -I${CERIUM}/include/TaskManager -I. -I../.. +LIBS = -L${CERIUM}/TaskManager
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/main.cc Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,114 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <sys/stat.h> +#include "types.h" +#include "TaskManager.h" +#include "GpuFunc.h" + +#define DEFAULT 5 +static long int length = DEFAULT; +static int task = 1; +int *indata; + +extern void task_init(void); + +char usr_help_str[] = "GpuRun [length]\n"; + +void +print_data(int *data, int size, const char *title) +{ + printf("%s ---\n", title); + for ( int i = 0; i < size; i++) { + printf("%2d ", data[i]); + } + printf("\n"); +} + +/** + * タスク終了後の data1, data2 の確認 + */ +void +twice_result(SchedTask *s, void *a, void *b) +{ + int* data = (int*)a; + long length = (long)b; + print_data(data, length, "after"); +} + + +int +init(int argc, char **argv) +{ + for (int i = 1; argv[i]; ++i) { + if (strcmp(argv[i], "-length") == 0) { + length = atoi(argv[++i]); + } else if (strcmp(argv[i], "-count") == 0) { + task = atoi(argv[++i]); + } + } + + return 0; +} + + +void +tester(int *indata, int *outdata, int num) { + + //チェック + int check = 0; + for (int c=0; c<num; c++) { + if(outdata[c] == indata[c]*2) { + check++; + } + } + + printf("Computed '%d/%d' correct values\n",check,num); + +} + + +void +test(TaskManager *manager) { + indata = new int[length]; + + for (int c=0; c < length ;c++) { + indata[c] = c; + } + + print_data(indata, length, "before"); + + HTaskPtr twice = manager->create_task(Twice); + + twice->set_param(0, (memaddr)length); + twice->set_inData(0, indata, sizeof (int)*length); + twice->set_outData(0, indata, sizeof (int)*length); + twice->set_cpu(GPU_0); + twice->flip(); + + /* + * set_post() で ppe task を渡せるようにしたい + */ + twice->set_post(twice_result, (void*)indata, (void*)length); + + twice->spawn(); +} + +int +TMmain(TaskManager *manager, int argc, char* argv[]) +{ + if (init(argc, argv) < 0) { + return -1; + } + + task_init(); + + for (int i = 0; i < task; ++i) { + test(manager); + } + + return 0; +} + +/* end */
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/task_init.cc Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,12 @@ +#include "GpuFunc.h" +#include "GpuScheduler.h" + +void +task_init(void) +{ + int cmd = Twice; + int dim = 2; + size_t *l_work_size = new size_t(dim); + GpuNDRangeRegister(cmd, dim, l_work_size); + GpuSchedRegister(cmd, "twice.cl", "twice"); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/flip/twice.cl Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,12 @@ +__kernel void +twice(__constant int *data_count, + __global int *input_data) + //__global int *output_data) +{ + long count = (long)data_count[0]; + for (int i = 0; i<count; i++) { + // output_data[i] = 2*input_data[i]; + input_data[i] *= 2; + } + +}
--- a/example/many_task/Func.h Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/Func.h Wed Nov 14 09:45:28 2012 +0900 @@ -1,6 +1,7 @@ enum { #include "SysTasks.h" QUICK_SORT, + QUICK_SORT_LOOP, SortSimple, SortCompat, };
--- a/example/many_task/Makefile Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/Makefile Wed Nov 14 09:45:28 2012 +0900 @@ -16,6 +16,10 @@ @echo "Make for PS3 (Cell)" @$(MAKE) -f Makefile.cell +gpu: FORCE + @echo "Make for OpenCL" + @$(MAKE) -f Makefile.gpu + FORCE: clean:
--- a/example/many_task/Makefile.def Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/Makefile.def Wed Nov 14 09:45:28 2012 +0900 @@ -6,11 +6,11 @@ CERIUM = ../../../Cerium -OPT = -O9 -# OPT = -g -O9 +OPT = -O +OPT = -g -O0 # OPT = -g -CC = g++ -CFLAGS = -DUSE_SIMPLE_TASK -Wall $(OPT) +CC = clang++ +CFLAGS = -Wall $(OPT) -DUSE_SIMPLE_TASK # CFLAGS = -Wall $(OPT) INCLUDE = -I${CERIUM}/include/TaskManager -I. -I..
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/Makefile.gpu Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,42 @@ +include ./Makefile.def + + +SRCS_TMP = $(wildcard *.cc) +SRCS_EXCLUDE = sort_test.cc task_init.cc # 除外するファイルを書く +SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) +OBJS = $(SRCS:.cc=.o) + +TASK_DIR = gpu +TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc) +TASK_SRCS_EXCLUDE = sort_test.cc +TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP)) +TASK_OBJS = $(TASK_SRCS:.cc=.o) + +CC += $(ABI) +# CFLAGS = -g -Wall# -O9 #-DDEBUG + +INCLUDE = -I${CERIUM}/include/TaskManager -I. -I.. +LIBS = -L${CERIUM}/TaskManager -DUSE_SIMPLE_TASK -lGpuManager -framework opencl `sdl-config --libs` + +.SUFFIXES: .cc .o + +.cc.o: + $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ + +all: $(TARGET) + +$(TARGET): $(OBJS) $(TASK_OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) + +link: + $(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS) + +debug: $(TARGET) + sudo gdb ./$(TARGET) + +clean: + rm -f $(TARGET) $(OBJS) $(TASK_OBJS) + rm -f *~ \#* + rm -f ppe/*~ ppe/\#* + rm -f spe/*~ spe/\#* + rm -f gpu/*~ gpu/\#*
--- a/example/many_task/Makefile.macosx Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/Makefile.macosx Wed Nov 14 09:45:28 2012 +0900 @@ -2,7 +2,7 @@ SRCS_TMP = $(wildcard *.cc) -SRCS_EXCLUDE = # 除外するファイルを書く +SRCS_EXCLUDE = sort_test.cc # 除外するファイルを書く SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) OBJS = $(SRCS:.cc=.o) @@ -33,7 +33,7 @@ $(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS) debug: $(TARGET) - sudo gdb ./$(TARGET) + sudo gdb ./$(TARGET) clean: rm -f $(TARGET) $(OBJS) $(TASK_OBJS)
--- a/example/many_task/README Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/README Wed Nov 14 09:45:28 2012 +0900 @@ -1,3 +1,6 @@ +2012/10/8 tomari +ppe内でmakeしたらCeriumを使わないtest routineが走る + 2010/7/31 kono bitoinc sort の一段落を待って、次のtaskを生成する方法だと、
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/gpu/Makefile Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,37 @@ +TARGET = sort_test +CERIUM = ../../../../Cerium + +CC = g++ +CFLAGS = -g -Wall + +INCLUDE = -I${CERIUM}/include/TaskManager -I. -I../ +LIBS = -L${CERIUM}/TaskManager + + +SRCS_TMP = $(wildcard *.cc) +SRCS_EXCLUDE = gpu_task_init.cc # 除外するファイルを書く +SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) +OBJS = $(SRCS:.cc=.o) + +LIBS += -framework opencl + +.SUFFIXES: .cc .o + +.cc.o: + $(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@ + +all: $(TARGET) +gpu: all + +$(TARGET): $(OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) + +link: + $(CC) -o $(TARGET) $(OBJS) $(LIBS) + +debug: $(TARGET) + sudo gdb ./$(TARGET) + +clean: + rm -f $(TARGET) $(OBJS) + rm -f *~ \#*
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/gpu/QuickSort.cl Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,50 @@ +typedef struct Data { + int index; + int ptr; + int pad[2]; +} Data, *DataPtr; + +inline void +swap(__global Data *data, int left, int right ) +{ + Data tmp = data[left]; + data[left] = data[right]; + data[right] = tmp; +} + +__kernel void +quick_sort(__constant int *count, + __global Data *data) +{ + int begin = 0; + int end = count[0]; + + int stack[1024]; + int sp = 0; + int p; + while (1) { + while (begin < end) { + int where = (begin + end) / 2; + int pivot = data[where].index; + data[where].index = data[begin].index; + int i; + p = begin; + for (i=begin+1; i<=end; i++) { + if (data[i].index < pivot) { + p++; + swap(data, p, i); + } + } + data[begin].index = data[p].index; + data[p].index = pivot; + + stack[sp++] = p + 1; + stack[sp++] = end; + end = p - 1; + } + if (sp == 0) return; + end = stack[--sp]; + begin = stack[--sp]; + begin = p + 1; + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/gpu/gpu_task_init.cc Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,12 @@ +#include "Func.h" +#include "GpuScheduler.h" +#include "Scheduler.h" + +SchedExternTask(SortSimple); + +void +task_init(void) +{ + SchedRegister(SortSimple); + GpuSchedRegister(QUICK_SORT, "gpu/QuickSort.cl", "quick_sort"); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/gpu/sort_test.cc Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,183 @@ +#include <stdlib.h> +#include <OpenCL/opencl.h> +#include <stdio.h> +#include <fcntl.h> +#include <string.h> +#include <sys/time.h> +#include <sys/stat.h> +#include "sort.h" +#include "sort_test.h" +#define DEFAULT 432 + +extern int data_length; +extern DataPtr data; + +// 計測用 +static double st_time; +static double ed_time; +static int length = DEFAULT; + +int +init(int argc, char **argv) +{ + for (int i = 1; argv[i]; ++i) { + if (strcmp(argv[i], "--length") == 0 || strcmp(argv[i], "-l") == 0) { + length = atoi(argv[++i]); + } + } + + return 0; +} + +int +get_split_num(int len, int num) +{ + if (len / num < MAX_BLOCK_SIZE) { + return num; + } else { + // 切り上げ + return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE; + } +} + + +static double +getTime() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + (double)tv.tv_usec*1e-6; +} + +void +show( Data *data, int size ) +{ + puts("-----------------------------------------------"); + for(int i=0; i<=size; i++) printf("data[%02d].index = %d\n", i, data[i].index); + puts("-----------------------------------------------"); + return; +} + +Sort sorter; + +static void +check_data() +{ + for(int i=0; i< sorter.data_length-1;i++) { + if (sorter.data[i].index>sorter.data[i+1].index) { + printf("Data are not sorted at %d. %d > %d \n",i, sorter.data[i].index,sorter.data[i+1].index); + return; + } + } + printf("Data are sorted\n"); +} + +void +gpu_init() +{ + clGetPlatformIDs(1, &platform_id, &ret_num_platforms); + clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, + &ret_num_devices); + + context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); + command_queue = clCreateCommandQueue(context, device_id, 0, &ret); + + //ファイルオープン + + const char* filename = "QuickSort.cl"; + const char* functionname = "quick_sort"; + + int fp = open(filename, O_RDONLY); + + if (!fp) { + fprintf(stderr, "Failed to load kernel.\n"); + exit(1); + } + + struct stat stats; + fstat(fp,&stats); + off_t size = stats.st_size; + + if (!size) { + fprintf(stderr, "Failed to load kernel.\n"); + exit(1); + } + + char *kernel_src_str = new char[size]; + size_t kernel_code_size = read(fp, kernel_src_str, size); + close(fp); + + program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src_str, + (const size_t *)&kernel_code_size, &ret); + clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); + kernel = clCreateKernel(program,functionname, &ret); +} + +void +sort_start(Sort s){ + + Sort sorter = s; + int length = sorter.data_length; + + //メモリバッファの作成 + cl_mem mem_count = clCreateBuffer(context, CL_MEM_READ_ONLY,sizeof(int),NULL, &ret); + cl_mem mem_data = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(Data)*length, NULL, &ret); + + st_time = getTime(); + + //メモリバッファに入力データを書き込み + ret = clEnqueueWriteBuffer(command_queue, mem_count, CL_TRUE, 0, + sizeof(int), &length, 0, NULL, NULL); + ret = clEnqueueWriteBuffer(command_queue, mem_data, CL_TRUE, 0, + sizeof(Data)*length, sorter.data, 0, NULL, NULL); + + //print_data(data, count, "before"); + clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&mem_count); + clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&mem_data); + + ev = NULL; + + ret = clEnqueueTask(command_queue, kernel, 0, NULL, &ev); + + //メモリバッファから結果を取得 + ret = clEnqueueReadBuffer(command_queue, mem_data, CL_TRUE, 0,sizeof(Data)*length, sorter.data, 1, &ev, NULL); + clFlush(command_queue); + ed_time = getTime(); + show(sorter.data, length-1); + check_data(); + printf("Time: %0.6f\n",ed_time-st_time); + + clReleaseKernel(kernel); + clReleaseProgram(program); + clReleaseMemObject(mem_data); + clReleaseEvent(ev); + clReleaseCommandQueue(command_queue); + clReleaseContext(context); + +} + +int main(int argc, char *argv[]) { + + // 無効な引数ならデフォルトの値として432を設定 + + + if (argc>1) { + if (init(argc,argv) < 0) { + return -1; + } + } + + gpu_init(); + + sorter.data = new Data[length]; + sorter.data_length = length; + sorter.split_num = get_split_num(sorter.data_length, 1); // (length, cpu_num) + + for (int i = 0; i < length; i++) { + sorter.data[i].index = rand()%10000; + sorter.data[i].ptr = i; + } + + sort_start(sorter); + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/gpu/sort_test.h Wed Nov 14 09:45:28 2012 +0900 @@ -0,0 +1,11 @@ + cl_platform_id platform_id; + cl_uint ret_num_platforms; + cl_device_id device_id; + cl_uint ret_num_devices; + cl_int ret; + + cl_context context; + cl_command_queue command_queue; + cl_program program; + cl_kernel kernel; + cl_event ev;
--- a/example/many_task/main.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/main.cc Wed Nov 14 09:45:28 2012 +0900 @@ -25,6 +25,7 @@ static double ed_time; static int length = 1200; +CPU_TYPE spe_cpu = SPE_ANY; // prototype void TMend(TaskManager *); @@ -36,17 +37,6 @@ gettimeofday(&tv, NULL); return tv.tv_sec + (double)tv.tv_usec*1e-6; } -/* -static void -show_data(void) -{ - puts("-----------------------------------------------"); - for(int i = 0; i < data_length; i++) { - printf("data[%02d].index = %d\n", i, data[i].index); - } - puts("-----------------------------------------------"); -} -*/ const char *usr_help_str = "Usage: ./sort [option]\n \ options\n\ @@ -64,6 +54,9 @@ if (strcmp(argv[i], "-a") == 0 ) { all = 1; } + if (strcmp(argv[i], "-g") == 0 ) { + spe_cpu = GPU_0; + } if (strcmp(argv[i], "-c") == 0 ) { sort_task = SortCompat; } @@ -79,6 +72,16 @@ Sort sorter; static void +show_data(void) +{ + puts("-----------------------------------------------"); + for(int i = 0; i < sorter.data_length; i++) { + printf("data[%02d].index = %d\n", i, sorter.data[i].index); + } + puts("-----------------------------------------------"); +} + +static void check_data() { for(int i=0; i< sorter.data_length-1;i++) { @@ -111,9 +114,12 @@ sorter.data[i].ptr = i; } + // show_data(); HTaskPtr restart = manager->create_task(sort_task,0,0,0,0); + // default ではSortSimpleがsetされている。SortSimpleはsort.ccに + restart->set_param(0,(memaddr)&sorter); - //set flip flag + // set flip flag restart->spawn(); } @@ -129,7 +135,6 @@ task_init(); int cpu = manager->get_cpuNum(); - // in case of -cpu 0 if (cpu==0) cpu = 1; if (1) { @@ -152,7 +157,7 @@ TMend(TaskManager *manager) { ed_time = getTime(); - //show_data(); + show_data(); check_data(); printf("Time: %0.6f\n",ed_time-st_time); }
--- a/example/many_task/ppe/Makefile Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/ppe/Makefile Wed Nov 14 09:45:28 2012 +0900 @@ -25,6 +25,12 @@ debug: $(TARGET) sudo gdb ./$(TARGET) +loop: $(OBJS) $(TASK_OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) $(LOOP) + +rec: $(OBJS) $(TASK_OBJS) + $(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS) $(REC) + clean: rm -f $(TARGET) $(OBJS) $(TASK_OBJS) rm -f *~ \#*
--- a/example/many_task/ppe/QuickSort.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/ppe/QuickSort.cc Wed Nov 14 09:45:28 2012 +0900 @@ -9,17 +9,17 @@ static void swap( Data *data, int left, int right ) { - Data tmp = data[left]; + Data tmp = data[left]; data[left] = data[right]; data[right] = tmp; } -// #define USE_MEMCPY +//#define USE_MEMCPY static int run(SchedTask *s, void* rbuff, void* wbuff) { // copy value - int begin = 0; + int begin = 0; #if USE_SIMPLE_TASK int end = s->read_size()/sizeof(Data); Data *r_data = (Data*)rbuff; @@ -37,7 +37,7 @@ // printf("[PPE] Quick: length:%d addr->%x \n",end, (int)rbuff); // printf("[PPE] Quick: data[0]: %ld addr->%lx\n",sizeof(r_data),(long)r_data); - quick_sort(r_data, begin, end-1); + quick_sort(r_data, begin, end); #ifdef USE_MEMCPY memcpy(w_data, r_data, sizeof(Data)*end); @@ -50,32 +50,38 @@ void qsort_test(Data *data, int begin, int end ) { - quick_sort(data, begin, end); - printf("end is %d\n",end); + quick_sort(data, begin, end-1); } static void -quick_sort(Data *data, int begin, int end ) { +quick_sort( Data *data, int begin, int end ) { + int stack[1024]; + int sp = 0; + int p; + while (1) { + while (begin < end) { + int where = (begin + end) / 2; + int pivot = data[where].index; + data[where].index = data[begin].index; + int i; + p = begin; + for (i=begin+1; i<=end; i++) { + if (data[i].index < pivot) { + p++; + swap(data, p, i); + } + } + data[begin].index = data[p].index; + data[p].index = pivot; - if (begin < end) { - int where = (begin + end) / 2; - int pivot = data[where].index; - data[where].index = data[begin].index; - int p = begin; - int i; - for (i=begin+1; i<end; i++) { - if (data[i].index < pivot) { - p++; - swap(data, p, i); - } + stack[sp++] = p + 1; + stack[sp++] = end; + end = p - 1; } - data[begin].index = data[p].index; - data[p].index = pivot; - - quick_sort(data, begin, p-1); - quick_sort(data, p+1, end); // tail call + if (sp == 0) return; + end = stack[--sp]; + begin = stack[--sp]; + begin = p + 1; } } - - /* end */
--- a/example/many_task/ppe/QuickSort.cc.loop Tue Oct 02 18:17:34 2012 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,97 +0,0 @@ -#include "QuickSort.h" -#include <stdio.h> -#include <string.h> - -SchedDefineTask(QuickSort); - -static void quick_sort( Data *data, int begin, int end ) ; -extern void show_data(DataPtr, int); - -static void -swap( Data *data, int left, int right ) -{ - Data tmp = data[left]; - data[left] = data[right]; - data[right] = tmp; -} - -// #define USE_MEMCPY - -static int -run(SchedTask *s, void* rbuff, void* wbuff) -{ - // copy value - int begin = 0; -#if USE_SIMPLE_TASK - int end = s->read_size()/sizeof(Data); - Data *r_data = (Data*)rbuff; -#ifdef USE_MEMCPY - Data *w_data = (Data*)wbuff; -#endif -#else - int end = s->get_inputSize(0)/sizeof(Data); - DataPtr r_data = (DataPtr)s->get_input(0); -#ifdef USE_MEMCPY - DataPtr w_data = (DataPtr)s->get_output(0); -#endif -#endif - - printf("[PPE] Quick: data[0]: %ld addr->%lx\n",sizeof(r_data),(long)r_data); - - // show_data(r_data, end); - quick_sort(r_data, begin, end-1); - // show_data(r_data, end); -#ifdef USE_MEMCPY - memcpy(w_data, r_data, sizeof(Data)*end); -#else - s->swap(); -#endif - - return 0; -} - -void -qsort_test(Data *data, int begin, int end ) { - quick_sort(data, begin, end); - printf("end is %d\n",end); -} - -static void -quick_sort( Data *data, int begin, int end ) { - int stack[1024]; - int sp = 0; - int p = begin; - while (begin < end) { - while (begin < end) { - int where = (begin + end) / 2; - int pivot = data[where].index; - data[where].index = data[begin].index; - int i; - for (i=begin+1; i<end; i++) { - if (data[i].index < pivot) { - p++; - swap(data, p, i); - } - } - data[begin].index = data[p].index; - data[p].index = pivot; - - stack[sp++] = begin; - stack[sp++] = end; - end = p-1; - - // quick_sort(data, begin, p-1); - // beginとp-1のみが変わっている(これだけを保持) - // beginとp-1(end)用のスタックを作ってやればよい - } - if (sp == 0) return; - end = stack[--sp]; - begin = stack[--sp]; - begin = p+1; - // quick_sort(data, p+1, end); // tail call - // そのままループに - } -} - - -/* end */
--- a/example/many_task/ppe/sort_test.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/ppe/sort_test.cc Wed Nov 14 09:45:28 2012 +0900 @@ -3,13 +3,21 @@ #include <stdlib.h> //#include "sort.h" #include "QuickSort.h" +#include <sys/time.h> // sort.cc extern int data_length; extern DataPtr data; -extern void quick_sort(DataPtr, int, int); static int length = 1200; extern void qsort_test(Data*, int, int); +static double +getTime() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + (double)tv.tv_usec*1e-6; +} + void show( Data *data, int size ) { @@ -33,6 +41,19 @@ Sort sorter; +void +check_data() +{ + for(int i=0; i< sorter.data_length-1;i++) { + if (sorter.data[i].index>sorter.data[i+1].index) { + printf("Data are not sorted at %d. %d > %d \n",i, sorter.data[i].index,sorter.data[i+1].index); + return; + } + } + printf("Data are sorted\n"); +} + + int main(int argc, char *argv[]) { @@ -50,9 +71,15 @@ int begin = 0; int end = length; - show(sorter.data, end-1); + double st_time; + double ed_time; + //show(sorter.data, end-1); + st_time = getTime(); qsort_test(sorter.data, begin, end); - show(sorter.data, end-1); + ed_time = getTime(); + printf("Time: %0.6f\n",ed_time-st_time); + //show(sorter.data, end-1); + check_data(); return 0; }
--- a/example/many_task/ppe/task_init.cc Tue Oct 02 18:17:34 2012 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -#include "Func.h" -#include "Scheduler.h" - -SchedExternTask(QuickSort); -SchedExternTask(SortSimple); -SchedExternTask(SortCompat); - -void -task_init(void) -{ - // ex - // SchedRegisterNDRange(dim,global_size, local_size) - - SchedRegisterTask(QUICK_SORT, QuickSort); - SchedRegister(SortSimple); - SchedRegister(SortCompat); -}
--- a/example/many_task/sort-compat.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/sort-compat.cc Wed Nov 14 09:45:28 2012 +0900 @@ -6,6 +6,7 @@ extern void check_data(); extern int all; // allocate task at once +extern CPU_TYPE spe_cpu; SchedDefineTask1(SortCompat, sort_start_compat ); @@ -37,7 +38,7 @@ if (i<s->split_num-2 && s->bsort[i]) { s->fsort[i]->wait_for(s->bsort[i]); } - s->fsort[i]->set_cpu(SPE_ANY); + s->fsort[i]->set_cpu(spe_cpu); } // 最後の block は端数なので last_block_num を使う @@ -50,7 +51,7 @@ if (i>0 && s->bsort[i-1]) { s->fsort[i]->wait_for(s->bsort[i-1]); } - s->fsort[i]->set_cpu(SPE_ANY); + s->fsort[i]->set_cpu(spe_cpu); } if (s->split_num > 1) { @@ -62,7 +63,7 @@ sizeof(Data)*block_num); s->bsort[i]->set_outData(0,&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); - s->bsort[i]->set_cpu(SPE_ANY); + s->bsort[i]->set_cpu(spe_cpu); } { @@ -74,7 +75,7 @@ sizeof(Data)*last_half_block_num); s->bsort[i]->set_outData(0,&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); - s->bsort[i]->set_cpu(SPE_ANY); + s->bsort[i]->set_cpu(spe_cpu); } for (int i = 0; i < half_num; i++) {
--- a/example/many_task/sort.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/sort.cc Wed Nov 14 09:45:28 2012 +0900 @@ -6,6 +6,7 @@ extern int get_split_num(int len, int num); extern int all; // allocate task at once +extern CPU_TYPE spe_cpu ; /** * 一つの block にある data の数が MAX_BLOCK_SIZE 超えないような @@ -23,12 +24,12 @@ get_split_num(int len, int num) { if (len / num < MAX_BLOCK_SIZE) { - return num; + return num; } else { - // 切り上げ - return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE; + // 切り上げ + return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE; } -} +} /** @@ -53,79 +54,91 @@ int last_half_block_num = half_block_num+(last_block_num/2); if (--sort_count < 0) { - return 0; + return 0; } + for (int i = 0; i < s->split_num-1; i++) { - s->fsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num], sizeof(Data)*block_num, - (memaddr)&s->data[i*block_num], sizeof(Data)*block_num); - if (i>0 && s->bsort[i-1]) { - s->fsort[i]->wait_for(s->bsort[i-1]); - } - if (i<s->split_num-2 && s->bsort[i]) { - s->fsort[i]->wait_for(s->bsort[i]); - } - s->fsort[i]->set_cpu(SPE_ANY); + s->fsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num], sizeof(Data)*block_num, + (memaddr)&s->data[i*block_num], sizeof(Data)*block_num); + + s->fsort[i]->flip(); + + if (i>0 && s->bsort[i-1]) { + s->fsort[i]->wait_for(s->bsort[i-1]); + } + if (i<s->split_num-2 && s->bsort[i]) { + s->fsort[i]->wait_for(s->bsort[i]); + } + s->fsort[i]->set_cpu(GPU_0); + s->fsort[i]->set_param(0,(memaddr)block_num); } // 最後の block は端数なので last_block_num を使う { - int i = s->split_num-1; + int i = s->split_num-1; - s->fsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num, - (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num); - if (i>0 && s->bsort[i-1]) { - s->fsort[i]->wait_for(s->bsort[i-1]); - } - s->fsort[i]->set_cpu(SPE_ANY); + s->fsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num, + (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num); + s->fsort[i]->flip(); + if (i>0 && s->bsort[i-1]) { + s->fsort[i]->wait_for(s->bsort[i-1]); + } + s->fsort[i]->set_cpu(GPU_0); + s->fsort[i]->set_param(0,(memaddr)last_block_num); } if (s->split_num > 1) { - for (int i = 0; i < half_num-1; i++) { - if (s->bsort[i]) manager->free_htask(s->bsort[i]); - s->bsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); - s->bsort[i]->set_cpu(SPE_ANY); - } + for (int i = 0; i < half_num-1; i++) { + if (s->bsort[i]) manager->free_htask(s->bsort[i]); + s->bsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); + s->bsort[i]->flip(); + s->bsort[i]->set_cpu(GPU_0); + s->bsort[i]->set_param(0,(memaddr)block_num); + } - { - int i = half_num-1; + { + int i = half_num-1; - if (s->bsort[i]) manager->free_htask(s->bsort[i]); - s->bsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); - s->bsort[i]->set_cpu(SPE_ANY); - } - - for (int i = 0; i < half_num; i++) { - s->bsort[i]->wait_for(s->fsort[i]); - s->bsort[i]->wait_for(s->fsort[i+1]); - s->bsort[i]->no_auto_free(); - s->bsort[i]->spawn(); - } + if (s->bsort[i]) manager->free_htask(s->bsort[i]); + s->bsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); + s->bsort[i]->flip(); + s->bsort[i]->set_cpu(GPU_0); + s->bsort[i]->set_param(0,(memaddr)last_half_block_num); + } + + for (int i = 0; i < half_num; i++) { + s->bsort[i]->wait_for(s->fsort[i]); + s->bsort[i]->wait_for(s->fsort[i+1]); + s->bsort[i]->no_auto_free(); + s->bsort[i]->spawn(); + } } HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0); restart->set_param(0,(memaddr)s); if (!all) restart->wait_for(s->fsort[0]); for (int i = 0; i < s->split_num; i++) { - s->fsort[i]->spawn(); + s->fsort[i]->spawn(); } if (sort_count == 1) { - // last loop wait for all task - // we should not need this? - for (int i = 0; i < half_num; i++) { - restart->wait_for(s->bsort[i]); - s->bsort[i]->auto_free(); - } + // last loop wait for all task + // we should not need this? + for (int i = 0; i < half_num; i++) { + restart->wait_for(s->bsort[i]); + s->bsort[i]->auto_free(); + } } restart->spawn(); + return 0; }
--- a/example/many_task/task_init.cc Tue Oct 02 18:17:34 2012 +0900 +++ b/example/many_task/task_init.cc Wed Nov 14 09:45:28 2012 +0900 @@ -1,14 +1,23 @@ #include "Func.h" #include "Scheduler.h" +#include "GpuScheduler.h" +#ifndef __CERIUM_GPU__ SchedExternTask(QuickSort); +#endif // __CERIUM_GPU__ SchedExternTask(SortSimple); SchedExternTask(SortCompat); void task_init(void) { +#ifdef __CERIUM_GPU__ + GpuSchedRegister(QUICK_SORT, "sort.cl", "sort"); +#else SchedRegisterTask(QUICK_SORT, QuickSort); +#endif + + SchedRegister(SortSimple); SchedRegister(SortCompat); }