# HG changeset patch # User Shinji KONO # Date 1387681143 -32400 # Node ID c21bd32e20b90b93eb835f317b86caa806311e3f # Parent f800f61a031144db04e7af023cebebb8ef69b566 fix sort with task array ( wrong result ) diff -r f800f61a0311 -r c21bd32e20b9 TaskManager/kernel/schedule/Scheduler.h --- a/TaskManager/kernel/schedule/Scheduler.h Sat Dec 21 19:56:58 2013 +0900 +++ b/TaskManager/kernel/schedule/Scheduler.h Sun Dec 22 11:59:03 2013 +0900 @@ -180,6 +180,10 @@ inline void loadSchedTask(Scheduler *scheduler,int command) { + if ( task_list[command].load == 0) { + fprintf(stderr,"no such command %d\n", command); + exit(1); + } task_list[command].load(scheduler,command); } diff -r f800f61a0311 -r c21bd32e20b9 example/many_task/Func.h --- a/example/many_task/Func.h Sat Dec 21 19:56:58 2013 +0900 +++ b/example/many_task/Func.h Sun Dec 22 11:59:03 2013 +0900 @@ -4,4 +4,5 @@ QUICK_SORT_LOOP, SortSimple, SortCompat, + SortTaskArray, }; diff -r f800f61a0311 -r c21bd32e20b9 example/many_task/Makefile.gpu --- a/example/many_task/Makefile.gpu Sat Dec 21 19:56:58 2013 +0900 +++ b/example/many_task/Makefile.gpu Sun Dec 22 11:59:03 2013 +0900 @@ -1,7 +1,7 @@ include ./Makefile.def SRCS_TMP = $(wildcard *.cc) -SRCS_EXCLUDE = sort-compat.cc sort_test.cc gpu/gpu_task_init.cc # 除外するファイルを書く +SRCS_EXCLUDE = sort_test.cc gpu/gpu_task_init.cc # 除外するファイルを書く SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP)) OBJS = $(SRCS:.cc=.o) diff -r f800f61a0311 -r c21bd32e20b9 example/many_task/gpu/task_init.cc --- a/example/many_task/gpu/task_init.cc Sat Dec 21 19:56:58 2013 +0900 +++ b/example/many_task/gpu/task_init.cc Sun Dec 22 11:59:03 2013 +0900 @@ -5,13 +5,16 @@ SchedExternTask(QuickSort); SchedExternTask(SortSimple); SchedExternTask(SortCompat); +SchedExternTask(SortTaskArray); void task_init(void) { SchedRegister(SortSimple); SchedRegisterTask(QUICK_SORT, QuickSort); - // SchedRegister(SortCompat); + SchedRegister(SortCompat); + SchedRegister(SortTaskArray); + GpuSchedRegister(QUICK_SORT,"gpu/QuickSort.cl","quick_sort"); } diff -r f800f61a0311 -r c21bd32e20b9 example/many_task/main.cc --- a/example/many_task/main.cc Sat Dec 21 19:56:58 2013 +0900 +++ b/example/many_task/main.cc Sun Dec 22 11:59:03 2013 +0900 @@ -68,7 +68,7 @@ } if (strcmp(argv[i], "-ta") == 0 ) { use_task_array = 1; - + sort_task = SortTaskArray; } if (strcmp(argv[i], "-any") == 0 ) { spe_cpu = ANY_ANY; diff -r f800f61a0311 -r c21bd32e20b9 example/many_task/ppe/task_init.cc --- a/example/many_task/ppe/task_init.cc Sat Dec 21 19:56:58 2013 +0900 +++ b/example/many_task/ppe/task_init.cc Sun Dec 22 11:59:03 2013 +0900 @@ -4,11 +4,13 @@ SchedExternTask(QuickSort); SchedExternTask(SortSimple); SchedExternTask(SortCompat); +SchedExternTask(SortTaskArray); void task_init(void) { SchedRegisterTask(QUICK_SORT, QuickSort); SchedRegister(SortSimple); - // SchedRegister(SortCompat); + SchedRegister(SortCompat); + SchedRegister(SortTaskArray); } diff -r f800f61a0311 -r c21bd32e20b9 example/many_task/sort-array.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/example/many_task/sort-array.cc Sun Dec 22 11:59:03 2013 +0900 @@ -0,0 +1,137 @@ +#include "TaskManager.h" +#include "SchedTask.h" +#include "sort.h" +#include "Func.h" +#include + +extern int get_split_num(int len, int num); +extern int all; // allocate task at once +extern CPU_TYPE spe_cpu ; +extern int use_task_array; +/** + * 一つの block にある data の数が MAX_BLOCK_SIZE 超えないような + * len の分割数を返す + * + * @param len sort する data の総数 + * @param num 使用する SPE の数 + * + * @return data の分割数 + * + * TODO: + * len が num 以下とか考えてません + */ +extern int get_split_num(int len, int num); + +/** + * btask が全て終了したら、再び sort_start を実行する + * @param d 生成された btask の数 + */ + +SchedDefineTask1(SortTaskArray, sort_start_array ); + +static int +sort_start_array(SchedTask *manager, void *d, void *e) +{ + Sort *s = (Sort*)manager->get_param(0); + long half_num = s->split_num-1; + static long sort_count = s->split_num; // sort 完了に必要な回数 + + // 一つのタスクで sort する data 数 + long block_num = (s->data_length + s->split_num -1)/s->split_num; + long half_block_num = block_num/2; + + long last_block_num = s->data_length - (s->split_num-1)*block_num; + long last_half_block_num = half_block_num+(last_block_num/2); + + if (--sort_count < 0) { + return 0; + } + HTask **task_array_f = (HTask**)manager->allocate(sizeof(HTask*)*s->split_num); + HTask **task_array_b = (HTask**)manager->allocate(sizeof(HTask*)*half_num); + + for (int i = 0; i < s->split_num;i++) { + task_array_f[i] = manager->create_task_array(QUICK_SORT, s->split_num,1,1,1); + s->fsort_task[i]=0; + } + for (int i = 0; icreate_task_array(QUICK_SORT, half_num,1,1,1); + s->bsort_task[i]=0; + } + for (int i = 0; i < s->split_num-1; i++) { + s->fsort_task[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort_task[i]); + s->fsort_task[i]->set_param(0,(memaddr)block_num); + s->fsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*block_num); + if (i>0 && s->bsort_task[i-1]) { + task_array_f[i]->wait_for(task_array_b[i-1]); + } + if (isplit_num-2 && s->bsort_task[i]) { + task_array_f[i]->wait_for(task_array_b[i]); + } + } + + // 最後の block は端数なので last_block_num を使う + { + + int i = s->split_num-1; + + s->fsort_task[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort_task[i]); + s->fsort_task[i]->set_param(0,(memaddr)last_block_num); + s->fsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num); + if (i>0 && s->bsort_task[i-1]) { + task_array_f[i]->wait_for(task_array_b[i-1]); + } + } + + if (s->split_num > 1) { + + for (int i = 0; i < half_num-1; i++) { + if (s->bsort_task[i]) s->bsort_task[i]=0; + s->bsort_task[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort_task[i]); + s->bsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); + s->bsort_task[i]->set_param(0,(memaddr)block_num); + } + + { + int i = half_num-1; + + if (s->bsort_task[i]) s->bsort_task[i]=0; + s->bsort_task[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort_task[i]); + s->bsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); + s->bsort_task[i]->set_param(0,(memaddr)last_half_block_num); + } + + for (int i = 0; i < half_num; i++) { + task_array_b[i]->wait_for(task_array_f[i]); + task_array_b[i]->wait_for(task_array_f[i+1]); + task_array_b[i]->no_auto_free(); + task_array_b[i]->spawn_task_array(s->bsort_task[i]->next()); + task_array_b[i]->set_cpu(spe_cpu); + task_array_b[i]->flip(); + task_array_b[i]->spawn(); + } + } + + HTaskPtr restart = manager->create_task(SortTaskArray,0,0,0,0); + restart->set_param(0,(memaddr)s); + if (!all) restart->wait_for(task_array_f[0]); + for (int i = 0; i < s->split_num; i++) { + task_array_f[i]->spawn_task_array(s->fsort_task[i]->next()); + task_array_f[i]->set_cpu(spe_cpu); + task_array_f[i]->flip(); + task_array_f[i]->spawn(); + } + if (sort_count == 1) { + // last loop wait for all task + // we should not need this? + for (int i = 0; i < half_num; i++) { + restart->wait_for(task_array_b[i]); + task_array_b[i]->auto_free(); + } + } + restart->spawn(); + + return 0; +} + + +/* end */ diff -r f800f61a0311 -r c21bd32e20b9 example/many_task/sort.cc --- a/example/many_task/sort.cc Sat Dec 21 19:56:58 2013 +0900 +++ b/example/many_task/sort.cc Sun Dec 22 11:59:03 2013 +0900 @@ -7,7 +7,6 @@ extern int get_split_num(int len, int num); extern int all; // allocate task at once extern CPU_TYPE spe_cpu ; -int task_array_num = 3; extern int use_task_array; /** * 一つの block にある data の数が MAX_BLOCK_SIZE 超えないような @@ -57,171 +56,84 @@ if (--sort_count < 0) { return 0; } - if (use_task_array) { - HTask **task_array_f = (HTask**)manager->allocate(sizeof(HTask*)*s->split_num); - HTask **task_array_b = (HTask**)manager->allocate(sizeof(HTask*)*half_num); + for (int i = 0; i < s->split_num-1; i++) { + s->fsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num], sizeof(Data)*block_num, + (memaddr)&s->data[i*block_num], sizeof(Data)*block_num); - for (int i = 0; i < s->split_num;i++) { - task_array_f[i] = manager->create_task_array(QUICK_SORT, s->split_num,1,1,1); - s->fsort_task[i]=0; - } - for (int i = 0; icreate_task_array(QUICK_SORT, half_num,1,1,1); - s->bsort_task[i]=0; - } - for (int i = 0; i < s->split_num-1; i++) { - s->fsort_task[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort_task[i]); - s->fsort_task[i]->set_param(0,(memaddr)block_num); - s->fsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*block_num); - if (i>0 && s->bsort_task[i-1]) { - task_array_f[i]->wait_for(task_array_b[i-1]); - } - if (isplit_num-2 && s->bsort_task[i]) { - task_array_f[i]->wait_for(task_array_b[i]); - } - } + s->fsort[i]->flip(); - // 最後の block は端数なので last_block_num を使う - { - - int i = s->split_num-1; - - s->fsort_task[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort_task[i]); - s->fsort_task[i]->set_param(0,(memaddr)last_block_num); - s->fsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num); - if (i>0 && s->bsort_task[i-1]) { - task_array_f[i]->wait_for(task_array_b[i-1]); - } + if (i>0 && s->bsort[i-1]) { + s->fsort[i]->wait_for(s->bsort[i-1]); + } + if (isplit_num-2 && s->bsort[i]) { + s->fsort[i]->wait_for(s->bsort[i]); } - - if (s->split_num > 1) { + s->fsort[i]->set_cpu(spe_cpu); + s->fsort[i]->set_param(0,(memaddr)block_num); + } - for (int i = 0; i < half_num-1; i++) { - if (s->bsort_task[i]) s->bsort_task[i]=0; - s->bsort_task[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort_task[i]); - s->bsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); - s->bsort_task[i]->set_param(0,(memaddr)block_num); - } - - { - int i = half_num-1; + { + int i = s->split_num-1; - if (s->bsort_task[i]) s->bsort_task[i]=0; - s->bsort_task[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort_task[i]); - s->bsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); - s->bsort_task[i]->set_param(0,(memaddr)last_half_block_num); - } - - for (int i = 0; i < half_num; i++) { - task_array_b[i]->wait_for(task_array_f[i]); - task_array_b[i]->wait_for(task_array_f[i+1]); - task_array_b[i]->no_auto_free(); - task_array_b[i]->spawn_task_array(s->bsort_task[i]->next()); - task_array_b[i]->set_cpu(spe_cpu); - task_array_b[i]->flip(); - task_array_b[i]->spawn(); - } - } - - HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0); - restart->set_param(0,(memaddr)s); - if (!all) restart->wait_for(task_array_f[0]); - for (int i = 0; i < s->split_num; i++) { - task_array_f[i]->spawn_task_array(s->fsort_task[i]->next()); - task_array_f[i]->set_cpu(spe_cpu); - task_array_f[i]->flip(); - task_array_f[i]->spawn(); + s->fsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num, + (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num); + s->fsort[i]->flip(); + if (i>0 && s->bsort[i-1]) { + s->fsort[i]->wait_for(s->bsort[i-1]); } - if (sort_count == 1) { - // last loop wait for all task - // we should not need this? - for (int i = 0; i < half_num; i++) { - restart->wait_for(task_array_b[i]); - task_array_b[i]->auto_free(); - } - } - restart->spawn(); - } else { - - for (int i = 0; i < s->split_num-1; i++) { - s->fsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num], sizeof(Data)*block_num, - (memaddr)&s->data[i*block_num], sizeof(Data)*block_num); + s->fsort[i]->set_cpu(spe_cpu); + s->fsort[i]->set_param(0,(memaddr)last_block_num); + } + + if (s->split_num > 1) { - s->fsort[i]->flip(); - - if (i>0 && s->bsort[i-1]) { - s->fsort[i]->wait_for(s->bsort[i-1]); - } - if (isplit_num-2 && s->bsort[i]) { - s->fsort[i]->wait_for(s->bsort[i]); - } - s->fsort[i]->set_cpu(spe_cpu); - s->fsort[i]->set_param(0,(memaddr)block_num); + for (int i = 0; i < half_num-1; i++) { + if (s->bsort[i]) manager->free_htask(s->bsort[i]); + s->bsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); + s->bsort[i]->flip(); + s->bsort[i]->set_cpu(spe_cpu); + s->bsort[i]->set_param(0,(memaddr)block_num); } { - int i = s->split_num-1; + int i = half_num-1; - s->fsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num, - (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num); - s->fsort[i]->flip(); - if (i>0 && s->bsort[i-1]) { - s->fsort[i]->wait_for(s->bsort[i-1]); - } - s->fsort[i]->set_cpu(spe_cpu); - s->fsort[i]->set_param(0,(memaddr)last_block_num); + if (s->bsort[i]) manager->free_htask(s->bsort[i]); + s->bsort[i] = manager->create_task(QUICK_SORT, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num, + (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); + s->bsort[i]->flip(); + s->bsort[i]->set_cpu(spe_cpu); + s->bsort[i]->set_param(0,(memaddr)last_half_block_num); } - if (s->split_num > 1) { - - for (int i = 0; i < half_num-1; i++) { - if (s->bsort[i]) manager->free_htask(s->bsort[i]); - s->bsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); - s->bsort[i]->flip(); - s->bsort[i]->set_cpu(spe_cpu); - s->bsort[i]->set_param(0,(memaddr)block_num); - } - - { - int i = half_num-1; - - if (s->bsort[i]) manager->free_htask(s->bsort[i]); - s->bsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); - s->bsort[i]->flip(); - s->bsort[i]->set_cpu(spe_cpu); - s->bsort[i]->set_param(0,(memaddr)last_half_block_num); - } + for (int i = 0; i < half_num; i++) { + s->bsort[i]->wait_for(s->fsort[i]); + s->bsort[i]->wait_for(s->fsort[i+1]); + s->bsort[i]->no_auto_free(); + s->bsort[i]->spawn(); + } + } - for (int i = 0; i < half_num; i++) { - s->bsort[i]->wait_for(s->fsort[i]); - s->bsort[i]->wait_for(s->fsort[i+1]); - s->bsort[i]->no_auto_free(); - s->bsort[i]->spawn(); - } + HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0); + restart->set_param(0,(memaddr)s); + if (!all) restart->wait_for(s->fsort[0]); + for (int i = 0; i < s->split_num; i++) { + s->fsort[i]->spawn(); + } + if (sort_count == 1) { + // last loop wait for all task + // we should not need this? + for (int i = 0; i < half_num; i++) { + restart->wait_for(s->bsort[i]); + s->bsort[i]->auto_free(); } - - HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0); - restart->set_param(0,(memaddr)s); - if (!all) restart->wait_for(s->fsort[0]); - for (int i = 0; i < s->split_num; i++) { - s->fsort[i]->spawn(); - } - if (sort_count == 1) { - // last loop wait for all task - // we should not need this? - for (int i = 0; i < half_num; i++) { - restart->wait_for(s->bsort[i]); - s->bsort[i]->auto_free(); - } - } - restart->spawn(); } + restart->spawn(); return 0; }