Mercurial > hg > Game > Cerium
changeset 1546:61164c687b29 draft
fix GpuScheduler flip
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 05 Feb 2013 13:15:46 +0900 |
parents | d9eb89610733 |
children | 2983e9e93d24 |
files | TaskManager/Gpu/GpuScheduler.cc example/flip/Makefile.def example/flip/twice.cl example/many_task/sort_ta.cc |
diffstat | 4 files changed, 19 insertions(+), 248 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc Tue Feb 05 11:02:46 2013 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Tue Feb 05 13:15:46 2013 +0900 @@ -145,7 +145,6 @@ param++; } } - cl_mem *memout; cl_mem_flags out_mem_flag; if (flag.flip) { @@ -158,8 +157,12 @@ for(int i=0;i<nextTask->outData_count;i++) { // set output data - ListElement *output_buf = nextTask->outData(i); + ListElement *output_buf = flag.flip? nextTask->inData(i) : nextTask->outData(i); memout[i] = clCreateBuffer(context, out_mem_flag, output_buf->size, NULL, &ret); + if (ret<0) { + const char *msg=convert_error_status(ret); + error(msg); + } if (flag.flip) { // use output buffer as input buffer ListElement *input_buf = nextTask->inData(i); @@ -172,10 +175,10 @@ } } ret = clSetKernelArg(kernel, param, sizeof(memaddr), (void *)&memout[i]); - if (ret<0) { - const char *msg=convert_error_status(ret); - error(msg); - } + if (ret<0) { + const char *msg=convert_error_status(ret); + error(msg); + } param++; } @@ -192,13 +195,13 @@ // (command_queue[cur], kernel, dim, NULL,global_work_size[0],local_work_size[0],NULL&ev); for(int i=0;i<nextTask->outData_count;i++) { // read output data - ListElement *output_buf = nextTask->outData(i); + ListElement *output_buf = flag.flip? nextTask->inData(i) :nextTask->outData(i); ret = clEnqueueReadBuffer(command_queue[cur], memout[i], CL_TRUE, 0, output_buf->size, output_buf->addr, 1, &ev, NULL); - if (ret<0) { - const char *msg=convert_error_status(ret); - error(msg); - } + if (ret<0) { + const char *msg=convert_error_status(ret); + error(msg); + } } }
--- a/example/flip/Makefile.def Tue Feb 05 11:02:46 2013 +0900 +++ b/example/flip/Makefile.def Tue Feb 05 13:15:46 2013 +0900 @@ -2,7 +2,7 @@ CERIUM = ../../../Cerium -CC = g++ +CC = clang++ CFLAGS = -g -Wall INCLUDE = -I${CERIUM}/include/TaskManager -I. -I../..
--- a/example/flip/twice.cl Tue Feb 05 11:02:46 2013 +0900 +++ b/example/flip/twice.cl Tue Feb 05 13:15:46 2013 +0900 @@ -1,12 +1,12 @@ __kernel void twice(__constant int *data_count, - __global int *input_data, - __global int *output_data) + __global int *input_data) + // __global int *output_data) { long count = (long)data_count[0]; for (int i = 0; i<count; i++) { - output_data[i] = 2*input_data[i]; - //input_data[i] *= 2; + // output_data[i] = 2*input_data[i]; + input_data[i] *= 2; } }
--- a/example/many_task/sort_ta.cc Tue Feb 05 11:02:46 2013 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,232 +0,0 @@ -#include "TaskManager.h" -#include "SchedTask.h" -#include "sort.h" -#include "Func.h" -#include <string.h> - -extern int get_split_num(int len, int num); -extern int all; // allocate task at once -extern CPU_TYPE spe_cpu ; -extern int task_array_num; - -/** - * 一つの block にある data の数が MAX_BLOCK_SIZE 超えないような - * len の分割数を返す - * - * @param len sort する data の総数 - * @param num 使用する SPE の数 - * - * @return data の分割数 - * - * TODO: - * len が num 以下とか考えてません - */ -int -get_split_num(int len, int num) -{ - if (len / num < MAX_BLOCK_SIZE) { - return num; - } else { - // 切り上げ - return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE; - } -} - - -/** - * btask が全て終了したら、再び sort_start を実行する - * @param d 生成された btask の数 - */ - -SchedDefineTask1(SortSimple, sort_start ); - -static int -sort_start(SchedTask *manager, void *d, void *e) -{ - Sort *s = (Sort*)manager->get_param(0); - int half_num = s->split_num-1; - static int sort_count = s->split_num; // sort 完了に必要な回数 - - // 一つのタスクで sort する data 数 - int block_num = (s->data_length + s->split_num -1)/s->split_num; - int half_block_num = block_num/2; - - int last_block_num = s->data_length - (s->split_num-1)*block_num; - int last_half_block_num = half_block_num+(last_block_num/2); - - if (--sort_count < 0) { - return 0; - } - - if (task_array_num > 0) { - // run task array - HTask **task_array_f = (HTask**)manager->allocate(sizeof(HTask*)*s->split_num); - HTask **task_array_b = (HTask**)manager->allocate(sizeof(HTask*)*s->split_num); - - for (int i = 0; i < s->split_num;i++) { - task_array_f[i] = manager->create_task_array(QUICK_SORT, task_array_num,1,1,1); - s->fsort[i]=0; - } - for (int i = 0; i<half_num;i++) { - task_array_b[i] = manager->create_task_array(QUICK_SORT, task_array_num,1,1,1); - s->bsort[i]=0; - } - for (int i = 0; i < s->split_num-1; i++) { - s->fsort[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort[i]); - s->fsort[i]->set_param(0,(memaddr)block_num); - s->fsort[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*block_num); - if (i>0 && s->bsort[i-1]) { - task_array_f[i]->wait_for(task_array_b[i-1]); - } - if (i<s->split_num-2 && s->bsort[i]) { - task_array_f[i]->wait_for(task_array_b[i]); - } - } - - // 最後の block は端数なので last_block_num を使う - { - - int i = s->split_num-1; - - s->fsort[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort[i]); - s->fsort[i]->set_param(0,(memaddr)last_block_num); - s->fsort[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num); - if (i>0 && s->bsort[i-1]) { - task_array_f[i]->wait_for(task_array_b[i-1]); - } - } - - if (s->split_num > 1) { - - for (int i = 0; i < half_num-1; i++) { - if (s->bsort[i]) s->bsort[i]=0; - s->bsort[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort[i]); - s->bsort[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); - s->bsort[i]->set_param(0,(memaddr)block_num); - } - - { - int i = half_num-1; - - if (s->bsort[i]) s->bsort[i]=0; - s->bsort[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort[i]); - s->bsort[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); - s->bsort[i]->set_param(0,(memaddr)last_half_block_num); - } - - for (int i = 0; i < half_num; i++) { - task_array_b[i]->wait_for(task_array_f[i]); - task_array_b[i]->wait_for(task_array_f[i+1]); - task_array_b[i]->no_auto_free(); - task_array_b[i]->spawn_task_array(s->bsort[i]->next()); - task_array_b[i]->set_cpu(spe_cpu); - task_array_b[i]->flip(); - task_array_b[i]->spawn(); - } - } - - HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0); - restart->set_param(0,(memaddr)s); - if (!all) restart->wait_for(task_array_f[0]); - for (int i = 0; i < s->split_num; i++) { - task_array_f[i]->spawn_task_array(s->fsort[i]->next()); - task_array_f[i]->set_cpu(spe_cpu); - task_array_f[i]->flip(); - task_array_f[i]->spawn(); - } - if (sort_count == 1) { - // last loop wait for all task - // we should not need this? - for (int i = 0; i < half_num; i++) { - restart->wait_for(task_array_b[i]); - task_array_b[i]->auto_free(); - } - } - restart->spawn(); - } else { - - for (int i = 0; i < s->split_num-1; i++) { - s->fsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num], sizeof(Data)*block_num, - (memaddr)&s->data[i*block_num], sizeof(Data)*block_num); - - s->fsort[i]->flip(); - - if (i>0 && s->bsort[i-1]) { - s->fsort[i]->wait_for(s->bsort[i-1]); - } - if (i<s->split_num-2 && s->bsort[i]) { - s->fsort[i]->wait_for(s->bsort[i]); - } - s->fsort[i]->set_cpu(spe_cpu); - s->fsort[i]->set_param(0,(memaddr)block_num); - } - - // 最後の block は端数なので last_block_num を使う - { - int i = s->split_num-1; - - s->fsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num, - (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num); - s->fsort[i]->flip(); - if (i>0 && s->bsort[i-1]) { - s->fsort[i]->wait_for(s->bsort[i-1]); - } - s->fsort[i]->set_cpu(spe_cpu); - s->fsort[i]->set_param(0,(memaddr)last_block_num); - } - - if (s->split_num > 1) { - - for (int i = 0; i < half_num-1; i++) { - if (s->bsort[i]) manager->free_htask(s->bsort[i]); - s->bsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num); - s->bsort[i]->flip(); - s->bsort[i]->set_cpu(spe_cpu); - s->bsort[i]->set_param(0,(memaddr)block_num); - } - - { - int i = half_num-1; - - if (s->bsort[i]) manager->free_htask(s->bsort[i]); - s->bsort[i] = manager->create_task(QUICK_SORT, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num, - (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num); - s->bsort[i]->flip(); - s->bsort[i]->set_cpu(spe_cpu); - s->bsort[i]->set_param(0,(memaddr)last_half_block_num); - } - - for (int i = 0; i < half_num; i++) { - s->bsort[i]->wait_for(s->fsort[i]); - s->bsort[i]->wait_for(s->fsort[i+1]); - s->bsort[i]->no_auto_free(); - s->bsort[i]->spawn(); - } - } - - HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0); - restart->set_param(0,(memaddr)s); - if (!all) restart->wait_for(s->fsort[0]); - for (int i = 0; i < s->split_num; i++) { - s->fsort[i]->spawn(); - } - if (sort_count == 1) { - // last loop wait for all task - // we should not need this? - for (int i = 0; i < half_num; i++) { - restart->wait_for(s->bsort[i]); - s->bsort[i]->auto_free(); - } - } - restart->spawn(); - } - return 0; -} - - -/* end */