changeset 1546:61164c687b29 draft

fix GpuScheduler flip
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Tue, 05 Feb 2013 13:15:46 +0900
parents d9eb89610733
children 2983e9e93d24
files TaskManager/Gpu/GpuScheduler.cc example/flip/Makefile.def example/flip/twice.cl example/many_task/sort_ta.cc
diffstat 4 files changed, 19 insertions(+), 248 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc	Tue Feb 05 11:02:46 2013 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Tue Feb 05 13:15:46 2013 +0900
@@ -145,7 +145,6 @@
                         param++;
                     }
                 }
-
                 cl_mem *memout;
                 cl_mem_flags out_mem_flag;
                 if (flag.flip) {
@@ -158,8 +157,12 @@
 
 
                 for(int i=0;i<nextTask->outData_count;i++) { // set output data
-                    ListElement *output_buf = nextTask->outData(i);
+                    ListElement *output_buf = flag.flip? nextTask->inData(i) : nextTask->outData(i);
                     memout[i] = clCreateBuffer(context, out_mem_flag, output_buf->size, NULL, &ret);
+                    if (ret<0) {
+                        const char *msg=convert_error_status(ret);
+                        error(msg);
+                    }
 
                     if (flag.flip) { // use output buffer as input buffer
                         ListElement *input_buf = nextTask->inData(i);
@@ -172,10 +175,10 @@
                         }
                     }
                     ret = clSetKernelArg(kernel,  param, sizeof(memaddr), (void *)&memout[i]);
-                if (ret<0) {
-                    const char *msg=convert_error_status(ret);
-                    error(msg);
-                }
+                    if (ret<0) {
+                        const char *msg=convert_error_status(ret);
+                        error(msg);
+                    }
                     param++;
                 }
 
@@ -192,13 +195,13 @@
                 // (command_queue[cur], kernel, dim, NULL,global_work_size[0],local_work_size[0],NULL&ev);
 
                 for(int i=0;i<nextTask->outData_count;i++) { // read output data
-                    ListElement *output_buf = nextTask->outData(i);
+                    ListElement *output_buf = flag.flip? nextTask->inData(i) :nextTask->outData(i);
                     ret = clEnqueueReadBuffer(command_queue[cur], memout[i], CL_TRUE, 0,
                                               output_buf->size, output_buf->addr, 1, &ev, NULL);
-                if (ret<0) {
-                    const char *msg=convert_error_status(ret);
-                    error(msg);
-                }
+                    if (ret<0) {
+                        const char *msg=convert_error_status(ret);
+                        error(msg);
+                    }
                 }
             }
 
--- a/example/flip/Makefile.def	Tue Feb 05 11:02:46 2013 +0900
+++ b/example/flip/Makefile.def	Tue Feb 05 13:15:46 2013 +0900
@@ -2,7 +2,7 @@
 
 CERIUM = ../../../Cerium
 
-CC      = g++
+CC      = clang++
 CFLAGS  =   -g -Wall
 
 INCLUDE = -I${CERIUM}/include/TaskManager -I. -I../..
--- a/example/flip/twice.cl	Tue Feb 05 11:02:46 2013 +0900
+++ b/example/flip/twice.cl	Tue Feb 05 13:15:46 2013 +0900
@@ -1,12 +1,12 @@
 __kernel void
 twice(__constant int *data_count,
-      __global int *input_data,
-      __global int *output_data)
+      __global int *input_data)
+      //      __global int *output_data)
 {
     long count = (long)data_count[0];
     for (int i = 0; i<count; i++) {
-        output_data[i] = 2*input_data[i];
-        //input_data[i] *= 2;
+        // output_data[i] = 2*input_data[i];
+        input_data[i] *= 2;
     }
 
 }
--- a/example/many_task/sort_ta.cc	Tue Feb 05 11:02:46 2013 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,232 +0,0 @@
-#include "TaskManager.h"
-#include "SchedTask.h"
-#include "sort.h"
-#include "Func.h"
-#include <string.h>
-
-extern int get_split_num(int len, int num);
-extern int all;  // allocate task at once
-extern CPU_TYPE spe_cpu ;
-extern int task_array_num;
-
-/**
- * 一つの block にある data の数が MAX_BLOCK_SIZE 超えないような
- * len の分割数を返す
- *
- * @param  len  sort する data の総数
- * @param  num  使用する SPE の数
- *
- * @return data の分割数
- *
- * TODO:
- *   len が num 以下とか考えてません
- */
-int
-get_split_num(int len, int num)
-{
-    if (len / num < MAX_BLOCK_SIZE) {
-        return num;
-    } else {
-        // 切り上げ
-        return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE;
-    }
-}
-
-
-/**
- * btask が全て終了したら、再び sort_start を実行する
- * @param d 生成された btask の数
- */
-
-SchedDefineTask1(SortSimple, sort_start );
-
-static int
-sort_start(SchedTask *manager, void *d, void *e)
-{
-    Sort *s =  (Sort*)manager->get_param(0);
-    int half_num = s->split_num-1;
-    static int sort_count = s->split_num; // sort 完了に必要な回数
-
-    // 一つのタスクで sort する data 数
-    int block_num = (s->data_length + s->split_num -1)/s->split_num;
-    int half_block_num = block_num/2;
-
-    int last_block_num = s->data_length - (s->split_num-1)*block_num;
-    int last_half_block_num = half_block_num+(last_block_num/2);
-
-    if (--sort_count < 0) {
-        return 0;
-    }
-
-    if (task_array_num > 0) {
-        // run task array
-        HTask **task_array_f = (HTask**)manager->allocate(sizeof(HTask*)*s->split_num);
-        HTask **task_array_b = (HTask**)manager->allocate(sizeof(HTask*)*s->split_num);
-
-        for (int i = 0; i < s->split_num;i++) {
-            task_array_f[i] = manager->create_task_array(QUICK_SORT, task_array_num,1,1,1);
-            s->fsort[i]=0;
-        }
-        for (int i = 0; i<half_num;i++) {
-            task_array_b[i] = manager->create_task_array(QUICK_SORT, task_array_num,1,1,1);
-            s->bsort[i]=0;
-        }
-        for (int i = 0; i < s->split_num-1; i++) {
-            s->fsort[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort[i]);
-            s->fsort[i]->set_param(0,(memaddr)block_num);
-            s->fsort[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
-            if (i>0 && s->bsort[i-1]) {
-                task_array_f[i]->wait_for(task_array_b[i-1]);
-            }
-            if (i<s->split_num-2 && s->bsort[i]) {
-                task_array_f[i]->wait_for(task_array_b[i]);
-            }
-        }
-
-        // 最後の block は端数なので last_block_num を使う
-        {
-
-            int i = s->split_num-1;
-
-            s->fsort[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort[i]);
-            s->fsort[i]->set_param(0,(memaddr)last_block_num);
-            s->fsort[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
-            if (i>0 && s->bsort[i-1]) {
-                task_array_f[i]->wait_for(task_array_b[i-1]);
-            }
-        }
-
-        if (s->split_num > 1) {
-
-            for (int i = 0; i < half_num-1; i++) {
-                if (s->bsort[i]) s->bsort[i]=0;
-                s->bsort[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort[i]);
-                s->bsort[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
-                s->bsort[i]->set_param(0,(memaddr)block_num);
-            }
-
-            {
-                int i = half_num-1;
-
-                if (s->bsort[i]) s->bsort[i]=0;
-                s->bsort[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort[i]);
-                s->bsort[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
-                s->bsort[i]->set_param(0,(memaddr)last_half_block_num);
-            }
-
-            for (int i = 0; i < half_num; i++) {
-                task_array_b[i]->wait_for(task_array_f[i]);
-                task_array_b[i]->wait_for(task_array_f[i+1]);
-                task_array_b[i]->no_auto_free();
-                task_array_b[i]->spawn_task_array(s->bsort[i]->next());
-                task_array_b[i]->set_cpu(spe_cpu);
-                task_array_b[i]->flip();
-                task_array_b[i]->spawn();
-            }
-        }
-
-        HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0);
-        restart->set_param(0,(memaddr)s);
-        if (!all) restart->wait_for(task_array_f[0]);
-        for (int i = 0; i < s->split_num; i++) {
-            task_array_f[i]->spawn_task_array(s->fsort[i]->next());
-            task_array_f[i]->set_cpu(spe_cpu);
-            task_array_f[i]->flip();
-            task_array_f[i]->spawn();
-        }
-        if (sort_count == 1) {
-            // last loop wait for all task
-            // we should not need this?
-            for (int i = 0; i < half_num; i++) {
-                restart->wait_for(task_array_b[i]);
-                task_array_b[i]->auto_free();
-            }
-        }
-        restart->spawn();
-    } else {
-
-        for (int i = 0; i < s->split_num-1; i++) {
-            s->fsort[i] = manager->create_task(QUICK_SORT,
-                                               (memaddr)&s->data[i*block_num], sizeof(Data)*block_num,
-                                               (memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
-
-            s->fsort[i]->flip();
-
-            if (i>0 && s->bsort[i-1]) {
-                s->fsort[i]->wait_for(s->bsort[i-1]);
-            }
-            if (i<s->split_num-2 && s->bsort[i]) {
-                s->fsort[i]->wait_for(s->bsort[i]);
-            }
-            s->fsort[i]->set_cpu(spe_cpu);
-            s->fsort[i]->set_param(0,(memaddr)block_num);
-        }
-
-        // 最後の block は端数なので last_block_num を使う
-        {
-            int i = s->split_num-1;
-
-            s->fsort[i] = manager->create_task(QUICK_SORT,
-                                               (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num,
-                                               (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
-            s->fsort[i]->flip();
-            if (i>0 && s->bsort[i-1]) {
-                s->fsort[i]->wait_for(s->bsort[i-1]);
-            }
-            s->fsort[i]->set_cpu(spe_cpu);
-            s->fsort[i]->set_param(0,(memaddr)last_block_num);
-        }
-
-        if (s->split_num > 1) {
-
-            for (int i = 0; i < half_num-1; i++) {
-                if (s->bsort[i]) manager->free_htask(s->bsort[i]);
-                s->bsort[i] = manager->create_task(QUICK_SORT,
-                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num,
-                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
-                s->bsort[i]->flip();
-                s->bsort[i]->set_cpu(spe_cpu);
-                s->bsort[i]->set_param(0,(memaddr)block_num);
-            }
-
-            {
-                int i = half_num-1;
-
-                if (s->bsort[i]) manager->free_htask(s->bsort[i]);
-                s->bsort[i] = manager->create_task(QUICK_SORT,
-                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num,
-                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
-                s->bsort[i]->flip();
-                s->bsort[i]->set_cpu(spe_cpu);
-                s->bsort[i]->set_param(0,(memaddr)last_half_block_num);
-            }
-
-            for (int i = 0; i < half_num; i++) {
-                s->bsort[i]->wait_for(s->fsort[i]);
-                s->bsort[i]->wait_for(s->fsort[i+1]);
-                s->bsort[i]->no_auto_free();
-                s->bsort[i]->spawn();
-            }
-        }
-
-        HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0);
-        restart->set_param(0,(memaddr)s);
-        if (!all) restart->wait_for(s->fsort[0]);
-        for (int i = 0; i < s->split_num; i++) {
-            s->fsort[i]->spawn();
-        }
-        if (sort_count == 1) {
-            // last loop wait for all task
-            // we should not need this?
-            for (int i = 0; i < half_num; i++) {
-                restart->wait_for(s->bsort[i]);
-                s->bsort[i]->auto_free();
-            }
-        }
-        restart->spawn();
-    }
-    return 0;
-}
-
-
-/* end */