view example/many_task/sort.cc @ 1592:afcb4a3f3526 draft

merge
author Masa <e085726@ie.u-ryukyu.ac.jp>
date Mon, 01 Apr 2013 18:00:35 +0900
parents e8c9a7099bcc
children 8bf0a27f5dff
line wrap: on
line source

#include "TaskManager.h"
#include "SchedTask.h"
#include "sort.h"
#include "Func.h"
#include <string.h>

extern int get_split_num(int len, int num);
extern int all;  // allocate task at once
extern CPU_TYPE spe_cpu ;
int task_array_num = 3;
extern int use_task_array;
/**
 * 一つの block にある data の数が MAX_BLOCK_SIZE 超えないような
 * len の分割数を返す
 *
 * @param  len  sort する data の総数
 * @param  num  使用する SPE の数
 *
 * @return data の分割数
 *
 * TODO:
 *   len が num 以下とか考えてません
 */
int
get_split_num(int len, int num)
{
    if (len / num < MAX_BLOCK_SIZE) {
        return num;
    } else {
        // 切り上げ
        return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE;
    }
}


/**
 * btask が全て終了したら、再び sort_start を実行する
 * @param d 生成された btask の数
 */

SchedDefineTask1(SortSimple, sort_start );

static int
sort_start(SchedTask *manager, void *d, void *e)
{
    Sort *s =  (Sort*)manager->get_param(0);
    int half_num = s->split_num-1;
    static int sort_count = s->split_num; // sort 完了に必要な回数

    // 一つのタスクで sort する data 数
    int block_num = (s->data_length + s->split_num -1)/s->split_num;
    int half_block_num = block_num/2;

    int last_block_num = s->data_length - (s->split_num-1)*block_num;
    int last_half_block_num = half_block_num+(last_block_num/2);

    if (--sort_count < 0) {
        return 0;
    }
    if (use_task_array) {
        HTask **task_array_f = (HTask**)manager->allocate(sizeof(HTask*)*s->split_num);
        HTask **task_array_b = (HTask**)manager->allocate(sizeof(HTask*)*half_num);

        for (int i = 0; i < s->split_num;i++) {
            task_array_f[i] = manager->create_task_array(QUICK_SORT, s->split_num,1,1,1);
            s->fsort_task[i]=0;
        }
        for (int i = 0; i<half_num;i++) {
            task_array_b[i] = manager->create_task_array(QUICK_SORT, half_num,1,1,1);
            s->bsort_task[i]=0;
        }
        for (int i = 0; i < s->split_num-1; i++) {
            s->fsort_task[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort_task[i]);
            s->fsort_task[i]->set_param(0,(memaddr)block_num);
            s->fsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
            if (i>0 && s->bsort_task[i-1]) {
                task_array_f[i]->wait_for(task_array_b[i-1]);
            }
            if (i<s->split_num-2 && s->bsort_task[i]) {
                task_array_f[i]->wait_for(task_array_b[i]);
            }
        }

        // 最後の block は端数なので last_block_num を使う
        {

            int i = s->split_num-1;

            s->fsort_task[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort_task[i]);
            s->fsort_task[i]->set_param(0,(memaddr)last_block_num);
            s->fsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
            if (i>0 && s->bsort_task[i-1]) {
                task_array_f[i]->wait_for(task_array_b[i-1]);
            }
        }

        if (s->split_num > 1) {

            for (int i = 0; i < half_num-1; i++) {
                if (s->bsort_task[i]) s->bsort_task[i]=0;
                s->bsort_task[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort_task[i]);
                s->bsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
                s->bsort_task[i]->set_param(0,(memaddr)block_num);
            }

            {
                int i = half_num-1;

                if (s->bsort_task[i]) s->bsort_task[i]=0;
                s->bsort_task[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort_task[i]);
                s->bsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
                s->bsort_task[i]->set_param(0,(memaddr)last_half_block_num);
            }

            for (int i = 0; i < half_num; i++) {
                task_array_b[i]->wait_for(task_array_f[i]);
                task_array_b[i]->wait_for(task_array_f[i+1]);
                task_array_b[i]->no_auto_free();
                task_array_b[i]->spawn_task_array(s->bsort_task[i]->next());
                task_array_b[i]->set_cpu(spe_cpu);
                task_array_b[i]->flip();
                task_array_b[i]->spawn();
            }
        }

        HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0);
        restart->set_param(0,(memaddr)s);
        if (!all) restart->wait_for(task_array_f[0]);
        for (int i = 0; i < s->split_num; i++) {
            task_array_f[i]->spawn_task_array(s->fsort_task[i]->next());
            task_array_f[i]->set_cpu(spe_cpu);
            task_array_f[i]->flip();
            task_array_f[i]->spawn();
        }
        if (sort_count == 1) {
            // last loop wait for all task
            // we should not need this?
            for (int i = 0; i < half_num; i++) {
                restart->wait_for(task_array_b[i]);
                task_array_b[i]->auto_free();
            }
        }
        restart->spawn();
    } else {
        
        for (int i = 0; i < s->split_num-1; i++) {
            s->fsort[i] = manager->create_task(QUICK_SORT,
                                               (memaddr)&s->data[i*block_num], sizeof(Data)*block_num,
                                               (memaddr)&s->data[i*block_num], sizeof(Data)*block_num);

            s->fsort[i]->flip();

            if (i>0 && s->bsort[i-1]) {
                s->fsort[i]->wait_for(s->bsort[i-1]);
            }
            if (i<s->split_num-2 && s->bsort[i]) {
                s->fsort[i]->wait_for(s->bsort[i]);
            }
            s->fsort[i]->set_cpu(spe_cpu);
            s->fsort[i]->set_param(0,(memaddr)block_num);
        }

        {
            int i = s->split_num-1;

            s->fsort[i] = manager->create_task(QUICK_SORT,
                                               (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num,
                                               (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
            s->fsort[i]->flip();
            if (i>0 && s->bsort[i-1]) {
                s->fsort[i]->wait_for(s->bsort[i-1]);
            }
            s->fsort[i]->set_cpu(spe_cpu);
            s->fsort[i]->set_param(0,(memaddr)last_block_num);
        }

        if (s->split_num > 1) {

            for (int i = 0; i < half_num-1; i++) {
                if (s->bsort[i]) manager->free_htask(s->bsort[i]);
                s->bsort[i] = manager->create_task(QUICK_SORT,
                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num,
                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
                s->bsort[i]->flip();
                s->bsort[i]->set_cpu(spe_cpu);
                s->bsort[i]->set_param(0,(memaddr)block_num);
            }

            {
                int i = half_num-1;

                if (s->bsort[i]) manager->free_htask(s->bsort[i]);
                s->bsort[i] = manager->create_task(QUICK_SORT,
                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num,
                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
                s->bsort[i]->flip();
                s->bsort[i]->set_cpu(spe_cpu);
                s->bsort[i]->set_param(0,(memaddr)last_half_block_num);
            }

            for (int i = 0; i < half_num; i++) {
                s->bsort[i]->wait_for(s->fsort[i]);
                s->bsort[i]->wait_for(s->fsort[i+1]);
                s->bsort[i]->no_auto_free();
                s->bsort[i]->spawn();
            }
        }

        HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0);
        restart->set_param(0,(memaddr)s);
        if (!all) restart->wait_for(s->fsort[0]);
        for (int i = 0; i < s->split_num; i++) {
            s->fsort[i]->spawn();
        }
        if (sort_count == 1) {
            // last loop wait for all task
            // we should not need this?
            for (int i = 0; i < half_num; i++) {
                restart->wait_for(s->bsort[i]);
                s->bsort[i]->auto_free();
            }
        }
        restart->spawn();
    }

    return 0;
}


/* end */