view example/many_task/sort-array.cc @ 2044:66aa91f6f4df draft

merge
author Shin,ichi Uehara
date Wed, 25 Mar 2015 19:13:56 +0900
parents c21bd32e20b9
children
line wrap: on
line source

#include "TaskManager.h"
#include "SchedTask.h"
#include "sort.h"
#include "Func.h"
#include <string.h>

extern int get_split_num(int len, int num);
extern int all;  // allocate task at once
extern CPU_TYPE spe_cpu ;
extern int use_task_array;
/**
 * 一つの block にある data の数が MAX_BLOCK_SIZE 超えないような
 * len の分割数を返す
 *
 * @param  len  sort する data の総数
 * @param  num  使用する SPE の数
 *
 * @return data の分割数
 *
 * TODO:
 *   len が num 以下とか考えてません
 */
extern int get_split_num(int len, int num);

/**
 * btask が全て終了したら、再び sort_start を実行する
 * @param d 生成された btask の数
 */

SchedDefineTask1(SortTaskArray, sort_start_array );

static int
sort_start_array(SchedTask *manager, void *d, void *e)
{
    Sort *s =  (Sort*)manager->get_param(0);
    long half_num = s->split_num-1;
    static long sort_count = s->split_num; // sort 完了に必要な回数

    // 一つのタスクで sort する data 数
    long block_num = (s->data_length + s->split_num -1)/s->split_num;
    long half_block_num = block_num/2;

    long last_block_num = s->data_length - (s->split_num-1)*block_num;
    long last_half_block_num = half_block_num+(last_block_num/2);

    if (--sort_count < 0) {
        return 0;
    }
    HTask **task_array_f = (HTask**)manager->allocate(sizeof(HTask*)*s->split_num);
    HTask **task_array_b = (HTask**)manager->allocate(sizeof(HTask*)*half_num);

    for (int i = 0; i < s->split_num;i++) {
        task_array_f[i] = manager->create_task_array(QUICK_SORT, s->split_num,1,1,1);
        s->fsort_task[i]=0;
    }
    for (int i = 0; i<half_num;i++) {
        task_array_b[i] = manager->create_task_array(QUICK_SORT, half_num,1,1,1);
        s->bsort_task[i]=0;
    }
    for (int i = 0; i < s->split_num-1; i++) {
        s->fsort_task[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort_task[i]);
        s->fsort_task[i]->set_param(0,(memaddr)block_num);
        s->fsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
        if (i>0 && s->bsort_task[i-1]) {
            task_array_f[i]->wait_for(task_array_b[i-1]);
        }
        if (i<s->split_num-2 && s->bsort_task[i]) {
            task_array_f[i]->wait_for(task_array_b[i]);
        }
    }

    // 最後の block は端数なので last_block_num を使う
    {

        int i = s->split_num-1;

        s->fsort_task[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort_task[i]);
        s->fsort_task[i]->set_param(0,(memaddr)last_block_num);
        s->fsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
        if (i>0 && s->bsort_task[i-1]) {
            task_array_f[i]->wait_for(task_array_b[i-1]);
        }
    }

    if (s->split_num > 1) {

        for (int i = 0; i < half_num-1; i++) {
            if (s->bsort_task[i]) s->bsort_task[i]=0;
            s->bsort_task[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort_task[i]);
            s->bsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
            s->bsort_task[i]->set_param(0,(memaddr)block_num);
        }

        {
            int i = half_num-1;

            if (s->bsort_task[i]) s->bsort_task[i]=0;
            s->bsort_task[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort_task[i]);
            s->bsort_task[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
            s->bsort_task[i]->set_param(0,(memaddr)last_half_block_num);
        }

        for (int i = 0; i < half_num; i++) {
            task_array_b[i]->wait_for(task_array_f[i]);
            task_array_b[i]->wait_for(task_array_f[i+1]);
            task_array_b[i]->no_auto_free();
            task_array_b[i]->spawn_task_array(s->bsort_task[i]->next());
            task_array_b[i]->set_cpu(spe_cpu);
            task_array_b[i]->flip();
            task_array_b[i]->spawn();
        }
    }

    HTaskPtr restart = manager->create_task(SortTaskArray,0,0,0,0);
    restart->set_param(0,(memaddr)s);
    if (!all) restart->wait_for(task_array_f[0]);
    for (int i = 0; i < s->split_num; i++) {
        task_array_f[i]->spawn_task_array(s->fsort_task[i]->next());
        task_array_f[i]->set_cpu(spe_cpu);
        task_array_f[i]->flip();
        task_array_f[i]->spawn();
    }
    if (sort_count == 1) {
        // last loop wait for all task
        // we should not need this?
        for (int i = 0; i < half_num; i++) {
            restart->wait_for(task_array_b[i]);
            task_array_b[i]->auto_free();
        }
    }
    restart->spawn();

    return 0;
}


/* end */