changeset 1515:b3644b73d2cf draft

add flip flag test
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Tue, 23 Oct 2012 14:41:27 +0900
parents 99ea7b932470
children e544f9747169
files TaskManager/Gpu/GpuScheduler.cc TaskManager/Gpu/GpuScheduler.h TaskManager/test/GpuRunTest/GpuRunTest.cc TaskManager/test/GpuRunTest/Makefile example/basic/Makefile.gpu example/basic/main.cc example/flip/GpuFunc.h example/flip/Makefile example/flip/Makefile.def example/flip/main.cc example/flip/task_init.cc example/flip/twice.cl example/many_task/Makefile example/many_task/Makefile.def example/many_task/Makefile.macosx example/many_task/main.cc example/many_task/ppe/sort_test.cc example/many_task/sort.cc
diffstat 18 files changed, 305 insertions(+), 96 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc	Sun Oct 14 02:40:05 2012 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Tue Oct 23 14:41:27 2012 +0900
@@ -82,15 +82,14 @@
                 load_kernel(nextTask->command);
 
                 cl_kernel& kernel = *task_list[nextTask->command].gputask->kernel;
-                int err = CL_SUCCESS;
 
                 int param = 0;
 
                 cl_mem memparam = clCreateBuffer(context, CL_MEM_READ_ONLY,
-                                               sizeof(memaddr)*nextTask->param_count, NULL, NULL);
-                err |= clEnqueueWriteBuffer(command_queue[cur], memparam, CL_TRUE, 0, sizeof(memaddr)*nextTask->param_count,
-                                            nextTask->param(0), 0, NULL, NULL);
-                err |= clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memparam);
+                                                 sizeof(memaddr)*nextTask->param_count, NULL, NULL);
+                ret = clEnqueueWriteBuffer(command_queue[cur], memparam, CL_TRUE, 0,
+                                           sizeof(memaddr)*nextTask->param_count,nextTask->param(0), 0, NULL, NULL);
+                ret = clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memparam);
 
                 param++;
 
@@ -100,9 +99,9 @@
                     for(int i=0;i<nextTask->inData_count;i++) {
                         memin[i] = clCreateBuffer(context, mem_flag, nextTask->inData(i)->size, NULL, NULL);
 
-                        err |= clEnqueueWriteBuffer(command_queue[cur], memin[i], CL_TRUE, 0,
+                        ret = clEnqueueWriteBuffer(command_queue[cur], memin[i], CL_TRUE, 0,
                                                     nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL);
-                        err |= clSetKernelArg(kernel,  param, sizeof(memaddr), (void *)&memin[i]);
+                        ret = clSetKernelArg(kernel,  param, sizeof(memaddr), (void *)&memin[i]);
 
                         param++;
                     }
@@ -111,24 +110,26 @@
                 cl_mem *memout = new cl_mem[nextTask->outData_count];
                 cl_mem_flags out_mem_flag = flag.flip? CL_MEM_READ_WRITE : CL_MEM_WRITE_ONLY;
 
+
                 for(int i=0;i<nextTask->outData_count;i++) {
-                    memout[i] = clCreateBuffer(context, out_mem_flag, nextTask->outData(i)->size, NULL, NULL);
+                    memout[i] = clCreateBuffer(context, out_mem_flag, nextTask->outData(i)->size, NULL, &ret);
                     if (flag.flip) { // use output buffer as input buffer
-                        err |= clEnqueueWriteBuffer(command_queue[cur], memout[i], CL_TRUE, 0,
-                                                    nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL);                    }
-                    err |= clSetKernelArg(kernel,  param, sizeof(memaddr), (void *)&memout[i]);
+                        ret = clEnqueueWriteBuffer(command_queue[cur], memout[i], CL_TRUE, 0,
+                                                    nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL);
+                    }
+                    ret = clSetKernelArg(kernel,  param, sizeof(memaddr), (void *)&memout[i]);
                     param++;
                 }
 
+
                 cl_event ev = NULL;
-                clEnqueueTask(command_queue[cur], kernel, 0, NULL, &ev);
-
+                ret = clEnqueueTask(command_queue[cur], kernel, 0, NULL, &ev);
                 // ndrange flagが0ならdim,global_work_size[0],local_work_size[0] = 1で固定に
                 // clEnqueueNDRange
                 // (command_queue[cur], kernel, dim, NULL,global_work_size[0],local_work_size[0],NULL&ev);
                 for(int i=0;i<nextTask->outData_count;i++) {
-                    err |= clEnqueueReadBuffer(command_queue[cur], memout[i], CL_TRUE, 0,
-                                               nextTask->outData(i)->size, nextTask->outData(i)->addr, 1, &ev, NULL);
+                    ret = clEnqueueReadBuffer(command_queue[cur], memout[i], CL_TRUE, 0,
+                                              nextTask->outData(i)->size, nextTask->outData(i)->addr, 1, &ev, NULL);
                 }
             }
 
--- a/TaskManager/Gpu/GpuScheduler.h	Sun Oct 14 02:40:05 2012 +0900
+++ b/TaskManager/Gpu/GpuScheduler.h	Tue Oct 23 14:41:27 2012 +0900
@@ -55,3 +55,4 @@
 
 extern void gpu_register_task(int cmd,const char* filename,const char* functionname);
 extern void gpu_register_ndrange(int, int, size_t*);
+
--- a/TaskManager/test/GpuRunTest/GpuRunTest.cc	Sun Oct 14 02:40:05 2012 +0900
+++ b/TaskManager/test/GpuRunTest/GpuRunTest.cc	Tue Oct 23 14:41:27 2012 +0900
@@ -82,7 +82,7 @@
 
     HTaskPtr twice = manager->create_task(Twice);
 
-    twice->set_param(0, (memaddr)length);
+    twice->set_param(0, (memaddr)&length);
     twice->set_inData(0, indata, sizeof (int)*length);
     twice->set_outData(0, outdata, sizeof (int)*length);
     twice->set_cpu(GPU_0);
--- a/TaskManager/test/GpuRunTest/Makefile	Sun Oct 14 02:40:05 2012 +0900
+++ b/TaskManager/test/GpuRunTest/Makefile	Tue Oct 23 14:41:27 2012 +0900
@@ -5,7 +5,7 @@
 SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
 OBJS = $(SRCS:.cc=.o)
 
-LIBS += -lGpuManager  -framework opencl `sdl-config --libs`
+LIBS += -lGpuManager -framework opencl `sdl-config --libs`
 
 .SUFFIXES: .cc .o
 
--- a/example/basic/Makefile.gpu	Sun Oct 14 02:40:05 2012 +0900
+++ b/example/basic/Makefile.gpu	Tue Oct 23 14:41:27 2012 +0900
@@ -5,7 +5,7 @@
 SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
 OBJS = $(SRCS:.cc=.o)
 
-TASK_DIR  = ppe
+TASK_DIR  = gpu
 TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc)
 TASK_SRCS_EXCLUDE = 
 TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
--- a/example/basic/main.cc	Sun Oct 14 02:40:05 2012 +0900
+++ b/example/basic/main.cc	Tue Oct 23 14:41:27 2012 +0900
@@ -19,7 +19,7 @@
 {
     printf("%s ---\n", title);
     for (int i = 0; i < size; i++) {
-	printf("%2d ", data[i]);
+        printf("%2d ", data[i]);
     }
     printf("\n");
 }
@@ -57,7 +57,7 @@
     int *data = (int*)manager->allocate(sizeof(int)*length);
 
     for (int i = 0; i < length; i++) {
-	data[i] = i;
+        data[i] = i;
     }
 
     print_data(data, length, "before");
@@ -65,7 +65,7 @@
     /**
      * Create Task
      *   create_task(Task ID);
-     */ 
+     */
     twice = manager->create_task(TWICE_TASK);
     twice->set_cpu(SPE_ANY);
 
@@ -73,6 +73,7 @@
      * Set of Input Data
      *   add_inData(address of input data, size of input data);
      */
+    // twice->set_param(0,(memaddr)&length);
     twice->set_inData(0,data, sizeof(int)*length);
 
     /**
@@ -93,14 +94,14 @@
     twice->set_post(twice_result, (void*)data, 0);
 
     // add Active Queue
-    twice->spawn();    
+    twice->spawn();
 }
 
 int
 TMmain(TaskManager *manager,int argc, char *argv[])
 {
     if (init(argc, argv) < 0) {
-	return -1;
+        return -1;
     }
 
     // Task Register
@@ -108,7 +109,7 @@
     task_init();
 
     for (int i = 0; i < task; ++i) {
-	twice_init(manager);
+        twice_init(manager);
     }
 
     return 0;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/flip/GpuFunc.h	Tue Oct 23 14:41:27 2012 +0900
@@ -0,0 +1,6 @@
+
+enum {
+#include "SysTasks.h"
+    Twice,
+    //    Func1,
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/flip/Makefile	Tue Oct 23 14:41:27 2012 +0900
@@ -0,0 +1,29 @@
+include ./Makefile.def
+
+SRCS_TMP = $(wildcard *.cc)
+SRCS_EXCLUDE =   # 除外するファイルを書く
+SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
+OBJS = $(SRCS:.cc=.o)
+
+LIBS += -lGpuManager -framework opencl `sdl-config --libs`
+
+.SUFFIXES: .cc .o
+
+.cc.o:
+	$(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@
+
+all: $(TARGET)
+gpu: all
+
+$(TARGET): $(OBJS)
+	$(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS)
+
+link:
+	$(CC) -o $(TARGET) $(OBJS) $(LIBS)
+
+debug: $(TARGET)
+	sudo gdb ./$(TARGET) 
+
+clean:
+	rm -f $(TARGET) $(OBJS)
+	rm -f *~ \#*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/flip/Makefile.def	Tue Oct 23 14:41:27 2012 +0900
@@ -0,0 +1,9 @@
+TARGET = fliptest
+
+CERIUM = ../../../Cerium
+
+CC      = g++
+CFLAGS  =   -g -Wall
+
+INCLUDE = -I${CERIUM}/include/TaskManager -I. -I../..
+LIBS = -L${CERIUM}/TaskManager
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/flip/main.cc	Tue Oct 23 14:41:27 2012 +0900
@@ -0,0 +1,114 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include "types.h"
+#include "TaskManager.h"
+#include "GpuFunc.h"
+
+#define DEFAULT 5
+static long int length = DEFAULT;
+static int task = 1;
+int *indata;
+
+extern void task_init(void);
+
+char usr_help_str[] = "GpuRun [length]\n";
+
+void
+print_data(int *data, int size, const char *title)
+{
+    printf("%s ---\n", title);
+    for ( int i = 0; i < size; i++) {
+        printf("%2d ", data[i]);
+    }
+    printf("\n");
+}
+
+/**
+ * タスク終了後の data1, data2 の確認
+ */
+void
+twice_result(SchedTask *s, void *a, void *b)
+{
+    int* data = (int*)a;
+    long length = (long)b;
+    print_data(data, length, "after");
+}
+
+
+int
+init(int argc, char **argv)
+{
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "-length") == 0) {
+            length = atoi(argv[++i]);
+        } else if (strcmp(argv[i], "-count") == 0) {
+            task = atoi(argv[++i]);
+        }
+    }
+
+    return 0;
+}
+
+
+void
+tester(int *indata, int *outdata, int num) {
+
+    //チェック
+    int check = 0;
+    for (int c=0; c<num; c++) {
+        if(outdata[c] == indata[c]*2) {
+            check++;
+        }
+    }
+
+    printf("Computed '%d/%d' correct values\n",check,num);
+
+}
+
+
+void
+test(TaskManager *manager) {
+    indata = new int[length];
+
+    for (int c=0; c < length ;c++) {
+        indata[c] = c;
+    }
+
+    print_data(indata, length, "before");
+
+    HTaskPtr twice = manager->create_task(Twice);
+
+    twice->set_param(0, (memaddr)length);
+    twice->set_inData(0, indata, sizeof (int)*length);
+    twice->set_outData(0, indata, sizeof (int)*length);
+    twice->set_cpu(GPU_0);
+    twice->flip();
+
+    /*
+     * set_post() で ppe task を渡せるようにしたい
+     */
+    twice->set_post(twice_result, (void*)indata, (void*)length);
+
+    twice->spawn();
+}
+
+int
+TMmain(TaskManager *manager, int argc, char* argv[])
+{
+    if (init(argc, argv) < 0) {
+        return -1;
+    }
+
+    task_init();
+
+    for (int i = 0; i < task; ++i) {
+        test(manager);
+    }
+
+    return 0;
+}
+
+/* end */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/flip/task_init.cc	Tue Oct 23 14:41:27 2012 +0900
@@ -0,0 +1,12 @@
+#include "GpuFunc.h"
+#include "GpuScheduler.h"
+
+void
+task_init(void)
+{
+    int cmd = Twice;
+    int dim = 2;
+    size_t *l_work_size = new size_t(dim);
+    GpuNDRangeRegister(cmd, dim, l_work_size);
+    GpuSchedRegister(cmd, "twice.cl", "twice");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/flip/twice.cl	Tue Oct 23 14:41:27 2012 +0900
@@ -0,0 +1,12 @@
+__kernel void
+twice(__constant int *data_count,
+      __global int *input_data)
+      //__global int *output_data)
+{
+    long count = (long)data_count[0];
+        for (int i = 0; i<count; i++) {
+            // output_data[i] = 2*input_data[i];
+            input_data[i] *= 2;
+        }
+
+}
--- a/example/many_task/Makefile	Sun Oct 14 02:40:05 2012 +0900
+++ b/example/many_task/Makefile	Tue Oct 23 14:41:27 2012 +0900
@@ -16,6 +16,11 @@
 	@echo "Make for PS3 (Cell)"
 	@$(MAKE) -f Makefile.cell
 
+gpu: FORCE
+	@echo "Make for OpenCL"
+	@$(MAKE) -f Makefile.gpu
+
+
 FORCE:
 
 clean:
--- a/example/many_task/Makefile.def	Sun Oct 14 02:40:05 2012 +0900
+++ b/example/many_task/Makefile.def	Tue Oct 23 14:41:27 2012 +0900
@@ -10,7 +10,7 @@
 # OPT = -g -O9
 # OPT = -g
 CC      = g++
-CFLAGS  =  -DUSE_SIMPLE_TASK -Wall  $(OPT)
+CFLAGS  =  -Wall  $(OPT) -DUSE_SIMPLE_TASK
 # CFLAGS  =   -Wall  $(OPT)
 
 INCLUDE = -I${CERIUM}/include/TaskManager -I. -I..
--- a/example/many_task/Makefile.macosx	Sun Oct 14 02:40:05 2012 +0900
+++ b/example/many_task/Makefile.macosx	Tue Oct 23 14:41:27 2012 +0900
@@ -2,7 +2,7 @@
 
 
 SRCS_TMP = $(wildcard *.cc)
-SRCS_EXCLUDE = sort_test.cc ppe/task_init.cc # 除外するファイルを書く
+SRCS_EXCLUDE = sort_test.cc # 除外するファイルを書く
 SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
 OBJS = $(SRCS:.cc=.o)
 
--- a/example/many_task/main.cc	Sun Oct 14 02:40:05 2012 +0900
+++ b/example/many_task/main.cc	Tue Oct 23 14:41:27 2012 +0900
@@ -37,18 +37,6 @@
     return tv.tv_sec + (double)tv.tv_usec*1e-6;
 }
 
-/*
-static void
-show_data(void)
-{
-    puts("-----------------------------------------------");
-    for(int i = 0; i < data_length; i++) {
-        printf("data[%02d].index = %d\n", i, data[i].index);
-    }
-    puts("-----------------------------------------------");
-}
-*/
-
 const char *usr_help_str = "Usage: ./sort [option]\n \
 options\n\
   -cpu     Number of SPE used (default 1)\n\
@@ -80,6 +68,16 @@
 Sort sorter;
 
 static void
+show_data(void)
+{
+    puts("-----------------------------------------------");
+    for(int i = 0; i < sorter.data_length; i++) {
+        printf("data[%02d].index = %d\n", i, sorter.data[i].index);
+    }
+    puts("-----------------------------------------------");
+}
+
+static void
 check_data()
 {
     for(int i=0; i< sorter.data_length-1;i++) {
@@ -112,11 +110,11 @@
         sorter.data[i].ptr   = i;
     }
 
+    // show_data();
     HTaskPtr restart = manager->create_task(sort_task,0,0,0,0);
+    // default ではSortSimpleがsetされている。SortSimpleはsort.ccに
+
     restart->set_param(0,(memaddr)&sorter);
-    //restart->set_inData(0,sorter.data,sizeof(Data)*length);
-    //restart->set_param(0,(memaddr)&length);
-    // restart->set_outData(0,sorter.data,sizeof(Data)*(int)length);
     // set flip flag
     restart->spawn();
 }
@@ -133,7 +131,6 @@
     task_init();
 
     int cpu  = manager->get_cpuNum();
-
     // in case of -cpu 0
     if (cpu==0) cpu = 1;
     if (1) {
@@ -156,7 +153,7 @@
 TMend(TaskManager *manager)
 {
     ed_time = getTime();
-    //show_data();
+    // show_data();
     check_data();
     printf("Time: %0.6f\n",ed_time-st_time);
 }
--- a/example/many_task/ppe/sort_test.cc	Sun Oct 14 02:40:05 2012 +0900
+++ b/example/many_task/ppe/sort_test.cc	Tue Oct 23 14:41:27 2012 +0900
@@ -3,13 +3,21 @@
 #include <stdlib.h>
 //#include "sort.h"
 #include "QuickSort.h"
+#include <sys/time.h>
 // sort.cc
 extern int data_length;
 extern DataPtr data;
-extern void quick_sort(DataPtr, int, int);
 static int length = 1200;
 extern void qsort_test(Data*, int, int);
 
+static double
+getTime()
+{
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec + (double)tv.tv_usec*1e-6;
+}
+
 void
 show( Data *data, int size )
 {
@@ -63,8 +71,14 @@
 
     int begin = 0;
     int end = length;
+    double st_time;
+    double ed_time;
     //show(sorter.data, end-1);
+    st_time = getTime();
     qsort_test(sorter.data, begin, end);
+    ed_time = getTime();
+    printf("Time: %0.6f\n",ed_time-st_time);
+
     //show(sorter.data, end-1);
     check_data();
     return 0;
--- a/example/many_task/sort.cc	Sun Oct 14 02:40:05 2012 +0900
+++ b/example/many_task/sort.cc	Tue Oct 23 14:41:27 2012 +0900
@@ -23,12 +23,12 @@
 get_split_num(int len, int num)
 {
     if (len / num < MAX_BLOCK_SIZE) {
-	return num;
+        return num;
     } else {
-	// 切り上げ
-	return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE;
+        // 切り上げ
+        return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE;
     }
-}	
+}
 
 
 /**
@@ -53,77 +53,85 @@
     int last_half_block_num = half_block_num+(last_block_num/2);
 
     if (--sort_count < 0) {
-	return 0;
+        return 0;
     }
 
 
     for (int i = 0; i < s->split_num-1; i++) {
-	s->fsort[i] = manager->create_task(QUICK_SORT,
-	    (memaddr)&s->data[i*block_num], sizeof(Data)*block_num,
-	    (memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
-	if (i>0 && s->bsort[i-1]) {
-	    s->fsort[i]->wait_for(s->bsort[i-1]);
-	}
-	if (i<s->split_num-2 && s->bsort[i]) {
-	    s->fsort[i]->wait_for(s->bsort[i]);
-	}
-	s->fsort[i]->set_cpu(SPE_ANY);
+        s->fsort[i] = manager->create_task(QUICK_SORT,
+            (memaddr)&s->data[i*block_num], sizeof(Data)*block_num,
+            (memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
+        s->fsort[i]->flip();
+        if (i>0 && s->bsort[i-1]) {
+            s->fsort[i]->wait_for(s->bsort[i-1]);
+        }
+        if (i<s->split_num-2 && s->bsort[i]) {
+            s->fsort[i]->wait_for(s->bsort[i]); 
+        }
+        s->fsort[i]->set_cpu(SPE_ANY);
+        s->fsort[i]->set_param(0,(memaddr)block_num);
     }
 
     // 最後の block は端数なので last_block_num を使う
     {
-	int i = s->split_num-1;
+        int i = s->split_num-1;
 
-	s->fsort[i] = manager->create_task(QUICK_SORT,
-	    (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num,
-	    (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
-	if (i>0 && s->bsort[i-1]) {
-	    s->fsort[i]->wait_for(s->bsort[i-1]);
-	}
-	s->fsort[i]->set_cpu(SPE_ANY);
+        s->fsort[i] = manager->create_task(QUICK_SORT,
+            (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num,
+            (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
+        s->fsort[i]->flip();
+        if (i>0 && s->bsort[i-1]) {
+            s->fsort[i]->wait_for(s->bsort[i-1]);
+        }
+        s->fsort[i]->set_cpu(SPE_ANY);
+        s->fsort[i]->set_param(0,(memaddr)last_block_num);
    }
 
     if (s->split_num > 1) {
 
-	for (int i = 0; i < half_num-1; i++) {
-	    if (s->bsort[i]) manager->free_htask(s->bsort[i]);
-	    s->bsort[i] = manager->create_task(QUICK_SORT,
-		(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num,
-		(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
-	    s->bsort[i]->set_cpu(SPE_ANY);
-	}
+        for (int i = 0; i < half_num-1; i++) {
+            if (s->bsort[i]) manager->free_htask(s->bsort[i]);
+            s->bsort[i] = manager->create_task(QUICK_SORT,
+                (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num,
+                (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
+            s->bsort[i]->flip();
+            s->bsort[i]->set_cpu(SPE_ANY);
+            s->bsort[i]->set_param(0,(memaddr)block_num);
+        }
 
-	{
-	    int i = half_num-1;
+        {
+            int i = half_num-1;
 
-	    if (s->bsort[i]) manager->free_htask(s->bsort[i]);
-	    s->bsort[i] = manager->create_task(QUICK_SORT,
-		(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num,
-		(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
-	    s->bsort[i]->set_cpu(SPE_ANY);	
-	}
-	
-	for (int i = 0; i < half_num; i++) {
-	    s->bsort[i]->wait_for(s->fsort[i]);
-	    s->bsort[i]->wait_for(s->fsort[i+1]);
-	    s->bsort[i]->no_auto_free();
-	    s->bsort[i]->spawn();
-	}
+            if (s->bsort[i]) manager->free_htask(s->bsort[i]);
+            s->bsort[i] = manager->create_task(QUICK_SORT,
+                (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num,
+                (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
+            s->bsort[i]->flip();
+            s->bsort[i]->set_cpu(SPE_ANY);
+            s->bsort[i]->set_param(0,(memaddr)last_half_block_num);
+        }
+
+        for (int i = 0; i < half_num; i++) {
+            s->bsort[i]->wait_for(s->fsort[i]);
+            s->bsort[i]->wait_for(s->fsort[i+1]);
+            s->bsort[i]->no_auto_free();
+            s->bsort[i]->spawn();
+        }
     }
 
     HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0);
     restart->set_param(0,(memaddr)s);
     if (!all) restart->wait_for(s->fsort[0]);
     for (int i = 0; i < s->split_num; i++) {
-	s->fsort[i]->spawn();
+        s->fsort[i]->spawn();
     }
     if (sort_count == 1) {
-	// last loop wait for all task 
-	// we should not need this?
-	for (int i = 0; i < half_num; i++) {
-	    restart->wait_for(s->bsort[i]);
-	    s->bsort[i]->auto_free();
-	}
+        // last loop wait for all task
+        // we should not need this?
+        for (int i = 0; i < half_num; i++) {
+            restart->wait_for(s->bsort[i]);
+            s->bsort[i]->auto_free();
+        }
     }
     restart->spawn();
     return 0;