changeset 1506:a7895ab4d0e3 draft

add flip flag and NDRange flag
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Mon, 10 Sep 2012 15:04:39 +0900
parents 0ad321ee074d
children 7abad41d12af
files TaskManager/ChangeLog TaskManager/Gpu/GpuScheduler.cc TaskManager/Gpu/GpuScheduler.h TaskManager/kernel/ppe/HTask.h TaskManager/kernel/ppe/TaskList.h TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest.cc TaskManager/test/GpuRegistTaskTest/Makefile TaskManager/test/GpuRegistTaskTest/Makefile.orig TaskManager/test/GpuRegistTaskTest/twice.cl TaskManager/test/GpuRunTest/GpuRunTest.cc example/OpenCL/twice.cl example/many_task/Makefile.macosx example/many_task/main.cc example/word_count/a.txt
diffstat 14 files changed, 152 insertions(+), 240 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/ChangeLog	Fri Aug 24 18:03:12 2012 +0900
+++ b/TaskManager/ChangeLog	Mon Sep 10 15:04:39 2012 +0900
@@ -1,3 +1,22 @@
+2012-9-5 Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
+
+	set_cpu(SPE_ANY)
+	CPUで実行するかGPUで実行するか選択可能にする
+	CPU_ANYとGPU_ANYを追加
+	SPE_ANYを選択したときにGPUで実行できるかはコマンドラインで選択した方がよい
+	-cpuと-gpuで選択する
+	ベンチマークを取る事も考える
+
+	sortではflipを使っている
+	 flip:input data上で計算を行ったときにそのinput bufferをそのままoutput bufferにする機能
+	これをGPUで実現するにはbufferをread writeにすればよい
+	そのためにはtask投入時にflipするかどうかを知っている必要がある
+	taskにbit fieldを使ったflagがあるので、そのAPIを足す。かなりの変更が必要
+	schedTaskのflipはやめる
+
+	将来的にはCLのソースとCeriumのソースは同じにしたい
+	CLに合わせるか、CLを生成するかのどちらか
+	
 2012-8-22 Shinji KONO <kono@ie.u-ryukyu.ac.jp>
 
 	今後の課題
--- a/TaskManager/Gpu/GpuScheduler.cc	Fri Aug 24 18:03:12 2012 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Mon Sep 10 15:04:39 2012 +0900
@@ -5,6 +5,7 @@
 #include "stdio.h"
 #include <fcntl.h>
 #include <sys/stat.h>
+#include <string.h>
 
 GpuScheduler::GpuScheduler()
 {
@@ -16,11 +17,11 @@
 GpuScheduler::init_impl(int useRefDma)
 {
     if (useRefDma & 0x10) {
-        fifoDmaManager = new PreRefDmaManager();
+        fifoDmaManager = new PreRefDmaManager(); // Prefetch command and no copy
     } else if (useRefDma & 0x01) {
-        fifoDmaManager = new FifoDmaManager();
+        fifoDmaManager = new FifoDmaManager(); // memcpy
     } else {
-        fifoDmaManager = new ReferencedDmaManager();
+        fifoDmaManager = new ReferencedDmaManager(); // no copy
     }
     connector = fifoDmaManager;
 }
@@ -28,7 +29,7 @@
 void
 GpuScheduler::init_gpu()
 {
-    clGetPlatformIDs(1, &platform_id, &ret_num_platforms); 
+    clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
     clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices);
     // unavailable GPU
     if(ret_num_devices == 0) {
@@ -38,26 +39,38 @@
     command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
 }
 
+GpuScheduler::~GpuScheduler()
+{
+    clReleaseCommandQueue(command_queue);
+    clReleaseContext(context);
+}
 
 void
 GpuScheduler::run()
 {
     for (;;) {
         memaddr params_addr = connector->task_list_mail_read();
-        
         // Get OpenCL infomation
-        
+
         if ((memaddr)params_addr == (memaddr)MY_SPE_COMMAND_EXIT) {
             clFinish(command_queue);
             return ;
         }
-        
+
+        HTask::htask_flag flag;
+
+        memset(&flag, 0, sizeof(HTask::htask_flag));
+
         while (params_addr) {
             // since we are on the same memory space, we don't hae to use dma_load here
-            TaskListPtr tasklist = (TaskListPtr)connector->dma_load(this, params_addr, 
+            TaskListPtr tasklist = (TaskListPtr)connector->dma_load(this, params_addr,
                                                         sizeof(TaskList), DMA_READ_TASKLIST);
-            
-            for (TaskPtr nextTask = tasklist->tasks; nextTask < tasklist->last(); nextTask = nextTask->next()) {
+            if (tasklist->self) {
+                flag = tasklist->self->flag;
+            }
+
+            for (TaskPtr nextTask = tasklist->tasks;
+                 nextTask < tasklist->last(); nextTask = nextTask->next()) {
 
                 load_kernel(nextTask->command);
 
@@ -65,40 +78,52 @@
                 int err = CL_SUCCESS;
 
                 int param = 0;
-                cl_mem memin = clCreateBuffer(context, CL_MEM_READ_ONLY, 
+
+
+                cl_mem memparam = clCreateBuffer(context, CL_MEM_READ_ONLY,
                                                sizeof(memaddr)*nextTask->param_count, NULL, NULL);
-                err |= clEnqueueWriteBuffer(command_queue, memin, CL_TRUE, 0, sizeof(memaddr)*nextTask->param_count, 
+                err |= clEnqueueWriteBuffer(command_queue, memparam, CL_TRUE, 0, sizeof(memaddr)*nextTask->param_count,
                                             nextTask->param(0), 0, NULL, NULL);
-                err |= clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memin);
-                
+                err |= clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memparam);
+
                 param++;
-                for(int i=0;i<nextTask->inData_count;i++) {
-                    cl_mem memin = clCreateBuffer(context, CL_MEM_READ_ONLY, nextTask->inData(i)->size, NULL, NULL);
-                    err |= clEnqueueWriteBuffer(command_queue, memin, CL_TRUE, 0, 
-                                                nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL);
-                    err |= clSetKernelArg(kernel,  param, sizeof(memaddr), (void *)&memin);
+
+                cl_mem_flags mem_flag = CL_MEM_READ_ONLY;
+                cl_mem *memin = new cl_mem[nextTask->inData_count];
+                if (!flag.flip) {
+                    for(int i=0;i<nextTask->inData_count;i++) {
+                        memin[i] = clCreateBuffer(context, mem_flag, nextTask->inData(i)->size, NULL, NULL);
+
+                        err |= clEnqueueWriteBuffer(command_queue, memin[i], CL_TRUE, 0,
+                                                    nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL);
+                        err |= clSetKernelArg(kernel,  param, sizeof(memaddr), (void *)&memin[i]);
 
-                    param++;
+                        param++;
+                    }
                 }
-                
+
                 cl_mem *memout = new cl_mem[nextTask->outData_count];
-                
+                cl_mem_flags out_mem_flag = flag.flip? CL_MEM_READ_WRITE : CL_MEM_WRITE_ONLY;
+
                 for(int i=0;i<nextTask->outData_count;i++) {
-                    memout[i] = clCreateBuffer(context, CL_MEM_WRITE_ONLY, nextTask->outData(i)->size, NULL, NULL);
+                    memout[i] = clCreateBuffer(context, out_mem_flag, nextTask->outData(i)->size, NULL, NULL);
+                    if (flag.flip) { // use output buffer as input buffer
+                        err |= clEnqueueWriteBuffer(command_queue, memout[i], CL_TRUE, 0,
+                                                    nextTask->inData(i)->size, nextTask->inData(i)->addr, 0, NULL, NULL);                    }
                     err |= clSetKernelArg(kernel,  param, sizeof(memaddr), (void *)&memout[i]);
                     param++;
                 }
-                
+
                 cl_event ev = NULL;
                 clEnqueueTask(command_queue, kernel, 0, NULL, &ev);
-                
+
                 for(int i=0;i<nextTask->outData_count;i++) {
-                    err |= clEnqueueReadBuffer(command_queue, memout[i], CL_TRUE, 0, 
+                    err |= clEnqueueReadBuffer(command_queue, memout[i], CL_TRUE, 0,
                                                nextTask->outData(i)->size, nextTask->outData(i)->addr, 1, &ev, NULL);
                 }
-                delete memout;
             }
             clFlush(command_queue); // waiting for queued task
+
             connector->mail_write((memaddr)(tasklist->waiter));
             params_addr = (memaddr)tasklist->next;
         }
@@ -125,29 +150,31 @@
     int fp;
     char *source_str;
     size_t source_size;
-    
+
     fp = open(filename, O_RDONLY);
-    
+
     if (!fp) {
         fprintf(stderr, "Failed to load kernel.\n");
         exit(1);
     }
-    
+
     struct stat stats;
     fstat(fp,&stats);
     off_t size = stats.st_size;
-    
+
     if (!size) {
         fprintf(stderr, "Failed to load kernel.\n");
         exit(1);
     }
-    
+
     source_str = (char*)alloca(size);
     source_size = read(fp, source_str, size);
     close(fp);
 
-    cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
-                                               (const size_t *)&source_size, &ret);
+    cl_program program =
+        clCreateProgramWithSource(context, 1,
+                                  (const char **)&source_str,
+                                  (const size_t *)&source_size, &ret);
     clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
 
     cl_kernel *kernel = new cl_kernel;
@@ -159,7 +186,7 @@
 
 void
 gpu_register_task(int cmd, const char* filename, const char* functionname)
-{     
+{
     task_list[cmd].run = not_ready;  // not yet ready
     task_list[cmd].load = null_loader;
     task_list[cmd].wait = null_loader;
--- a/TaskManager/Gpu/GpuScheduler.h	Fri Aug 24 18:03:12 2012 +0900
+++ b/TaskManager/Gpu/GpuScheduler.h	Mon Sep 10 15:04:39 2012 +0900
@@ -4,6 +4,7 @@
 #include "Scheduler.h"
 #include "FifoDmaManager.h"
 #include "GpuThreads.h"
+#include "HTask.h"
 
 #ifdef __APPLE__
 #include <OpenCL/opencl.h>
@@ -11,11 +12,10 @@
 #include <CL/cl.h>
 #endif
 
-
 class GpuScheduler : public Scheduler {
 public:
     GpuScheduler();
-
+    virtual ~GpuScheduler();
     void init_impl(int useRefDma);
     void init_gpu();
     void run();
--- a/TaskManager/kernel/ppe/HTask.h	Fri Aug 24 18:03:12 2012 +0900
+++ b/TaskManager/kernel/ppe/HTask.h	Mon Sep 10 15:04:39 2012 +0900
@@ -19,9 +19,9 @@
 
 /*!
   @class
-  
+
   @brief
-  
+
   Cerium の Task で、spawn() でキューに格納されて順次実行される。
   cpu の指定があれば並列に実行される。
   特定の Task を待ち合わせる事が可能。
@@ -50,13 +50,15 @@
     HTask *next;
     HTask *prev;
 
-    struct {
-	unsigned no_auto_free:1;        //      bit 0    auto free flag (0 .. auto, 1 manual)
+    struct htask_flag {
+        unsigned no_auto_free:1;        //      bit 0    auto free flag (0 .. auto, 1 manual)
+        unsigned flip:1;                //      use read write buffers for all
+        unsigned nd_range:1;            //      openCL nd_range
     } flag;
 
     void spawn();
     void wait_for(HTask *);
-    void set_cpu(CPU_TYPE type);    
+    void set_cpu(CPU_TYPE type);
     void set_post(PostFunction func, void *read, void *write);
     Task *create_task_array(int task_id, int num_task, int num_param, int num_inData, int num_outData);
     Task *next_task_array(int task_id, Task *t);
@@ -64,10 +66,10 @@
     void spawn_task_array(Task *t);
 
     HTask *init(int cmd, memaddr rbuf, int rs, memaddr wbuf, int ws) {
-	init(cmd);
-	set_input(rbuf, rs);
-	set_output(wbuf, ws);
-	return this;
+        init(cmd);
+        set_input(rbuf, rs);
+        set_output(wbuf, ws);
+        return this;
     }
 
     void initOnce() {
@@ -87,26 +89,26 @@
     void set_inData_t(int index, memaddr addr, int size) {
 #ifdef EARLY_TOUCH
         if ((unsigned long)addr&0xf) {
-	  printf("inData is not aligned. command = %d, index = %d, addr = 0x%lx, size = %d\n", 
-		 command, index, (unsigned long)addr, size);
-	}
+          printf("inData is not aligned. command = %d, index = %d, addr = 0x%lx, size = %d\n",
+                 command, index, (unsigned long)addr, size);
+        }
         char *p = (char *)addr; char b = *p;
         p = (char *)(addr+size-1); b += *p;
 #endif
-	Task *t = ((TaskList*)rbuf)->tasks;
-	t->set_inData_t(index, addr,size);
+        Task *t = ((TaskList*)rbuf)->tasks;
+        t->set_inData_t(index, addr,size);
     }
-    void set_outData_t(int index, memaddr addr, int size) { 
+    void set_outData_t(int index, memaddr addr, int size) {
 #ifdef EARLY_TOUCH
-	if ((unsigned long)addr&0xf) {
-	  printf("inData is not aligned. command = %d, index = %d, addr = 0x%lx, size = %d\n", 
-		 command, index, (unsigned long)addr, size);
-	}
+        if ((unsigned long)addr&0xf) {
+          printf("inData is not aligned. command = %d, index = %d, addr = 0x%lx, size = %d\n",
+                 command, index, (unsigned long)addr, size);
+        }
         char *p = (char *)addr; char b = *p;
         p = (char *)(addr+size-1); b += *p;
 #endif
-	Task *t = ((TaskList*)rbuf)->tasks;
-	t->set_outData_t(index, addr,size);
+        Task *t = ((TaskList*)rbuf)->tasks;
+        t->set_outData_t(index, addr,size);
     }
     void set_param_t(int index, memaddr param) {
         Task *t = ((TaskList*)rbuf)->tasks;
@@ -114,28 +116,41 @@
     }
 
     void no_auto_free() {
-	flag.no_auto_free = 1;
+        flag.no_auto_free = 1;
     }
     void auto_free() {
-	flag.no_auto_free = 0;
+        flag.no_auto_free = 0;
+    }
+
+    void flip() {
+        flag.flip = 1;
+    }
+    void no_flip() {
+        flag.flip = 0;
+    }
+
+    htask_flag get_flag(){
+        return flag;
     }
 
     void init() {
-	next = prev = NULL;
-	waiter = NULL;
+        next = prev = NULL;
+        waiter = NULL;
     }
 
     void init(int cmd) {
-	command  = cmd;
-	flag.no_auto_free = 0;
-	self = (memaddr) this;
+        command  = cmd;
+        flag.no_auto_free = 0;
+        flag.flip = 0;
+        flag.nd_range = 0;
+        self = (memaddr) this;
 
-	post_func = NULL;
-	mimpl     = NULL;
-	cpu_type  = CPU_PPE;
+        post_func = NULL;
+        mimpl     = NULL;
+        cpu_type  = CPU_PPE;
 
-	post_arg1 = NULL;
-	post_arg2 = NULL;
+        post_arg1 = NULL;
+        post_arg2 = NULL;
     }
 #define set_param(index,param) set_param_t(index, (memaddr) (param))
 
--- a/TaskManager/kernel/ppe/TaskList.h	Fri Aug 24 18:03:12 2012 +0900
+++ b/TaskManager/kernel/ppe/TaskList.h	Mon Sep 10 15:04:39 2012 +0900
@@ -19,6 +19,7 @@
     HTask *self; // 4 byte
     long dummy[3]; // 16 byte
     Task tasks[TASK_MAX_SIZE]; // 32*TASK_MAX_SIZE
+    
 
     TaskPtr last() { return (TaskPtr)(((memaddr)tasks)+lastTask); }
     void set_last(Task *t) { lastTask = ((memaddr)t) - ((memaddr)tasks); }
--- a/TaskManager/test/GpuRegistTaskTest/GpuRegistTaskTest.cc	Fri Aug 24 18:03:12 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,109 +0,0 @@
-#include <stdio.h>
-#include <OpenCL/opencl.h>
-#include "GpuThreads.h"
-#include "GpuScheduler.h"
-#include "CellTaskManagerImpl.h"
-
-#define DEFAULT 5
-extern void gpu_register_task(GpuThreads*, int, const char*, const char*);
-
-void
-print_data(int *data, int size, const char *title){
-    printf("%s ---\n", title);
-
-    for ( int i = 0; i < size; i++) {
-        printf("%2d ", data[i]);
-    }
-
-    printf("\n");
-}
-
-void
-tester(int *indata,int *outdata, int num)
-{
-
-    //check
-    int check = 0;
-    for (int c=0; c<num; c++){
-        if(outdata[c] == indata[c]*2){
-            check++;
-        }
-    }
-    printf("Computed '%d/%d' correct values\n",check,num);    
-    
-}
-
-void
-test(int task_array_num)
-{
-    
-    GpuScheduler* g_scheduler = new GpuScheduler();
-
-    int *indata,*outdata;
-    int count;
-
-    indata = new int(task_array_num);
-    outdata = new int(task_array_num);
-
-    // prepare input data
-    for (count=0; count < task_array_num ;count++) {
-        indata[count] = count;
-    }
-    
-    g_scheduler->regist_task(1,"./twice.cl","twice");
-
-
-    cl_int ret;
-    cl_context context = (cl_context)g_scheduler->command_queue;
-
-    cl_mem memobj_in  = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
-    cl_mem memobj_out = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
-    cl_mem data_count = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*count, NULL, &ret);
-    
-    cl_command_queue command_queue = g_scheduler->command_queue;
-    ret = clEnqueueWriteBuffer(command_queue, memobj_in, CL_TRUE, 0,
-                               sizeof(int)*count, indata, 0, NULL, NULL);
-    ret = clEnqueueWriteBuffer(command_queue, data_count, CL_TRUE, 0,
-                               sizeof(count), &count, 0, NULL, NULL);
-    
-    print_data(indata, count, "before");
-    
-    
-    cl_kernel *kernel = task_list[1].kernel;    
-    clSetKernelArg(*kernel, 0, sizeof(cl_mem), (void *)&memobj_in);
-    clSetKernelArg(*kernel, 1, sizeof(cl_mem), (void *)&memobj_out);
-    clSetKernelArg(*kernel, 2, sizeof(cl_mem), (void *)&data_count);
-
-    cl_event ev;
-    clEnqueueTask(command_queue, *kernel, 0, NULL,  &ev);
-    
-    clEnqueueReadBuffer(command_queue, memobj_out, CL_TRUE, 0,
-                        sizeof(int)*count, outdata, 1, &ev, NULL);
-
-
-    print_data(outdata, count, "after");
-    tester(indata,outdata,count);
-
-    delete [] indata;
-    delete [] outdata;
-    delete [] g_scheduler;
-    clReleaseCommandQueue(command_queue);
-    clReleaseContext(context);
-    
-}
-
-int
-main(int argc, char* argv[])
-{   
-    
-    int length = DEFAULT;
-    
-    if (argc > 1) { // if exist arg
-        if(atoi(argv[1])) {// if arg is number
-            length = atoi(argv[1]);
-        }
-    }
-
-    test(length);
-    
-}
--- a/TaskManager/test/GpuRegistTaskTest/Makefile	Fri Aug 24 18:03:12 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-include ../../Makefile.def
-
-CPPFLAGS += -g -Wall -I../../../include/TaskManager -m$(ABIBIT)
-
-TARGET=GpuRegistTaskTest
-
-
-
-LIBS += ../../libGpuManager.a -framework opencl
-
-GpuRegistTaskTest : GpuRegistTaskTest.o
-	$(CC) $(CFLAGS) -o $@ $? $(LIBS)
-
-clean:
-	rm -rf *.o $(TARGET)
--- a/TaskManager/test/GpuRegistTaskTest/Makefile.orig	Fri Aug 24 18:03:12 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,15 +0,0 @@
-include ../../Makefile.def
-
-CPPFLAGS += -g -Wall -I../../../include/TaskManager -m$(ABIBIT) 
-
-TARGET= GpuRegistTaskTest -framework opencl
-
-$(TARGET) :
-
-LIBS += ../../libGpuManager.a
-
-CpuRegistTaskTest : GpuRegistTaskTest.o
-	$(CC) $(CFLAGS) -o $@ $? $(LIBS)
-
-clean:
-	rm -rf *.o $(TARGET)
--- a/TaskManager/test/GpuRegistTaskTest/twice.cl	Fri Aug 24 18:03:12 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-__kernel void
-twice(__global int *input_data,
-      __global int *output_data,
-      __global int *data_count)
-{
-    int count = *data_count;
-	for (int i = 0; i<count; i++) {
-	    output_data[i] = input_data[i] * 2;
-	}
-        
-}
\ No newline at end of file
--- a/TaskManager/test/GpuRunTest/GpuRunTest.cc	Fri Aug 24 18:03:12 2012 +0900
+++ b/TaskManager/test/GpuRunTest/GpuRunTest.cc	Mon Sep 10 15:04:39 2012 +0900
@@ -54,7 +54,7 @@
 
 void
 tester(int *indata, int *outdata, int num) {
-    
+
     //チェック
     int check = 0;
     for (int c=0; c<num; c++) {
@@ -62,30 +62,31 @@
             check++;
         }
     }
-   
+
     printf("Computed '%d/%d' correct values\n",check,num);
-    
+
 }
 
 
 void
 test(TaskManager *manager) {
-    
+
     int *indata  = new int[length];
     int *outdata = new int[length];
-    
+
     for (int c=0; c < length ;c++) {
         indata[c] = c;
     }
 
     print_data(indata, length, "before");
-    
+
     HTaskPtr twice = manager->create_task(Twice);
 
     twice->set_param(0, (memaddr)length);
     twice->set_inData(0, indata, sizeof (int)*length);
     twice->set_outData(0, outdata, sizeof (int)*length);
     twice->set_cpu(GPU_0);
+    twice->no_flip();
 
     /*
      * set_post() で ppe task を渡せるようにしたい
@@ -93,22 +94,22 @@
     twice->set_post(twice_result, (void*)outdata, (void*)length);
 
     twice->spawn();
-    
+
 }
-    
+
 int
 TMmain(TaskManager *manager, int argc, char* argv[])
 {
     if (init(argc, argv) < 0) {
         return -1;
     }
-    
+
     task_init();
 
     for (int i = 0; i < task; ++i) {
         test(manager);
     }
-    
+
     return 0;
 }
 
--- a/example/OpenCL/twice.cl	Fri Aug 24 18:03:12 2012 +0900
+++ b/example/OpenCL/twice.cl	Mon Sep 10 15:04:39 2012 +0900
@@ -1,11 +1,11 @@
 __kernel void
 twice(__global int *input_data,
       __global int *output_data,
-      __global int *data_count,)
+      __global int *data_count)
 {
+
     int count = *data_count;
 	for (int i = 0; i<count; i++) {
 	    output_data[i] = input_data[i] * 2;
-	}
-        
-}
\ No newline at end of file
+	}        
+}
--- a/example/many_task/Makefile.macosx	Fri Aug 24 18:03:12 2012 +0900
+++ b/example/many_task/Makefile.macosx	Mon Sep 10 15:04:39 2012 +0900
@@ -1,3 +1,4 @@
+
 include ./Makefile.def
 
 
@@ -40,3 +41,4 @@
 	rm -f *~ \#*
 	rm -f ppe/*~ ppe/\#*
 	rm -f spe/*~ spe/\#*
+	rm -f gpu/*~ gpu/\#*
--- a/example/many_task/main.cc	Fri Aug 24 18:03:12 2012 +0900
+++ b/example/many_task/main.cc	Mon Sep 10 15:04:39 2012 +0900
@@ -25,6 +25,7 @@
 static double ed_time;
 
 static int length = 1200;
+static int 
 
 // prototype
 void TMend(TaskManager *);
@@ -65,7 +66,7 @@
             all = 1;
         }
         if (strcmp(argv[i], "-c") == 0 ) {
-            sort_task = SortCompat;
+            sort_task = SortCompat
         }
         if (strcmp(argv[i], "-s") == 0 ) {
             sort_task = SortSimple;
@@ -113,6 +114,7 @@
 
     HTaskPtr restart = manager->create_task(sort_task,0,0,0,0);
     restart->set_param(0,(memaddr)&sorter);
+    //set flip flag
     restart->spawn();
 }
 
--- a/example/word_count/a.txt	Fri Aug 24 18:03:12 2012 +0900
+++ b/example/word_count/a.txt	Mon Sep 10 15:04:39 2012 +0900
@@ -4,8 +4,3 @@
 aaa
 aaa
 
-
-
-
-
-