changeset 1542:9ccfdc408d51 draft

fix gpu word count.but not count line num.
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Sun, 13 Jan 2013 17:55:03 +0900
parents 99c9ed2932a0
children 890cb39418ca
files TaskManager/Gpu/GpuScheduler.cc TaskManager/kernel/ppe/TaskManager.cc TaskManager/kernel/ppe/TaskManager.h TaskManager/kernel/ppe/TaskManagerImpl.cc TaskManager/kernel/ppe/TaskManagerImpl.h TaskManager/test/GpuRunTest/Makefile TaskManager/test/GpuRunTest/task_init.cc example/Bulk/main.cc example/OpenCL/twice.cc example/many_task/Makefile.def example/many_task/Makefile.macosx example/many_task/gpu/QuickSort.cl example/many_task/ppe/QuickSort.cc example/regex/main.cc example/word_count/a.txt example/word_count/gpu/Exec.cl example/word_count/main.cc example/word_count/ppe/Exec.cc
diffstat 18 files changed, 361 insertions(+), 354 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc	Tue Dec 18 15:32:43 2012 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Sun Jan 13 17:55:03 2013 +0900
@@ -1,3 +1,4 @@
+#include "TaskManager.h"
 #include "GpuScheduler.h"
 #include "ReferencedDmaManager.h"
 #include "PreRefDmaManager.h"
@@ -91,7 +92,7 @@
             if (tasklist->self) {
                 /*
                  * get flip flag
-                 * flip : When cluculate on input data, to treat this as a output data
+                 * flip : When caluculate on input data, to treat this as a output data
                  */
                 flag = tasklist->self->flag;
             }
@@ -234,8 +235,11 @@
 
     char *log = new char[1024];
     size_t s = 1024;
-    ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, s,log,NULL);
-    printf("\n%s\n",log);
+    if(ret<0) {
+        int build_ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, s,log,NULL);
+        printf("\n%s\n",log);
+        exit(ret);
+    }
     cl_kernel *kernel = new cl_kernel;
     *kernel = clCreateKernel(program, functionname, &ret);
     task_list[cmd].gputask->kernel = kernel;
--- a/TaskManager/kernel/ppe/TaskManager.cc	Tue Dec 18 15:32:43 2012 +0900
+++ b/TaskManager/kernel/ppe/TaskManager.cc	Sun Jan 13 17:55:03 2013 +0900
@@ -56,13 +56,13 @@
     return m_impl->create_task(cmd,__builtin_return_address(0));
 }
 
-HTaskPtr 
+HTaskPtr
 TaskManager::create_task(int cmd, memaddr r, long rs, memaddr w, long ws)
 {
     return m_impl->create_task(cmd,r,rs,w,ws,__builtin_return_address(0));
 }
 
-HTaskPtr 
+HTaskPtr
 TaskManager::create_task_array(int id, int num_task, int num_param, int num_inData, int num_outData) {
     return m_impl->create_task_array(id, num_task, num_param, num_inData, num_outData,__builtin_return_address(0)) ;
 }
@@ -110,4 +110,9 @@
     return m_impl->get_scheduler();
 }
 
+void
+TaskManager::error(const char* error_message) {
+    printf("%s \n",error_message);
+    exit(1);
+}
 /* end */
--- a/TaskManager/kernel/ppe/TaskManager.h	Tue Dec 18 15:32:43 2012 +0900
+++ b/TaskManager/kernel/ppe/TaskManager.h	Sun Jan 13 17:55:03 2013 +0900
@@ -36,13 +36,13 @@
     void export_task_log() { m_impl->export_task_log(); }
 
     SchedTask *get_schedTask() {
-	return m_impl->schedTaskManager;
+    return m_impl->schedTaskManager;
     }
 
     /* functions */
     void init(int spuIdle, int export_task_log, int useRefDma);
     void finish();
-
+    void error(const char* str);
 private:
     int machineNum;
 }  ;
--- a/TaskManager/kernel/ppe/TaskManagerImpl.cc	Tue Dec 18 15:32:43 2012 +0900
+++ b/TaskManager/kernel/ppe/TaskManagerImpl.cc	Sun Jan 13 17:55:03 2013 +0900
@@ -32,7 +32,7 @@
     // HTask の factory. QueueInfo<HTask> ならなんでもいい。
     htaskImpl = waitTaskQueue ;             // any QueueInfo<HTask>
     // Task の dependency を表現する double linked list. QueueInfo<HTask> とは別に必要。
-    taskQueueImpl = new QueueInfo<TaskQueue>(taskQueuePool); 
+    taskQueueImpl = new QueueInfo<TaskQueue>(taskQueuePool);
 
 }
 
@@ -82,20 +82,20 @@
 
 #ifdef EARLY_TOUCH
         if (rbuf) {
-	    if ((unsigned long)rbuf&0xf) {
-	      printf("Data is not aligned. command = %d, addr = 0x%lx, size = %ld\n",
-		     cmd, (unsigned long)rbuf, r_size);
-	    }
-	    char *p = (char *)rbuf; char b = *p; // これはコンパイラが落としてしまうのではないか...
-	    p = (char *)(rbuf+r_size-1); b += *p;
+        if ((unsigned long)rbuf&0xf) {
+          printf("Data is not aligned. command = %d, addr = 0x%lx, size = %ld\n",
+             cmd, (unsigned long)rbuf, r_size);
+        }
+        char *p = (char *)rbuf; char b = *p; // これはコンパイラが落としてしまうのではないか...
+        p = (char *)(rbuf+r_size-1); b += *p;
         }
         if (wbuf) {
-	    if ((unsigned long)wbuf&0xf) {
-	      printf("Data is not aligned. command = %d, addr = 0x%lx, size = %ld\n",
-		     cmd, (unsigned long)wbuf, w_size);
-	    }
-	    char *p = (char *)wbuf; char b = *p;
-	    p = (char *)(wbuf+w_size-1); b += *p;
+        if ((unsigned long)wbuf&0xf) {
+          printf("Data is not aligned. command = %d, addr = 0x%lx, size = %ld\n",
+             cmd, (unsigned long)wbuf, w_size);
+        }
+        char *p = (char *)wbuf; char b = *p;
+        p = (char *)(wbuf+w_size-1); b += *p;
         }
 #endif
 
@@ -154,22 +154,22 @@
 
 #ifndef NOT_CHECK
 
-	  int flag = 0;
-	  
-	  for (int i = 0; i < MAX_TASK_OBJECT; i++) {
-	    if (entry_cmd[i] == cmd) {
-	      flag = 1;
-	      break;
-	    }
-	  }
+      int flag = 0;
 
-	  if (flag == 0) {
-	    printf("cmd %d is not registered on task_list\n", cmd);
-	    return NULL;
-	  }
-			  
+      for (int i = 0; i < MAX_TASK_OBJECT; i++) {
+        if (entry_cmd[i] == cmd) {
+          flag = 1;
+          break;
+        }
+      }
+
+      if (flag == 0) {
+        printf("cmd %d is not registered on task_list\n", cmd);
+        return NULL;
+      }
+
 #endif
-	  return task_list[cmd].name;
+      return task_list[cmd].name;
     }
     else {
       return NULL;
@@ -177,19 +177,19 @@
 }
 const char *
 TaskManagerImpl::get_task_name(TaskPtr task) {
-	return task != NULL ? get_task_name(task->command) : NULL;
+    return task != NULL ? get_task_name(task->command) : NULL;
 }
 const char *
 TaskManagerImpl::get_task_name(SimpleTaskPtr simple_task) {
-	return simple_task != NULL ? get_task_name(simple_task->command) : NULL;
+    return simple_task != NULL ? get_task_name(simple_task->command) : NULL;
 }
 const char *
 TaskManagerImpl::get_task_name(SchedTaskBase *sched_task) {
-	if (sched_task == NULL) return NULL;
-	if (sched_task->atask != NULL) {
+    if (sched_task == NULL) return NULL;
+    if (sched_task->atask != NULL) {
             return get_task_name(sched_task->atask->command);
-	}
-	return NULL;
+    }
+    return NULL;
 }
 const char *
 TaskManagerImpl::get_task_name(HTaskPtr htask) {
@@ -199,21 +199,21 @@
 }
 const char *
 TaskManagerImpl::get_task_name(HTaskPtr htask, int index) {
-	if (!htask)	return NULL;
-	switch (htask->command) {
-	case TaskArray1:
-		return get_task_name((TaskPtr)htask->rbuf);
-		break;
-	case TaskArray: {
+    if (!htask) return NULL;
+    switch (htask->command) {
+    case TaskArray1:
+        return get_task_name((TaskPtr)htask->rbuf);
+        break;
+    case TaskArray: {
 
-		TaskPtr tmp = (TaskPtr)htask->rbuf;
-		return get_task_name(tmp[0].command);
+        TaskPtr tmp = (TaskPtr)htask->rbuf;
+        return get_task_name(tmp[0].command);
 
-	}
-	default:
-		return get_task_name(htask->command);
-	}
-	return NULL;
+    }
+    default:
+        return get_task_name(htask->command);
+    }
+    return NULL;
 }
 
 /**
@@ -273,23 +273,23 @@
 {
     task_list->cpu_type = type;
     if (machineNum==0)
-	task->cpu_type = CPU_PPE ;
+    task->cpu_type = CPU_PPE ;
     else
-	task->cpu_type = type;
+    task->cpu_type = type;
 }
 
 #if 0
-static void 
+static void
 check_wait(TaskManagerImpl *tm, QueueInfo<TaskQueue> *wait_i) {
     for(TaskQueue *t = wait_i->getFirst(); t; t = wait_i->getNext(t)) {
-	if (!tm->waitTaskQueue->find(t->task)) {
-	  //this->printf("stray waiting task%d %lx\n",t->task->command, (long)t->task);
-	  printf("stray waiting task%d %lx\n",t->task->command, (long)t->task);
-	} else if (tm->activeTaskQueue->find(t->task)) {
-	  //this->printf(" active task%d in waiting queue %lx\n",t->task->command, (long)t->task);
-	  printf(" active task%d in waiting queue %lx\n",t->task->command, (long)t->task);
-	} else
-	    printf(".");
+    if (!tm->waitTaskQueue->find(t->task)) {
+      //this->printf("stray waiting task%d %lx\n",t->task->command, (long)t->task);
+      printf("stray waiting task%d %lx\n",t->task->command, (long)t->task);
+    } else if (tm->activeTaskQueue->find(t->task)) {
+      //this->printf(" active task%d in waiting queue %lx\n",t->task->command, (long)t->task);
+      printf(" active task%d in waiting queue %lx\n",t->task->command, (long)t->task);
+    } else
+        printf(".");
     }
 }
 #endif
@@ -307,24 +307,24 @@
         me->tasklog->finish_time = rdtsc();
 
     while(TaskQueue *p = me->wait_me->poll()) {
-	HTaskPtr you = p->task;
-	QueueInfo<TaskQueue> *wait_i = you->wait_i;
-	// 相手の wait queue から自分(を指しているTaskQueue)を削除
-	wait_i->remove(p->waiter);
-	// queue を free する
-	wait_i->free_(p->waiter);
+    HTaskPtr you = p->task;
+    QueueInfo<TaskQueue> *wait_i = you->wait_i;
+    // 相手の wait queue から自分(を指しているTaskQueue)を削除
+    wait_i->remove(p->waiter);
+    // queue を free する
+    wait_i->free_(p->waiter);
 
-	if (wait_i->empty()) {
-	    wait_queue->remove(you);
-	    append_activeTask(you);
-	}
+    if (wait_i->empty()) {
+        wait_queue->remove(you);
+        append_activeTask(you);
+    }
 
-	wait_i->free_(p);   // p->wait_i, p->wait_me は再利用される
+    wait_i->free_(p);   // p->wait_i, p->wait_me は再利用される
     }
 
     // このTaskList は終わったので、今 free して良いが、TaskListInfo に入っているので、
     // MY_SPE_STATUS_READY 時に、まとめてfree する。FifoTaskManager/CellTaskManager
-    
+
     // me を誰かが持っていて、me が finish した後に、
     // me->wait_for(i) とか、やられると気まずい。
     // 特に、me が他人に再利用されていると。そういう時には、
@@ -334,7 +334,7 @@
 
     me->self = 0;
     if (!me->flag.no_auto_free)
-	htaskImpl->free_(me);
+    htaskImpl->free_(me);
 }
 
 /**
@@ -364,16 +364,16 @@
 }
 
 /**
- @brief htask のTaskListを DMA でCPUに渡すための TaskListQueue に入れる 
- @param htask     
- @param taskList 
+ @brief htask のTaskListを DMA でCPUに渡すための TaskListQueue に入れる
+ @param htask
+ @param taskList
  */
 void
 TaskManagerImpl::set_taskList(HTaskPtr htask, QueueInfo<TaskList> * taskList)
 {
     if (_export_task_log)
         htask->tasklog->execute_time = rdtsc();
-    
+
     TaskListPtr tl = (TaskList*)htask->rbuf;
     while(tl->prev) tl=tl->prev;
     while(tl) {
@@ -384,5 +384,4 @@
 }
 
 
-
 /* end */
--- a/TaskManager/kernel/ppe/TaskManagerImpl.h	Tue Dec 18 15:32:43 2012 +0900
+++ b/TaskManager/kernel/ppe/TaskManagerImpl.h	Sun Jan 13 17:55:03 2013 +0900
@@ -73,46 +73,45 @@
 
     void free_htask(HTaskPtr htask) {
 #if !defined(__SPU__)
-	if (htask->self) {
-	    htask->flag.no_auto_free = 0;
-	    return;
-	}
-	htaskImpl->free_(htask);
+    if (htask->self) {
+        htask->flag.no_auto_free = 0;
+        return;
+    }
+    htaskImpl->free_(htask);
 #endif
     }
 
     void* allocate(int size, int alignment)
     {
 
-	void *buff = 0;
-	if (size==0) return 0;
+    void *buff = 0;
+    if (size==0) return 0;
 #if defined(__SPU__) || ! defined(HAS_POSIX_MEMALIGN)
-	buff =  malloc(size);
+    buff =  malloc(size);
 #else
-	posix_memalign(&buff, alignment, size);
+    posix_memalign(&buff, alignment, size);
 #endif
-	if (buff==0) 
-		get_scheduler()->printf("Can't allocate memory\n");
-	return buff;
+    if (buff==0)
+        get_scheduler()->printf("Can't allocate memory\n");
+    return buff;
     }
 
     void* allocate(int size)
     {
 
-	void *buff = 0;
-	if (size==0) return 0;
+    void *buff = 0;
+    if (size==0) return 0;
 #if defined(__SPU__) || ! defined(HAS_POSIX_MEMALIGN)
-	buff =  malloc(size);
+    buff =  malloc(size);
 #else
-	posix_memalign(&buff, DEFAULT_ALIGNMENT, size);
+    posix_memalign(&buff, DEFAULT_ALIGNMENT, size);
 #endif
         if (buff==0)
-		get_scheduler()->printf("Can't allocate memory\n");
-	return buff;
+        get_scheduler()->printf("Can't allocate memory\n");
+    return buff;
     }
 
     Scheduler* get_scheduler() { return scheduler; }
     void set_scheduler(Scheduler *s) {  scheduler = s; }
 }  __attribute__ ((aligned (DEFAULT_ALIGNMENT)));
-
 #endif
--- a/TaskManager/test/GpuRunTest/Makefile	Tue Dec 18 15:32:43 2012 +0900
+++ b/TaskManager/test/GpuRunTest/Makefile	Sun Jan 13 17:55:03 2013 +0900
@@ -5,7 +5,7 @@
 SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
 OBJS = $(SRCS:.cc=.o)
 
-LIBS += -lGpuManager -framework opencl # `sdl-config --libs`
+LIBS += -lGpuManager -framework opencl `sdl-config --libs`
 
 .SUFFIXES: .cc .o
 
--- a/TaskManager/test/GpuRunTest/task_init.cc	Tue Dec 18 15:32:43 2012 +0900
+++ b/TaskManager/test/GpuRunTest/task_init.cc	Sun Jan 13 17:55:03 2013 +0900
@@ -2,11 +2,6 @@
 #include "GpuScheduler.h"
 
 void
-task_init(void)
-{
-    int cmd = Twice;
-    int dim = 2;
-    size_t *l_work_size = new size_t(dim);
-    GpuNDRangeRegister(cmd, dim, l_work_size);
-    GpuSchedRegister(cmd, "twice.cl", "twice");
+task_init(void) {
+    GpuSchedRegister(Twice, "twice.cl", "twice");
 }
--- a/example/Bulk/main.cc	Tue Dec 18 15:32:43 2012 +0900
+++ b/example/Bulk/main.cc	Sun Jan 13 17:55:03 2013 +0900
@@ -100,9 +100,9 @@
 
     t = 0;
     for(int i = 0;i<task_array_num;i++) {
-	t = twice_main->next_task_array(Twice, t);
-	int block_size = length/block_num;
-	t->set_param(0, (memaddr)block_num);
+        t = twice_main->next_task_array(Twice, t);
+        int block_size = length/block_num;
+        t->set_param(0, (memaddr)block_num);
 
 	for(int j = 0;j<block_num;j++) {
 	    /**
--- a/example/OpenCL/twice.cc	Tue Dec 18 15:32:43 2012 +0900
+++ b/example/OpenCL/twice.cc	Sun Jan 13 17:55:03 2013 +0900
@@ -3,7 +3,6 @@
 #include <stdio.h>
 #include <fcntl.h>
 #include <sys/stat.h>
-
 #define DEFAULT 432
 
 void
@@ -86,6 +85,7 @@
     //メモリバッファに入力データを書き込み
     ret = clEnqueueWriteBuffer(command_queue, data_count, CL_TRUE, 0,
                                sizeof(count), &count, 0, NULL, NULL);
+
     ret = clEnqueueWriteBuffer(command_queue, memobj_in, CL_TRUE, 0,
                                sizeof(int)*count, data, 0, NULL, NULL);
 
--- a/example/many_task/Makefile.def	Tue Dec 18 15:32:43 2012 +0900
+++ b/example/many_task/Makefile.def	Sun Jan 13 17:55:03 2013 +0900
@@ -7,11 +7,12 @@
 CERIUM = ../../../Cerium
 
 OPT = -O
-OPT = -g -O0
+OPT = -g -O2
 # OPT = -g
 CC      = clang++
+CXX = clang++
 CFLAGS  =  -Wall  $(OPT) -DUSE_SIMPLE_TASK
-# CFLAGS  =   -Wall  $(OPT)
+CXXFLAGS  = ${CFLAGS}
 
 INCLUDE = -I${CERIUM}/include/TaskManager -I. -I..
 LIBS = -L${CERIUM}/TaskManager
--- a/example/many_task/Makefile.macosx	Tue Dec 18 15:32:43 2012 +0900
+++ b/example/many_task/Makefile.macosx	Sun Jan 13 17:55:03 2013 +0900
@@ -12,7 +12,7 @@
 TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
 TASK_OBJS = $(TASK_SRCS:.cc=.o)
 
-CC      = g++
+CC      = clang++
 CC += $(ABI)
 # CFLAGS  = -g -Wall# -O9 #-DDEBUG
 
--- a/example/many_task/gpu/QuickSort.cl	Tue Dec 18 15:32:43 2012 +0900
+++ b/example/many_task/gpu/QuickSort.cl	Sun Jan 13 17:55:03 2013 +0900
@@ -33,14 +33,15 @@
     int stack[1024];
     int sp = 0;
     int p;
-    // bubble_sort(data,begin,end);
 
     while (1) {
-        while (begin < end) {
-            if (end-begin <= 50) {
-                bubble_sort(data,begin,end);
-                break;
-            }
+        while (beegin < end) {
+            /*
+             * if (end-begin <= 50) {
+             *     bubble_sort(data,begin,end);
+             *     break;
+             * }
+            */
             int where = (begin + end) / 2;
             int pivot = data[where].index;
             data[where].index = data[begin].index;
--- a/example/many_task/ppe/QuickSort.cc	Tue Dec 18 15:32:43 2012 +0900
+++ b/example/many_task/ppe/QuickSort.cc	Sun Jan 13 17:55:03 2013 +0900
@@ -70,10 +70,12 @@
 
     while (1) {
         while (begin < end) {
+
             if (end-begin <= 50) {
-                bubble_sort(data, begin, end);
-                break;
+                //bubble_sort(data, begin, end);
+                //break;
             }
+
             int where = (begin + end) / 2;
             int pivot = data[where].index;
             data[where].index = data[begin].index;
--- a/example/regex/main.cc	Tue Dec 18 15:32:43 2012 +0900
+++ b/example/regex/main.cc	Sun Jan 13 17:55:03 2013 +0900
@@ -456,35 +456,35 @@
     char *filename = 0;
     
     for (int i = 1; argv[i]; ++i) {	
-	if (strcmp(argv[i], "-file") == 0) {
-	    filename = argv[i+1];
-	} else if (strcmp(argv[i], "-division") == 0) {
-	    division = atoi(argv[i+1]);
-	} else if (strcmp(argv[i], "-block") == 0) {
-	    blocks = atoi(argv[i+1]);
-	} else if (strcmp(argv[i], "-a") == 0) {
-	    // create task all at once
-	    all = 1;
-	} else if (strcmp(argv[i], "-c") == 0) {
-	    use_task_array = 0;
-	    use_compat = 1;
-	} else if (strcmp(argv[i], "-s") == 0) {
-	    use_task_array = 0;
-	    use_compat = 0;
-	} else if (strcmp(argv[i], "-t") == 0) {
-	    use_task_creater = 1;
-	    use_task_array = 0;
-	    use_compat = 0;
-	} else if (strcmp(argv[i], "-anum") == 0) {
-	    array_task_num = atoi(argv[i+1]);
-	} else if (strcmp(argv[i], "-cpu") == 0) {
-	    spe_num = atoi(argv[i+1]);
-	    if (spe_num==0) spe_num = 1;
-	}
+        if (strcmp(argv[i], "-file") == 0) {
+            filename = argv[i+1];
+        } else if (strcmp(argv[i], "-division") == 0) {
+            division = atoi(argv[i+1]);
+        } else if (strcmp(argv[i], "-block") == 0) {
+            blocks = atoi(argv[i+1]);
+        } else if (strcmp(argv[i], "-a") == 0) {
+            // create task all at once
+            all = 1;
+        } else if (strcmp(argv[i], "-c") == 0) {
+            use_task_array = 0;
+            use_compat = 1;
+        } else if (strcmp(argv[i], "-s") == 0) {
+            use_task_array = 0;
+            use_compat = 0;
+        } else if (strcmp(argv[i], "-t") == 0) {
+            use_task_creater = 1;
+            use_task_array = 0;
+            use_compat = 0;
+        } else if (strcmp(argv[i], "-anum") == 0) {
+            array_task_num = atoi(argv[i+1]);
+        } else if (strcmp(argv[i], "-cpu") == 0) {
+            spe_num = atoi(argv[i+1]);
+            if (spe_num==0) spe_num = 1;
+        }
     }
     if (filename==0) {
         puts(usr_help_str);
-	exit(1);
+        exit(1);
     }
     
     return filename;
--- a/example/word_count/a.txt	Tue Dec 18 15:32:43 2012 +0900
+++ b/example/word_count/a.txt	Sun Jan 13 17:55:03 2013 +0900
@@ -1,6 +1,5 @@
+aaa bbb
+aaa bbb ccc
 aaa
 aaa
 aaa
-aaa
-aaa
-
--- a/example/word_count/gpu/Exec.cl	Tue Dec 18 15:32:43 2012 +0900
+++ b/example/word_count/gpu/Exec.cl	Sun Jan 13 17:55:03 2013 +0900
@@ -1,11 +1,16 @@
 __kernel void
 run(__global int *data_count,
-    __global void *r_buf,
-    __global void *w_buf)
+    __global char *i_data,
+    __global unsigned long long  *o_data)
 {
-    __global char *i_data =  (char *)r_buf;
-    __global unsigned long long *o_data = (unsigned long long*)w_buf;
-    __global unsigned long long *head_tail_flag = o_data +2;
+    /*
+     * Todo:ここの書式もCerium側に合わせる。
+     * 第2、第3引数をvoid *r_buf, void *w_bufにして、以下のような感じ
+     * __global char *i_data =  (char *)r_buf;
+     * __global unsigned long long *o_data = (unsigned long long*)w_buf;
+     * __global unsigned long long *head_tail_flag = o_data +2;
+     */
+    __global  unsigned long long *head_tail_flag = o_data +2;
     int length = data_count[0];
     int word_flag = 0;
     int word_num = 0;
--- a/example/word_count/main.cc	Tue Dec 18 15:32:43 2012 +0900
+++ b/example/word_count/main.cc	Sun Jan 13 17:55:03 2013 +0900
@@ -25,110 +25,108 @@
 int spe_num = 1;
 CPU_TYPE spe_cpu = SPE_ANY;
 const char *usr_help_str = "Usage: ./word_count [-a -c -s] [-cpu spe_num] [-file filename]\n";
+
 typedef struct {
     caddr_t file_mmap;
     off_t size;
 } st_mmap_t;
 
-
 static void simple_task_creater(int in_total_size, int out_total_size,
-                             int command, int in_data_size, int out_data_size,
-                             void *in_data, void *out_data, SchedTask *manager,
-                             HTask *wait_i, HTask *wait_me) {
-
+                                int command, int in_data_size, int out_data_size,
+                                void *in_data, void *out_data, SchedTask *manager,
+                                HTask *wait_i, HTask *wait_me) {
 
-  int in_task_size = 0;
-  int out_task_size = 0;
-  int length = in_data_size/sizeof(char);
+    int in_task_size = 0;
+    int out_task_size = 0;
+    int length = in_data_size/sizeof(char);
 
-  if (in_total_size != 0) {
-    in_task_size = in_total_size / in_data_size;
-    if (in_total_size != in_task_size * in_data_size) {
-      printf("mismatch of in_total_size and in_data_size\n");
+    if (in_total_size != 0) {
+        in_task_size = in_total_size / in_data_size;
+        if (in_total_size != in_task_size * in_data_size) {
+            printf("mismatch of in_total_size and in_data_size\n");
+        }
     }
-  }
 
-  if (out_total_size != 0) {
-    out_task_size = out_total_size / out_data_size;
-    if (out_total_size != out_task_size * out_data_size) {
-      printf("mismatch of out_total_size and out_data_size\n");
+    if (out_total_size != 0) {
+        out_task_size = out_total_size / out_data_size;
+        if (out_total_size != out_task_size * out_data_size) {
+            printf("mismatch of out_total_size and out_data_size\n");
+        }
     }
-  }
 
-  /*in, out の大きい方に合わせるのがいいかな? Taskの数は1Task分に使うデータの大きいほうを取るような仕様がいいかな*/
-  int task_num = (in_task_size > out_task_size) ? in_task_size : out_task_size;
+    /*in, out の大きい方に合わせるのがいいかな? Taskの数は1Task分に使うデータの大きいほうを取るような仕様がいいかな*/
+    int task_num = (in_task_size > out_task_size) ? in_task_size : out_task_size;
 
-  if (task_num == 0) task_num = 1;
+    if (task_num == 0) task_num = 1;
 
-  /*spe分あればいいのかな?*/
+    /*spe分あればいいのかな?*/
 
-  int array_num = spe_num;
-  if (task_num < array_num) {
-    array_num = task_num;
-  }
+    int array_num = spe_num;
+    if (task_num < array_num) {
+        array_num = task_num;
+    }
 
 
-  HTaskPtr *task_array = (HTask**)manager->allocate(sizeof(HTask*)*array_num);
-  TaskPtr *t_exec = (Task**)manager->allocate(sizeof(Task*)*array_num);
-
-  int array_length = task_num / array_num;
-  int rest = task_num % array_num;
-
-  int index = 0;
-
-  for (int k = 0; k < array_num; k++) {
+    HTaskPtr *task_array = (HTask**)manager->allocate(sizeof(HTask*)*array_num);
+    TaskPtr *t_exec = (Task**)manager->allocate(sizeof(Task*)*array_num);
 
-    task_array[k] = manager->create_task_array(command,array_length,0,1,1);
-    t_exec[k] = 0;
+    int array_length = task_num / array_num;
+    int rest = task_num % array_num;
 
-    if (wait_me != 0) {
-      wait_me->wait_for(task_array[k]);
-    }
-    if (wait_i != 0) {
-      task_array[k]->wait_for(wait_i);
-    }
+    int index = 0;
 
-  }
-
-
-  for (int j = 0; j < array_length; j++) {
     for (int k = 0; k < array_num; k++) {
 
-      t_exec[k] = task_array[k]->next_task_array(command,t_exec[k]);
-      t_exec[k]->set_param(0,(memaddr)length);
-      t_exec[k]->set_inData(0,(char*)in_data + index*in_data_size, in_data_size);
-      t_exec[k]->set_outData(0,(char*)out_data + index*out_data_size, out_data_size);
+        task_array[k] = manager->create_task_array(command,array_length,0,1,1);
+        t_exec[k] = 0;
 
-      index++;
+        if (wait_me != 0) {
+            wait_me->wait_for(task_array[k]);
+        }
+        if (wait_i != 0) {
+            task_array[k]->wait_for(wait_i);
+        }
 
     }
-  }
+
 
-  for (int k = 0; k < array_num; k++) {
-    task_array[k]->spawn_task_array(t_exec[k]->next());
-    task_array[k]->set_cpu(spe_cpu);
-    task_array[k]->spawn();
-  }
+    for (int j = 0; j < array_length; j++) {
+        for (int k = 0; k < array_num; k++) {
 
-  for (int k = 0; k < rest; k++) {
-    HTaskPtr t_exec = manager->create_task(command);
-    t_exec->set_param(0,(memaddr)length);
-    t_exec->set_inData(0,(char*)in_data + index*in_data_size, in_data_size);
-    t_exec->set_outData(0,(char*)out_data + index*out_data_size, out_data_size);
+            t_exec[k] = task_array[k]->next_task_array(command,t_exec[k]);
+            t_exec[k]->set_param(0,(memaddr)length);
+            t_exec[k]->set_inData(0,(char*)in_data + index*in_data_size, in_data_size);
+            t_exec[k]->set_outData(0,(char*)out_data + index*out_data_size, out_data_size);
+            index++;
 
-    index++;
+        }
+    }
 
-    if (wait_me != 0) {
-      wait_me->wait_for(t_exec);
-    }
-    if (wait_i != 0) {
-      t_exec->wait_for(wait_i);
+    for (int k = 0; k < array_num; k++) {
+        task_array[k]->spawn_task_array(t_exec[k]->next());
+        task_array[k]->set_cpu(spe_cpu);
+        task_array[k]->spawn();
     }
 
-    t_exec->spawn();
-    t_exec->set_cpu(spe_cpu);
+    for (int k = 0; k < rest; k++) {
+        HTaskPtr t_exec = manager->create_task(command);
+        t_exec->set_param(0,(memaddr)length);
+        t_exec->set_inData(0,(char*)in_data + index*in_data_size, in_data_size);
+        t_exec->set_outData(0,(char*)out_data + index*out_data_size, out_data_size);
+
+        index++;
 
-  }
+        if (wait_me != 0) {
+            wait_me->wait_for(t_exec);
+        }
+        if (wait_i != 0) {
+            t_exec->wait_for(wait_i);
+        }
+
+        t_exec->set_cpu(spe_cpu);
+        t_exec->spawn();
+
+    }
 
 
 }
@@ -184,150 +182,150 @@
 run_tasks(SchedTask *manager, WordCount *w, int task_count, HTaskPtr t_next, int size)
 {
 
-  if (task_count < array_task_num) {
-    array_task_num = task_count;
-    if (task_count<=0) return;
-  }
+    if (task_count < array_task_num) {
+        array_task_num = task_count;
+        if (task_count<=0) return;
+    }
+
+    if (use_task_creater) {
+        simple_task_creater(w->file_size, w->division_out_size * w->task_num, TASK_EXEC, w->division_size, w->division_out_size,
+                            w->file_mmap, w->o_data, manager, w->t_print, 0);
+    }
+
+    if (use_task_array) {
 
-  if (use_task_creater) {
-    simple_task_creater(w->file_size, w->division_out_size * w->task_num, TASK_EXEC, w->division_size, w->division_out_size,
-                        w->file_mmap, w->o_data, manager, w->t_print, 0);
-  }
+        int spl = spe_num * array_task_num;
+        int loop = (task_count + spl - 1) / spl;
+
+        for (int i = 0; i < loop; i += 1) {
 
-  if (use_task_array) {
+            if (spl > w->task_num) {
+                if (w->task_num >= spe_num) {
+                    array_task_num = w->task_num / spe_num;
+                } else {
 
-    int spl = spe_num * array_task_num;
-    int loop = (task_count + spl - 1) / spl;
-
-    for (int i = 0; i < loop; i += 1) {
+                    int task_num = w->task_num;
 
-      if (spl > w->task_num) {
-        if (w->task_num >= spe_num) {
-          array_task_num = w->task_num / spe_num;
-        } else {
+                    for (int j = 0; j < task_num; j++) {
+                        HTask *h_exec = 0;
+                        int i = w->task_spwaned++;
 
-          int task_num = w->task_num;
+                        if (w->size < size) size = w->size;
 
-          for (int j = 0; j < task_num; j++) {
-            HTask *h_exec = 0;
-            int i = w->task_spwaned++;
+                        h_exec = manager->create_task(TASK_EXEC,
+                                                      (memaddr)(w->file_mmap + i*w->division_size), size,
+                                                      (memaddr)(w->o_data + i*w->out_size), w->division_out_size);
 
-            if (w->size < size) size = w->size;
+                        if (all) {
+                            w->t_print->wait_for(h_exec);
+                        } else {
+                            t_next->wait_for(h_exec);
+                        }
+
+                        h_exec->set_cpu(spe_cpu);
+                        h_exec->spawn();
 
-            h_exec = manager->create_task(TASK_EXEC,
-                                          (memaddr)(w->file_mmap + i*w->division_size), size,
-                                          (memaddr)(w->o_data + i*w->out_size), w->division_out_size);
+                        w->size -= size;
+                        if (w->size == 0) break;
+                        w->task_num--;
 
-            if (all) {
-              w->t_print->wait_for(h_exec);
-            } else {
-              t_next->wait_for(h_exec);
+                    }
+
+                    return;
+                }
             }
 
-            h_exec->set_cpu(spe_cpu);
-            h_exec->spawn();
+            //ここから
+            HTask **task_array = (HTask**)manager->allocate(sizeof(HTask*)*spe_num);
+            Task **t_exec = (Task**)manager->allocate(sizeof(Task*)*spe_num);
 
-            w->size -= size;
-                if (w->size == 0) break;
-            w->task_num--;
-
-          }
-
-          return;
-        }
-      }
+            for (int k = 0; k < spe_num; k++) {
+                task_array[k] = manager->create_task_array(TASK_EXEC,array_task_num,1,1,1);
+                t_exec[k] = 0;
+                if (all) {
+                    w->t_print->wait_for(task_array[k]);
+                } else {
+                    t_next->wait_for(task_array[k]);
+                }
+            }
 
 
-      HTask **task_array = (HTask**)manager->allocate(sizeof(HTask*)*spe_num);
-      Task **t_exec = (Task**)manager->allocate(sizeof(Task*)*spe_num);
+            for (int j = 0; j < array_task_num; j++) {
+                for (int k = 0; k < spe_num; k++) {
+
+                    int a = w->task_spwaned++;
 
-      for (int k = 0; k < spe_num; k++) {
-        task_array[k] = manager->create_task_array(TASK_EXEC,array_task_num,0,1,1);
-        t_exec[k] = 0;
-        if (all) {
-          w->t_print->wait_for(task_array[k]);
-        } else {
-          t_next->wait_for(task_array[k]);
-        }
-      }
+                    if (w->size < size) size = w->size;
 
-
-      for (int j = 0; j < array_task_num; j++) {
-        for (int k = 0; k < spe_num; k++) {
-
-          int a = w->task_spwaned++;
+                    int length = size/sizeof(char);
+                    t_exec[k] = task_array[k]->next_task_array(TASK_EXEC,t_exec[k]);
+                    t_exec[k]->set_param(0,(memaddr)length);
+                    t_exec[k]->set_inData(0,w->file_mmap + a*w->division_size, size);
+                    t_exec[k]->set_outData(0,w->o_data + a*w->out_size, w->division_out_size);
 
-          if (w->size < size) size = w->size;
-
-          int length = size/sizeof(char);
-          t_exec[k] = task_array[k]->next_task_array(TASK_EXEC,t_exec[k]);
-          t_exec[k]->set_param(0,(memaddr)length);
-          t_exec[k]->set_inData(0,w->file_mmap + a*w->division_size, size);
-          t_exec[k]->set_outData(0,w->o_data + a*w->out_size, w->division_out_size);
+                    w->size -= size;
+                    w->task_num--;
+                }
+            }
 
-          w->size -= size;
-          w->task_num--;
-        }
-      }
+            for (int k = 0; k < spe_num; k++) {
+                task_array[k]->spawn_task_array(t_exec[k]->next());
+                task_array[k]->set_cpu(spe_cpu);
+                task_array[k]->spawn();
+            }
 
-      for (int k = 0; k < spe_num; k++) {
-        task_array[k]->spawn_task_array(t_exec[k]->next());
-        task_array[k]->set_cpu(spe_cpu);
-        task_array[k]->spawn();
-      }
+        }
+
+        return;
 
     }
 
-    return;
 
-  }
-
-
-  for (int i = 0; i < task_count; i += array_task_num) {
+    for (int i = 0; i < task_count; i += array_task_num) {
 
-    HTask *h_exec = 0;
-    for (int j = 0; j < array_task_num; j++) {
-        int i = w->task_spwaned++;
-        if (w->size < size) size = w->size;
-        int length = size/sizeof(char);
-        if (size==0) break;
+        HTask *h_exec = 0;
+        for (int j = 0; j < array_task_num; j++) {
+            int i = w->task_spwaned++;
+            if (w->size < size) size = w->size;
+            int length = size/sizeof(char);
+            if (size==0) break;
 
-        if (use_compat) {
-            h_exec = manager->create_task(TASK_EXEC);
-            h_exec->set_param(0,(memaddr)length);
-            h_exec->set_inData(0,w->file_mmap + i*w->division_size, size);
-            h_exec->set_outData(0,w->o_data + i*w->out_size, w->division_out_size);
+            if (use_compat) {
+                h_exec = manager->create_task(TASK_EXEC);
+                h_exec->set_param(0,(memaddr)length);
+                h_exec->set_inData(0,w->file_mmap + i*w->division_size, size);
+                h_exec->set_outData(0,w->o_data + i*w->out_size, w->division_out_size);
 
 
-            if (all) {
-              w->t_print->wait_for(h_exec);
-            } else {
-              t_next->wait_for(h_exec);
-            }
+                if (all) {
+                    w->t_print->wait_for(h_exec);
+                } else {
+                    t_next->wait_for(h_exec);
+                }
 
-            h_exec->set_cpu(spe_cpu);
-            h_exec->spawn();
+                h_exec->set_cpu(spe_cpu);
+                h_exec->spawn();
 
-        } else {
-            h_exec = manager->create_task(TASK_EXEC,
-                (memaddr)(w->file_mmap + i*w->division_size), size,
-                (memaddr)(w->o_data + i*w->out_size), w->division_out_size);
+            } else {
+                h_exec = manager->create_task(TASK_EXEC,
+                                              (memaddr)(w->file_mmap + i*w->division_size), size,
+                                              (memaddr)(w->o_data + i*w->out_size), w->division_out_size);
 
-            if (all) {
-              w->t_print->wait_for(h_exec);
-            } else {
-              t_next->wait_for(h_exec);
-            }
+                if (all) {
+                    w->t_print->wait_for(h_exec);
+                } else {
+                    t_next->wait_for(h_exec);
+                }
 
-            h_exec->set_cpu(spe_cpu);
-            h_exec->spawn();
+                h_exec->set_cpu(spe_cpu);
+                h_exec->spawn();
+            }
+            w->size -= size;
+            w->task_num--;
         }
-        w->size -= size;
-        w->task_num--;
+
     }
 
-  }
-
 }
 
 /**
@@ -352,7 +350,7 @@
         // printf("run16 last %d\n",w->task_num);
     } else {
         HTaskPtr t_next = manager->create_task(RUN_TASK_BLOCKS,
-            (memaddr)&w->self,sizeof(memaddr),0,0);
+                                               (memaddr)&w->self,sizeof(memaddr),0,0);
         w->t_print->wait_for(t_next);
 
         run_tasks(manager,w, w->task_blocks, t_next, w->division_size);
@@ -424,15 +422,14 @@
     /*各SPEの結果を合計して出力するタスク*/
 
     t_print = manager->create_task(TASK_PRINT,
-        (memaddr)&w->self,sizeof(memaddr),0,0);
-
+                                   (memaddr)&w->self,sizeof(memaddr),0,0);
     w->t_print = t_print;
 
     for(int i = 0;i<20;i++) {
         /* Task を task_blocks ずつ起動する Task */
         /* serialize されていると仮定する... */
         HTaskPtr t_exec = manager->create_task(RUN_TASK_BLOCKS,
-            (memaddr)&w->self,sizeof(memaddr),0,0);
+                                               (memaddr)&w->self,sizeof(memaddr),0,0);
         t_print->wait_for(t_exec);
         t_exec->spawn();
     }
--- a/example/word_count/ppe/Exec.cc	Tue Dec 18 15:32:43 2012 +0900
+++ b/example/word_count/ppe/Exec.cc	Sun Jan 13 17:55:03 2013 +0900
@@ -17,20 +17,20 @@
     int word_num = 0;
     int line_num = 0;
     int i = 0;
-    
+
     head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A);
     word_num -= 1-head_tail_flag[0];
 
     for (; i < length; i++) {
-	if (i_data[i] == 0x20) {
-	    word_flag = 1;
-	} else if (i_data[i] == 0x0A) {
-	    line_num += 1;
-	    word_flag = 1;
-	} else {
-	    word_num += word_flag;
-	    word_flag = 0;
-	}
+        if (i_data[i] == 0x20) { // 空白
+            word_flag = 1;
+        } else if (i_data[i] == 0x0A) { // 改行
+            line_num += 1;
+            word_flag = 1;
+        } else {
+            word_num += word_flag;
+            word_flag = 0;
+        }
     }
 
     word_num += word_flag;