changeset 1544:5c4e3f0d372a draft

many_task add task array
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Thu, 31 Jan 2013 18:47:13 +0900
parents 890cb39418ca
children d9eb89610733
files TaskManager/Gpu/GpuScheduler.cc example/flip/main.cc example/flip/task_init.cc example/flip/twice.cl example/many_task/Makefile.gpu example/many_task/Makefile.macosx example/many_task/gpu/QuickSort.cl example/many_task/main.cc example/many_task/sort.cc example/many_task/sort.h example/many_task/sort_ta.cc example/many_task/task_init.cc example/word_count/a.txt example/word_count/gpu/Exec.cl example/word_count/main.cc example/word_count/ppe/Print.cc
diffstat 16 files changed, 338 insertions(+), 91 deletions(-) [+]
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc	Sun Jan 13 22:53:50 2013 +0900
+++ b/TaskManager/Gpu/GpuScheduler.cc	Thu Jan 31 18:47:13 2013 +0900
@@ -115,15 +115,15 @@
                     const char *msg=convert_error_status(ret);
                     error(msg);
                 }
-                
+
                 ret = clSetKernelArg(kernel, param, sizeof(memaddr),(void *)&memparam);
                 if (ret<0) {
                     const char *msg=convert_error_status(ret);
                     error(msg);
                 }
-                
+
                 param++;
-                
+
                 cl_mem_flags mem_flag = CL_MEM_READ_ONLY;
                 cl_mem *memin = new cl_mem[nextTask->inData_count];
                 if (!flag.flip) { // set input data when not flip
@@ -146,8 +146,15 @@
                     }
                 }
 
-                cl_mem *memout = new cl_mem[nextTask->outData_count];
-                cl_mem_flags out_mem_flag = flag.flip? CL_MEM_READ_WRITE : CL_MEM_WRITE_ONLY;
+                cl_mem *memout;
+                cl_mem_flags out_mem_flag;
+                if (flag.flip) {
+                    memout = new cl_mem[nextTask->inData_count];
+                    out_mem_flag = CL_MEM_READ_WRITE;
+                } else {
+                    memout = new cl_mem[nextTask->outData_count];
+                    out_mem_flag = CL_MEM_WRITE_ONLY;
+                }
 
 
                 for(int i=0;i<nextTask->outData_count;i++) { // set output data
--- a/example/flip/main.cc	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/flip/main.cc	Thu Jan 31 18:47:13 2013 +0900
@@ -83,7 +83,7 @@
 
     twice->set_param(0, (memaddr)length);
     twice->set_inData(0, indata, sizeof (int)*length);
-    twice->set_outData(0, indata, sizeof (int)*length);
+    //    twice->set_outData(0, indata, sizeof (int)*length);
     twice->set_cpu(GPU_0);
     twice->flip();
 
--- a/example/flip/task_init.cc	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/flip/task_init.cc	Thu Jan 31 18:47:13 2013 +0900
@@ -4,9 +4,5 @@
 void
 task_init(void)
 {
-    int cmd = Twice;
-    int dim = 2;
-    size_t *l_work_size = new size_t(dim);
-    GpuNDRangeRegister(cmd, dim, l_work_size);
-    GpuSchedRegister(cmd, "twice.cl", "twice");
+    GpuSchedRegister(Twice, "twice.cl", "twice");
 }
--- a/example/flip/twice.cl	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/flip/twice.cl	Thu Jan 31 18:47:13 2013 +0900
@@ -1,12 +1,12 @@
 __kernel void
 twice(__constant int *data_count,
-      __global int *input_data)
-      //__global int *output_data)
+      __global int *input_data,
+      __global int *output_data)
 {
     long count = (long)data_count[0];
-        for (int i = 0; i<count; i++) {
-            // output_data[i] = 2*input_data[i];
-            input_data[i] *= 2;
-        }
+    for (int i = 0; i<count; i++) {
+        output_data[i] = 2*input_data[i];
+        //input_data[i] *= 2;
+    }
 
 }
--- a/example/many_task/Makefile.gpu	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/many_task/Makefile.gpu	Thu Jan 31 18:47:13 2013 +0900
@@ -1,7 +1,7 @@
 include ./Makefile.def
 
 SRCS_TMP = $(wildcard *.cc)
-SRCS_EXCLUDE = sort_test.cc gpu/gpu_task_init.cc # 除外するファイルを書く
+SRCS_EXCLUDE = sort-compat.cc sort_test.cc gpu/gpu_task_init.cc # 除外するファイルを書く
 SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
 OBJS = $(SRCS:.cc=.o)
 
--- a/example/many_task/Makefile.macosx	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/many_task/Makefile.macosx	Thu Jan 31 18:47:13 2013 +0900
@@ -2,7 +2,7 @@
 
 
 SRCS_TMP = $(wildcard *.cc)
-SRCS_EXCLUDE = sort_test.cc # 除外するファイルを書く
+SRCS_EXCLUDE = sort-compat.cc sort_test.cc # 除外するファイルを書く
 SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
 OBJS = $(SRCS:.cc=.o)
 
--- a/example/many_task/gpu/QuickSort.cl	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/many_task/gpu/QuickSort.cl	Thu Jan 31 18:47:13 2013 +0900
@@ -35,7 +35,7 @@
     int p;
 
     while (1) {
-        while (beegin < end) {
+        while (begin < end) {
             /*
              * if (end-begin <= 50) {
              *     bubble_sort(data,begin,end);
--- a/example/many_task/main.cc	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/many_task/main.cc	Thu Jan 31 18:47:13 2013 +0900
@@ -109,8 +109,8 @@
 
     sorter.split_num = get_split_num(sorter.data_length, sorter.cpuNum); // data の分割数
     int half_num = sorter.split_num-1;
-    sorter.fsort = (HTaskPtr*)manager->allocate(sizeof(HTaskPtr)*sorter.split_num);
-    sorter.bsort = (HTaskPtr*)manager->allocate(sizeof(HTaskPtr)*half_num);
+    sorter.fsort = (Task**)manager->allocate(sizeof(Task*)*sorter.split_num);
+    sorter.bsort = (Task**)manager->allocate(sizeof(Task*)*half_num);
     memset((void*)sorter.bsort,0, sizeof(HTaskPtr)*half_num);
 
     for (int i = 0; i < length; i++) {
--- a/example/many_task/sort.cc	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/many_task/sort.cc	Thu Jan 31 18:47:13 2013 +0900
@@ -58,88 +58,86 @@
         return 0;
     }
 
+    HTask **task_array_f = (HTask**)manager->allocate(sizeof(HTask*)*ta);
+    HTask **task_array_b = (HTask**)manager->allocate(sizeof(HTask*)*half_num);
 
-
+    for (int i = 0; i < s->split_num;i++) {
+        task_array_f[i] = manager->create_task_array(QUICK_SORT, task_array_num,1,1,1);
+        s->fsort[i]=0;
+    }
+    for (int i = 0; i<half_num;i++) {
+        task_array_b[i] = manager->create_task_array(QUICK_SORT, task_array_num,1,1,1);
+        s->bsort[i]=0;
+    }
     for (int i = 0; i < s->split_num-1; i++) {
-        if (task_array_num) {
-            s->fsort[i] = manager->create_task_array(QUICK_SORT, task_array_num, block_num+1, block_num, block_num);
-        } else {
-            s->fsort[i] = manager->create_task(QUICK_SORT,
-                                               (memaddr)&s->data[i*block_num], sizeof(Data)*block_num,
-                                               (memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
-        }
-
-        s->fsort[i]->flip();
-
+        s->fsort[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort[i]);
+        s->fsort[i]->set_param(0,(memaddr)block_num);
+        s->fsort[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
         if (i>0 && s->bsort[i-1]) {
-            s->fsort[i]->wait_for(s->bsort[i-1]);
+            task_array_f[i]->wait_for(task_array_b[i-1]);
         }
         if (i<s->split_num-2 && s->bsort[i]) {
-            s->fsort[i]->wait_for(s->bsort[i]);
+            task_array_f[i]->wait_for(task_array_b[i]);
         }
-        s->fsort[i]->set_cpu(spe_cpu);
-        s->fsort[i]->set_param(0,(memaddr)block_num);
     }
 
     // 最後の block は端数なので last_block_num を使う
     {
+
         int i = s->split_num-1;
 
-        s->fsort[i] = manager->create_task(QUICK_SORT,
-            (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num,
-            (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
-        s->fsort[i]->flip();
+        s->fsort[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort[i]);
+        s->fsort[i]->set_param(0,(memaddr)last_block_num);
+        s->fsort[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
         if (i>0 && s->bsort[i-1]) {
-            s->fsort[i]->wait_for(s->bsort[i-1]);
+            task_array_f[i]->wait_for(task_array_b[i-1]);
         }
-        s->fsort[i]->set_cpu(spe_cpu);
-        s->fsort[i]->set_param(0,(memaddr)last_block_num);
    }
 
     if (s->split_num > 1) {
 
         for (int i = 0; i < half_num-1; i++) {
-            if (s->bsort[i]) manager->free_htask(s->bsort[i]);
-            s->bsort[i] = manager->create_task(QUICK_SORT,
-                (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num,
-                (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
-            s->bsort[i]->flip();
-            s->bsort[i]->set_cpu(spe_cpu);
+            if (s->bsort[i]) s->bsort[i]=0;
+            s->bsort[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort[i]);
+            s->bsort[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
             s->bsort[i]->set_param(0,(memaddr)block_num);
         }
 
         {
             int i = half_num-1;
 
-            if (s->bsort[i]) manager->free_htask(s->bsort[i]);
-            s->bsort[i] = manager->create_task(QUICK_SORT,
-                (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num,
-                (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
-            s->bsort[i]->flip();
-            s->bsort[i]->set_cpu(spe_cpu);
+            if (s->bsort[i]) s->bsort[i]=0;
+            s->bsort[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort[i]);
+            s->bsort[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
             s->bsort[i]->set_param(0,(memaddr)last_half_block_num);
         }
 
         for (int i = 0; i < half_num; i++) {
-            s->bsort[i]->wait_for(s->fsort[i]);
-            s->bsort[i]->wait_for(s->fsort[i+1]);
-            s->bsort[i]->no_auto_free();
-            s->bsort[i]->spawn();
+            task_array_b[i]->wait_for(task_array_f[i]);
+            task_array_b[i]->wait_for(task_array_f[i+1]);
+            task_array_b[i]->no_auto_free();
+            task_array_b[i]->spawn_task_array(s->bsort[i]->next());
+            task_array_b[i]->set_cpu(spe_cpu);
+            task_array_b[i]->flip();
+            task_array_b[i]->spawn();
         }
     }
 
     HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0);
     restart->set_param(0,(memaddr)s);
-    if (!all) restart->wait_for(s->fsort[0]);
+    if (!all) restart->wait_for(task_array_f[0]);
     for (int i = 0; i < s->split_num; i++) {
-        s->fsort[i]->spawn();
+        task_array_f[i]->spawn_task_array(s->fsort[i]->next());
+        task_array_f[i]->set_cpu(spe_cpu);
+        task_array_f[i]->flip();
+        task_array_f[i]->spawn();
     }
     if (sort_count == 1) {
         // last loop wait for all task
         // we should not need this?
         for (int i = 0; i < half_num; i++) {
-            restart->wait_for(s->bsort[i]);
-            s->bsort[i]->auto_free();
+            restart->wait_for(task_array_b[i]);
+            task_array_b[i]->auto_free();
         }
     }
     restart->spawn();
--- a/example/many_task/sort.h	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/many_task/sort.h	Thu Jan 31 18:47:13 2013 +0900
@@ -10,8 +10,8 @@
     int data_length;
     int split_num;
     long cpuNum;
-    HTaskPtr *fsort;
-    HTaskPtr *bsort;
+    Task **fsort;
+    Task **bsort;
     DataPtr data;
 } Sort, *SortPtr;
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/many_task/sort_ta.cc	Thu Jan 31 18:47:13 2013 +0900
@@ -0,0 +1,232 @@
+#include "TaskManager.h"
+#include "SchedTask.h"
+#include "sort.h"
+#include "Func.h"
+#include <string.h>
+
+extern int get_split_num(int len, int num);
+extern int all;  // allocate task at once
+extern CPU_TYPE spe_cpu ;
+extern int task_array_num;
+
+/**
+ * 一つの block にある data の数が MAX_BLOCK_SIZE 超えないような
+ * len の分割数を返す
+ *
+ * @param  len  sort する data の総数
+ * @param  num  使用する SPE の数
+ *
+ * @return data の分割数
+ *
+ * TODO:
+ *   len が num 以下とか考えてません
+ */
+int
+get_split_num(int len, int num)
+{
+    if (len / num < MAX_BLOCK_SIZE) {
+        return num;
+    } else {
+        // 切り上げ
+        return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE;
+    }
+}
+
+
+/**
+ * btask が全て終了したら、再び sort_start を実行する
+ * @param d 生成された btask の数
+ */
+
+SchedDefineTask1(SortSimple, sort_start );
+
+static int
+sort_start(SchedTask *manager, void *d, void *e)
+{
+    Sort *s =  (Sort*)manager->get_param(0);
+    int half_num = s->split_num-1;
+    static int sort_count = s->split_num; // sort 完了に必要な回数
+
+    // 一つのタスクで sort する data 数
+    int block_num = (s->data_length + s->split_num -1)/s->split_num;
+    int half_block_num = block_num/2;
+
+    int last_block_num = s->data_length - (s->split_num-1)*block_num;
+    int last_half_block_num = half_block_num+(last_block_num/2);
+
+    if (--sort_count < 0) {
+        return 0;
+    }
+
+    if (task_array_num > 0) {
+        // run task array
+        HTask **task_array_f = (HTask**)manager->allocate(sizeof(HTask*)*s->split_num);
+        HTask **task_array_b = (HTask**)manager->allocate(sizeof(HTask*)*s->split_num);
+
+        for (int i = 0; i < s->split_num;i++) {
+            task_array_f[i] = manager->create_task_array(QUICK_SORT, task_array_num,1,1,1);
+            s->fsort[i]=0;
+        }
+        for (int i = 0; i<half_num;i++) {
+            task_array_b[i] = manager->create_task_array(QUICK_SORT, task_array_num,1,1,1);
+            s->bsort[i]=0;
+        }
+        for (int i = 0; i < s->split_num-1; i++) {
+            s->fsort[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort[i]);
+            s->fsort[i]->set_param(0,(memaddr)block_num);
+            s->fsort[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
+            if (i>0 && s->bsort[i-1]) {
+                task_array_f[i]->wait_for(task_array_b[i-1]);
+            }
+            if (i<s->split_num-2 && s->bsort[i]) {
+                task_array_f[i]->wait_for(task_array_b[i]);
+            }
+        }
+
+        // 最後の block は端数なので last_block_num を使う
+        {
+
+            int i = s->split_num-1;
+
+            s->fsort[i] = task_array_f[i]->next_task_array(QUICK_SORT,s->fsort[i]);
+            s->fsort[i]->set_param(0,(memaddr)last_block_num);
+            s->fsort[i]->set_inData(0,(memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
+            if (i>0 && s->bsort[i-1]) {
+                task_array_f[i]->wait_for(task_array_b[i-1]);
+            }
+        }
+
+        if (s->split_num > 1) {
+
+            for (int i = 0; i < half_num-1; i++) {
+                if (s->bsort[i]) s->bsort[i]=0;
+                s->bsort[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort[i]);
+                s->bsort[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
+                s->bsort[i]->set_param(0,(memaddr)block_num);
+            }
+
+            {
+                int i = half_num-1;
+
+                if (s->bsort[i]) s->bsort[i]=0;
+                s->bsort[i] = task_array_b[i]->next_task_array(QUICK_SORT,s->bsort[i]);
+                s->bsort[i]->set_inData(0,(memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
+                s->bsort[i]->set_param(0,(memaddr)last_half_block_num);
+            }
+
+            for (int i = 0; i < half_num; i++) {
+                task_array_b[i]->wait_for(task_array_f[i]);
+                task_array_b[i]->wait_for(task_array_f[i+1]);
+                task_array_b[i]->no_auto_free();
+                task_array_b[i]->spawn_task_array(s->bsort[i]->next());
+                task_array_b[i]->set_cpu(spe_cpu);
+                task_array_b[i]->flip();
+                task_array_b[i]->spawn();
+            }
+        }
+
+        HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0);
+        restart->set_param(0,(memaddr)s);
+        if (!all) restart->wait_for(task_array_f[0]);
+        for (int i = 0; i < s->split_num; i++) {
+            task_array_f[i]->spawn_task_array(s->fsort[i]->next());
+            task_array_f[i]->set_cpu(spe_cpu);
+            task_array_f[i]->flip();
+            task_array_f[i]->spawn();
+        }
+        if (sort_count == 1) {
+            // last loop wait for all task
+            // we should not need this?
+            for (int i = 0; i < half_num; i++) {
+                restart->wait_for(task_array_b[i]);
+                task_array_b[i]->auto_free();
+            }
+        }
+        restart->spawn();
+    } else {
+
+        for (int i = 0; i < s->split_num-1; i++) {
+            s->fsort[i] = manager->create_task(QUICK_SORT,
+                                               (memaddr)&s->data[i*block_num], sizeof(Data)*block_num,
+                                               (memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
+
+            s->fsort[i]->flip();
+
+            if (i>0 && s->bsort[i-1]) {
+                s->fsort[i]->wait_for(s->bsort[i-1]);
+            }
+            if (i<s->split_num-2 && s->bsort[i]) {
+                s->fsort[i]->wait_for(s->bsort[i]);
+            }
+            s->fsort[i]->set_cpu(spe_cpu);
+            s->fsort[i]->set_param(0,(memaddr)block_num);
+        }
+
+        // 最後の block は端数なので last_block_num を使う
+        {
+            int i = s->split_num-1;
+
+            s->fsort[i] = manager->create_task(QUICK_SORT,
+                                               (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num,
+                                               (memaddr)&s->data[i*block_num], sizeof(Data)*last_block_num);
+            s->fsort[i]->flip();
+            if (i>0 && s->bsort[i-1]) {
+                s->fsort[i]->wait_for(s->bsort[i-1]);
+            }
+            s->fsort[i]->set_cpu(spe_cpu);
+            s->fsort[i]->set_param(0,(memaddr)last_block_num);
+        }
+
+        if (s->split_num > 1) {
+
+            for (int i = 0; i < half_num-1; i++) {
+                if (s->bsort[i]) manager->free_htask(s->bsort[i]);
+                s->bsort[i] = manager->create_task(QUICK_SORT,
+                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num,
+                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*block_num);
+                s->bsort[i]->flip();
+                s->bsort[i]->set_cpu(spe_cpu);
+                s->bsort[i]->set_param(0,(memaddr)block_num);
+            }
+
+            {
+                int i = half_num-1;
+
+                if (s->bsort[i]) manager->free_htask(s->bsort[i]);
+                s->bsort[i] = manager->create_task(QUICK_SORT,
+                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num,
+                                                   (memaddr)&s->data[i*block_num+half_block_num], sizeof(Data)*last_half_block_num);
+                s->bsort[i]->flip();
+                s->bsort[i]->set_cpu(spe_cpu);
+                s->bsort[i]->set_param(0,(memaddr)last_half_block_num);
+            }
+
+            for (int i = 0; i < half_num; i++) {
+                s->bsort[i]->wait_for(s->fsort[i]);
+                s->bsort[i]->wait_for(s->fsort[i+1]);
+                s->bsort[i]->no_auto_free();
+                s->bsort[i]->spawn();
+            }
+        }
+
+        HTaskPtr restart = manager->create_task(SortSimple,0,0,0,0);
+        restart->set_param(0,(memaddr)s);
+        if (!all) restart->wait_for(s->fsort[0]);
+        for (int i = 0; i < s->split_num; i++) {
+            s->fsort[i]->spawn();
+        }
+        if (sort_count == 1) {
+            // last loop wait for all task
+            // we should not need this?
+            for (int i = 0; i < half_num; i++) {
+                restart->wait_for(s->bsort[i]);
+                s->bsort[i]->auto_free();
+            }
+        }
+        restart->spawn();
+    }
+    return 0;
+}
+
+
+/* end */
--- a/example/many_task/task_init.cc	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/many_task/task_init.cc	Thu Jan 31 18:47:13 2013 +0900
@@ -19,5 +19,5 @@
 
 
     SchedRegister(SortSimple);
-    SchedRegister(SortCompat);
+    //    SchedRegister(SortCompat);
 }
--- a/example/word_count/a.txt	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/word_count/a.txt	Thu Jan 31 18:47:13 2013 +0900
@@ -1,5 +1,6 @@
 aaa bbb
-aaa bbb ccc
+aaa bbb cc
+aaa dd
 aaa
 aaa
 aaa
--- a/example/word_count/gpu/Exec.cl	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/word_count/gpu/Exec.cl	Thu Jan 31 18:47:13 2013 +0900
@@ -1,15 +1,13 @@
 __kernel void
 run(__global int *data_count,
-    __global char *i_data,
-    __global unsigned long long  *o_data)
+    __global void *rbuf,
+    __global void *wbuf)
 {
     /*
      * Todo:ここの書式もCerium側に合わせる。
-     * 第2、第3引数をvoid *r_buf, void *w_bufにして、以下のような感じ
-     * __global char *i_data =  (char *)r_buf;
-     * __global unsigned long long *o_data = (unsigned long long*)w_buf;
-     * __global unsigned long long *head_tail_flag = o_data +2;
-     */
+     * 第2、第3引数をvoid *r_buf, void *w_bufにして、以下のような感じ*/
+    __global char *i_data =  (__global char *)rbuf;
+    __global unsigned long long *o_data = (__global unsigned long long*)wbuf;
     __global  unsigned long long *head_tail_flag = o_data +2;
     int length = data_count[0];
     int word_flag = 0;
--- a/example/word_count/main.cc	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/word_count/main.cc	Thu Jan 31 18:47:13 2013 +0900
@@ -6,6 +6,7 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
+#include <sys/time.h>
 #include "TaskManager.h"
 #include "SchedTask.h"
 #include "Func.h"
@@ -16,7 +17,9 @@
  */
 
 extern void task_init();
-
+void TMend(TaskManager *);
+static double st_time;
+static double ed_time;
 int all = 0;
 int use_task_array = 1;
 int use_task_creater = 0;
@@ -26,6 +29,13 @@
 CPU_TYPE spe_cpu = SPE_ANY;
 const char *usr_help_str = "Usage: ./word_count [-a -c -s] [-cpu spe_num] [-file filename]\n";
 
+static double
+getTime() {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec + (double)tv.tv_usec*1e-6;
+}
+
 typedef struct {
     caddr_t file_mmap;
     off_t size;
@@ -35,10 +45,8 @@
                                 int command, int in_data_size, int out_data_size,
                                 void *in_data, void *out_data, SchedTask *manager,
                                 HTask *wait_i, HTask *wait_me) {
-
     int in_task_size = 0;
     int out_task_size = 0;
-    int length = in_data_size/sizeof(char);
 
     if (in_total_size != 0) {
         in_task_size = in_total_size / in_data_size;
@@ -89,7 +97,7 @@
 
     }
 
-
+    int length = in_data_size/sizeof(char);
     for (int j = 0; j < array_length; j++) {
         for (int k = 0; k < array_num; k++) {
 
@@ -235,7 +243,7 @@
                 }
             }
 
-            //ここから
+            // ここから
             HTask **task_array = (HTask**)manager->allocate(sizeof(HTask*)*spe_num);
             Task **t_exec = (Task**)manager->allocate(sizeof(Task*)*spe_num);
 
@@ -249,15 +257,14 @@
                 }
             }
 
-
             for (int j = 0; j < array_task_num; j++) {
                 for (int k = 0; k < spe_num; k++) {
 
                     int a = w->task_spwaned++;
-
+                    
                     if (w->size < size) size = w->size;
+                    int length = size/sizeof(char);
 
-                    int length = size/sizeof(char);
                     t_exec[k] = task_array[k]->next_task_array(TASK_EXEC,t_exec[k]);
                     t_exec[k]->set_param(0,(memaddr)length);
                     t_exec[k]->set_inData(0,w->file_mmap + a*w->division_size, size);
@@ -494,8 +501,16 @@
 
     task_init();
     run_start(manager, filename);
-
+    st_time = getTime();
+    manager->set_TMend(TMend);
     return 0;
 }
 
+void
+TMend(TaskManager *manager)
+{
+    ed_time = getTime();
+    printf("Time: %0.6f\n",ed_time-st_time);
+}
+
 /* end */
--- a/example/word_count/ppe/Print.cc	Sun Jan 13 22:53:50 2013 +0900
+++ b/example/word_count/ppe/Print.cc	Thu Jan 31 18:47:13 2013 +0900
@@ -63,24 +63,24 @@
     s->printf("start sum\n");
 
     for (int i = 0; i < status_num; i++) {
-	word_data[i] = 0;
+        word_data[i] = 0;
     }
 
     for (int i = 0; i < out_task_num ; i++) {
-	word_data[0] += idata[i*w->out_size+0];
-	word_data[1] += idata[i*w->out_size+1];
-	unsigned long long *head_tail_flag = 
-		       &idata[i*w->out_size+2];
+        word_data[0] += idata[i*w->out_size+0];
+        word_data[1] += idata[i*w->out_size+1];
+        unsigned long long *head_tail_flag = 
+            &idata[i*w->out_size+2];
         if((i!=out_task_num-1)&&
            (head_tail_flag[1] == 1) && (head_tail_flag[4] == 0)) {
-	    flag_cal_sum++;
+            flag_cal_sum++;
         }
     }
 
     word_data[0] += flag_cal_sum;
 
     for (int i = status_num-1; i >=0; i--) {
-	s->printf("%llu ",word_data[i]);
+        s->printf("%llu ",word_data[i]);
     }
 
     //printf("j = %d\n",j);