changeset 1541:99c9ed2932a0 draft

change task_init WordCount example
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Tue, 18 Dec 2012 15:32:43 +0900
parents f3d0870648ff
children 9ccfdc408d51
files example/many_task/gpu/sort_test.h example/many_task/main.cc example/many_task/sort.cc example/word_count/Makefile.def example/word_count/Makefile.gpu example/word_count/gpu/Exec.cl example/word_count/gpu/Print.cc example/word_count/gpu/Print.h example/word_count/gpu/task_init.cc example/word_count/ppe/task_init.cc example/word_count/task_init.cc
diffstat 11 files changed, 52 insertions(+), 165 deletions(-) [+]
line wrap: on
line diff
--- a/example/many_task/gpu/sort_test.h	Sun Dec 16 21:13:10 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-    cl_platform_id platform_id;
-    cl_uint ret_num_platforms;
-    cl_device_id device_id;
-    cl_uint ret_num_devices;
-    cl_int ret;
-
-    cl_context context;
-    cl_command_queue command_queue;
-    cl_program program;
-    cl_kernel kernel;
-    cl_event ev;
--- a/example/many_task/main.cc	Sun Dec 16 21:13:10 2012 +0900
+++ b/example/many_task/main.cc	Tue Dec 18 15:32:43 2012 +0900
@@ -26,7 +26,7 @@
 
 static int length = 1200;
 CPU_TYPE spe_cpu = SPE_ANY;
-
+int task_array_num=0;
 // prototype
 void TMend(TaskManager *);
 
@@ -63,7 +63,9 @@
         if (strcmp(argv[i], "-s") == 0 ) {
             sort_task = SortSimple;
         }
-
+        if (strcmp(argv[i], "-ta") == 0 ) {
+            task_array_num = atoi(argv[++i]);
+        }
     }
 
     return 0;
--- a/example/many_task/sort.cc	Sun Dec 16 21:13:10 2012 +0900
+++ b/example/many_task/sort.cc	Tue Dec 18 15:32:43 2012 +0900
@@ -7,6 +7,7 @@
 extern int get_split_num(int len, int num);
 extern int all;  // allocate task at once
 extern CPU_TYPE spe_cpu ;
+extern int task_array_num;
 
 /**
  * 一つの block にある data の数が MAX_BLOCK_SIZE 超えないような
@@ -60,9 +61,13 @@
 
 
     for (int i = 0; i < s->split_num-1; i++) {
-        s->fsort[i] = manager->create_task(QUICK_SORT,
-            (memaddr)&s->data[i*block_num], sizeof(Data)*block_num,
-            (memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
+        if (task_array_num) {
+            s->fsort[i] = manager->create_task_array(QUICK_SORT, task_array_num, block_num+1, block_num, block_num);
+        } else {
+            s->fsort[i] = manager->create_task(QUICK_SORT,
+                                               (memaddr)&s->data[i*block_num], sizeof(Data)*block_num,
+                                               (memaddr)&s->data[i*block_num], sizeof(Data)*block_num);
+        }
 
         s->fsort[i]->flip();
 
--- a/example/word_count/Makefile.def	Sun Dec 16 21:13:10 2012 +0900
+++ b/example/word_count/Makefile.def	Tue Dec 18 15:32:43 2012 +0900
@@ -11,7 +11,7 @@
 
 OPT =  -g
 
-CC      = g++
+CC      = clang++
 CFLAGS  =  -Wall $(OPT) 
 
 INCLUDE = -I${CERIUM}/include/TaskManager -I. -I..
--- a/example/word_count/Makefile.gpu	Sun Dec 16 21:13:10 2012 +0900
+++ b/example/word_count/Makefile.gpu	Tue Dec 18 15:32:43 2012 +0900
@@ -5,10 +5,12 @@
 SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
 OBJS = $(SRCS:.cc=.o)
 
-TASK_DIR  = gpu
-TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc)
-TASK_SRCS_EXCLUDE = sort_test.cc
-TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
+TASK_DIR1  = ppe
+#TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc)
+TASK_DIR2  = gpu
+TASK_SRCS_TMP = $(wildcard $(TASK_DIR2)/*.cc $(TASK_DIR1)/*.cc)
+TASK_SRCS_EXCLUDE = Exec.cc
+TASK_SRCS = $(filter-out $(TASK_DIR1)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
 TASK_OBJS = $(TASK_SRCS:.cc=.o)
 
 CC += $(ABI)
--- a/example/word_count/gpu/Exec.cl	Sun Dec 16 21:13:10 2012 +0900
+++ b/example/word_count/gpu/Exec.cl	Tue Dec 18 15:32:43 2012 +0900
@@ -1,9 +1,11 @@
 __kernel void
 run(__global int *data_count,
-    __global char *i_data,
-    __global int *o_data)
+    __global void *r_buf,
+    __global void *w_buf)
 {
-    __global int *head_tail_flag = o_data +2;
+    __global char *i_data =  (char *)r_buf;
+    __global unsigned long long *o_data = (unsigned long long*)w_buf;
+    __global unsigned long long *head_tail_flag = o_data +2;
     int length = data_count[0];
     int word_flag = 0;
     int word_num = 0;
--- a/example/word_count/gpu/Print.cc	Sun Dec 16 21:13:10 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include "Print.h"
-#include "Func.h"
-#include "WordCount.h"
-
-/* これは必須 */
-SchedDefineTask1(Print,run_print);
-
-static int
-run_print(SchedTask *s, void *rbuf, void *wbuf)
-{
-    WordCount *w = *(WordCount**)rbuf;
-    unsigned long long *idata = w->o_data;
-    // long task_num = w->task_num;
-    long status_num = w->status_num;
-    int out_task_num = w->out_task_num;
-
-    /*
-     *  head_flag
-     *  o_data[0]
-     *  o_data[1]
-     *
-     */
-    unsigned long long word_data[2];
-
-    int flag_cal_sum = 0;
-    //printf("pad %d\n",pad);
-
-
-    /* head_tail_flag : task = 2 : 1
-     *
-     *    head_tail_flag[2]
-     *  ___________ __________
-     * |           |          |
-     * | head_flag | tail_fag | 
-     * |___________|__________|
-     *      [0]         [1] 
-     * 
-     * |----------------------|
-     *    unsigned long long 
-     *          16byte
-     *
-     * 配列先頭の head_flag はみない
-     * 配列末尾の tail_flag はみない
-     * 担当範囲前の末尾文字が「改行と、スペース以外」(tail_flag = 1)で、
-     * 担当範囲の先頭文字が「改行とスペース」(head_flag = 0)の場合
-     * 単語数を +1  することで、整合性がとれる。
-     *
-     * ex.
-     *    task_num 4
-     *    head_tail_flag[8]
-     *
-     *      0   1   2   3   4   5   6   7
-     *     ___ ___ ___ ___ ___ ___ ___ ___ 
-     *    | h | t | h | t | h | t | h | t |
-     *    |___|___|___|___|___|___|___|___|
-     *        |-------|-------|-------| 
-     *           比較    比較    比較
-     *
-     */
-
-    s->printf("start sum\n");
-
-    for (int i = 0; i < status_num; i++) {
-	word_data[i] = 0;
-    }
-
-    for (int i = 0; i < out_task_num ; i++) {
-	word_data[0] += idata[i*w->out_size+0];
-	word_data[1] += idata[i*w->out_size+1];
-	unsigned long long *head_tail_flag = 
-		       &idata[i*w->out_size+2];
-        if((i!=out_task_num-1)&&
-           (head_tail_flag[1] == 1) && (head_tail_flag[4] == 0)) {
-	    flag_cal_sum++;
-        }
-    }
-
-    word_data[0] += flag_cal_sum;
-
-    for (int i = status_num-1; i >=0; i--) {
-	s->printf("%llu ",word_data[i]);
-    }
-
-    //printf("j = %d\n",j);
-
-
-    s->printf("\n");
-
-    return 0;
-}
--- a/example/word_count/gpu/Print.h	Sun Dec 16 21:13:10 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-#ifndef INCLUDED_TASK_PRINT
-#define INCLUDED_TASK_PRINT
-
-#ifndef INCLUDED_SCHED_TASK
-#  include "SchedTask.h"
-#endif
-
-
-#endif
--- a/example/word_count/gpu/task_init.cc	Sun Dec 16 21:13:10 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#include "Func.h"
-#include "Scheduler.h"
-#include "GpuScheduler.h"
-
-/* 必ずこの位置に書いて */
-SchedExternTask(Print);
-SchedExternTask(RUN_TASK_BLOCKS);
-
-/**
- * この関数は ../spe/spe-main と違って
- * 自分で呼び出せばいい関数なので
- * 好きな関数名でおk (SchedRegisterTask は必須)
- */
-void
-task_init(void)
-{
-    SchedRegisterTask(TASK_PRINT, Print);
-    GpuSchedRegister(TASK_EXEC, "gpu/Exec.cl", "run");
-    SchedRegister(RUN_TASK_BLOCKS);
-}
--- a/example/word_count/ppe/task_init.cc	Sun Dec 16 21:13:10 2012 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#include "Func.h"
-#include "Scheduler.h"
-
-/* 必ずこの位置に書いて */
-SchedExternTask(Print);
-SchedExternTask(Exec);
-SchedExternTask(RUN_TASK_BLOCKS);
-
-/**
- * この関数は ../spe/spe-main と違って
- * 自分で呼び出せばいい関数なので
- * 好きな関数名でおk (SchedRegisterTask は必須)
- */
-void
-task_init(void)
-{
-    SchedRegisterTask(TASK_PRINT, Print);
-    SchedRegisterTask(TASK_EXEC, Exec);
-    SchedRegister(RUN_TASK_BLOCKS);
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/word_count/task_init.cc	Tue Dec 18 15:32:43 2012 +0900
@@ -0,0 +1,28 @@
+#include "Func.h"
+#include "Scheduler.h"
+#include "GpuScheduler.h"
+
+/* 必ずこの位置に書いて */
+#ifndef __CERIUM_GPU__
+SchedExternTask(Exec);
+#endif
+SchedExternTask(Print);
+SchedExternTask(RUN_TASK_BLOCKS);
+
+/**
+ * この関数は ../spe/spe-main と違って
+ * 自分で呼び出せばいい関数なので
+ * 好きな関数名でおk (SchedRegisterTask は必須)
+ */
+void
+task_init(void)
+{
+#ifdef __CERIUM_GPU__
+    GpuSchedRegister(TASK_EXEC, "gpu/Exec.cl", "run");
+#else
+    SchedRegisterTask(TASK_EXEC, Exec);
+#endif
+
+    SchedRegisterTask(TASK_PRINT, Print);
+    SchedRegister(RUN_TASK_BLOCKS);
+}