changeset 1520:031f26b15ae6 draft

add many_task/gpu
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Sat, 10 Nov 2012 19:42:22 +0900
parents 9a5f87f4b60f
children 9ae6eedd3ee3
files example/many_task/Makefile.gpu example/many_task/gpu/Makefile example/many_task/gpu/QuickSort.cl example/many_task/gpu/gpu_task_init.cc example/many_task/gpu/sort_test.cc example/many_task/gpu/sort_test.h
diffstat 6 files changed, 306 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/example/many_task/Makefile.gpu	Sat Nov 10 18:21:16 2012 +0900
+++ b/example/many_task/Makefile.gpu	Sat Nov 10 19:42:22 2012 +0900
@@ -1,26 +1,29 @@
 include ./Makefile.def
 
+
 SRCS_TMP = $(wildcard *.cc)
-SRCS_EXCLUDE =  # 除外するファイルを書く
+SRCS_EXCLUDE = sort_test.cc task_init.cc # 除外するファイルを書く
 SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
 OBJS = $(SRCS:.cc=.o)
 
-TASK_DIR  = ppe
+TASK_DIR  = gpu
 TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc)
-TASK_SRCS_EXCLUDE = sort_test.cc 
+TASK_SRCS_EXCLUDE = sort_test.cc
 TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
 TASK_OBJS = $(TASK_SRCS:.cc=.o)
 
-CFLAGS += -D__CERIUM_GPU__
+CC += $(ABI)
+# CFLAGS  = -g -Wall# -O9 #-DDEBUG
 
-LIBS += -lGpuManager -framework opencl `sdl-config --libs`
+INCLUDE = -I${CERIUM}/include/TaskManager -I. -I..
+LIBS = -L${CERIUM}/TaskManager -DUSE_SIMPLE_TASK -lGpuManager -framework opencl `sdl-config --libs`
 
 .SUFFIXES: .cc .o
 
 .cc.o:
 	$(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@
 
-all: $(TARGET) 
+all: $(TARGET)
 
 $(TARGET): $(OBJS) $(TASK_OBJS)
 	$(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS)
@@ -28,8 +31,12 @@
 link:
 	$(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS)
 
+debug: $(TARGET)
+	sudo gdb ./$(TARGET)
 
 clean:
 	rm -f $(TARGET) $(OBJS) $(TASK_OBJS)
 	rm -f *~ \#*
 	rm -f ppe/*~ ppe/\#*
+	rm -f spe/*~ spe/\#*
+	rm -f gpu/*~ gpu/\#*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/many_task/gpu/Makefile	Sat Nov 10 19:42:22 2012 +0900
@@ -0,0 +1,37 @@
+TARGET = sort_test
+CERIUM = ../../../../Cerium
+
+CC      = g++
+CFLAGS  =   -g -Wall  
+
+INCLUDE = -I${CERIUM}/include/TaskManager -I. -I../
+LIBS = -L${CERIUM}/TaskManager
+
+
+SRCS_TMP = $(wildcard *.cc)
+SRCS_EXCLUDE = gpu_task_init.cc # 除外するファイルを書く
+SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
+OBJS = $(SRCS:.cc=.o)
+
+LIBS += -framework opencl
+
+.SUFFIXES: .cc .o
+
+.cc.o:
+	$(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@
+
+all: $(TARGET)
+gpu: all
+
+$(TARGET): $(OBJS)
+	$(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS)
+
+link:
+	$(CC) -o $(TARGET) $(OBJS) $(LIBS)
+
+debug: $(TARGET)
+	sudo gdb ./$(TARGET) 
+
+clean:
+	rm -f $(TARGET) $(OBJS)
+	rm -f *~ \#*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/many_task/gpu/QuickSort.cl	Sat Nov 10 19:42:22 2012 +0900
@@ -0,0 +1,50 @@
+typedef struct Data {
+    int index;
+    int ptr;
+    int pad[2];
+} Data, *DataPtr;
+
+inline void
+swap(__global Data *data, int left, int right )
+{
+    Data tmp    = data[left];
+    data[left]  = data[right];
+    data[right] = tmp;
+}
+
+__kernel void
+quick_sort(__constant int *count,
+           __global Data *data)
+{
+    int begin = 0;
+    int end = count[0];
+
+    int stack[1024];
+    int sp = 0;
+    int p;
+    while (1) {
+        while (begin < end) {
+            int where = (begin + end) / 2;
+            int pivot = data[where].index;
+            data[where].index = data[begin].index;
+            int i;
+            p = begin;
+            for (i=begin+1; i<=end; i++) {
+                if (data[i].index < pivot) {
+                    p++;
+                    swap(data, p, i);
+                }
+            }
+            data[begin].index = data[p].index;
+            data[p].index = pivot;
+
+            stack[sp++] = p + 1;
+            stack[sp++] = end;
+            end = p - 1;
+        }
+        if (sp == 0) return;
+        end = stack[--sp];
+        begin = stack[--sp];
+        begin = p + 1;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/many_task/gpu/gpu_task_init.cc	Sat Nov 10 19:42:22 2012 +0900
@@ -0,0 +1,12 @@
+#include "Func.h"
+#include "GpuScheduler.h"
+#include "Scheduler.h"
+
+SchedExternTask(SortSimple);
+
+void
+task_init(void)
+{
+    GpuSchedRegister(QUICK_SORT, "gpu/QuickSort.cl", "quick_sort");
+    SchedRegister(SortSimple);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/many_task/gpu/sort_test.cc	Sat Nov 10 19:42:22 2012 +0900
@@ -0,0 +1,183 @@
+#include <stdlib.h>
+#include <OpenCL/opencl.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/stat.h>
+#include "sort.h"
+#include "sort_test.h"
+#define DEFAULT 432
+
+extern int data_length;
+extern DataPtr data;
+
+// 計測用
+static double st_time;
+static double ed_time;
+static int length = DEFAULT;
+
+int
+init(int argc, char **argv)
+{
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "--length") == 0 || strcmp(argv[i], "-l") == 0) {
+            length = atoi(argv[++i]);
+        }
+    }
+
+    return 0;
+}
+
+int
+get_split_num(int len, int num)
+{
+    if (len / num < MAX_BLOCK_SIZE) {
+        return num;
+    } else {
+        // 切り上げ
+        return (len + MAX_BLOCK_SIZE - 1) / MAX_BLOCK_SIZE;
+    }
+}
+
+
+static double
+getTime()
+{
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec + (double)tv.tv_usec*1e-6;
+}
+
+void
+show( Data *data, int size )
+{
+    puts("-----------------------------------------------");
+    for(int i=0; i<=size; i++) printf("data[%02d].index = %d\n", i, data[i].index);
+    puts("-----------------------------------------------");
+    return;
+}
+
+Sort sorter;
+
+static void
+check_data()
+{
+    for(int i=0; i< sorter.data_length-1;i++) {
+        if (sorter.data[i].index>sorter.data[i+1].index)  {
+            printf("Data are not sorted at %d. %d > %d \n",i, sorter.data[i].index,sorter.data[i+1].index);
+            return;
+        }
+    }
+    printf("Data are sorted\n");
+}
+
+void
+gpu_init()
+{
+    clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
+    clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id,
+                   &ret_num_devices);
+
+    context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
+    command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
+
+    //ファイルオープン
+
+    const char* filename = "QuickSort.cl";
+    const char* functionname = "quick_sort";
+
+    int fp = open(filename, O_RDONLY);
+
+    if (!fp) {
+        fprintf(stderr, "Failed to load kernel.\n");
+        exit(1);
+    }
+
+    struct stat stats;
+    fstat(fp,&stats);
+    off_t size = stats.st_size;
+
+    if (!size) {
+        fprintf(stderr, "Failed to load kernel.\n");
+        exit(1);
+    }
+
+    char *kernel_src_str = new char[size];
+    size_t kernel_code_size = read(fp, kernel_src_str, size);
+    close(fp);
+
+    program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src_str,
+                                                   (const size_t *)&kernel_code_size, &ret);
+    clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
+    kernel = clCreateKernel(program,functionname, &ret);
+}
+
+void
+sort_start(Sort s){
+
+    Sort sorter = s;
+    int length = sorter.data_length;
+
+    //メモリバッファの作成
+    cl_mem mem_count = clCreateBuffer(context, CL_MEM_READ_ONLY,sizeof(int),NULL, &ret);
+    cl_mem mem_data = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(Data)*length, NULL, &ret);
+
+    st_time = getTime();
+
+    //メモリバッファに入力データを書き込み
+    ret = clEnqueueWriteBuffer(command_queue, mem_count, CL_TRUE, 0,
+                               sizeof(int), &length, 0, NULL, NULL);
+    ret = clEnqueueWriteBuffer(command_queue, mem_data, CL_TRUE, 0,
+                               sizeof(Data)*length, sorter.data, 0, NULL, NULL);
+
+    //print_data(data, count, "before");
+    clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&mem_count);
+    clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&mem_data);
+
+    ev = NULL;
+
+    ret = clEnqueueTask(command_queue, kernel, 0, NULL, &ev);
+
+    //メモリバッファから結果を取得
+    ret = clEnqueueReadBuffer(command_queue, mem_data, CL_TRUE, 0,sizeof(Data)*length, sorter.data, 1, &ev, NULL);
+    clFlush(command_queue);
+    ed_time = getTime();
+    show(sorter.data, length-1);
+    check_data();
+    printf("Time: %0.6f\n",ed_time-st_time);
+
+    clReleaseKernel(kernel);
+    clReleaseProgram(program);
+    clReleaseMemObject(mem_data);
+    clReleaseEvent(ev);
+    clReleaseCommandQueue(command_queue);
+    clReleaseContext(context);
+
+}
+
+int main(int argc, char *argv[]) {
+
+    // 無効な引数ならデフォルトの値として432を設定
+
+
+    if (argc>1) {
+        if (init(argc,argv) < 0) {
+            return -1;
+        }
+    }
+
+    gpu_init();
+
+    sorter.data = new Data[length];
+    sorter.data_length = length;
+    sorter.split_num = get_split_num(sorter.data_length, 1); // (length, cpu_num)
+
+    for (int i = 0; i < length; i++) {
+        sorter.data[i].index = rand()%10000;
+        sorter.data[i].ptr   = i;
+    }
+
+    sort_start(sorter);
+    return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/many_task/gpu/sort_test.h	Sat Nov 10 19:42:22 2012 +0900
@@ -0,0 +1,11 @@
+    cl_platform_id platform_id;
+    cl_uint ret_num_platforms;
+    cl_device_id device_id;
+    cl_uint ret_num_devices;
+    cl_int ret;
+
+    cl_context context;
+    cl_command_queue command_queue;
+    cl_program program;
+    cl_kernel kernel;
+    cl_event ev;