Mercurial > hg > Game > Cerium
changeset 1538:fac06524090b draft
add gpu task wordcount. But not work print
author | Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 16 Dec 2012 20:52:55 +0900 |
parents | b3adb72d906c |
children | 6840fa213308 |
files | TaskManager/Gpu/GpuScheduler.cc example/many_task/gpu/QuickSort.cl example/many_task/gpu/sort_test.cc example/many_task/ppe/QuickSort.cc example/word_count/Makefile example/word_count/Makefile.def example/word_count/main.cc |
diffstat | 7 files changed, 247 insertions(+), 193 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/Gpu/GpuScheduler.cc Mon Dec 10 15:49:18 2012 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Sun Dec 16 20:52:55 2012 +0900 @@ -230,8 +230,12 @@ clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); - clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); + ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); + char *log = new char[1024]; + size_t s = 1024; + ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, s,log,NULL); + printf("\n%s\n",log); cl_kernel *kernel = new cl_kernel; *kernel = clCreateKernel(program, functionname, &ret); task_list[cmd].gputask->kernel = kernel;
--- a/example/many_task/gpu/QuickSort.cl Mon Dec 10 15:49:18 2012 +0900 +++ b/example/many_task/gpu/QuickSort.cl Sun Dec 16 20:52:55 2012 +0900 @@ -12,6 +12,17 @@ data[right] = tmp; } +void +bubble_sort(__global Data *data, int begin, int end) +{ + for (int count=0;count<end;count++) { + for (int c=end;c>count;c--) { + if (data[c].index<data[c-1].index)swap(data,c-1,c); + } + } +} + + __kernel void quick_sort(__constant int *count, __global Data *data) @@ -22,8 +33,14 @@ int stack[1024]; int sp = 0; int p; + // bubble_sort(data,begin,end); + while (1) { while (begin < end) { + if (end-begin <= 50) { + bubble_sort(data,begin,end); + break; + } int where = (begin + end) / 2; int pivot = data[where].index; data[where].index = data[begin].index; @@ -47,4 +64,5 @@ begin = stack[--sp]; begin = p + 1; } + }
--- a/example/many_task/gpu/sort_test.cc Mon Dec 10 15:49:18 2012 +0900 +++ b/example/many_task/gpu/sort_test.cc Sun Dec 16 20:52:55 2012 +0900 @@ -6,7 +6,6 @@ #include <sys/time.h> #include <sys/stat.h> #include "sort.h" -#include "sort_test.h" #define DEFAULT 432 extern int data_length; @@ -17,6 +16,7 @@ static double ed_time; static int length = DEFAULT; + int init(int argc, char **argv) { @@ -75,12 +75,26 @@ void gpu_init() { + +} + +void +sort_start(Sort s){ + + Sort sorter = s; + int length = sorter.data_length; + cl_platform_id platform_id = NULL; + cl_uint ret_num_platforms = NULL; + cl_device_id device_id = NULL; + cl_uint ret_num_devices = NULL; + cl_int ret = NULL; + clGetPlatformIDs(1, &platform_id, &ret_num_platforms); clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices); - context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); - command_queue = clCreateCommandQueue(context, device_id, 0, &ret); + cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret); + cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret); //ファイルオープン @@ -107,17 +121,10 @@ size_t kernel_code_size = read(fp, kernel_src_str, size); close(fp); - program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src_str, + cl_program program = clCreateProgramWithSource(context, 1, (const char **)&kernel_src_str, (const size_t *)&kernel_code_size, &ret); clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); - kernel = clCreateKernel(program,functionname, &ret); -} - -void -sort_start(Sort s){ - - Sort sorter = s; - int length = sorter.data_length; + cl_kernel kernel = clCreateKernel(program,functionname, &ret); //メモリバッファの作成 cl_mem mem_count = clCreateBuffer(context, CL_MEM_READ_ONLY,sizeof(int),NULL, &ret); @@ -135,8 +142,8 @@ clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&mem_count); clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&mem_data); - ev = NULL; - + cl_event ev = NULL; + ret = clEnqueueTask(command_queue, kernel, 0, NULL, &ev); //メモリバッファから結果を取得
--- a/example/many_task/ppe/QuickSort.cc Mon Dec 10 15:49:18 2012 +0900 +++ b/example/many_task/ppe/QuickSort.cc Sun Dec 16 20:52:55 2012 +0900 @@ -16,6 +16,15 @@ //#define USE_MEMCPY +void +bubble_sort(Data *data, int begin, int end) +{ + for (int count=0;count<end;count++) { + for (int c=end;c>count;c--) { + if (data[c].index<data[c-1].index)swap(data,c-1,c); + } + } +} static int run(SchedTask *s, void* rbuff, void* wbuff) { // copy value @@ -58,8 +67,13 @@ int stack[1024]; int sp = 0; int p; + while (1) { while (begin < end) { + if (end-begin <= 50) { + bubble_sort(data, begin, end); + break; + } int where = (begin + end) / 2; int pivot = data[where].index; data[where].index = data[begin].index; @@ -82,6 +96,6 @@ end = stack[--sp]; begin = stack[--sp]; begin = p + 1; - } + } } /* end */
--- a/example/word_count/Makefile Mon Dec 10 15:49:18 2012 +0900 +++ b/example/word_count/Makefile Sun Dec 16 20:52:55 2012 +0900 @@ -12,6 +12,10 @@ @echo "Make for CELL (Cell)" @$(MAKE) -f Makefile.cell +gpu: FORCE + @echo "Make for OpenCL" + @$(MAKE) -f Makefile.gpu + FORCE: clean:
--- a/example/word_count/Makefile.def Mon Dec 10 15:49:18 2012 +0900 +++ b/example/word_count/Makefile.def Sun Dec 16 20:52:55 2012 +0900 @@ -9,8 +9,7 @@ CERIUM = ../../../Cerium -OPT = -O9 -# OPT = -g +OPT = -g CC = g++ CFLAGS = -Wall $(OPT)
--- a/example/word_count/main.cc Mon Dec 10 15:49:18 2012 +0900 +++ b/example/word_count/main.cc Sun Dec 16 20:52:55 2012 +0900 @@ -23,23 +23,23 @@ int use_compat = 0; int array_task_num = 8; int spe_num = 1; - +CPU_TYPE spe_cpu = SPE_ANY; const char *usr_help_str = "Usage: ./word_count [-a -c -s] [-cpu spe_num] [-file filename]\n"; - typedef struct { caddr_t file_mmap; off_t size; } st_mmap_t; -static void simple_task_creater(int in_total_size, int out_total_size, - int command, int in_data_size, int out_data_size, - void *in_data, void *out_data, SchedTask *manager, - HTask *wait_i, HTask *wait_me) { +static void simple_task_creater(int in_total_size, int out_total_size, + int command, int in_data_size, int out_data_size, + void *in_data, void *out_data, SchedTask *manager, + HTask *wait_i, HTask *wait_me) { int in_task_size = 0; int out_task_size = 0; + int length = in_data_size/sizeof(char); if (in_total_size != 0) { in_task_size = in_total_size / in_data_size; @@ -54,7 +54,7 @@ printf("mismatch of out_total_size and out_data_size\n"); } } - + /*in, out の大きい方に合わせるのがいいかな? Taskの数は1Task分に使うデータの大きいほうを取るような仕様がいいかな*/ int task_num = (in_task_size > out_task_size) ? in_task_size : out_task_size; @@ -75,25 +75,27 @@ int rest = task_num % array_num; int index = 0; - + for (int k = 0; k < array_num; k++) { task_array[k] = manager->create_task_array(command,array_length,0,1,1); t_exec[k] = 0; - + if (wait_me != 0) { wait_me->wait_for(task_array[k]); } if (wait_i != 0) { task_array[k]->wait_for(wait_i); } - + } + for (int j = 0; j < array_length; j++) { for (int k = 0; k < array_num; k++) { - + t_exec[k] = task_array[k]->next_task_array(command,t_exec[k]); + t_exec[k]->set_param(0,(memaddr)length); t_exec[k]->set_inData(0,(char*)in_data + index*in_data_size, in_data_size); t_exec[k]->set_outData(0,(char*)out_data + index*out_data_size, out_data_size); @@ -101,19 +103,19 @@ } } - + for (int k = 0; k < array_num; k++) { task_array[k]->spawn_task_array(t_exec[k]->next()); - task_array[k]->set_cpu(SPE_ANY); + task_array[k]->set_cpu(spe_cpu); task_array[k]->spawn(); - } + } for (int k = 0; k < rest; k++) { HTaskPtr t_exec = manager->create_task(command); - + t_exec->set_param(0,(memaddr)length); t_exec->set_inData(0,(char*)in_data + index*in_data_size, in_data_size); t_exec->set_outData(0,(char*)out_data + index*out_data_size, out_data_size); - + index++; if (wait_me != 0) { @@ -124,7 +126,7 @@ } t_exec->spawn(); - t_exec->set_cpu(SPE_ANY); + t_exec->set_cpu(spe_cpu); } @@ -137,7 +139,7 @@ fix_byte(int size,int fix_byte_size) { size = (size/fix_byte_size)*fix_byte_size + ((size%fix_byte_size)!= 0)*fix_byte_size; - + return size; } @@ -151,17 +153,17 @@ int map = MAP_PRIVATE; st_mmap_t st_mmap; struct stat sb; - + if ((fd=open(filename,O_RDONLY,0666))==0) { - fprintf(stderr,"can't open %s\n",filename); + fprintf(stderr,"can't open %s\n",filename); } - + if (fstat(fd,&sb)) { - fprintf(stderr,"can't fstat %s\n",filename); + fprintf(stderr,"can't fstat %s\n",filename); } printf("file size %d\n",(int)sb.st_size); - + /*sizeをページングサイズの倍数にあわせる*/ st_mmap.size = fix_byte(sb.st_size,4096); @@ -169,9 +171,9 @@ st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ,map,fd,(off_t)0); if (st_mmap.file_mmap == (caddr_t)-1) { - fprintf(stderr,"Can't mmap file\n"); - perror(NULL); - exit(0); + fprintf(stderr,"Can't mmap file\n"); + perror(NULL); + exit(0); } return st_mmap; @@ -179,9 +181,9 @@ } static void -run_tasks(SchedTask *manager, WordCount *w, int task_count, HTaskPtr t_next, int size) +run_tasks(SchedTask *manager, WordCount *w, int task_count, HTaskPtr t_next, int size) { - + if (task_count < array_task_num) { array_task_num = task_count; if (task_count<=0) return; @@ -189,7 +191,7 @@ if (use_task_creater) { simple_task_creater(w->file_size, w->division_out_size * w->task_num, TASK_EXEC, w->division_size, w->division_out_size, - w->file_mmap, w->o_data, manager, w->t_print, 0); + w->file_mmap, w->o_data, manager, w->t_print, 0); } if (use_task_array) { @@ -200,124 +202,128 @@ for (int i = 0; i < loop; i += 1) { if (spl > w->task_num) { - if (w->task_num >= spe_num) { - array_task_num = w->task_num / spe_num; - } else { + if (w->task_num >= spe_num) { + array_task_num = w->task_num / spe_num; + } else { - int task_num = w->task_num; + int task_num = w->task_num; - for (int j = 0; j < task_num; j++) { - HTask *h_exec = 0; - int i = w->task_spwaned++; + for (int j = 0; j < task_num; j++) { + HTask *h_exec = 0; + int i = w->task_spwaned++; - if (w->size < size) size = w->size; + if (w->size < size) size = w->size; + + h_exec = manager->create_task(TASK_EXEC, + (memaddr)(w->file_mmap + i*w->division_size), size, + (memaddr)(w->o_data + i*w->out_size), w->division_out_size); - h_exec = manager->create_task(TASK_EXEC, - (memaddr)(w->file_mmap + i*w->division_size), size, - (memaddr)(w->o_data + i*w->out_size), w->division_out_size); - - if (all) { - w->t_print->wait_for(h_exec); - } else { - t_next->wait_for(h_exec); - } - - h_exec->set_cpu(SPE_ANY); - h_exec->spawn(); - - w->size -= size; - if (w->size == 0) break; - w->task_num--; - - } + if (all) { + w->t_print->wait_for(h_exec); + } else { + t_next->wait_for(h_exec); + } + + h_exec->set_cpu(spe_cpu); + h_exec->spawn(); - return; - } + w->size -= size; + if (w->size == 0) break; + w->task_num--; + + } + + return; + } } - - + + HTask **task_array = (HTask**)manager->allocate(sizeof(HTask*)*spe_num); Task **t_exec = (Task**)manager->allocate(sizeof(Task*)*spe_num); - + for (int k = 0; k < spe_num; k++) { - task_array[k] = manager->create_task_array(TASK_EXEC,array_task_num,0,1,1); - t_exec[k] = 0; - if (all) { - w->t_print->wait_for(task_array[k]); - } else { - t_next->wait_for(task_array[k]); - } - } - - + task_array[k] = manager->create_task_array(TASK_EXEC,array_task_num,0,1,1); + t_exec[k] = 0; + if (all) { + w->t_print->wait_for(task_array[k]); + } else { + t_next->wait_for(task_array[k]); + } + } + + for (int j = 0; j < array_task_num; j++) { - for (int k = 0; k < spe_num; k++) { - - int a = w->task_spwaned++; - - if (w->size < size) size = w->size; - - t_exec[k] = task_array[k]->next_task_array(TASK_EXEC,t_exec[k]); - t_exec[k]->set_inData(0,w->file_mmap + a*w->division_size, size); - t_exec[k]->set_outData(0,w->o_data + a*w->out_size, w->division_out_size); - - w->size -= size; - w->task_num--; - } + for (int k = 0; k < spe_num; k++) { + + int a = w->task_spwaned++; + + if (w->size < size) size = w->size; + + int length = size/sizeof(char); + t_exec[k] = task_array[k]->next_task_array(TASK_EXEC,t_exec[k]); + t_exec[k]->set_param(0,(memaddr)length); + t_exec[k]->set_inData(0,w->file_mmap + a*w->division_size, size); + t_exec[k]->set_outData(0,w->o_data + a*w->out_size, w->division_out_size); + + w->size -= size; + w->task_num--; + } } - + for (int k = 0; k < spe_num; k++) { - task_array[k]->spawn_task_array(t_exec[k]->next()); - task_array[k]->set_cpu(SPE_ANY); - task_array[k]->spawn(); - } - + task_array[k]->spawn_task_array(t_exec[k]->next()); + task_array[k]->set_cpu(spe_cpu); + task_array[k]->spawn(); + } + } return; } - - + + for (int i = 0; i < task_count; i += array_task_num) { HTask *h_exec = 0; for (int j = 0; j < array_task_num; j++) { - int i = w->task_spwaned++; - if (w->size < size) size = w->size; - if (size==0) break; + int i = w->task_spwaned++; + if (w->size < size) size = w->size; + int length = size/sizeof(char); + if (size==0) break; - if (use_compat) { - h_exec = manager->create_task(TASK_EXEC); - h_exec->set_inData(0,w->file_mmap + i*w->division_size, size); - h_exec->set_outData(0,w->o_data + i*w->out_size, w->division_out_size); + if (use_compat) { + h_exec = manager->create_task(TASK_EXEC); + h_exec->set_param(0,(memaddr)length); + h_exec->set_inData(0,w->file_mmap + i*w->division_size, size); + h_exec->set_outData(0,w->o_data + i*w->out_size, w->division_out_size); - if (all) { - w->t_print->wait_for(h_exec); - } else { - t_next->wait_for(h_exec); - } - - h_exec->set_cpu(SPE_ANY); - h_exec->spawn(); + if (all) { + w->t_print->wait_for(h_exec); + } else { + t_next->wait_for(h_exec); + } + + h_exec->set_cpu(spe_cpu); + h_exec->spawn(); - } else { - h_exec = manager->create_task(TASK_EXEC, - (memaddr)(w->file_mmap + i*w->division_size), size, - (memaddr)(w->o_data + i*w->out_size), w->division_out_size); + } else { + h_exec = manager->create_task(TASK_EXEC, + (memaddr)(w->file_mmap + i*w->division_size), size, + (memaddr)(w->o_data + i*w->out_size), w->division_out_size); - if (all) { - w->t_print->wait_for(h_exec); - } else { - t_next->wait_for(h_exec); - } + if (all) { + w->t_print->wait_for(h_exec); + } else { + t_next->wait_for(h_exec); + } - h_exec->set_cpu(SPE_ANY); - h_exec->spawn(); - } - w->size -= size; - w->task_num--; + h_exec->set_cpu(spe_cpu); + h_exec->spawn(); + } + w->size -= size; + w->task_num--; } } @@ -335,24 +341,24 @@ run16(SchedTask *manager, void *in, void *out) { WordCount *w = *(WordCount **)in; - + if (w->task_num < w->task_blocks) { - // last case - while (w->size >= w->division_size) - run_tasks(manager,w,w->task_num, w->t_print, w->division_size); - // remaining data - while (w->size>0) - run_tasks(manager,w,1, w->t_print, w->size); - // printf("run16 last %d\n",w->task_num); + // last case + while (w->size >= w->division_size) + run_tasks(manager,w,w->task_num, w->t_print, w->division_size); + // remaining data + while (w->size>0) + run_tasks(manager,w,1, w->t_print, w->size); + // printf("run16 last %d\n",w->task_num); } else { - HTaskPtr t_next = manager->create_task(RUN_TASK_BLOCKS, - (memaddr)&w->self,sizeof(memaddr),0,0); - w->t_print->wait_for(t_next); + HTaskPtr t_next = manager->create_task(RUN_TASK_BLOCKS, + (memaddr)&w->self,sizeof(memaddr),0,0); + w->t_print->wait_for(t_next); - run_tasks(manager,w, w->task_blocks, t_next, w->division_size); + run_tasks(manager,w, w->task_blocks, t_next, w->division_size); - t_next->spawn(); - // printf("run16 next %d\n",w->task_num); + t_next->spawn(); + // printf("run16 next %d\n",w->task_num); } return 0; } @@ -372,8 +378,8 @@ WordCount *w = (WordCount*)manager->allocate(sizeof(WordCount)); // bzero(w,sizeof(WordCount)); + //w->task_blocks = blocks; w->self = w; - //w->task_blocks = blocks; w->task_spwaned = 0; /*sizeはdivision_sizeの倍数にしている。*/ @@ -383,9 +389,9 @@ /* 1task分のデータサイズ(byte) */ if (w->size >= 1024*division) { - w->division_size = 1024 * division;/*16kbyte*/ + w->division_size = 1024 * division;/*16kbyte*/ } else { - w->division_size = w->size; + w->division_size = w->size; } printf("dvision_size %d\n",w->division_size); @@ -418,17 +424,17 @@ /*各SPEの結果を合計して出力するタスク*/ t_print = manager->create_task(TASK_PRINT, - (memaddr)&w->self,sizeof(memaddr),0,0); + (memaddr)&w->self,sizeof(memaddr),0,0); w->t_print = t_print; for(int i = 0;i<20;i++) { - /* Task を task_blocks ずつ起動する Task */ + /* Task を task_blocks ずつ起動する Task */ /* serialize されていると仮定する... */ - HTaskPtr t_exec = manager->create_task(RUN_TASK_BLOCKS, - (memaddr)&w->self,sizeof(memaddr),0,0); - t_print->wait_for(t_exec); - t_exec->spawn(); + HTaskPtr t_exec = manager->create_task(RUN_TASK_BLOCKS, + (memaddr)&w->self,sizeof(memaddr),0,0); + t_print->wait_for(t_exec); + t_exec->spawn(); } t_print->spawn(); @@ -437,41 +443,43 @@ static char* init(int argc, char **argv) { - + char *filename = 0; - - for (int i = 1; argv[i]; ++i) { - if (strcmp(argv[i], "-file") == 0) { - filename = argv[i+1]; - } else if (strcmp(argv[i], "-division") == 0) { - division = atoi(argv[i+1]); - } else if (strcmp(argv[i], "-block") == 0) { - blocks = atoi(argv[i+1]); - } else if (strcmp(argv[i], "-a") == 0) { - // create task all at once - all = 1; - } else if (strcmp(argv[i], "-c") == 0) { - use_task_array = 0; - use_compat = 1; - } else if (strcmp(argv[i], "-s") == 0) { - use_task_array = 0; - use_compat = 0; - } else if (strcmp(argv[i], "-t") == 0) { - use_task_creater = 1; - use_task_array = 0; - use_compat = 0; - } else if (strcmp(argv[i], "-anum") == 0) { - array_task_num = atoi(argv[i+1]); - } else if (strcmp(argv[i], "-cpu") == 0) { - spe_num = atoi(argv[i+1]); - if (spe_num==0) spe_num = 1; - } + + for (int i = 1; argv[i]; ++i) { + if (strcmp(argv[i], "-file") == 0) { + filename = argv[i+1]; + } else if (strcmp(argv[i], "-division") == 0) { + division = atoi(argv[i+1]); + } else if (strcmp(argv[i], "-block") == 0) { + blocks = atoi(argv[i+1]); + } else if (strcmp(argv[i], "-a") == 0) { + // create task all at once + all = 1; + } else if (strcmp(argv[i], "-c") == 0) { + use_task_array = 0; + use_compat = 1; + } else if (strcmp(argv[i], "-s") == 0) { + use_task_array = 0; + use_compat = 0; + } else if (strcmp(argv[i], "-t") == 0) { + use_task_creater = 1; + use_task_array = 0; + use_compat = 0; + } else if (strcmp(argv[i], "-anum") == 0) { + array_task_num = atoi(argv[i+1]); + } else if (strcmp(argv[i], "-g") == 0 ) { + spe_cpu = GPU_0; + } else if (strcmp(argv[i], "-cpu") == 0) { + spe_num = atoi(argv[i+1]); + if (spe_num==0) spe_num = 1; + } } if (filename==0) { puts(usr_help_str); - exit(1); + exit(1); } - + return filename; } @@ -482,9 +490,9 @@ char *filename = 0; filename = init(argc, argv); - + if (filename < 0) { - return -1; + return -1; } task_init();