Mercurial > hg > Game > Cerium
changeset 1870:44fa0f1320a9 draft
run wordcount with iterate
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 26 Dec 2013 17:05:49 +0900 |
parents | 5e06a8089625 |
children | c3f7ba33222d |
files | TaskManager/ChangeLog TaskManager/Gpu/GpuScheduler.cc TaskManager/kernel/ppe/TaskList.h example/multiply/gpu/Multi.cl example/multiply/main.cc example/word_count/gpu/Exec.cl example/word_count/gpu/Exec_Data_Parallel.cl example/word_count/main.cc example/word_count/ppe/Exec.cc example/word_count/ppe/Exec_Data_Parallel.cc |
diffstat | 10 files changed, 51 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/ChangeLog Thu Dec 26 15:33:06 2013 +0900 +++ b/TaskManager/ChangeLog Thu Dec 26 17:05:49 2013 +0900 @@ -8,7 +8,7 @@ 現在の GpuScheduler の pipeline 実行は2並列(cur=0,1) これをn個に拡張する - + 2013-11-23 Shinji kONO <kono@ie.u-ryukyu.ac.jp> Open CL の event の扱い方が良くない @@ -20,7 +20,7 @@ memout x n read_event x n write_event x n - kernel_event + kernel_event これらを、すべて二重に持つ。必要なら n の分 extension する。 event は、上書きす前にすべて、release する必要がある。
--- a/TaskManager/Gpu/GpuScheduler.cc Thu Dec 26 15:33:06 2013 +0900 +++ b/TaskManager/Gpu/GpuScheduler.cc Thu Dec 26 17:05:49 2013 +0900 @@ -247,7 +247,7 @@ ret = clEnqueueWriteBuffer(command_queue, memparam, CL_TRUE, 0,sizeof(memaddr)*nextTask->param_count, nextTask->param(0), 0, NULL, &memin[cur].event[0]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } - + ret = clSetKernelArg(kernel[cur], 0, sizeof(memaddr),(void *)&memin[cur].buf[0]); if (ret<0) { gpuTaskError(cur,tasklist,ret); continue; } @@ -270,7 +270,7 @@ param++; } memin[cur].size = param; // +1 means param - + for(int i = 0; i<nextTask->outData_count;i++) { // set output data ListElement *output_buf = nextTask->outData(i); if (output_buf->size==0) break;
--- a/TaskManager/kernel/ppe/TaskList.h Thu Dec 26 15:33:06 2013 +0900 +++ b/TaskManager/kernel/ppe/TaskList.h Thu Dec 26 17:05:49 2013 +0900 @@ -22,8 +22,8 @@ size_t x,y,z; // 8*3 byte unsigned long long task_start_time,task_end_time; // 8*2 byte Task tasks[TASK_MAX_SIZE]; // 32*TASK_MAX_SIZE - - + + TaskPtr last() { return (TaskPtr)(((memaddr)tasks)+lastTask); } void set_last(Task *t) { lastTask = ((memaddr)t) - ((memaddr)tasks); } void init() { lastTask = ((memaddr)&tasks[TASK_MAX_SIZE])-(memaddr)(tasks); waiter=this; dim=0;}
--- a/example/multiply/gpu/Multi.cl Thu Dec 26 15:33:06 2013 +0900 +++ b/example/multiply/gpu/Multi.cl Thu Dec 26 17:05:49 2013 +0900 @@ -1,10 +1,13 @@ __kernel void -multi(__global const long *params,__global const float *A, __global const float*B, __global float *C) +multi(__global const long *params,__global const float *A, __global const float*B,__global float* C_, __global float *C) { // int i=get_global_id(0); long length = (long)params[0]; long id = get_global_id(0); // for(int i=0;i<length;i++) { - C[id]=A[id]*B[id]; + if(length) + C[id]=A[id]*B[id]; + else + C[id] = C_[id]; //} }
--- a/example/multiply/main.cc Thu Dec 26 15:33:06 2013 +0900 +++ b/example/multiply/main.cc Thu Dec 26 17:05:49 2013 +0900 @@ -70,8 +70,9 @@ void multi_init(TaskManager *manager) { - HTask *multiply; - + HTask* multiply; + HTask* hoge; + A = new float[length]; B = new float[length]; C = new float[length]; @@ -90,7 +91,6 @@ // for(int i=0;i<10;i++) { multiply = manager->create_task(MULTIPLY_TASK); multiply->set_cpu(spe_cpu); - /** * Set of Input Data * add_inData(address of input data, size of input data); @@ -98,14 +98,25 @@ */ multiply->set_inData(0,(memaddr)A, sizeof(float)*length); multiply->set_inData(1,(memaddr)B, sizeof(float)*length); + multiply->set_inData(2,(memaddr)C,sizeof(float)*length); /** * Set of OutPut area * add_outData(address of output area, size of output area); */ multiply->set_outData(0,(memaddr)C, sizeof(float)*length); // param 0に0~length-1をsetしたtaskをlength個spawnする - multiply->set_param(0,(long)length); + multiply->set_param(0,(long)1); multiply->iterate(length); + + hoge = manager->create_task(MULTIPLY_TASK); + hoge->set_cpu(spe_cpu); + hoge->set_inData(0,(memaddr)A,sizeof(float)*length); + hoge->set_inData(1,(memaddr)B,sizeof(float)*length); + hoge->set_inData(2,(memaddr)C,sizeof(float)*length); + hoge->set_outData(0,(memaddr)C,sizeof(float)*length); + hoge->set_param(0,(long)0); + hoge->wait_for(multiply); + hoge->iterate(length); // multiply->spawn(); // } }
--- a/example/word_count/gpu/Exec.cl Thu Dec 26 15:33:06 2013 +0900 +++ b/example/word_count/gpu/Exec.cl Thu Dec 26 17:05:49 2013 +0900 @@ -12,7 +12,6 @@ head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A); word_num -= 1-head_tail_flag[0]; - for (; i < length; i++) { if (i_data[i] == 0x20) { word_flag = 1;
--- a/example/word_count/gpu/Exec_Data_Parallel.cl Thu Dec 26 15:33:06 2013 +0900 +++ b/example/word_count/gpu/Exec_Data_Parallel.cl Thu Dec 26 17:05:49 2013 +0900 @@ -1,23 +1,22 @@ __kernel void wordcount_parallel(__constant long *param, __global char *rbuf, - __global unsigned long long *wbuf) + __global unsigned long *wbuf) { - long task_spwaned = (long)param[0]; - long division_size = (long)param[1]; - long length = (long)param[2]; - long out_size = (long)param[3]; + long task_spwaned = param[0]; + long division_size = param[1]; + long length = param[2]; + long out_size = param[3]; long allocation = task_spwaned + (long)get_global_id(0); + __global char *i_data = rbuf + allocation*division_size; + __global unsigned long *o_data = wbuf + allocation*out_size; + __global unsigned long *head_tail_flag = o_data+2; + long word_flag = 0; + long word_num = 0; + long line_num = 0; + long i = 0; - __global char *i_data = rbuf + allocation*division_size; - __global unsigned long long *o_data = wbuf + allocation*out_size; - __global unsigned long long *head_tail_flag = o_data +2; - int word_flag = 0; - int word_num = 0; - int line_num = 0; - int i = 0; - head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A); word_num -= 1-head_tail_flag[0]; @@ -38,7 +37,7 @@ // s->printf("SPE word %d line %d\n",word_num,line_num); - o_data[0] = (unsigned long long)word_num; - o_data[1] = (unsigned long long)line_num; + o_data[0] = (unsigned long)word_num; + o_data[1] = (unsigned long)line_num; }
--- a/example/word_count/main.cc Thu Dec 26 15:33:06 2013 +0900 +++ b/example/word_count/main.cc Thu Dec 26 17:05:49 2013 +0900 @@ -134,8 +134,11 @@ w->size -= size*array_task_num; if(w->size < 0) array_task_num -= 1; h_exec = manager->create_task(TASK_EXEC_DATA_PARALLEL); + h_exec->flip(); h_exec->set_inData(0,w->file_mmap,w->file_size); - h_exec->set_outData(0,w->o_data,w->out_size_); + h_exec->set_inData(1,w->o_data,w->out_size_); + h_exec->set_outData(0,w->file_mmap,w->file_size); + h_exec->set_outData(1,w->o_data,w->out_size_); h_exec->set_param(0,(long)i); h_exec->set_param(1,(long)w->division_size); h_exec->set_param(2,(long)size); @@ -150,8 +153,11 @@ if(w->size < 0) { h_exec = manager->create_task(TASK_EXEC_DATA_PARALLEL); + h_exec->flip(); h_exec->set_inData(0,w->file_mmap,w->file_size); - h_exec->set_outData(0,w->o_data,w->out_size_); + h_exec->set_inData(1,w->o_data,w->out_size_); + h_exec->set_outData(0,w->file_mmap,w->file_size); + h_exec->set_outData(1,w->o_data,w->out_size_); h_exec->set_param(0,(long)w->task_spwaned); h_exec->set_param(1,(long)w->division_size); h_exec->set_param(2,(long)(size+w->size)); @@ -160,6 +166,7 @@ t_next->wait_for(h_exec); h_exec->set_cpu(spe_cpu); h_exec->iterate(1); + w->task_num -= 1; w->task_spwaned += 1; array_task_num += 1;
--- a/example/word_count/ppe/Exec.cc Thu Dec 26 15:33:06 2013 +0900 +++ b/example/word_count/ppe/Exec.cc Thu Dec 26 17:05:49 2013 +0900 @@ -17,7 +17,7 @@ int word_num = 0; int line_num = 0; int i = 0; - + s->printf("%ld\n",o_data); head_tail_flag[0] = (i_data[0] != 0x20) && (i_data[0] != 0x0A); word_num -= 1-head_tail_flag[0];
--- a/example/word_count/ppe/Exec_Data_Parallel.cc Thu Dec 26 15:33:06 2013 +0900 +++ b/example/word_count/ppe/Exec_Data_Parallel.cc Thu Dec 26 17:05:49 2013 +0900 @@ -13,9 +13,8 @@ long division_size = (long)s->get_param(1); long length = (long)s->get_param(2); long out_size = (long)s->get_param(3); - + long allocation = task_spwaned + (long)s->x; - char *i_data = (char *)rbuf + allocation*division_size; unsigned long long *o_data = (unsigned long long*)wbuf + allocation*out_size; unsigned long long *head_tail_flag = o_data +2;