# HG changeset patch # User Shohei KOKUBO # Date 1370246571 -32400 # Node ID 8b2d3ac19991111d3b09d2abf88a5fb3ac90cc79 # Parent 4fed76f4d101ac8b6beeb7f205bba1454b32c022 fix diff -r 4fed76f4d101 -r 8b2d3ac19991 TaskManager/Cell/CellTaskManagerImpl.h --- a/TaskManager/Cell/CellTaskManagerImpl.h Thu May 30 18:35:08 2013 +0900 +++ b/TaskManager/Cell/CellTaskManagerImpl.h Mon Jun 03 17:02:51 2013 +0900 @@ -12,7 +12,7 @@ class CellTaskManagerImpl : public TaskManagerImpl { public: /* constructor */ - CellTaskManagerImpl(int num, Threads *cpus) : TaskManagerImpl(num,gpu) {speThreads = cpus;} + CellTaskManagerImpl(int num, int gpu, Threads *cpus) : TaskManagerImpl(num) {gpuNum = gpu; speThreads = cpus;} ~CellTaskManagerImpl(); /* variables */ @@ -23,6 +23,7 @@ FifoTaskManagerImpl *ppeManager; int spe_running; int spuIdle; + int gpuNum; /* functions */ // system void init(int spuIdle,int useRefDma, int export_task_log); diff -r 4fed76f4d101 -r 8b2d3ac19991 TaskManager/Fifo/FifoTaskManagerImpl.cc --- a/TaskManager/Fifo/FifoTaskManagerImpl.cc Thu May 30 18:35:08 2013 +0900 +++ b/TaskManager/Fifo/FifoTaskManagerImpl.cc Mon Jun 03 17:02:51 2013 +0900 @@ -339,7 +339,7 @@ */ #ifdef __CERIUM_FIFO__ TaskManagerImpl* -create_impl(int num, int useRefDma) +create_impl(int num, int gpu, int useRefDma) { #ifdef __CERIUM_PARALLEL__ //マルチコアverでコンパイルしたのにかかわらず、 @@ -354,7 +354,7 @@ return new FifoTaskManagerImpl(num); } else { Threads *cpus = new CpuThreads(num,useRefDma); - return new CellTaskManagerImpl(num,cpus); + return new CellTaskManagerImpl(num,gpu,cpus); } } #endif // __CERIUM_FIFO__ diff -r 4fed76f4d101 -r 8b2d3ac19991 TaskManager/kernel/ppe/TaskManager.cc --- a/TaskManager/kernel/ppe/TaskManager.cc Thu May 30 18:35:08 2013 +0900 +++ b/TaskManager/kernel/ppe/TaskManager.cc Mon Jun 03 17:02:51 2013 +0900 @@ -4,7 +4,7 @@ #include "Task.h" #include "SysFunc.h" -TaskManager::TaskManager(int num, int gpu) : machineNum(num) : gpuNum(gpu) +TaskManager::TaskManager(int num, int gpu) : machineNum(num) , gpuNum(gpu) { } @@ -29,7 +29,7 @@ * * てか、普通に TaskManagerImpl に関数持たせた方が早いか・・・? */ -extern TaskManagerImpl* create_impl(int,int); +extern TaskManagerImpl* create_impl(int,int,int); void TaskManager::init(int spuIdle, int export_task_log, int useRefDma=0) diff -r 4fed76f4d101 -r 8b2d3ac19991 TaskManager/kernel/ppe/TaskManager.h --- a/TaskManager/kernel/ppe/TaskManager.h Thu May 30 18:35:08 2013 +0900 +++ b/TaskManager/kernel/ppe/TaskManager.h Mon Jun 03 17:02:51 2013 +0900 @@ -11,7 +11,7 @@ class TaskManager { public: /* constructor */ - TaskManager(int num); // The number of threads + TaskManager(int num, int gpu); // The number of threads ~TaskManager(); /* variables */ @@ -45,6 +45,7 @@ void error(const char* str); private: int machineNum; + int gpuNum; } ; #endif diff -r 4fed76f4d101 -r 8b2d3ac19991 example/fft/main.cc --- a/example/fft/main.cc Thu May 30 18:35:08 2013 +0900 +++ b/example/fft/main.cc Mon Jun 03 17:02:51 2013 +0900 @@ -85,7 +85,6 @@ size_t gws[2],lws[2]; int length_dst = n[0]*n[0]; int length_src = n[0]*n[0]; - cl_uint dimension = 2; HTask* brev = manager->create_task(BIT_REVERSE); setWorkSize(gws,lws,n[0],n[0]); @@ -106,23 +105,17 @@ bfly->set_outData(0,dst,length_dst*sizeof(cl_float2)); bfly->set_cpu(spe_cpu); bfly->wait_for(brev); - bfly->iterate(m_,gws[0],gws[1]); + bfly->iterate(gws[0],gws[1],m_); if (direction == inverse) { HTask *norm = manager->create_task(NORMALIZATION); setWorkSize(gws,lws,n[0],n[0]); - norm->set_param(0,(memaddr)length_dst); - norm->set_param(1,(memaddr)dimension); - norm->set_param(2,(memaddr)gws[0]); - norm->set_param(3,(memaddr)gws[1]); - norm->set_param(4,(memaddr)lws[0]); - norm->set_param(5,(memaddr)lws[1]); norm->set_inData(0, n,sizeof(int)); norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); norm->set_cpu(spe_cpu); norm->flip(); norm->wait_for(bfly); - norm->spawn(); + norm->iterate(gws[0],lws[0]); } return 0; } @@ -150,7 +143,6 @@ void run_start(TaskManager *manager,pgm_t ipgm) { - int dimension; int *n = new int[1]; n[0] = ipgm.width; int *m = new int[1]; @@ -200,22 +192,15 @@ // Transpose matrix int length_r =n[0] * n[0]; setWorkSize(gws,lws,n[0]/2,1); - dimension = 2; for (int i=0;i<2;i++) { trns[i]= manager->create_task(TRANSEPOSE); - trns[i]->set_param(0, (memaddr)length_r); - trns[i]->set_param(1,(memaddr)dimension); - trns[i]->set_param(2,(memaddr)gws[0]); - trns[i]->set_param(3,(memaddr)gws[1]); - trns[i]->set_param(4,(memaddr)lws[0]); - trns[i]->set_param(5,(memaddr)lws[1]); trns[i]->set_inData(0, rm, length_r*sizeof(cl_float2)); trns[i]->set_inData(1, n,sizeof(int)); trns[i]->set_outData(0, xm, length_r*sizeof(cl_float2)); trns[i]->set_cpu(spe_cpu); } trns[0]->wait_for(sfac); - trns[0]->spawn(); + trns[0]->iterate(gws[0],lws[0]); // Butterfly Operation fftCore(manager, rm, xm, wm, m[0], forward); @@ -224,25 +209,20 @@ cl_int *radius = new cl_int[1]; radius[0] = n[0]/8; setWorkSize(gws,lws,n[0]/2,1); - hpfl->set_param(0, (memaddr)length_r); - hpfl->set_param(1,(memaddr)dimension); - hpfl->set_param(2,(memaddr)gws[0]); - hpfl->set_param(3,(memaddr)gws[1]); - hpfl->set_param(4,(memaddr)lws[0]); - hpfl->set_param(5,(memaddr)lws[1]); hpfl->set_inData(0, n,sizeof(int)); hpfl->set_inData(1, radius,sizeof(int)); hpfl->set_outData(0, rm, length_r*sizeof(cl_float2)); hpfl->set_cpu(spe_cpu); hpfl->wait_for(trns[0]); - hpfl->spawn(); + hpfl->iterate(gws[0],lws[0]); // Inverse FFT // Butterfly Operation fftCore(manager,xm, rm, wm, m[0], inverse); // Transpose matrix - trns[1]->spawn(); + setWorkSize(gws,lws,n[0],n[0]); + trns[1]->iterate(gws[0],lws[0]); // Butterfly Operation diff -r 4fed76f4d101 -r 8b2d3ac19991 example/fft/ppe/butterfly.cc --- a/example/fft/ppe/butterfly.cc Thu May 30 18:35:08 2013 +0900 +++ b/example/fft/ppe/butterfly.cc Mon Jun 03 17:02:51 2013 +0900 @@ -17,9 +17,9 @@ cl_float2* x_out = (cl_float2*)s->get_outputAddr(0); - long iter = (long)s->get_param(0) + 1; - unsigned long gid = (unsigned long)s->get_param(1); - unsigned long nid = (unsigned long)s->get_param(2); + unsigned long gid = (unsigned long)s->get_param(0); + unsigned long nid = (unsigned long)s->get_param(1); + long iter = (long)s->get_param(2) + 1; int butterflySize = 1 << (iter-1); int butterflyGrpDist = 1 << iter; diff -r 4fed76f4d101 -r 8b2d3ac19991 example/fft/ppe/highPassFilter.cc --- a/example/fft/ppe/highPassFilter.cc Thu May 30 18:35:08 2013 +0900 +++ b/example/fft/ppe/highPassFilter.cc Mon Jun 03 17:02:51 2013 +0900 @@ -10,13 +10,13 @@ static int highPassFilter(SchedTask* s,void* rbuf,void* wbuf) { - int* n = (int*)s->get_input(rbuf,0); - int* radius = (int*)s->get_input(rbuf,1); + int* n = (int*)s->get_inputAddr(0); + int* radius = (int*)s->get_inputAddr(1); - cl_float2* image = (cl_float2*)s->get_output(wbuf,0); + cl_float2* image = (cl_float2*)s->get_outputAddr(0); - unsigned int* xgid = (unsigned int*)s->global_get(FIRSTID); - unsigned int* ygid = (unsigned int*)s->global_get(SECONDID); + unsigned long xgid = (unsigned long)s->get_param(0); + unsigned long ygid = (unsigned long)s->get_param(1); cl_int2 n_2; n_2.x = n_2.y = n[0]>>1; @@ -25,8 +25,8 @@ mask.x = mask.y = n[0]-1; cl_int2 gid; - gid.x = (xgid[0] + n_2.x) & mask.x; - gid.y = (ygid[0] + n_2.y) & mask.y; + gid.x = (xgid + n_2.x) & mask.x; + gid.y = (ygid + n_2.y) & mask.y; cl_int2 diff; diff.x = n_2.x - gid.x; @@ -46,8 +46,8 @@ window.x = window.y = (int)-1L; } - image[ygid[0]*n[0]+xgid[0]].x = (float)((int)image[ygid[0]*n[0]+xgid[0]].x & window.x); - image[ygid[0]*n[0]+xgid[0]].y = (float)((int)image[ygid[0]*n[0]+xgid[0]].y & window.y); + image[ygid*n[0]+xgid].x = (float)((int)image[ygid*n[0]+xgid].x & window.x); + image[ygid*n[0]+xgid].y = (float)((int)image[ygid*n[0]+xgid].y & window.y); return 0; } diff -r 4fed76f4d101 -r 8b2d3ac19991 example/fft/ppe/transpose.cc --- a/example/fft/ppe/transpose.cc Thu May 30 18:35:08 2013 +0900 +++ b/example/fft/ppe/transpose.cc Mon Jun 03 17:02:51 2013 +0900 @@ -16,11 +16,11 @@ cl_float2* dst = (cl_float2*)s->get_outputAddr(0); - unsigned int* xgid = (unsigned int*)s->global_get(FIRSTID); - unsigned int* ygid = (unsigned int*)s->global_get(SECONDID); + unsigned long xgid = (unsigned long)s->get_param(0); + unsigned long ygid = (unsigned long)s->get_param(1); - unsigned int iid = ygid[0] * n[0] + xgid[0]; - unsigned int oid = xgid[0] * n[0] + ygid[0]; + unsigned int iid = ygid * n[0] + xgid; + unsigned int oid = xgid * n[0] + ygid; dst[oid] = src[iid];