Mercurial > hg > Game > Cerium
changeset 1643:6c0b6947c231 draft
fix fft
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 22 Jun 2013 18:10:55 +0900 |
parents | b975f29893c7 |
children | b9f46cc511d3 |
files | TaskManager/kernel/ppe/SynchronizedMailManager.cc TaskManager/test/SetCpuTest/Makefile.gpu example/basic/main.cc example/fft/main.cc example/fft/ppe/bitReverse.cc example/fft/ppe/butterfly.cc example/fft/ppe/highPassFilter.cc example/fft/ppe/norm.cc example/fft/ppe/setid.cc example/fft/ppe/setid.h example/fft/ppe/spinFact.cc example/fft/ppe/transpose.cc example/fft/task_init.cc |
diffstat | 13 files changed, 105 insertions(+), 131 deletions(-) [+] |
line wrap: on
line diff
--- a/TaskManager/kernel/ppe/SynchronizedMailManager.cc Tue Jun 18 21:42:58 2013 +0900 +++ b/TaskManager/kernel/ppe/SynchronizedMailManager.cc Sat Jun 22 18:10:55 2013 +0900 @@ -24,9 +24,9 @@ SynchronizedMailManager::~SynchronizedMailManager() { - free(queue); - delete queue_remain; - delete queue_count; + free(queue); + delete queue_remain; + delete queue_count; } int @@ -38,7 +38,7 @@ void SynchronizedMailManager::send(memaddr data) { - queue_remain->sem_p(); //資源-1 + queue_remain->sem_p(); //資源-1 queue[write++] = data; //maskの範囲を超えた場合、0に戻す @@ -50,13 +50,13 @@ memaddr SynchronizedMailManager::recv() { - queue_count->sem_p(); //資源-1 - + queue_count->sem_p(); //資源-1 + memaddr data; - data = queue[read++]; - read &= mask; + data = queue[read++]; + read &= mask; - queue_remain->sem_v(); //資源+1 + queue_remain->sem_v(); //資源+1 return data; }
--- a/TaskManager/test/SetCpuTest/Makefile.gpu Tue Jun 18 21:42:58 2013 +0900 +++ b/TaskManager/test/SetCpuTest/Makefile.gpu Sat Jun 22 18:10:55 2013 +0900 @@ -35,13 +35,13 @@ cpus=16;./$(TARGET) -cpu $$cpus cpus=24;./$(TARGET) -cpu $$cpus - cpus=0;./$(TARGET) -cpu $$cpus -gpu - cpus=1;./$(TARGET) -cpu $$cpus -gpu - cpus=2;./$(TARGET) -cpu $$cpus -gpu - cpus=4;./$(TARGET) -cpu $$cpus -gpu - cpus=8;./$(TARGET) -cpu $$cpus -gpu - cpus=16;./$(TARGET) -cpu $$cpus -gpu - cpus=24;./$(TARGET) -cpu $$cpus -gpu + cpus=0;./$(TARGET) -cpu $$cpus -gpu $$cpus + cpus=1;./$(TARGET) -cpu $$cpus -gpu $$cpus + cpus=2;./$(TARGET) -cpu $$cpus -gpu $$cpus + cpus=4;./$(TARGET) -cpu $$cpus -gpu $$cpus + cpus=8;./$(TARGET) -cpu $$cpus -gpu $$cpus + cpus=16;./$(TARGET) -cpu $$cpus -gpu $$cpus + cpus=24;./$(TARGET) -cpu $$cpus -gpu $$cpus debug: $(TARGET) sudo ppu-gdb ./$(TARGET)
--- a/example/basic/main.cc Tue Jun 18 21:42:58 2013 +0900 +++ b/example/basic/main.cc Sat Jun 22 18:10:55 2013 +0900 @@ -91,6 +91,7 @@ /* * set_post() で ppe task を渡せるようにしたい */ + twice->set_cpu(SPE_ANY); // add Active Queue twice->set_post(twice_result, (void*)data, 0);
--- a/example/fft/main.cc Tue Jun 18 21:42:58 2013 +0900 +++ b/example/fft/main.cc Sat Jun 22 18:10:55 2013 +0900 @@ -71,17 +71,14 @@ return 0; } -int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m_, enum Mode direction) +int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m, enum Mode direction) { - unsigned int* flag = new unsigned int[1]; + unsigned int direction_flag; switch (direction) { - case forward:flag[0] = 0x00000000; break; - case inverse:flag[0] = 0x80000000; break; + case forward:direction_flag = 0x00000000; break; + case inverse:direction_flag = 0x80000000; break; } - int n; - int m; - m = m_; - n = 1<<m; + int n = 1<<m; size_t gws[2],lws[2]; int length_dst = n*n; int length_src = n*n; @@ -89,33 +86,32 @@ HTask* brev = manager->create_task(BIT_REVERSE); setWorkSize(gws,lws,n,n); brev->set_inData(0, src, length_src*sizeof(cl_float2)); - brev->set_inData(1, (memaddr)m,sizeof(int)); - brev->set_inData(2, (memaddr)n,sizeof(int)); brev->set_outData(0, dst, length_dst*sizeof(cl_float2)); + brev->set_param(3,m); + brev->set_param(4,n); brev->set_cpu(spe_cpu); brev->iterate(gws[0],gws[1]); HTask* bfly = manager->create_task(BUTTERFLY); - setWorkSize(gws,lws,n[0]/2,n[0]); + setWorkSize(gws,lws,n/2,n); bfly->set_inData(0, dst, length_dst*sizeof(cl_float2)); - bfly->set_inData(1, spin, sizeof(cl_float2)*(n[0]/2)); - bfly->set_inData(2, m,sizeof(int)); - bfly->set_inData(3, n,sizeof(int)); - bfly->set_inData(4, flag,sizeof(int)); + bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2)); bfly->set_outData(0,dst,length_dst*sizeof(cl_float2)); + bfly->set_param(3,n); + bfly->set_param(4,direction_flag); bfly->set_cpu(spe_cpu); bfly->wait_for(brev); - bfly->iterate(gws[0],gws[1],m_); + bfly->iterate(gws[0],gws[1],m); if (direction == inverse) { HTask *norm = manager->create_task(NORMALIZATION); - setWorkSize(gws,lws,n[0],n[0]); - norm->set_inData(0, n,sizeof(int)); + setWorkSize(gws,lws,n,n); norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); + norm->set_param(3,n); norm->set_cpu(spe_cpu); norm->flip(); norm->wait_for(bfly); - norm->iterate(gws[0],lws[0]); + norm->iterate(gws[0],gws[0]); } return 0; } @@ -133,7 +129,7 @@ } } if ( (argc == 1)||(filename==0)) { - printf("Usage: ./fft -file [image filename] -cpu or -gpu \n"); + printf("Usage: ./fft -file [image filename] -cpu or -gpu\n"); exit(-1); } @@ -143,16 +139,14 @@ void run_start(TaskManager *manager,pgm_t ipgm) { - int *n = new int[1]; - n[0] = ipgm.width; - int *m = new int[1]; - m[0] = (cl_int)(log((double)n[0])/log(2.0)); - size_t *gws = new size_t[3]; - size_t *lws = new size_t[3]; + int n = ipgm.width; + int m = (cl_int)(log((double)n)/log(2.0)); + size_t *gws = new size_t[2]; + size_t *lws = new size_t[2]; - cl_float2 *xm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2)); - cl_float2 *rm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2)); - cl_float2 *wm = (cl_float2 *)malloc(n[0] / 2 * sizeof(cl_float2)); + cl_float2 *xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); + cl_float2 *rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); + cl_float2 *wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2)); /* * [cl_float2] * typedef union @@ -168,79 +162,78 @@ * #endif * } cl_float2; */ - for (int i=0; i<n[0]; i++) { - for (int j=0; j < n[0]; j++) { - ((float*)xm)[(2*n[0]*j)+2*i+0] = (float)ipgm.buf[n[0]*j+i]; - ((float*)xm)[(2*n[0]*j)+2*i+1] = (float)0; + for (int i=0; i<n; i++) { + for (int j=0; j < n; j++) { + ((float*)xm)[(2*n*j)+2*i+0] = (float)ipgm.buf[n*j+i]; + ((float*)xm)[(2*n*j)+2*i+1] = (float)0; } } // Create spin factor - int length_w = n[0] / 2; + int length_w = n / 2; HTask* sfac = manager->create_task(SPIN_FACT); - setWorkSize(gws,lws,n[0]/2,1); - sfac->set_inData(0, n, sizeof(int)); + setWorkSize(gws,lws,n/2,1); sfac->set_outData(0, wm, length_w*sizeof(cl_float2)); + sfac->set_param(3,n); sfac->set_cpu(spe_cpu); - sfac->iterate(gws[0]); + sfac->iterate(gws[0],gws[1]); // Butterfly Operation - fftCore(manager, rm, xm, wm, m[0], forward); + fftCore(manager, rm, xm, wm, m, forward); HTaskPtr *trns = (HTask**)manager->allocate(sizeof(HTask*)*2); // Transpose matrix - int length_r =n[0] * n[0]; - setWorkSize(gws,lws,n[0]/2,1); + int length_r =n*n; + setWorkSize(gws,lws,n/2,1); for (int i=0;i<2;i++) { trns[i]= manager->create_task(TRANSEPOSE); trns[i]->set_inData(0, rm, length_r*sizeof(cl_float2)); - trns[i]->set_inData(1, n,sizeof(int)); trns[i]->set_outData(0, xm, length_r*sizeof(cl_float2)); + trns[i]->set_param(3,n); trns[i]->set_cpu(spe_cpu); } trns[0]->wait_for(sfac); - trns[0]->iterate(gws[0],lws[0]); + trns[0]->iterate(gws[0],gws[1]); // Butterfly Operation - fftCore(manager, rm, xm, wm, m[0], forward); + fftCore(manager, rm, xm, wm, m, forward); // Apply high-pass filter HTask *hpfl = manager->create_task(HIGH_PASS_FILTER); - cl_int *radius = new cl_int[1]; - radius[0] = n[0]/8; - setWorkSize(gws,lws,n[0]/2,1); - hpfl->set_inData(0, n,sizeof(int)); - hpfl->set_inData(1, radius,sizeof(int)); + cl_int radius = n/8; + setWorkSize(gws,lws,n/2,1); hpfl->set_outData(0, rm, length_r*sizeof(cl_float2)); + hpfl->set_param(3,n); + hpfl->set_param(4,radius); hpfl->set_cpu(spe_cpu); hpfl->wait_for(trns[0]); - hpfl->iterate(gws[0],lws[0]); + hpfl->iterate(gws[0],gws[1]); // Inverse FFT // Butterfly Operation - fftCore(manager,xm, rm, wm, m[0], inverse); + fftCore(manager,xm, rm, wm, m, inverse); // Transpose matrix - setWorkSize(gws,lws,n[0],n[0]); - trns[1]->iterate(gws[0],lws[0]); + setWorkSize(gws,lws,n,n); + trns[1]->iterate(gws[0],gws[1]); // Butterfly Operation - fftCore(manager,xm, rm, wm, m[0], inverse); + fftCore(manager,xm, rm, wm, m, inverse); // Read data from memory buffer // spawn and wait float* ampd; - ampd = (float*)malloc(n[0]*n[0]*sizeof(float)); - for (int i=0; i < n[0]; i++) { - for (int j=0; j < n[0]; j++) { - ampd[n[0]*((i))+((j))] = (AMP(((float*)xm)[(2*n[0]*i)+2*j], ((float*)xm)[(2*n[0]*i)+2*j+1])); + ampd = (float*)malloc(n*n*sizeof(float)); + for (int i=0; i < n; i++) { + for (int j=0; j < n; j++) { + ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1])); } } pgm_t opgm; - opgm.width = n[0]; - opgm.height = n[0]; + opgm.width = n; + opgm.height = n; normalizeF2PGM(&opgm, ampd); free(ampd);
--- a/example/fft/ppe/bitReverse.cc Tue Jun 18 21:42:58 2013 +0900 +++ b/example/fft/ppe/bitReverse.cc Sat Jun 22 18:10:55 2013 +0900 @@ -12,12 +12,13 @@ bitReverse(SchedTask* s, void* rbuf, void* wbuf) { cl_float2* src = (cl_float2*)s->get_inputAddr(0); - int* m = (int*)s->get_inputAddr(1); - int* n = (int*)s->get_inputAddr(2); cl_float2* dst = (cl_float2*)s->get_outputAddr(0); unsigned long gid = (unsigned long)s->get_param(0); unsigned long nid = (unsigned long)s->get_param(1); + + unsigned long m = (unsigned long)s->get_param(3); + unsigned long n = (unsigned long)s->get_param(4); unsigned int j = gid; j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1; @@ -26,9 +27,8 @@ j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8; j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16; - j >>= (32-m[0]); + j >>= (32-m); - dst[nid*n[0]+j] = src[nid*n[0]+gid]; - + dst[nid*n+j] = src[nid*n+gid]; return 0; }
--- a/example/fft/ppe/butterfly.cc Tue Jun 18 21:42:58 2013 +0900 +++ b/example/fft/ppe/butterfly.cc Sat Jun 22 18:10:55 2013 +0900 @@ -12,8 +12,6 @@ { cl_float2* x_in = (cl_float2*)s->get_inputAddr(0); cl_float2* w = (cl_float2*)s->get_inputAddr(1); - int* n = (int*)s->get_inputAddr(3); - unsigned int* flag = (unsigned int*)s->get_inputAddr(4); cl_float2* x_out = (cl_float2*)s->get_outputAddr(0); @@ -21,13 +19,16 @@ unsigned long nid = (unsigned long)s->get_param(1); long iter = (long)s->get_param(2) + 1; + long n = (long)s->get_param(3); + long direction_flag = (long)s->get_param(4); + int butterflySize = 1 << (iter-1); int butterflyGrpDist = 1 << iter; - int butterflyGrpNum = n[0] >> iter; + int butterflyGrpNum = n >> iter; int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist); int butterflyGrpOffset = gid & (butterflySize-1); - int a = nid * n[0] + butterflyGrpBase + butterflyGrpOffset; + int a = nid * n + butterflyGrpBase + butterflyGrpOffset; int b = a + butterflySize; int l = butterflyGrpNum * butterflyGrpOffset; @@ -40,7 +41,7 @@ xbyy.x = xbyy.y = xb.y; wab.x = (cl_float)((cl_uint)w[l].x ^ 0x0); - wab.y = (cl_float)((cl_uint)w[l].y ^ flag[0]); + wab.y = (cl_float)((cl_uint)w[l].y ^ direction_flag); wayx.x = (cl_float)((cl_uint)wab.y ^ 0x80000000); wayx.y = (cl_float)((cl_uint)wab.x ^ 0x0);
--- a/example/fft/ppe/highPassFilter.cc Tue Jun 18 21:42:58 2013 +0900 +++ b/example/fft/ppe/highPassFilter.cc Sat Jun 22 18:10:55 2013 +0900 @@ -10,19 +10,19 @@ static int highPassFilter(SchedTask* s,void* rbuf,void* wbuf) { - int* n = (int*)s->get_inputAddr(0); - int* radius = (int*)s->get_inputAddr(1); - cl_float2* image = (cl_float2*)s->get_outputAddr(0); unsigned long xgid = (unsigned long)s->get_param(0); unsigned long ygid = (unsigned long)s->get_param(1); + long n = (long)s->get_param(3); + long radius = (long)s->get_param(4); + cl_int2 n_2; - n_2.x = n_2.y = n[0]>>1; + n_2.x = n_2.y = n>>1; cl_int2 mask; - mask.x = mask.y = n[0]-1; + mask.x = mask.y = n-1; cl_int2 gid; gid.x = (xgid + n_2.x) & mask.x; @@ -40,14 +40,14 @@ cl_int2 window; - if (dist2 < radius[0]*radius[0]) { + if (dist2 < radius*radius) { window.x = window.y = (int)0L; } else { window.x = window.y = (int)-1L; } - image[ygid*n[0]+xgid].x = (float)((int)image[ygid*n[0]+xgid].x & window.x); - image[ygid*n[0]+xgid].y = (float)((int)image[ygid*n[0]+xgid].y & window.y); + image[ygid*n+xgid].x = (float)((int)image[ygid*n+xgid].x & window.x); + image[ygid*n+xgid].y = (float)((int)image[ygid*n+xgid].y & window.y); return 0; }
--- a/example/fft/ppe/norm.cc Tue Jun 18 21:42:58 2013 +0900 +++ b/example/fft/ppe/norm.cc Sat Jun 22 18:10:55 2013 +0900 @@ -10,15 +10,15 @@ static int norm(SchedTask* s, void* rbuf,void* wbuf) { - int* n = (int*)s->get_inputAddr(0); - cl_float2* x = (cl_float2*)s->get_outputAddr(0); - unsigned int* gid = (unsigned int*)s->global_get(FIRSTID); - unsigned int* nid = (unsigned int*)s->global_get(SECONDID); - - x[nid[0]*n[0]+gid[0]].x = x[nid[0]*n[0]+gid[0]].x / (float)n[0]; - x[nid[0]*n[0]+gid[0]].y = x[nid[0]*n[0]+gid[0]].y / (float)n[0]; + unsigned long gid = (unsigned long)s->get_param(0); + unsigned long nid = (unsigned long)s->get_param(1); + + long n = (long)s->get_param(3); + + x[nid*n+gid].x = x[nid*n+gid].x / (float)n; + x[nid*n+gid].y = x[nid*n+gid].y / (float)n; return 0; }
--- a/example/fft/ppe/setid.cc Tue Jun 18 21:42:58 2013 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -#include "setid.h" -#include "Func.h" - -SchedDefineTask1(setid,setid); - -static int -setid(SchedTask* s,void* rbuf,void* wbuf) -{ - s->global_alloc(FIRSTID,sizeof(int)); - - s->global_alloc(SECONDID,sizeof(int)); - - return 0; -}
--- a/example/fft/ppe/setid.h Tue Jun 18 21:42:58 2013 +0900 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -#ifndef INCLUDED_TASK_SET_ID -#define INCLUDED_TASK_SET_ID - -#ifndef INCLUDED_SCHED_TASK -#include "SchedTask.h" -#endif - -#endif
--- a/example/fft/ppe/spinFact.cc Tue Jun 18 21:42:58 2013 +0900 +++ b/example/fft/ppe/spinFact.cc Sat Jun 22 18:10:55 2013 +0900 @@ -15,14 +15,15 @@ static int spinFact(SchedTask* s,void* rbuf,void* wbuf) { - int* n = (int*)s->get_input(rbuf,0); - - cl_float2* w = (cl_float2*)s->get_output(wbuf,0); + cl_float2* w = (cl_float2*)s->get_outputAddr(0); unsigned long i = (unsigned long)s->get_param(0); + + long n = (long)s->get_param(3); + cl_float2 angle; - angle.x = (float)(2*i*PI/(float)n[0]); - angle.y = (float)((2*i*PI/(float)n[0]) + PI_2); + angle.x = (float)(2*i*PI/(float)n); + angle.y = (float)((2*i*PI/(float)n) + PI_2); w[i].x = cos(angle.x); w[i].y = cos(angle.y);
--- a/example/fft/ppe/transpose.cc Tue Jun 18 21:42:58 2013 +0900 +++ b/example/fft/ppe/transpose.cc Sat Jun 22 18:10:55 2013 +0900 @@ -12,15 +12,16 @@ transpose(SchedTask* s,void* rbuf,void* wbuf) { cl_float2* src = (cl_float2*)s->get_inputAddr(0); - int* n = (int*)s->get_inputAddr(1); cl_float2* dst = (cl_float2*)s->get_outputAddr(0); unsigned long xgid = (unsigned long)s->get_param(0); unsigned long ygid = (unsigned long)s->get_param(1); - unsigned int iid = ygid * n[0] + xgid; - unsigned int oid = xgid * n[0] + ygid; + long n = (long)s->get_param(3); + + unsigned int iid = ygid * n + xgid; + unsigned int oid = xgid * n + ygid; dst[oid] = src[iid];
--- a/example/fft/task_init.cc Tue Jun 18 21:42:58 2013 +0900 +++ b/example/fft/task_init.cc Sat Jun 22 18:10:55 2013 +0900 @@ -23,7 +23,6 @@ GpuSchedRegister(TRANSEPOSE, "gpu/fft.cl", "transpose"); GpuSchedRegister(HIGH_PASS_FILTER, "gpu/fft.cl", "highPassFilter"); #else - SchedRegisterTask(SET_ID,setid); SchedRegisterTask(SPIN_FACT,spinFact); SchedRegisterTask(NORMALIZATION, norm); SchedRegisterTask(BIT_REVERSE, bitReverse);