Mercurial > hg > Game > Cerium
changeset 1571:9832a5eb2027 draft
merge
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 18 Mar 2013 15:28:54 +0900 |
parents | b3f865f339f6 |
children | 0bd4598f5a65 |
files | example/fft/main.cc example/fft/ppe/bitReverse.cc example/fft/ppe/butterfly.cc example/fft/ppe/highPassFilter.cc example/fft/ppe/norm.cc example/fft/ppe/spinFact.cc example/fft/ppe/transpose.cc example/multiply/main.cc |
diffstat | 8 files changed, 39 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/example/fft/main.cc Mon Mar 18 08:18:21 2013 +0900 +++ b/example/fft/main.cc Mon Mar 18 15:28:54 2013 +0900 @@ -87,8 +87,9 @@ int length_dst = n[0]*n[0]; int length_src = n[0]*n[0]; HTask *brev = manager->create_task(BIT_REVERSE); - // setWorkSize(gws,lws,n[0],n[0]); + setWorkSize(gws,lws,n[0],n[0]); cl_uint dimension = 2; + brev->set_param(0,(memaddr)length_src); brev->set_param(1,(memaddr)dimension); brev->set_param(2,(memaddr)gws[0]); @@ -103,7 +104,7 @@ brev->nd_range(); HTaskPtr *bfly = (HTask**)manager->allocate(sizeof(HTask*)*m_); - // setWorkSize(gws,lws,n[0]/2,n[0]); + setWorkSize(gws,lws,n[0]/2,n[0]); for (iter[0]=1; iter[0]<=m_;iter[0]++) { bfly[iter[0]-1] = manager->create_task(BUTTERFLY); @@ -113,13 +114,13 @@ bfly[iter[0]-1]->set_param(3,(memaddr)gws[1]); bfly[iter[0]-1]->set_param(4,(memaddr)lws[0]); bfly[iter[0]-1]->set_param(5,(memaddr)lws[1]); - bfly[iter[0]-1]->set_inData(0, dst, length_dst*sizeof(cl_float2)); bfly[iter[0]-1]->set_inData(1, spin, sizeof(cl_float2)*(n[0]/2)); bfly[iter[0]-1]->set_inData(2, m,sizeof(int)); bfly[iter[0]-1]->set_inData(3, n,sizeof(int)); bfly[iter[0]-1]->set_inData(4, iter,sizeof(int)); bfly[iter[0]-1]->set_inData(5, flag,sizeof(int)); + bfly[iter[0]-1]->set_outData(0,dst,length_dst*sizeof(cl_float2)); bfly[iter[0]-1]->set_cpu(spe_cpu); bfly[iter[0]-1]->nd_range(); bfly[iter[0]-1]->flip(); @@ -136,25 +137,21 @@ if (direction == inverse) { HTask *norm = manager->create_task(NORMALIZATION); - // setWorkSize(gws,lws,n[0],n[0]); + setWorkSize(gws,lws,n[0],n[0]); norm->set_param(0,(memaddr)length_dst); norm->set_param(1,(memaddr)dimension); norm->set_param(2,(memaddr)gws[0]); norm->set_param(3,(memaddr)gws[1]); norm->set_param(4,(memaddr)lws[0]); norm->set_param(5,(memaddr)lws[1]); - norm->set_inData(0, dst, length_dst*sizeof(cl_float2)); - norm->set_inData(1, n,sizeof(int)); + norm->set_inData(0, n,sizeof(int)); + norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); norm->set_cpu(spe_cpu); norm->nd_range(); norm->flip(); norm->wait_for(bfly[m[0]-1]); norm->spawn(); } - // brev->spawn(); - // for (int i=0;i<m_;i++) { - // bfly[i]->spawn(); - // } return 0; } @@ -207,7 +204,6 @@ * #endif * } cl_float2; */ - int i, j; for (i=0; i < n[0]; i++) { for (j=0; j < n[0]; j++) { @@ -231,8 +227,8 @@ ndr->gws[1] = gws[1]; ndr->lws[0] = lws[0]; ndr->lws[1] = lws[1]; + sfac->set_param(0,ndr); - sfac->set_param(1, (memaddr)length_w); sfac->set_inData(0, n, sizeof(int)); sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
--- a/example/fft/ppe/bitReverse.cc Mon Mar 18 08:18:21 2013 +0900 +++ b/example/fft/ppe/bitReverse.cc Mon Mar 18 15:28:54 2013 +0900 @@ -14,9 +14,12 @@ cl_float2* src = (cl_float2*)s->get_inputAddr(0); int* m = (int*)s->get_inputAddr(1); int* n = (int*)s->get_inputAddr(2); + cl_float2* dst = (cl_float2*)s->get_outputAddr(0); + unsigned int* gid = (unsigned int*)s->global_get(FIRSTID); unsigned int* nid = (unsigned int*)s->global_get(SECONDID); + unsigned int j = gid[0]; j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1; j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;
--- a/example/fft/ppe/butterfly.cc Mon Mar 18 08:18:21 2013 +0900 +++ b/example/fft/ppe/butterfly.cc Mon Mar 18 15:28:54 2013 +0900 @@ -10,12 +10,14 @@ static int butterfly(SchedTask* s,void* rbuf,void* wbuf) { - cl_float2* x = (cl_float2*)s->get_inputAddr(0); + cl_float2* x_in = (cl_float2*)s->get_inputAddr(0); cl_float2* w = (cl_float2*)s->get_inputAddr(1); int* n = (int*)s->get_inputAddr(3); int* iter = (int*)s->get_inputAddr(4); unsigned int* flag = (unsigned int*)s->get_inputAddr(5); + cl_float2* x_out = (cl_float2*)s->get_outputAddr(0); + unsigned int* gid = (unsigned int*)s->global_get(FIRSTID); unsigned int* nid = (unsigned int*)s->global_get(SECONDID); @@ -32,8 +34,8 @@ cl_float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb; - xa = x[a]; - xb = x[b]; + xa = x_in[a]; + xb = x_in[b]; xbxx.x = xbxx.y = xb.x; xbyy.x = xbyy.y = xb.y; @@ -52,8 +54,8 @@ resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x; resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y; - x[a] = resa; - x[b] = resb; + x_out[a] = resa; + x_out[b] = resb; return 0; }
--- a/example/fft/ppe/highPassFilter.cc Mon Mar 18 08:18:21 2013 +0900 +++ b/example/fft/ppe/highPassFilter.cc Mon Mar 18 15:28:54 2013 +0900 @@ -12,7 +12,9 @@ { int* n = (int*)s->get_input(rbuf,0); int* radius = (int*)s->get_input(rbuf,1); + cl_float2* image = (cl_float2*)s->get_output(wbuf,0); + unsigned int* xgid = (unsigned int*)s->global_get(FIRSTID); unsigned int* ygid = (unsigned int*)s->global_get(SECONDID); @@ -43,6 +45,7 @@ } else { window.x = window.y = (int)-1L; } + image[ygid[0]*n[0]+xgid[0]].x = (float)((int)image[ygid[0]*n[0]+xgid[0]].x & window.x); image[ygid[0]*n[0]+xgid[0]].y = (float)((int)image[ygid[0]*n[0]+xgid[0]].y & window.y);
--- a/example/fft/ppe/norm.cc Mon Mar 18 08:18:21 2013 +0900 +++ b/example/fft/ppe/norm.cc Mon Mar 18 15:28:54 2013 +0900 @@ -10,8 +10,10 @@ static int norm(SchedTask* s, void* rbuf,void* wbuf) { - cl_float2* x = (cl_float2*)s->get_inputAddr(0); - int* n = (int*)s->get_inputAddr(1); + int* n = (int*)s->get_inputAddr(0); + + cl_float2* x = (cl_float2*)s->get_outputAddr(0); + unsigned int* gid = (unsigned int*)s->global_get(FIRSTID); unsigned int* nid = (unsigned int*)s->global_get(SECONDID);
--- a/example/fft/ppe/spinFact.cc Mon Mar 18 08:18:21 2013 +0900 +++ b/example/fft/ppe/spinFact.cc Mon Mar 18 15:28:54 2013 +0900 @@ -16,9 +16,12 @@ spinFact(SchedTask* s,void* rbuf,void* wbuf) { int* n = (int*)s->get_input(rbuf,0); + cl_float2* w = (cl_float2*)s->get_output(wbuf,0); + unsigned int* i = (unsigned int*)s->global_get(FIRSTID); cl_float2 angle; + printf("%d\n",((unsigned int*)s->global_get(SECONDID))[0]); angle.x = (float)(2*i[0]*PI/(float)n[0]); angle.y = (float)((2*i[0]*PI/(float)n[0]) + PI_2);
--- a/example/fft/ppe/transpose.cc Mon Mar 18 08:18:21 2013 +0900 +++ b/example/fft/ppe/transpose.cc Mon Mar 18 15:28:54 2013 +0900 @@ -13,7 +13,9 @@ { cl_float2* src = (cl_float2*)s->get_inputAddr(0); int* n = (int*)s->get_inputAddr(1); + cl_float2* dst = (cl_float2*)s->get_outputAddr(0); + unsigned int* xgid = (unsigned int*)s->global_get(FIRSTID); unsigned int* ygid = (unsigned int*)s->global_get(SECONDID);
--- a/example/multiply/main.cc Mon Mar 18 08:18:21 2013 +0900 +++ b/example/multiply/main.cc Mon Mar 18 15:28:54 2013 +0900 @@ -71,16 +71,24 @@ } st_time = getTime(); +<<<<<<< local + ND_RANGE_T_PTR ndr = new ND_RANGE_T; + ndr->dimension = 1; + ndr->gws[0] = length; ndr->gws[1] = 1; ndr->gws[2] = 1; + ndr->lws[0] = 1; ndr->lws[1] = 1; ndr->lws[2] = 1; + // manager->set_NDRange(ndr); +======= // set ND_RANGE parameter // 初期値は全部1になっている ndr = new ND_RANGE_T; ndr->gws[0] = length; manager->set_NDRange(ndr); +>>>>>>> other multiply = manager->create_task(MULTIPLY_TASK); multiply->nd_range(); - multiply->set_cpu(GPU_0); + multiply->set_cpu(SPE_ANY); /** * Set of Input Data