changeset 2018:433043c56a0c draft

fix fft
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Tue, 15 Jul 2014 01:41:57 +0900
parents 1d7d1e398833
children 6849865f96eb
files example/fft/cuda/butterfly.cu example/fft/cuda/norm.cu example/fft/main.cc
diffstat 3 files changed, 22 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/example/fft/cuda/butterfly.cu	Tue Jul 15 01:19:31 2014 +0900
+++ b/example/fft/cuda/butterfly.cu	Tue Jul 15 01:41:57 2014 +0900
@@ -1,6 +1,6 @@
 extern "C" {
     __global__ void
-    butterfly(long* param, float* x_in, float* w, float* x_out)
+    butterfly(long* param, float* x, float* w)
     {
         unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
         unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1);
@@ -22,10 +22,10 @@
         
         float xa[2], xb[2], xbxx[2], xbyy[2], wab[2], wayx[2], wbyx[2], resa[2], resb[2];
         
-        xa[0] = x_in[2*a];
-        xa[1] = x_in[2*a+1];
-        xb[0] = x_in[2*b];
-        xb[1] = x_in[2*b+1];
+        xa[0] = x[2*a];
+        xa[1] = x[2*a+1];
+        xb[0] = x[2*b];
+        xb[1] = x[2*b+1];
         xbxx[0] = xbxx[1] = xb[0];
         xbyy[0] = xbyy[1] = xb[1];
         
@@ -48,9 +48,9 @@
         resb[0] = xa[0] - xbxx[0]*wab[0] + xbyy[0]*wbyx[0];
         resb[1] = xa[1] - xbxx[1]*wab[1] + xbyy[1]*wbyx[1];
 
-        x_out[2*a] = resa[0];
-        x_out[2*a+1] = resa[1];
-        x_out[2*b] = resb[0];
-        x_out[2*b+1] = resb[1];
+        x[2*a] = resa[0];
+        x[2*a+1] = resa[1];
+        x[2*b] = resb[0];
+        x[2*b+1] = resb[1];
     }
 }
--- a/example/fft/cuda/norm.cu	Tue Jul 15 01:19:31 2014 +0900
+++ b/example/fft/cuda/norm.cu	Tue Jul 15 01:41:57 2014 +0900
@@ -1,13 +1,13 @@
 extern "C" {
     __global__ void
-    norm(long* param, float* in_x,float* out_x)
+    norm(long* param, float* x)
     {
         unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
         unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; //(unsigned long)s->get_param(1);
         
         long n = param[0];
         
-        out_x[(nid*n+gid)*2] = in_x[(nid*n+gid)*2] / (float)n;
-        out_x[(nid*n+gid)*2+1] = in_x[(nid*n+gid)*2+1] / (float)n;
+        x[(nid*n+gid)*2] = x[(nid*n+gid)*2] / (float)n;
+        x[(nid*n+gid)*2+1] = x[(nid*n+gid)*2+1] / (float)n;
     }
 }
--- a/example/fft/main.cc	Tue Jul 15 01:19:31 2014 +0900
+++ b/example/fft/main.cc	Tue Jul 15 01:41:57 2014 +0900
@@ -107,7 +107,7 @@
 }
 
 HTask*
-fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, long m, enum Mode direction,HTask* waitTask)
+fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, long m, enum Mode direction, HTask* waitTask, bool last)
 {
     long direction_flag;
     switch (direction) {
@@ -140,7 +140,7 @@
         bfly->set_param(2,(long)iter);
         bfly->set_inData(0, dst, length_dst*sizeof(cl_float2));
         bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2));
-        bfly->set_outData(0,dst,length_dst*sizeof(cl_float2));
+        bfly->set_outData(0, dst,length_dst*sizeof(cl_float2));
         bfly->set_cpu(spe_cpu);
         bfly->flip();
         bfly->wait_for(waitTask);
@@ -151,7 +151,9 @@
     if (direction == inverse) { 
         setWorkSize(gws,lws,n,n);
         HTask *norm = manager->create_task(NORMALIZATION);
-        norm->set_inData(0,dst,length_dst*sizeof(cl_float2));
+        norm->set_inData(0, dst,length_dst*sizeof(cl_float2));
+        if (!last)
+            norm->flip();
         norm->set_outData(0, dst, length_dst*sizeof(cl_float2));
         norm->set_param(0,n);
         norm->set_cpu(spe_cpu);
@@ -228,10 +230,11 @@
     sfac->set_outData(0, wm, length_w*sizeof(cl_float2));
     sfac->set_param(0,n);
     sfac->set_cpu(spe_cpu);
+    sfac->flip();
     sfac->iterate(gws[0]);
 
     // Butterfly Operation
-    waitTask = fftCore(manager, rm, xm, wm, m, forward,sfac);
+    waitTask = fftCore(manager, rm, xm, wm, m, forward, sfac, false);
 
     // Transpose matrix 
     int length_r =n*n;
@@ -246,7 +249,7 @@
     first_trns->iterate(gws[0],gws[1]);
 
     // Butterfly Operation 
-    waitTask = fftCore(manager, rm, xm, wm, m, forward,first_trns);
+    waitTask = fftCore(manager, rm, xm, wm, m, forward, first_trns, false);
 
     // Apply high-pass filter
     HTask *hpfl = manager->create_task(HIGH_PASS_FILTER);
@@ -264,7 +267,7 @@
     // Inverse FFT
 
     // Butterfly Operation
-    waitTask = fftCore(manager,xm, rm, wm, m, inverse,hpfl);
+    waitTask = fftCore(manager,xm, rm, wm, m, inverse, hpfl, false);
 
     // Transpose matrix
     setWorkSize(gws,lws,n,n);
@@ -279,7 +282,7 @@
 
     // Butterfly Operation
 
-    waitTask = fftCore(manager,xm, rm, wm, m, inverse, second_trns);
+    waitTask = fftCore(manager,xm, rm, wm, m, inverse, second_trns, true);
 }
 
 int TMmain(TaskManager *manager, int argc, char** argv) {