changeset 1646:ab6b11476e02 draft

fix fft
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Mon, 24 Jun 2013 19:45:45 +0900
parents 4698c92bf2ca
children 0dcdaa4dfecb
files example/fft/main.cc example/fft/output.pgm
diffstat 2 files changed, 55 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/example/fft/main.cc	Mon Jun 24 17:28:28 2013 +0900
+++ b/example/fft/main.cc	Mon Jun 24 19:45:45 2013 +0900
@@ -25,13 +25,17 @@
 static double st_time;
 static double ed_time;
 void TMend(TaskManager *);
-int ndrange_flag;
 cl_device_id device_id = NULL;
 cl_context context = NULL;
 cl_command_queue queue = NULL;
 cl_program program = NULL;
 CPU_TYPE spe_cpu = SPE_ANY;
 
+cl_float2* xm;
+cl_float2* rm;
+cl_float2* wm;
+pgm_t ipgm;
+
 enum Mode {
     forward = 0,
     inverse = 1
@@ -45,6 +49,35 @@
     return tv.tv_sec + (double)tv.tv_usec*1e-6;
 }
 
+void
+output()
+{
+    int n = ipgm.width;
+    float* ampd;
+    ampd = (float*)malloc(n*n*sizeof(float));
+    for (int i=0; i < n; i++) {
+        for (int j=0; j < n; j++) {
+            ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1]));
+        }
+    }
+    pgm_t opgm;
+    opgm.width = n;
+    opgm.height = n;
+    normalizeF2PGM(&opgm, ampd);
+    free(ampd);
+
+    // Write out image
+    writePGM(&opgm, "output.pgm");
+
+    // Finalizations
+    destroyPGM(&ipgm);
+    destroyPGM(&opgm);
+
+    free(wm);
+    free(rm);
+    free(xm);    
+}
+
 const char *usr_help_str = "Usage: ./fft [option]\n \
 options\n\
   -cpu     Number of SPE used (default 1)\n\
@@ -71,7 +104,8 @@
     return 0;
 }
 
-int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m, enum Mode direction)
+HTask*
+fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m, enum Mode direction,HTask* waitTask)
 {
     unsigned int direction_flag;
     switch (direction) {
@@ -90,6 +124,7 @@
     brev->set_param(3,m);
     brev->set_param(4,n);
     brev->set_cpu(spe_cpu);
+    brev->wait_for(waitTask);
     brev->iterate(gws[0],gws[1]);
 
     HTask* bfly = manager->create_task(BUTTERFLY);
@@ -102,6 +137,8 @@
     bfly->set_cpu(spe_cpu);
     bfly->wait_for(brev);
     bfly->iterate(gws[0],gws[1],m);
+
+    waitTask = bfly;
     
     if (direction == inverse) { 
         HTask *norm = manager->create_task(NORMALIZATION);
@@ -112,8 +149,10 @@
         norm->flip();
         norm->wait_for(bfly);
         norm->iterate(gws[0],gws[0]);
+        
+        waitTask = norm;
     }
-    return 0;
+    return waitTask;
 }
 
 char *
@@ -144,9 +183,11 @@
     size_t *gws = new size_t[2];
     size_t *lws = new size_t[2];
 
-    cl_float2 *xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
-    cl_float2 *rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
-    cl_float2 *wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2));
+    xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
+    rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));
+    wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2));
+
+    HTask* waitTask;
     /*
      * [cl_float2]
      * typedef union
@@ -179,7 +220,7 @@
     sfac->iterate(gws[0],gws[1]);
 
     // Butterfly Operation
-    fftCore(manager, rm, xm, wm, m, forward);
+    waitTask = fftCore(manager, rm, xm, wm, m, forward,sfac);
     
     HTaskPtr *trns = (HTask**)manager->allocate(sizeof(HTask*)*2);
 
@@ -193,10 +234,11 @@
         trns[i]->set_param(3,n);
         trns[i]->set_cpu(spe_cpu);
     }
-    trns[0]->wait_for(sfac);
+    //    trns[0]->wait_for(sfac);
+    trns[0]->wait_for(waitTask);
     trns[0]->iterate(gws[0],gws[1]);
     // Butterfly Operation 
-    fftCore(manager, rm, xm, wm, m, forward);
+    //    fftCore(manager, rm, xm, wm, m, forward);
 
     // Apply high-pass filter
     HTask *hpfl = manager->create_task(HIGH_PASS_FILTER);
@@ -211,48 +253,21 @@
     // Inverse FFT
 
     // Butterfly Operation
-    fftCore(manager,xm, rm, wm, m, inverse);
+    waitTask = fftCore(manager,xm, rm, wm, m, inverse,hpfl);
 
     // Transpose matrix
     setWorkSize(gws,lws,n,n);
+    trns[1]->wait_for(waitTask);
     trns[1]->iterate(gws[0],gws[1]);
 
     // Butterfly Operation
 
-    fftCore(manager,xm, rm, wm, m, inverse);
-
-    // Read data from memory buffer
-    // spawn and wait 
-
-    float* ampd;
-    ampd = (float*)malloc(n*n*sizeof(float));
-    for (int i=0; i < n; i++) {
-        for (int j=0; j < n; j++) {
-            ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1]));
-        }
-    }
-    pgm_t opgm;
-    opgm.width = n;
-    opgm.height = n;
-    normalizeF2PGM(&opgm, ampd);
-    free(ampd);
-
-    // Write out image
-    writePGM(&opgm, "output.pgm");
-
-    // Finalizations
-    destroyPGM(&ipgm);
-    destroyPGM(&opgm);
-
-    free(wm);
-    free(rm);
-    free(xm);
+    waitTask = fftCore(manager,xm, rm, wm, m, inverse,trns[1]);
 }
 
 int TMmain(TaskManager *manager, int argc, char** argv) {
     task_init();
     char * pgm_file = init(argc,argv);
-    pgm_t ipgm;
     /* Read image */
     int err = readPGM(&ipgm, pgm_file);
     if (err<0) {
@@ -268,6 +283,7 @@
 void
 TMend(TaskManager *manager)
 {
+    output();
     ed_time = getTime();
     fprintf(stdout, "image out put succeeded.\n");
     printf("Time: %0.6f\n",ed_time-st_time);
Binary file example/fft/output.pgm has changed