changeset 10:e38bef2012bc

fix sync bug. and NDrange option
author Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
date Mon, 11 Feb 2013 06:52:24 +0900
parents ed3d4a769bf3
children 53e22a570937
files fft_fixstart/main.cc
diffstat 1 files changed, 62 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/fft_fixstart/main.cc	Tue Feb 05 17:21:18 2013 +0900
+++ b/fft_fixstart/main.cc	Mon Feb 11 06:52:24 2013 +0900
@@ -21,6 +21,7 @@
 
 static double st_time;
 static double ed_time;
+int ndrange_flag;
 
 cl_device_id device_id = NULL;
 cl_context context = NULL;
@@ -102,21 +103,33 @@
     ret = clSetKernelArg(norm, 0, sizeof(cl_mem), (void *)&dst);
     ret = clSetKernelArg(norm, 1, sizeof(cl_int), (void *)&n);
 
-    /* Reverse bit ordering */
-    setWorkSize(gws, lws, n, n);
-    ret = clEnqueueTask(queue, brev, 0, NULL, NULL);
+    /* Reversee bit ordering */
+    if (ndrange_flag == 1) {
+        setWorkSize(gws, lws, n, n);
+        ret = clEnqueueNDRangeKernel(queue, brev, 2, NULL, gws, lws, 0, NULL, NULL);
+    } else {
+        ret = clEnqueueTask(queue, brev, 0, NULL, NULL);
+    }
 
     /* Perform Butterfly Operations*/
     setWorkSize(gws, lws, n/2, n);
     for (iter=1; iter <= m; iter++) {
         ret = clSetKernelArg(bfly, 4, sizeof(cl_int), (void *)&iter);
-        ret = clEnqueueTask(queue, bfly, 0, NULL, NULL);
+        if (ndrange_flag == 1) {
+            ret = clEnqueueNDRangeKernel(queue, bfly, 2, NULL, gws, lws, 0, NULL, &kernelDone);
+        } else {
+            ret = clEnqueueTask(queue, bfly, 0, NULL, &kernelDone);
+        }
         ret = clWaitForEvents(1, &kernelDone);
     }
 
     if (direction == inverse) {
-        setWorkSize(gws, lws, n, n);
-        ret = clEnqueueTask(queue, norm, 0, NULL, NULL);
+        if (ndrange_flag == 1) { 
+            setWorkSize(gws, lws, n, n);
+            ret = clEnqueueNDRangeKernel(queue, norm, 2, NULL, gws, lws, 0, NULL, &kernelDone);
+        } else {
+            ret = clEnqueueTask(queue, norm, 0, NULL, &kernelDone);
+        }
         ret = clWaitForEvents(1, &kernelDone);
     }
 
@@ -141,6 +154,8 @@
             device_type = CL_DEVICE_TYPE_GPU;
         } else if (strcmp(argv[i], "-all") == 0) {
             device_type = CL_DEVICE_TYPE_ALL;
+        } else if (strcmp(argv[i], "-nd") == 0 ) {
+            ndrange_flag = 1;
         }
     }
     if ( (argc == 1)||(filename==0)) {
@@ -166,10 +181,26 @@
 
     cl_int ret;
 
+    
     cl_float2 *xm;
     cl_float2 *rm;
     cl_float2 *wm;
 
+    /*
+     * typedef union
+     * {
+     * cl_float  CL_ALIGNED(8) s[2];
+     * #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
+     * __extension__ struct{ cl_float  x, y; };
+     * __extension__ struct{ cl_float  s0, s1; };
+     * __extension__ struct{ cl_float  lo, hi; };
+     * #endif
+     * #if defined( __CL_FLOAT2__) 
+     * __cl_float2     v2;
+     * #endif
+     * }cl_float2;
+     */
+
     pgm_t ipgm;
     pgm_t opgm;
 
@@ -240,7 +271,7 @@
     xmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret);
     rmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret);
     wmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, (n/2)*sizeof(cl_float2), NULL, &ret);
-
+    
     /* Transfer data to memory buffer */
     ret = clEnqueueWriteBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL);
 
@@ -268,8 +299,12 @@
     /* Create spin factor */
     ret = clSetKernelArg(sfac, 0, sizeof(cl_mem), (void *)&wmobj);
     ret = clSetKernelArg(sfac, 1, sizeof(cl_int), (void *)&n);
-    setWorkSize(gws, lws, n/2, 1);
-    ret = clEnqueueTask(queue, sfac, 0, NULL, NULL);
+    if (ndrange_flag == 1) { 
+        setWorkSize(gws, lws, n/2, 1); 
+        ret = clEnqueueNDRangeKernel(queue, sfac, 1, NULL, gws, lws, 0, NULL, NULL);
+    } else {
+        ret = clEnqueueTask(queue, sfac, 0, NULL, NULL);
+    }
 
     /* Butterfly Operation */
     fftCore(rmobj, xmobj, wmobj, m, forward);
@@ -278,8 +313,12 @@
     ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&xmobj);
     ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&rmobj);
     ret = clSetKernelArg(trns, 2, sizeof(cl_int), (void *)&n);
-    setWorkSize(gws, lws, n, n);
-    ret = clEnqueueTask(queue, trns, 0, NULL, NULL);
+    if (ndrange_flag == 1) { 
+        setWorkSize(gws, lws, n, n);
+        ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL);
+    } else {
+        ret = clEnqueueTask(queue, trns, 0, NULL, NULL);
+    }
 
     /* Butterfly Operation */
     fftCore(rmobj, xmobj, wmobj, m, forward);
@@ -289,8 +328,12 @@
     ret = clSetKernelArg(hpfl, 0, sizeof(cl_mem), (void *)&rmobj);
     ret = clSetKernelArg(hpfl, 1, sizeof(cl_int), (void *)&n);
     ret = clSetKernelArg(hpfl, 2, sizeof(cl_int), (void *)&radius);
-    setWorkSize(gws, lws, n, n);
-    ret = clEnqueueTask(queue, hpfl, 0, NULL, NULL);
+    if (ndrange_flag == 1) { 
+        setWorkSize(gws, lws, n, n);
+        ret = clEnqueueNDRangeKernel(queue, hpfl, 2, NULL, gws, lws, 0, NULL, NULL);
+    } else {
+        ret = clEnqueueTask(queue, hpfl, 0, NULL, NULL);
+    }
 
     /* Inverse FFT */
 
@@ -300,9 +343,12 @@
     /* Transpose matrix */
     ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&rmobj);
     ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&xmobj);
-    setWorkSize(gws, lws, n, n);
-    ret = clEnqueueTask(queue, trns, 0, NULL, NULL);
-
+    if (ndrange_flag == 1) { 
+        setWorkSize(gws, lws, n, n);
+        ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL);
+    } else {
+        ret = clEnqueueTask(queue, trns, 0, NULL, NULL);
+    }
     /* Butterfly Operation */
 
     fftCore(xmobj, rmobj, wmobj, m, inverse);