Mercurial > hg > Game > Cerium
annotate example/fft/main.cc @ 1581:8ee897303cd0 draft
fix multi_dimention
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 30 Mar 2013 18:29:54 +0900 |
parents | 7418c7aef534 |
children | 6ff0c34c8a3c |
rev | line source |
---|---|
1551 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
3 #include <math.h> | |
4 #include <sys/stat.h> | |
5 #include <fcntl.h> | |
6 #include <sys/time.h> | |
7 #include "TaskManager.h" | |
1555
096412ad80fb
add opencl example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1551
diff
changeset
|
8 #include "GpuScheduler.h" |
1560
3df1868130cb
fix fft ppe example
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1555
diff
changeset
|
9 #include "SchedTask.h" |
1551 | 10 #include "Func.h" |
11 #ifdef __APPLE__ | |
12 #include <OpenCL/opencl.h> | |
13 #else | |
14 #include <CL/cl.h> | |
15 #endif | |
16 #include "pgm.h" | |
17 extern void task_init(); | |
18 | |
19 #define PI 3.14159265358979 | |
20 | |
21 #define MAX_SOURCE_SIZE (0x100000) | |
22 | |
23 #define AMP(a, b) (sqrt((a)*(a)+(b)*(b))) | |
24 | |
25 static double st_time; | |
26 static double ed_time; | |
27 void TMend(TaskManager *); | |
28 int ndrange_flag; | |
29 cl_device_id device_id = NULL; | |
30 cl_context context = NULL; | |
31 cl_command_queue queue = NULL; | |
32 cl_program program = NULL; | |
33 CPU_TYPE spe_cpu = SPE_ANY; | |
34 | |
35 enum Mode { | |
36 forward = 0, | |
37 inverse = 1 | |
38 }; | |
39 | |
40 static double | |
41 getTime() | |
42 { | |
43 struct timeval tv; | |
44 gettimeofday(&tv, NULL); | |
45 return tv.tv_sec + (double)tv.tv_usec*1e-6; | |
46 } | |
47 const char *usr_help_str = "Usage: ./fft [option]\n \ | |
48 options\n\ | |
49 -cpu Number of SPE used (default 1)\n\ | |
50 -l, --length Sorted number of data (default 1200)\n\ | |
51 -h, --help Print this message"; | |
52 | |
53 int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y) | |
54 { | |
55 switch(y) { | |
56 case 1: | |
57 gws[0] = x; | |
58 gws[1] = 1; | |
59 lws[0] = 1; | |
60 lws[1] = 1; | |
61 break; | |
62 default: | |
63 gws[0] = x; | |
64 gws[1] = y; | |
65 lws[0] = 1; | |
66 lws[1] = 1; | |
67 break; | |
68 } | |
69 | |
70 return 0; | |
71 } | |
72 | |
73 int fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m_, enum Mode direction) | |
74 { | |
1579 | 75 int iter; |
1566 | 76 unsigned int* flag = new unsigned int[1]; |
1551 | 77 switch (direction) { |
1566 | 78 case forward:flag[0] = 0x00000000; break; |
79 case inverse:flag[0] = 0x80000000; break; | |
1551 | 80 } |
1566 | 81 int* n = new int[1]; |
82 int* m = new int[1]; | |
83 m[0] = m_; | |
1551 | 84 n[0] = 1<<m[0]; |
85 size_t gws[2],lws[2]; | |
86 int length_dst = n[0]*n[0]; | |
87 int length_src = n[0]*n[0]; | |
1579 | 88 cl_uint dimension = 2; |
89 HTask* brev; | |
90 int i,j; | |
1571 | 91 setWorkSize(gws,lws,n[0],n[0]); |
1579 | 92 for(i=0;i<gws[0];i++){ |
93 for(j=0;j<gws[1];j++){ | |
94 brev = manager->create_task(BIT_REVERSE); | |
95 brev->set_param(0,(memaddr)length_src); | |
96 brev->set_param(1,(memaddr)i); | |
97 brev->set_param(2,(memaddr)j); | |
98 brev->set_inData(0, src, length_src*sizeof(cl_float2)); | |
99 brev->set_inData(1, m,sizeof(int)); | |
100 brev->set_inData(2, n,sizeof(int)); | |
101 brev->set_outData(0, dst, length_dst*sizeof(cl_float2)); | |
102 brev->set_cpu(spe_cpu); | |
103 brev->spawn(); | |
104 } | |
105 } | |
1581
8ee897303cd0
fix multi_dimention
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1579
diff
changeset
|
106 exit(0); |
1579 | 107 HTask* bfly; |
1571 | 108 setWorkSize(gws,lws,n[0]/2,n[0]); |
1551 | 109 |
1579 | 110 for (iter=1; iter<=m_;iter++) { |
111 for(i=0;i<gws[0];i++){ | |
112 for(j=0;i<gws[1];j++){ | |
113 bfly = manager->create_task(BUTTERFLY); | |
114 bfly->set_param(0,(memaddr)length_dst); | |
115 bfly->set_param(1,(memaddr)iter); | |
116 bfly->set_param(2,(memaddr)i); | |
117 bfly->set_param(3,(memaddr)j); | |
118 bfly->set_inData(0, dst, length_dst*sizeof(cl_float2)); | |
119 bfly->set_inData(1, spin, sizeof(cl_float2)*(n[0]/2)); | |
120 bfly->set_inData(2, m,sizeof(int)); | |
121 bfly->set_inData(3, n,sizeof(int)); | |
122 bfly->set_inData(4, flag,sizeof(int)); | |
123 bfly->set_outData(0,dst,length_dst*sizeof(cl_float2)); | |
124 bfly->set_cpu(spe_cpu); | |
125 bfly->wait_for(brev); | |
126 bfly->spawn(); | |
127 } | |
1551 | 128 } |
129 } | |
130 | |
131 if (direction == inverse) { | |
132 HTask *norm = manager->create_task(NORMALIZATION); | |
1571 | 133 setWorkSize(gws,lws,n[0],n[0]); |
1551 | 134 norm->set_param(0,(memaddr)length_dst); |
135 norm->set_param(1,(memaddr)dimension); | |
136 norm->set_param(2,(memaddr)gws[0]); | |
137 norm->set_param(3,(memaddr)gws[1]); | |
138 norm->set_param(4,(memaddr)lws[0]); | |
139 norm->set_param(5,(memaddr)lws[1]); | |
1571 | 140 norm->set_inData(0, n,sizeof(int)); |
141 norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); | |
1551 | 142 norm->set_cpu(spe_cpu); |
143 norm->nd_range(); | |
144 norm->flip(); | |
1579 | 145 norm->wait_for(bfly); |
1551 | 146 norm->spawn(); |
147 } | |
148 return 0; | |
149 } | |
150 | |
151 char * | |
152 init(int argc, char**argv){ | |
153 | |
154 char *filename = 0; | |
155 | |
156 for (int i = 1; argv[i]; ++i) { | |
157 if (strcmp(argv[i], "-file") == 0) { | |
158 filename = argv[i+1]; | |
159 } else if (strcmp(argv[i], "-g") == 0) { | |
160 spe_cpu = GPU_0; | |
161 } | |
162 } | |
163 if ( (argc == 1)||(filename==0)) { | |
164 printf("Usage: ./fft -file [image filename] -cpu or -gpu \n"); | |
165 exit(-1); | |
166 } | |
167 | |
168 return filename; | |
169 } | |
170 | |
171 void | |
172 run_start(TaskManager *manager,pgm_t ipgm) | |
173 { | |
1555
096412ad80fb
add opencl example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1551
diff
changeset
|
174 int dimension; |
1551 | 175 int *n = new int[1]; |
176 n[0] = ipgm.width; | |
177 int *m = new int[1]; | |
178 m[0] = (cl_int)(log((double)n[0])/log(2.0)); | |
1555
096412ad80fb
add opencl example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1551
diff
changeset
|
179 size_t *gws = new size_t[3]; |
096412ad80fb
add opencl example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1551
diff
changeset
|
180 size_t *lws = new size_t[3]; |
096412ad80fb
add opencl example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1551
diff
changeset
|
181 |
1551 | 182 cl_float2 *xm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2)); |
183 cl_float2 *rm = (cl_float2 *)malloc(n[0] * n[0] * sizeof(cl_float2)); | |
184 cl_float2 *wm = (cl_float2 *)malloc(n[0] / 2 * sizeof(cl_float2)); | |
1579 | 185 int i,j; |
1551 | 186 /* |
187 * [cl_float2] | |
188 * typedef union | |
189 * { | |
190 * cl_float CL_ALIGNED(8) s[2]; | |
191 * #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) | |
192 * __extension__ struct{ cl_float x, y; }; | |
193 * __extension__ struct{ cl_float s0, s1; }; | |
194 * __extension__ struct{ cl_float lo, hi; }; | |
195 * #endif | |
196 * #if defined( __CL_FLOAT2__) | |
197 * __cl_float2 v2; | |
198 * #endif | |
199 * } cl_float2; | |
200 */ | |
1579 | 201 for (i=0; i<n[0]; i++) { |
1551 | 202 for (j=0; j < n[0]; j++) { |
203 ((float*)xm)[(2*n[0]*j)+2*i+0] = (float)ipgm.buf[n[0]*j+i]; | |
204 ((float*)xm)[(2*n[0]*j)+2*i+1] = (float)0; | |
205 } | |
206 } | |
1579 | 207 |
208 // Create spin factor | |
1555
096412ad80fb
add opencl example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1551
diff
changeset
|
209 setWorkSize(gws,lws,n[0]/2,1); // Todo:setWorkSize(ndr,n[0]/2,1);でできるように |
1579 | 210 int length_w = n[0] / 2; |
211 HTask* sfac; | |
212 for(i=0;i<gws[0];i++){ | |
213 sfac = manager->create_task(SPIN_FACT); | |
214 sfac->set_param(0, (memaddr)length_w); | |
215 sfac->set_param(1,(memaddr)i); | |
216 sfac->set_inData(0, n, sizeof(int)); | |
217 sfac->set_outData(0, wm, length_w*sizeof(cl_float2)); | |
218 sfac->set_cpu(spe_cpu); | |
219 sfac->nd_range(); | |
220 sfac->spawn(); | |
221 } | |
1581
8ee897303cd0
fix multi_dimention
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1579
diff
changeset
|
222 // Butterfly Operation |
1551 | 223 fftCore(manager, rm, xm, wm, m[0], forward); |
224 | |
225 HTaskPtr *trns = (HTask**)manager->allocate(sizeof(HTask*)*2); | |
226 | |
227 // Transpose matrix | |
228 int length_r =n[0] * n[0]; | |
229 setWorkSize(gws,lws,n[0]/2,1); | |
230 dimension = 2; | |
231 for (int i=0;i<2;i++) { | |
232 trns[i]= manager->create_task(TRANSEPOSE); | |
233 trns[i]->set_param(0, (memaddr)length_r); | |
234 trns[i]->set_param(1,(memaddr)dimension); | |
235 trns[i]->set_param(2,(memaddr)gws[0]); | |
236 trns[i]->set_param(3,(memaddr)gws[1]); | |
237 trns[i]->set_param(4,(memaddr)lws[0]); | |
238 trns[i]->set_param(5,(memaddr)lws[1]); | |
239 trns[i]->set_inData(0, rm, length_r*sizeof(cl_float2)); | |
240 trns[i]->set_inData(1, n,sizeof(int)); | |
241 trns[i]->set_outData(0, xm, length_r*sizeof(cl_float2)); | |
242 trns[i]->set_cpu(spe_cpu); | |
243 trns[i]->nd_range(); | |
244 } | |
245 trns[0]->wait_for(sfac); | |
246 trns[0]->spawn(); | |
247 // Butterfly Operation | |
248 fftCore(manager, rm, xm, wm, m[0], forward); | |
249 | |
250 // Apply high-pass filter | |
251 HTask *hpfl = manager->create_task(HIGH_PASS_FILTER); | |
252 cl_int *radius = new cl_int[1]; | |
253 radius[0] = n[0]/8; | |
254 setWorkSize(gws,lws,n[0]/2,1); | |
255 hpfl->set_param(0, (memaddr)length_r); | |
256 hpfl->set_param(1,(memaddr)dimension); | |
257 hpfl->set_param(2,(memaddr)gws[0]); | |
258 hpfl->set_param(3,(memaddr)gws[1]); | |
259 hpfl->set_param(4,(memaddr)lws[0]); | |
260 hpfl->set_param(5,(memaddr)lws[1]); | |
261 hpfl->set_inData(0, n,sizeof(int)); | |
262 hpfl->set_inData(1, radius,sizeof(int)); | |
263 hpfl->set_outData(0, rm, length_r*sizeof(cl_float2)); | |
264 hpfl->set_cpu(spe_cpu); | |
265 hpfl->nd_range(); | |
266 hpfl->wait_for(trns[0]); | |
1566 | 267 hpfl->spawn(); |
1551 | 268 // Inverse FFT |
269 | |
270 // Butterfly Operation | |
271 fftCore(manager,xm, rm, wm, m[0], inverse); | |
272 | |
273 // Transpose matrix | |
274 trns[1]->spawn(); | |
275 | |
276 // Butterfly Operation | |
277 | |
278 fftCore(manager,xm, rm, wm, m[0], inverse); | |
279 | |
280 // Read data from memory buffer | |
281 // spawn and wait | |
282 | |
283 float* ampd; | |
284 ampd = (float*)malloc(n[0]*n[0]*sizeof(float)); | |
285 for (i=0; i < n[0]; i++) { | |
286 for (j=0; j < n[0]; j++) { | |
287 ampd[n[0]*((i))+((j))] = (AMP(((float*)xm)[(2*n[0]*i)+2*j], ((float*)xm)[(2*n[0]*i)+2*j+1])); | |
288 } | |
289 } | |
290 pgm_t opgm; | |
291 opgm.width = n[0]; | |
292 opgm.height = n[0]; | |
293 normalizeF2PGM(&opgm, ampd); | |
294 free(ampd); | |
295 | |
296 // Write out image | |
297 writePGM(&opgm, "output.pgm"); | |
298 | |
299 // Finalizations | |
300 destroyPGM(&ipgm); | |
301 destroyPGM(&opgm); | |
302 | |
303 free(wm); | |
304 free(rm); | |
305 free(xm); | |
306 } | |
307 | |
308 int TMmain(TaskManager *manager, int argc, char** argv) { | |
309 task_init(); | |
310 char * pgm_file = init(argc,argv); | |
311 pgm_t ipgm; | |
312 /* Read image */ | |
313 int err = readPGM(&ipgm, pgm_file); | |
314 if (err<0) { | |
315 fprintf(stderr, "Failed to read image file.\n"); | |
316 exit(1); | |
1566 | 317 } |
1551 | 318 st_time = getTime(); |
319 run_start(manager, ipgm); | |
320 manager->set_TMend(TMend); | |
321 return 0; | |
322 } | |
323 | |
324 void | |
325 TMend(TaskManager *manager) | |
326 { | |
327 ed_time = getTime(); | |
328 fprintf(stdout, "image out put succeeded.\n"); | |
329 printf("Time: %0.6f\n",ed_time-st_time); | |
330 } |