Mercurial > hg > Game > Cerium
annotate example/fft/main.cc @ 1646:ab6b11476e02 draft
fix fft
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 24 Jun 2013 19:45:45 +0900 |
parents | 6c0b6947c231 |
children | 05adaf6f7afb |
rev | line source |
---|---|
1551 | 1 #include <stdio.h> |
2 #include <stdlib.h> | |
3 #include <math.h> | |
4 #include <sys/stat.h> | |
5 #include <fcntl.h> | |
6 #include <sys/time.h> | |
7 #include "TaskManager.h" | |
1555
096412ad80fb
add opencl example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1551
diff
changeset
|
8 #include "GpuScheduler.h" |
1560
3df1868130cb
fix fft ppe example
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1555
diff
changeset
|
9 #include "SchedTask.h" |
1551 | 10 #include "Func.h" |
11 #ifdef __APPLE__ | |
12 #include <OpenCL/opencl.h> | |
13 #else | |
14 #include <CL/cl.h> | |
15 #endif | |
16 #include "pgm.h" | |
17 extern void task_init(); | |
18 | |
19 #define PI 3.14159265358979 | |
20 | |
21 #define MAX_SOURCE_SIZE (0x100000) | |
22 | |
23 #define AMP(a, b) (sqrt((a)*(a)+(b)*(b))) | |
24 | |
25 static double st_time; | |
26 static double ed_time; | |
27 void TMend(TaskManager *); | |
28 cl_device_id device_id = NULL; | |
29 cl_context context = NULL; | |
30 cl_command_queue queue = NULL; | |
31 cl_program program = NULL; | |
32 CPU_TYPE spe_cpu = SPE_ANY; | |
33 | |
1646 | 34 cl_float2* xm; |
35 cl_float2* rm; | |
36 cl_float2* wm; | |
37 pgm_t ipgm; | |
38 | |
1551 | 39 enum Mode { |
40 forward = 0, | |
41 inverse = 1 | |
42 }; | |
43 | |
44 static double | |
45 getTime() | |
46 { | |
47 struct timeval tv; | |
48 gettimeofday(&tv, NULL); | |
49 return tv.tv_sec + (double)tv.tv_usec*1e-6; | |
50 } | |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
51 |
1646 | 52 void |
53 output() | |
54 { | |
55 int n = ipgm.width; | |
56 float* ampd; | |
57 ampd = (float*)malloc(n*n*sizeof(float)); | |
58 for (int i=0; i < n; i++) { | |
59 for (int j=0; j < n; j++) { | |
60 ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1])); | |
61 } | |
62 } | |
63 pgm_t opgm; | |
64 opgm.width = n; | |
65 opgm.height = n; | |
66 normalizeF2PGM(&opgm, ampd); | |
67 free(ampd); | |
68 | |
69 // Write out image | |
70 writePGM(&opgm, "output.pgm"); | |
71 | |
72 // Finalizations | |
73 destroyPGM(&ipgm); | |
74 destroyPGM(&opgm); | |
75 | |
76 free(wm); | |
77 free(rm); | |
78 free(xm); | |
79 } | |
80 | |
1551 | 81 const char *usr_help_str = "Usage: ./fft [option]\n \ |
82 options\n\ | |
83 -cpu Number of SPE used (default 1)\n\ | |
84 -l, --length Sorted number of data (default 1200)\n\ | |
85 -h, --help Print this message"; | |
86 | |
87 int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y) | |
88 { | |
89 switch(y) { | |
90 case 1: | |
91 gws[0] = x; | |
92 gws[1] = 1; | |
93 lws[0] = 1; | |
94 lws[1] = 1; | |
95 break; | |
96 default: | |
97 gws[0] = x; | |
98 gws[1] = y; | |
99 lws[0] = 1; | |
100 lws[1] = 1; | |
101 break; | |
102 } | |
103 | |
104 return 0; | |
105 } | |
106 | |
1646 | 107 HTask* |
108 fftCore(TaskManager *manager,cl_float2 *dst, cl_float2 *src, cl_float2 *spin, int m, enum Mode direction,HTask* waitTask) | |
1551 | 109 { |
1643 | 110 unsigned int direction_flag; |
1551 | 111 switch (direction) { |
1643 | 112 case forward:direction_flag = 0x00000000; break; |
113 case inverse:direction_flag = 0x80000000; break; | |
1551 | 114 } |
1643 | 115 int n = 1<<m; |
1551 | 116 size_t gws[2],lws[2]; |
1633
fbb4757d82ee
refactor GpuScheduler
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1627
diff
changeset
|
117 int length_dst = n*n; |
fbb4757d82ee
refactor GpuScheduler
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1627
diff
changeset
|
118 int length_src = n*n; |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
119 |
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
120 HTask* brev = manager->create_task(BIT_REVERSE); |
1633
fbb4757d82ee
refactor GpuScheduler
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1627
diff
changeset
|
121 setWorkSize(gws,lws,n,n); |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
122 brev->set_inData(0, src, length_src*sizeof(cl_float2)); |
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
123 brev->set_outData(0, dst, length_dst*sizeof(cl_float2)); |
1643 | 124 brev->set_param(3,m); |
125 brev->set_param(4,n); | |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
126 brev->set_cpu(spe_cpu); |
1646 | 127 brev->wait_for(waitTask); |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
128 brev->iterate(gws[0],gws[1]); |
1551 | 129 |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
130 HTask* bfly = manager->create_task(BUTTERFLY); |
1643 | 131 setWorkSize(gws,lws,n/2,n); |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
132 bfly->set_inData(0, dst, length_dst*sizeof(cl_float2)); |
1643 | 133 bfly->set_inData(1, spin, sizeof(cl_float2)*(n/2)); |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
134 bfly->set_outData(0,dst,length_dst*sizeof(cl_float2)); |
1643 | 135 bfly->set_param(3,n); |
136 bfly->set_param(4,direction_flag); | |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
137 bfly->set_cpu(spe_cpu); |
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
138 bfly->wait_for(brev); |
1643 | 139 bfly->iterate(gws[0],gws[1],m); |
1646 | 140 |
141 waitTask = bfly; | |
1551 | 142 |
143 if (direction == inverse) { | |
144 HTask *norm = manager->create_task(NORMALIZATION); | |
1643 | 145 setWorkSize(gws,lws,n,n); |
1571 | 146 norm->set_outData(0, dst, length_dst*sizeof(cl_float2)); |
1643 | 147 norm->set_param(3,n); |
1551 | 148 norm->set_cpu(spe_cpu); |
149 norm->flip(); | |
1579 | 150 norm->wait_for(bfly); |
1643 | 151 norm->iterate(gws[0],gws[0]); |
1646 | 152 |
153 waitTask = norm; | |
1551 | 154 } |
1646 | 155 return waitTask; |
1551 | 156 } |
157 | |
158 char * | |
159 init(int argc, char**argv){ | |
160 | |
161 char *filename = 0; | |
162 | |
163 for (int i = 1; argv[i]; ++i) { | |
164 if (strcmp(argv[i], "-file") == 0) { | |
165 filename = argv[i+1]; | |
166 } else if (strcmp(argv[i], "-g") == 0) { | |
167 spe_cpu = GPU_0; | |
168 } | |
169 } | |
170 if ( (argc == 1)||(filename==0)) { | |
1643 | 171 printf("Usage: ./fft -file [image filename] -cpu or -gpu\n"); |
1551 | 172 exit(-1); |
173 } | |
174 | |
175 return filename; | |
176 } | |
177 | |
178 void | |
179 run_start(TaskManager *manager,pgm_t ipgm) | |
180 { | |
1643 | 181 int n = ipgm.width; |
182 int m = (cl_int)(log((double)n)/log(2.0)); | |
183 size_t *gws = new size_t[2]; | |
184 size_t *lws = new size_t[2]; | |
1555
096412ad80fb
add opencl example
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
1551
diff
changeset
|
185 |
1646 | 186 xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); |
187 rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2)); | |
188 wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2)); | |
189 | |
190 HTask* waitTask; | |
1551 | 191 /* |
192 * [cl_float2] | |
193 * typedef union | |
194 * { | |
195 * cl_float CL_ALIGNED(8) s[2]; | |
196 * #if defined( __GNUC__) && ! defined( __STRICT_ANSI__ ) | |
197 * __extension__ struct{ cl_float x, y; }; | |
198 * __extension__ struct{ cl_float s0, s1; }; | |
199 * __extension__ struct{ cl_float lo, hi; }; | |
200 * #endif | |
201 * #if defined( __CL_FLOAT2__) | |
202 * __cl_float2 v2; | |
203 * #endif | |
204 * } cl_float2; | |
205 */ | |
1643 | 206 for (int i=0; i<n; i++) { |
207 for (int j=0; j < n; j++) { | |
208 ((float*)xm)[(2*n*j)+2*i+0] = (float)ipgm.buf[n*j+i]; | |
209 ((float*)xm)[(2*n*j)+2*i+1] = (float)0; | |
1551 | 210 } |
211 } | |
1579 | 212 |
213 // Create spin factor | |
1643 | 214 int length_w = n / 2; |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
215 HTask* sfac = manager->create_task(SPIN_FACT); |
1643 | 216 setWorkSize(gws,lws,n/2,1); |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
217 sfac->set_outData(0, wm, length_w*sizeof(cl_float2)); |
1643 | 218 sfac->set_param(3,n); |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
219 sfac->set_cpu(spe_cpu); |
1643 | 220 sfac->iterate(gws[0],gws[1]); |
1625
6ff0c34c8a3c
fix fft , used iterate
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1581
diff
changeset
|
221 |
1581
8ee897303cd0
fix multi_dimention
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1579
diff
changeset
|
222 // Butterfly Operation |
1646 | 223 waitTask = fftCore(manager, rm, xm, wm, m, forward,sfac); |
1551 | 224 |
225 HTaskPtr *trns = (HTask**)manager->allocate(sizeof(HTask*)*2); | |
226 | |
227 // Transpose matrix | |
1643 | 228 int length_r =n*n; |
229 setWorkSize(gws,lws,n/2,1); | |
1551 | 230 for (int i=0;i<2;i++) { |
231 trns[i]= manager->create_task(TRANSEPOSE); | |
232 trns[i]->set_inData(0, rm, length_r*sizeof(cl_float2)); | |
233 trns[i]->set_outData(0, xm, length_r*sizeof(cl_float2)); | |
1643 | 234 trns[i]->set_param(3,n); |
1551 | 235 trns[i]->set_cpu(spe_cpu); |
236 } | |
1646 | 237 // trns[0]->wait_for(sfac); |
238 trns[0]->wait_for(waitTask); | |
1643 | 239 trns[0]->iterate(gws[0],gws[1]); |
1551 | 240 // Butterfly Operation |
1646 | 241 // fftCore(manager, rm, xm, wm, m, forward); |
1551 | 242 |
243 // Apply high-pass filter | |
244 HTask *hpfl = manager->create_task(HIGH_PASS_FILTER); | |
1643 | 245 cl_int radius = n/8; |
246 setWorkSize(gws,lws,n/2,1); | |
1551 | 247 hpfl->set_outData(0, rm, length_r*sizeof(cl_float2)); |
1643 | 248 hpfl->set_param(3,n); |
249 hpfl->set_param(4,radius); | |
1551 | 250 hpfl->set_cpu(spe_cpu); |
251 hpfl->wait_for(trns[0]); | |
1643 | 252 hpfl->iterate(gws[0],gws[1]); |
1551 | 253 // Inverse FFT |
254 | |
255 // Butterfly Operation | |
1646 | 256 waitTask = fftCore(manager,xm, rm, wm, m, inverse,hpfl); |
1551 | 257 |
258 // Transpose matrix | |
1643 | 259 setWorkSize(gws,lws,n,n); |
1646 | 260 trns[1]->wait_for(waitTask); |
1643 | 261 trns[1]->iterate(gws[0],gws[1]); |
1551 | 262 |
263 // Butterfly Operation | |
264 | |
1646 | 265 waitTask = fftCore(manager,xm, rm, wm, m, inverse,trns[1]); |
1551 | 266 } |
267 | |
268 int TMmain(TaskManager *manager, int argc, char** argv) { | |
269 task_init(); | |
270 char * pgm_file = init(argc,argv); | |
271 /* Read image */ | |
272 int err = readPGM(&ipgm, pgm_file); | |
273 if (err<0) { | |
274 fprintf(stderr, "Failed to read image file.\n"); | |
275 exit(1); | |
1566 | 276 } |
1551 | 277 st_time = getTime(); |
278 run_start(manager, ipgm); | |
279 manager->set_TMend(TMend); | |
280 return 0; | |
281 } | |
282 | |
283 void | |
284 TMend(TaskManager *manager) | |
285 { | |
1646 | 286 output(); |
1551 | 287 ed_time = getTime(); |
288 fprintf(stdout, "image out put succeeded.\n"); | |
289 printf("Time: %0.6f\n",ed_time-st_time); | |
290 } |