annotate example/fft/gpu/butterfly.cl @ 2044:66aa91f6f4df draft

merge
author Shin,ichi Uehara
date Wed, 25 Mar 2015 19:13:56 +0900
parents 144e573b030b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1673
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 __kernel
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 void butterfly(__constant long *param, __global float2 *x_in, __global float2 *w, __global float2 *x_out)
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 {
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 unsigned long gid = (unsigned long)get_global_id(0);
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 unsigned long nid = (unsigned long)get_global_id(1);
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
6
1835
144e573b030b fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1673
diff changeset
7 long n = param[0];
144e573b030b fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1673
diff changeset
8 unsigned long direction_flag = (unsigned long)param[1];
144e573b030b fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1673
diff changeset
9 long iter = param[2];
1673
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
10
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 int butterflySize = 1 << (iter-1);
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 int butterflyGrpDist = 1 << iter;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 int butterflyGrpNum = n >> iter;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 int butterflyGrpOffset = gid & (butterflySize-1);
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
16
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
17 int a = nid * n + butterflyGrpBase + butterflyGrpOffset;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 int b = a + butterflySize;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
19
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 int l = butterflyGrpNum * butterflyGrpOffset;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
21
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
23
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 xa = x_in[a];
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
25 xb = x_in[b];
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 xbxx.x = xbxx.y = xb.x;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
27 xbyy.x = xbyy.y = xb.y;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
28
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 wab.x = w[l].x;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 if(direction_flag == 0x80000000) {
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
31 wab.y = -w[l].y;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 } else {
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 wab.y = w[l].y;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 }
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
35
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 wayx.x = -wab.y;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 wayx.y = wab.x;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
38
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 wbyx.x = wab.y;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 wbyx.y = -wab.x;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
41
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 resa.x = xa.x + xbxx.x*wab.x + xbyy.x*wayx.x;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 resa.y = xa.y + xbxx.y*wab.y + xbyy.y*wayx.y;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
44
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 resb.x = xa.x - xbxx.x*wab.x + xbyy.x*wbyx.x;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 resb.y = xa.y - xbxx.y*wab.y + xbyy.y*wbyx.y;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
47
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 x_out[a] = resa;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 x_out[b] = resb;
2c3adce7eb40 fix fft on gpu
Yuhi TOMARI <yuhi@cr.ie.u-ryukyu.ac.jp>
parents:
diff changeset
50 }