annotate example/fft/cuda/butterfly.cu @ 2069:26aa08c9a1de draft default tip

cuda example fix
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 12 Feb 2017 10:04:55 +0900
parents 433043c56a0c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1975
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 extern "C" {
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2 __global__ void
2018
433043c56a0c fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1975
diff changeset
3 butterfly(long* param, float* x, float* w)
1975
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 {
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
5 unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0);
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1);
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 long n = param[0];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 long direction_flag = param[1];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 long iter = param[2];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
11
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12 int butterflySize = 1 << (iter-1);
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 int butterflyGrpDist = 1 << iter;
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 int butterflyGrpNum = n >> iter;
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 int butterflyGrpOffset = gid & (butterflySize-1);
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 int a = nid * n + butterflyGrpBase + butterflyGrpOffset;
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 int b = a + butterflySize;
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
21 int l = butterflyGrpNum * butterflyGrpOffset;
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 float xa[2], xb[2], xbxx[2], xbyy[2], wab[2], wayx[2], wbyx[2], resa[2], resb[2];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24
2018
433043c56a0c fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1975
diff changeset
25 xa[0] = x[2*a];
433043c56a0c fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1975
diff changeset
26 xa[1] = x[2*a+1];
433043c56a0c fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1975
diff changeset
27 xb[0] = x[2*b];
433043c56a0c fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1975
diff changeset
28 xb[1] = x[2*b+1];
1975
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
29 xbxx[0] = xbxx[1] = xb[0];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
30 xbyy[0] = xbyy[1] = xb[1];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
31
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
32 wab[0] = w[2*l];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
33 if(direction_flag == 0x80000000) {
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
34 wab[1] = -w[2*l+1];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
35 } else {
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
36 wab[1] = w[2*l+1];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
37 }
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
38
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
39 wayx[0] = -wab[1];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 wayx[1] = wab[0];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
41
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 wbyx[0] = wab[1];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
43 wbyx[1] = -wab[0];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
44
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
45 resa[0] = xa[0] + xbxx[0]*wab[0] + xbyy[0]*wayx[0];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
46 resa[1] = xa[1] + xbxx[1]*wab[1] + xbyy[1]*wayx[1];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
47
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
48 resb[0] = xa[0] - xbxx[0]*wab[0] + xbyy[0]*wbyx[0];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
49 resb[1] = xa[1] - xbxx[1]*wab[1] + xbyy[1]*wbyx[1];
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
50
2018
433043c56a0c fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1975
diff changeset
51 x[2*a] = resa[0];
433043c56a0c fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1975
diff changeset
52 x[2*a+1] = resa[1];
433043c56a0c fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1975
diff changeset
53 x[2*b] = resb[0];
433043c56a0c fix fft
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents: 1975
diff changeset
54 x[2*b+1] = resb[1];
1975
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 }
4cf85b48ab9e running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff changeset
56 }