Mercurial > hg > Game > Cerium
annotate example/fft/cuda/butterfly.cu @ 2069:26aa08c9a1de draft default tip
cuda example fix
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 12 Feb 2017 10:04:55 +0900 |
parents | 433043c56a0c |
children |
rev | line source |
---|---|
1975
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 extern "C" { |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
2 __global__ void |
2018 | 3 butterfly(long* param, float* x, float* w) |
1975
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 { |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 unsigned long gid = blockIdx.x*blockDim.x+threadIdx.x; // (unsigned long)s->get_param(0); |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
6 unsigned long nid = blockIdx.y*blockDim.y+threadIdx.y; // (unsigned long)s->get_param(1); |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
7 |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 long n = param[0]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
9 long direction_flag = param[1]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
10 long iter = param[2]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
11 |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
12 int butterflySize = 1 << (iter-1); |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
13 int butterflyGrpDist = 1 << iter; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
14 int butterflyGrpNum = n >> iter; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
15 int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist); |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
16 int butterflyGrpOffset = gid & (butterflySize-1); |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
17 |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
18 int a = nid * n + butterflyGrpBase + butterflyGrpOffset; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
19 int b = a + butterflySize; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
20 |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 int l = butterflyGrpNum * butterflyGrpOffset; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 float xa[2], xb[2], xbxx[2], xbyy[2], wab[2], wayx[2], wbyx[2], resa[2], resb[2]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 |
2018 | 25 xa[0] = x[2*a]; |
26 xa[1] = x[2*a+1]; | |
27 xb[0] = x[2*b]; | |
28 xb[1] = x[2*b+1]; | |
1975
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
29 xbxx[0] = xbxx[1] = xb[0]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
30 xbyy[0] = xbyy[1] = xb[1]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
31 |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 wab[0] = w[2*l]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
33 if(direction_flag == 0x80000000) { |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
34 wab[1] = -w[2*l+1]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
35 } else { |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
36 wab[1] = w[2*l+1]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
37 } |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
38 |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
39 wayx[0] = -wab[1]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
40 wayx[1] = wab[0]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
41 |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
42 wbyx[0] = wab[1]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
43 wbyx[1] = -wab[0]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45 resa[0] = xa[0] + xbxx[0]*wab[0] + xbyy[0]*wayx[0]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 resa[1] = xa[1] + xbxx[1]*wab[1] + xbyy[1]*wayx[1]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
48 resb[0] = xa[0] - xbxx[0]*wab[0] + xbyy[0]*wbyx[0]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 resb[1] = xa[1] - xbxx[1]*wab[1] + xbyy[1]*wbyx[1]; |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 |
2018 | 51 x[2*a] = resa[0]; |
52 x[2*a+1] = resa[1]; | |
53 x[2*b] = resb[0]; | |
54 x[2*b+1] = resb[1]; | |
1975
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
55 } |
4cf85b48ab9e
running fft with CudaScheduler, but wrong result
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
56 } |