Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @ 173:0572611fdcc8 llvm10 llvm12
reorgnization done
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 11:55:54 +0900 |
parents | 1d019706d866 |
children | 2e18cbf3894f |
comparison
equal
deleted
inserted
replaced
172:9fbae9c8bf63 | 173:0572611fdcc8 |
---|---|
7 define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { | 7 define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { |
8 ; VARIANT0-LABEL: test_barrier: | 8 ; VARIANT0-LABEL: test_barrier: |
9 ; VARIANT0: ; %bb.0: ; %entry | 9 ; VARIANT0: ; %bb.0: ; %entry |
10 ; VARIANT0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 | 10 ; VARIANT0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 |
11 ; VARIANT0-NEXT: s_load_dword s2, s[0:1], 0xb | 11 ; VARIANT0-NEXT: s_load_dword s2, s[0:1], 0xb |
12 ; VARIANT0-NEXT: v_not_b32_e32 v3, v0 | |
13 ; VARIANT0-NEXT: s_mov_b32 s7, 0xf000 | 12 ; VARIANT0-NEXT: s_mov_b32 s7, 0xf000 |
14 ; VARIANT0-NEXT: s_mov_b32 s6, 0 | 13 ; VARIANT0-NEXT: s_mov_b32 s6, 0 |
15 ; VARIANT0-NEXT: v_lshlrev_b32_e32 v1, 2, v0 | 14 ; VARIANT0-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
16 ; VARIANT0-NEXT: v_mov_b32_e32 v2, 0 | 15 ; VARIANT0-NEXT: v_mov_b32_e32 v2, 0 |
16 ; VARIANT0-NEXT: v_not_b32_e32 v3, v0 | |
17 ; VARIANT0-NEXT: s_waitcnt lgkmcnt(0) | 17 ; VARIANT0-NEXT: s_waitcnt lgkmcnt(0) |
18 ; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 | 18 ; VARIANT0-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 |
19 ; VARIANT0-NEXT: s_waitcnt vmcnt(0) expcnt(0) | 19 ; VARIANT0-NEXT: s_waitcnt vmcnt(0) expcnt(0) |
20 ; VARIANT0-NEXT: s_barrier | 20 ; VARIANT0-NEXT: s_barrier |
21 ; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, s2, v3 | 21 ; VARIANT0-NEXT: v_add_i32_e32 v3, vcc, s2, v3 |
28 ; | 28 ; |
29 ; VARIANT1-LABEL: test_barrier: | 29 ; VARIANT1-LABEL: test_barrier: |
30 ; VARIANT1: ; %bb.0: ; %entry | 30 ; VARIANT1: ; %bb.0: ; %entry |
31 ; VARIANT1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 | 31 ; VARIANT1-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 |
32 ; VARIANT1-NEXT: s_load_dword s2, s[0:1], 0xb | 32 ; VARIANT1-NEXT: s_load_dword s2, s[0:1], 0xb |
33 ; VARIANT1-NEXT: v_not_b32_e32 v3, v0 | |
34 ; VARIANT1-NEXT: s_mov_b32 s7, 0xf000 | 33 ; VARIANT1-NEXT: s_mov_b32 s7, 0xf000 |
35 ; VARIANT1-NEXT: s_mov_b32 s6, 0 | 34 ; VARIANT1-NEXT: s_mov_b32 s6, 0 |
36 ; VARIANT1-NEXT: v_lshlrev_b32_e32 v1, 2, v0 | 35 ; VARIANT1-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
37 ; VARIANT1-NEXT: v_mov_b32_e32 v2, 0 | 36 ; VARIANT1-NEXT: v_mov_b32_e32 v2, 0 |
37 ; VARIANT1-NEXT: v_not_b32_e32 v3, v0 | |
38 ; VARIANT1-NEXT: s_waitcnt lgkmcnt(0) | 38 ; VARIANT1-NEXT: s_waitcnt lgkmcnt(0) |
39 ; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 | 39 ; VARIANT1-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 |
40 ; VARIANT1-NEXT: s_barrier | 40 ; VARIANT1-NEXT: s_barrier |
41 ; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, s2, v3 | 41 ; VARIANT1-NEXT: v_add_i32_e32 v3, vcc, s2, v3 |
42 ; VARIANT1-NEXT: v_ashrrev_i32_e32 v4, 31, v3 | 42 ; VARIANT1-NEXT: v_ashrrev_i32_e32 v4, 31, v3 |
49 ; | 49 ; |
50 ; VARIANT2-LABEL: test_barrier: | 50 ; VARIANT2-LABEL: test_barrier: |
51 ; VARIANT2: ; %bb.0: ; %entry | 51 ; VARIANT2: ; %bb.0: ; %entry |
52 ; VARIANT2-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 | 52 ; VARIANT2-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
53 ; VARIANT2-NEXT: s_load_dword s0, s[0:1], 0x2c | 53 ; VARIANT2-NEXT: s_load_dword s0, s[0:1], 0x2c |
54 ; VARIANT2-NEXT: v_lshlrev_b32_e32 v3, 2, v0 | 54 ; VARIANT2-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
55 ; VARIANT2-NEXT: s_waitcnt lgkmcnt(0) | 55 ; VARIANT2-NEXT: s_waitcnt lgkmcnt(0) |
56 ; VARIANT2-NEXT: v_mov_b32_e32 v4, s3 | 56 ; VARIANT2-NEXT: v_mov_b32_e32 v2, s3 |
57 ; VARIANT2-NEXT: v_xad_u32 v1, v0, -1, s0 | 57 ; VARIANT2-NEXT: v_xad_u32 v3, v0, -1, s0 |
58 ; VARIANT2-NEXT: v_ashrrev_i32_e32 v2, 31, v1 | 58 ; VARIANT2-NEXT: v_ashrrev_i32_e32 v4, 31, v3 |
59 ; VARIANT2-NEXT: v_add_co_u32_e32 v1, vcc, s2, v1 | |
60 ; VARIANT2-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4] | |
61 ; VARIANT2-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc | |
62 ; VARIANT2-NEXT: global_store_dword v[1:2], v0, off | |
63 ; VARIANT2-NEXT: v_mov_b32_e32 v0, s3 | |
59 ; VARIANT2-NEXT: v_add_co_u32_e32 v3, vcc, s2, v3 | 64 ; VARIANT2-NEXT: v_add_co_u32_e32 v3, vcc, s2, v3 |
60 ; VARIANT2-NEXT: v_lshlrev_b64 v[1:2], 2, v[1:2] | 65 ; VARIANT2-NEXT: v_addc_co_u32_e32 v4, vcc, v0, v4, vcc |
61 ; VARIANT2-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc | |
62 ; VARIANT2-NEXT: global_store_dword v[3:4], v0, off | |
63 ; VARIANT2-NEXT: v_mov_b32_e32 v5, s3 | |
64 ; VARIANT2-NEXT: v_add_co_u32_e32 v0, vcc, s2, v1 | |
65 ; VARIANT2-NEXT: v_addc_co_u32_e32 v1, vcc, v5, v2, vcc | |
66 ; VARIANT2-NEXT: s_waitcnt vmcnt(0) | 66 ; VARIANT2-NEXT: s_waitcnt vmcnt(0) |
67 ; VARIANT2-NEXT: s_barrier | 67 ; VARIANT2-NEXT: s_barrier |
68 ; VARIANT2-NEXT: global_load_dword v0, v[0:1], off | 68 ; VARIANT2-NEXT: global_load_dword v0, v[3:4], off |
69 ; VARIANT2-NEXT: s_waitcnt vmcnt(0) | 69 ; VARIANT2-NEXT: s_waitcnt vmcnt(0) |
70 ; VARIANT2-NEXT: global_store_dword v[3:4], v0, off | 70 ; VARIANT2-NEXT: global_store_dword v[1:2], v0, off |
71 ; VARIANT2-NEXT: s_endpgm | 71 ; VARIANT2-NEXT: s_endpgm |
72 ; | 72 ; |
73 ; VARIANT3-LABEL: test_barrier: | 73 ; VARIANT3-LABEL: test_barrier: |
74 ; VARIANT3: ; %bb.0: ; %entry | 74 ; VARIANT3: ; %bb.0: ; %entry |
75 ; VARIANT3-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 | 75 ; VARIANT3-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 |
76 ; VARIANT3-NEXT: s_load_dword s0, s[0:1], 0x2c | 76 ; VARIANT3-NEXT: s_load_dword s0, s[0:1], 0x2c |
77 ; VARIANT3-NEXT: v_lshlrev_b32_e32 v3, 2, v0 | 77 ; VARIANT3-NEXT: v_lshlrev_b32_e32 v1, 2, v0 |
78 ; VARIANT3-NEXT: s_waitcnt lgkmcnt(0) | 78 ; VARIANT3-NEXT: s_waitcnt lgkmcnt(0) |
79 ; VARIANT3-NEXT: v_mov_b32_e32 v4, s3 | 79 ; VARIANT3-NEXT: v_mov_b32_e32 v2, s3 |
80 ; VARIANT3-NEXT: v_xad_u32 v1, v0, -1, s0 | 80 ; VARIANT3-NEXT: v_xad_u32 v3, v0, -1, s0 |
81 ; VARIANT3-NEXT: v_ashrrev_i32_e32 v2, 31, v1 | 81 ; VARIANT3-NEXT: v_ashrrev_i32_e32 v4, 31, v3 |
82 ; VARIANT3-NEXT: v_add_co_u32_e32 v1, vcc, s2, v1 | |
83 ; VARIANT3-NEXT: v_lshlrev_b64 v[3:4], 2, v[3:4] | |
84 ; VARIANT3-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc | |
85 ; VARIANT3-NEXT: global_store_dword v[1:2], v0, off | |
86 ; VARIANT3-NEXT: v_mov_b32_e32 v0, s3 | |
82 ; VARIANT3-NEXT: v_add_co_u32_e32 v3, vcc, s2, v3 | 87 ; VARIANT3-NEXT: v_add_co_u32_e32 v3, vcc, s2, v3 |
83 ; VARIANT3-NEXT: v_lshlrev_b64 v[1:2], 2, v[1:2] | 88 ; VARIANT3-NEXT: v_addc_co_u32_e32 v4, vcc, v0, v4, vcc |
84 ; VARIANT3-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc | |
85 ; VARIANT3-NEXT: global_store_dword v[3:4], v0, off | |
86 ; VARIANT3-NEXT: v_mov_b32_e32 v5, s3 | |
87 ; VARIANT3-NEXT: v_add_co_u32_e32 v0, vcc, s2, v1 | |
88 ; VARIANT3-NEXT: v_addc_co_u32_e32 v1, vcc, v5, v2, vcc | |
89 ; VARIANT3-NEXT: s_barrier | 89 ; VARIANT3-NEXT: s_barrier |
90 ; VARIANT3-NEXT: global_load_dword v0, v[0:1], off | 90 ; VARIANT3-NEXT: global_load_dword v0, v[3:4], off |
91 ; VARIANT3-NEXT: s_waitcnt vmcnt(0) | 91 ; VARIANT3-NEXT: s_waitcnt vmcnt(0) |
92 ; VARIANT3-NEXT: global_store_dword v[3:4], v0, off | 92 ; VARIANT3-NEXT: global_store_dword v[1:2], v0, off |
93 ; VARIANT3-NEXT: s_endpgm | 93 ; VARIANT3-NEXT: s_endpgm |
94 entry: | 94 entry: |
95 %tmp = call i32 @llvm.amdgcn.workitem.id.x() | 95 %tmp = call i32 @llvm.amdgcn.workitem.id.x() |
96 %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp | 96 %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp |
97 store i32 %tmp, i32 addrspace(1)* %tmp1 | 97 store i32 %tmp, i32 addrspace(1)* %tmp1 |