150
|
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
173
|
2
|
|
3 ; NOTE: The checks for opt are NOT added by the update script. Those
|
|
4 ; checks are looking for the absence of specific metadata, which
|
|
5 ; cannot be expressed reliably by the generated checks.
|
150
|
6
|
173
|
7 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=ISA
|
|
8 ; RUN: opt --amdgpu-annotate-uniform -S %s | FileCheck %s -check-prefix=UNIFORM
|
|
9 ; RUN: opt --amdgpu-annotate-uniform --si-annotate-control-flow -S %s | FileCheck %s -check-prefix=CONTROLFLOW
|
|
10
|
|
11 ; This module creates a divergent branch in block Flow2. The branch is
|
|
12 ; marked as divergent by the divergence analysis but the condition is
|
|
13 ; not. This test ensures that the divergence of the branch is tested,
|
|
14 ; not its condition, so that branch is correctly emitted as divergent.
|
150
|
15
|
|
16 target triple = "amdgcn-mesa-mesa3d"
|
|
17
|
173
|
18 define amdgpu_ps void @main(i32 %0, float %1) {
|
|
19 ; ISA-LABEL: main:
|
|
20 ; ISA: ; %bb.0: ; %start
|
|
21 ; ISA-NEXT: v_readfirstlane_b32 s0, v0
|
|
22 ; ISA-NEXT: s_mov_b32 m0, s0
|
236
|
23 ; ISA-NEXT: s_mov_b32 s8, 0
|
173
|
24 ; ISA-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x
|
|
25 ; ISA-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0
|
236
|
26 ; ISA-NEXT: s_mov_b64 s[0:1], 0
|
173
|
27 ; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
|
236
|
28 ; ISA-NEXT: ; implicit-def: $sgpr2_sgpr3
|
|
29 ; ISA-NEXT: s_branch .LBB0_3
|
|
30 ; ISA-NEXT: .LBB0_1: ; %Flow1
|
173
|
31 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
236
|
32 ; ISA-NEXT: s_or_b64 exec, exec, s[6:7]
|
|
33 ; ISA-NEXT: s_mov_b64 s[6:7], 0
|
|
34 ; ISA-NEXT: .LBB0_2: ; %Flow
|
173
|
35 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
236
|
36 ; ISA-NEXT: s_and_b64 s[10:11], exec, s[4:5]
|
|
37 ; ISA-NEXT: s_or_b64 s[0:1], s[10:11], s[0:1]
|
|
38 ; ISA-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
|
|
39 ; ISA-NEXT: s_and_b64 s[6:7], s[6:7], exec
|
|
40 ; ISA-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
|
|
41 ; ISA-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
|
42 ; ISA-NEXT: s_cbranch_execz .LBB0_6
|
|
43 ; ISA-NEXT: .LBB0_3: ; %loop
|
173
|
44 ; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
|
236
|
45 ; ISA-NEXT: s_or_b64 s[4:5], s[4:5], exec
|
|
46 ; ISA-NEXT: s_cmp_lt_u32 s8, 32
|
|
47 ; ISA-NEXT: s_mov_b64 s[6:7], -1
|
|
48 ; ISA-NEXT: s_cbranch_scc0 .LBB0_2
|
173
|
49 ; ISA-NEXT: ; %bb.4: ; %endif1
|
|
50 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
236
|
51 ; ISA-NEXT: s_mov_b64 s[4:5], -1
|
|
52 ; ISA-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
|
53 ; ISA-NEXT: s_cbranch_execz .LBB0_1
|
173
|
54 ; ISA-NEXT: ; %bb.5: ; %endif2
|
|
55 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
|
236
|
56 ; ISA-NEXT: s_add_i32 s8, s8, 1
|
|
57 ; ISA-NEXT: s_xor_b64 s[4:5], exec, -1
|
|
58 ; ISA-NEXT: s_branch .LBB0_1
|
|
59 ; ISA-NEXT: .LBB0_6: ; %Flow2
|
|
60 ; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
|
173
|
61 ; ISA-NEXT: v_mov_b32_e32 v1, 0
|
236
|
62 ; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
|
173
|
63 ; ISA-NEXT: ; %bb.7: ; %if1
|
|
64 ; ISA-NEXT: v_sqrt_f32_e32 v1, v0
|
|
65 ; ISA-NEXT: ; %bb.8: ; %endloop
|
|
66 ; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
|
|
67 ; ISA-NEXT: exp mrt0 v1, v1, v1, v1 done vm
|
|
68 ; ISA-NEXT: s_endpgm
|
150
|
69 start:
|
|
70 %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0)
|
|
71 br label %loop
|
|
72
|
173
|
73 loop: ; preds = %Flow, %start
|
|
74 %v1 = phi i32 [ 0, %start ], [ %6, %Flow ]
|
150
|
75 %v2 = icmp ugt i32 %v1, 31
|
173
|
76 %2 = xor i1 %v2, true
|
|
77 br i1 %2, label %endif1, label %Flow
|
|
78
|
|
79 Flow1: ; preds = %endif2, %endif1
|
|
80 %3 = phi i32 [ %v5, %endif2 ], [ undef, %endif1 ]
|
|
81 %4 = phi i1 [ false, %endif2 ], [ true, %endif1 ]
|
|
82 br label %Flow
|
150
|
83
|
173
|
84 ; UNIFORM-LABEL: Flow2:
|
|
85 ; UNIFORM-NEXT: br i1 %8, label %if1, label %endloop
|
|
86 ; UNIFORM-NOT: !amdgpu.uniform
|
|
87 ; UNIFORM: if1:
|
|
88
|
|
89 ; CONTROLFLOW-LABEL: Flow2:
|
|
90 ; CONTROLFLOW-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %{{.*}})
|
|
91 ; CONTROLFLOW-NEXT: [[IF:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %{{.*}})
|
|
92 ; CONTROLFLOW-NEXT: [[COND:%.*]] = extractvalue { i1, i64 } [[IF]], 0
|
|
93 ; CONTROLFLOW-NEXT: %{{.*}} = extractvalue { i1, i64 } [[IF]], 1
|
|
94 ; CONTROLFLOW-NEXT: br i1 [[COND]], label %if1, label %endloop
|
|
95
|
|
96 Flow2: ; preds = %Flow
|
|
97 br i1 %8, label %if1, label %endloop
|
|
98
|
|
99 if1: ; preds = %Flow2
|
150
|
100 %v3 = call float @llvm.sqrt.f32(float %v0)
|
|
101 br label %endloop
|
|
102
|
173
|
103 endif1: ; preds = %loop
|
150
|
104 %v4 = fcmp ogt float %v0, 0.000000e+00
|
173
|
105 %5 = xor i1 %v4, true
|
|
106 br i1 %5, label %endif2, label %Flow1
|
150
|
107
|
173
|
108 Flow: ; preds = %Flow1, %loop
|
|
109 %6 = phi i32 [ %3, %Flow1 ], [ undef, %loop ]
|
|
110 %7 = phi i1 [ %4, %Flow1 ], [ true, %loop ]
|
|
111 %8 = phi i1 [ false, %Flow1 ], [ true, %loop ]
|
|
112 br i1 %7, label %Flow2, label %loop
|
|
113
|
|
114 endif2: ; preds = %endif1
|
150
|
115 %v5 = add i32 %v1, 1
|
173
|
116 br label %Flow1
|
150
|
117
|
173
|
118 endloop: ; preds = %if1, %Flow2
|
|
119 %v6 = phi float [ 0.000000e+00, %Flow2 ], [ %v3, %if1 ]
|
|
120 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true)
|
150
|
121 ret void
|
|
122 }
|
|
123
|
173
|
124 ; Function Attrs: nounwind readnone speculatable willreturn
|
|
125 declare float @llvm.sqrt.f32(float) #0
|
|
126
|
|
127 ; Function Attrs: nounwind readnone speculatable
|
|
128 declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #1
|
150
|
129
|
173
|
130 ; Function Attrs: inaccessiblememonly nounwind writeonly
|
|
131 declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #2
|
|
132
|
|
133 attributes #0 = { nounwind readnone speculatable willreturn }
|
|
134 attributes #1 = { nounwind readnone speculatable }
|
|
135 attributes #2 = { inaccessiblememonly nounwind writeonly }
|