121
|
1 ; RUN: llc -march=amdgcn < %s | FileCheck %s
|
|
2
|
|
3 ; Check we can compile this bugpoint-reduced test without an
|
|
4 ; infinite loop in TLI.SimplifyDemandedBits() due to failure
|
|
5 ; to use return value of TLO.DAG.UpdateNodeOperands()
|
|
6
|
|
7 ; Check that code was generated; we know there will be
|
|
8 ; a s_endpgm, so check for it.
|
|
9
|
|
10 @0 = external unnamed_addr addrspace(3) global [462 x float], align 4
|
|
11
|
|
12 ; Function Attrs: nounwind readnone speculatable
|
|
13 declare i32 @llvm.amdgcn.workitem.id.y() #0
|
|
14
|
|
15 ; Function Attrs: nounwind readnone speculatable
|
|
16 declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
17
|
|
18 ; Function Attrs: nounwind readnone speculatable
|
|
19 declare float @llvm.fmuladd.f32(float, float, float) #0
|
|
20
|
|
21 ; CHECK: s_endpgm
|
|
22 define amdgpu_kernel void @foo(float addrspace(1)* noalias nocapture readonly %arg, float addrspace(1)* noalias nocapture readonly %arg1, float addrspace(1)* noalias nocapture %arg2, float %arg3) local_unnamed_addr !reqd_work_group_size !0 {
|
|
23 bb:
|
|
24 %tmp = tail call i32 @llvm.amdgcn.workitem.id.y()
|
|
25 %tmp4 = tail call i32 @llvm.amdgcn.workitem.id.x()
|
|
26 %tmp5 = and i32 %tmp, 15
|
|
27 %tmp6 = mul nuw nsw i32 %tmp5, 21
|
|
28 %tmp7 = sub i32 %tmp6, 0
|
|
29 %tmp8 = add i32 %tmp7, 0
|
|
30 %tmp9 = add i32 %tmp8, 0
|
|
31 %tmp10 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 0
|
|
32 br label %bb12
|
|
33
|
|
34 bb11: ; preds = %bb30
|
|
35 br i1 undef, label %bb37, label %bb38
|
|
36
|
|
37 bb12: ; preds = %bb30, %bb
|
|
38 br i1 false, label %.preheader, label %.loopexit145
|
|
39
|
|
40 .loopexit145: ; preds = %.preheader, %bb12
|
|
41 br label %bb13
|
|
42
|
|
43 bb13: ; preds = %.loopexit, %.loopexit145
|
|
44 %tmp14 = phi i32 [ %tmp5, %.loopexit145 ], [ %tmp20, %.loopexit ]
|
|
45 %tmp15 = add nsw i32 %tmp14, -3
|
|
46 %tmp16 = mul i32 %tmp14, 21
|
|
47 br i1 undef, label %bb17, label %.loopexit
|
|
48
|
|
49 bb17: ; preds = %bb13
|
|
50 %tmp18 = mul i32 %tmp15, 224
|
|
51 %tmp19 = add i32 undef, %tmp18
|
|
52 br label %bb21
|
|
53
|
|
54 .loopexit: ; preds = %bb21, %bb13
|
|
55 %tmp20 = add nuw nsw i32 %tmp14, 16
|
|
56 br i1 undef, label %bb13, label %bb26
|
|
57
|
|
58 bb21: ; preds = %bb21, %bb17
|
|
59 %tmp22 = phi i32 [ %tmp4, %bb17 ], [ %tmp25, %bb21 ]
|
|
60 %tmp23 = add i32 %tmp22, %tmp16
|
|
61 %tmp24 = getelementptr inbounds float, float addrspace(3)* %tmp10, i32 %tmp23
|
|
62 store float undef, float addrspace(3)* %tmp24, align 4
|
|
63 %tmp25 = add nuw i32 %tmp22, 8
|
|
64 br i1 undef, label %bb21, label %.loopexit
|
|
65
|
|
66 bb26: ; preds = %.loopexit
|
|
67 br label %bb31
|
|
68
|
|
69 .preheader: ; preds = %.preheader, %bb12
|
|
70 %tmp27 = phi i32 [ %tmp28, %.preheader ], [ undef, %bb12 ]
|
|
71 %tmp28 = add nuw i32 %tmp27, 128
|
|
72 %tmp29 = icmp ult i32 %tmp28, 1568
|
|
73 br i1 %tmp29, label %.preheader, label %.loopexit145
|
|
74
|
|
75 bb30: ; preds = %bb31
|
|
76 br i1 undef, label %bb11, label %bb12
|
|
77
|
|
78 bb31: ; preds = %bb31, %bb26
|
|
79 %tmp32 = phi i32 [ %tmp9, %bb26 ], [ undef, %bb31 ]
|
|
80 %tmp33 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 %tmp32
|
|
81 %tmp34 = load float, float addrspace(3)* %tmp33, align 4
|
|
82 %tmp35 = tail call float @llvm.fmuladd.f32(float %tmp34, float undef, float undef)
|
|
83 %tmp36 = tail call float @llvm.fmuladd.f32(float undef, float undef, float %tmp35)
|
|
84 br i1 undef, label %bb30, label %bb31
|
|
85
|
|
86 bb37: ; preds = %bb11
|
|
87 br label %bb38
|
|
88
|
|
89 bb38: ; preds = %bb37, %bb11
|
|
90 ret void
|
|
91 }
|
|
92
|
|
93 attributes #0 = { nounwind readnone speculatable }
|
|
94
|
|
95 !0 = !{i32 8, i32 16, i32 1}
|