134
|
1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
|
2
|
|
3 ; GCN-LABEL: {{^}}adjust_writemask_crash_0_nochain:
|
|
4 ; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
|
|
5 ; GCN-NOT: v1
|
|
6 ; GCN-NOT: v0
|
|
7 ; GCN: buffer_store_dword v0
|
|
8 define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
|
|
9 main_body:
|
|
10 %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
|
|
11 %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
|
12 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
13 %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
|
14 %tmp4 = extractelement <4 x float> %tmp3, i32 0
|
|
15 store volatile float %tmp4, float addrspace(1)* undef
|
|
16 ret void
|
|
17 }
|
|
18
|
|
19 ; GCN-LABEL: {{^}}adjust_writemask_crash_1_nochain:
|
|
20 ; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
|
|
21 ; GCN-NOT: v1
|
|
22 ; GCN-NOT: v0
|
|
23 ; GCN: buffer_store_dword v0
|
|
24 define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
|
|
25 main_body:
|
|
26 %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
|
|
27 %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
|
28 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
|
|
29 %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
|
30 %tmp4 = extractelement <4 x float> %tmp3, i32 1
|
|
31 store volatile float %tmp4, float addrspace(1)* undef
|
|
32 ret void
|
|
33 }
|
|
34
|
|
35 ; GCN-LABEL: {{^}}adjust_writemask_crash_0_chain:
|
|
36 ; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
|
|
37 ; GCN-NOT: v1
|
|
38 ; GCN-NOT: v0
|
|
39 ; GCN: buffer_store_dword v0
|
|
40 define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
|
|
41 main_body:
|
|
42 %tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
|
|
43 %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
|
44 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
45 %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
|
46 %tmp4 = extractelement <4 x float> %tmp3, i32 0
|
|
47 store volatile float %tmp4, float addrspace(1)* undef
|
|
48 ret void
|
|
49 }
|
|
50
|
|
51 ; GCN-LABEL: {{^}}adjust_writemask_crash_1_chain:
|
|
52 ; GCN: image_sample v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
|
|
53 ; GCN-NOT: v1
|
|
54 ; GCN-NOT: v0
|
|
55 ; GCN: buffer_store_dword v0
|
|
56 define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
|
|
57 main_body:
|
|
58 %tmp = call <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
|
|
59 %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
|
60 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
|
|
61 %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
|
62 %tmp4 = extractelement <4 x float> %tmp3, i32 1
|
|
63 store volatile float %tmp4, float addrspace(1)* undef
|
|
64 ret void
|
|
65 }
|
|
66
|
|
67 define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
|
|
68 main_body:
|
|
69 %tmp = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
|
|
70 %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
|
|
71 %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
|
72 %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
|
73 %tmp4 = extractelement <4 x float> %tmp3, i32 0
|
|
74 store volatile float %tmp4, float addrspace(1)* undef
|
|
75 ret void
|
|
76 }
|
|
77
|
|
78
|
|
79 declare <2 x float> @llvm.amdgcn.image.sample.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
|
|
80 declare <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
|
|
81 declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
|
|
82
|
|
83 attributes #0 = { nounwind }
|
|
84 attributes #1 = { nounwind readonly }
|