comparison llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll @ 236:c4bab56944e8 llvm-original

LLVM 16
author kono
date Wed, 09 Nov 2022 17:45:10 +0900
parents 5f17cb93ff66
children 1f2b6ac9f198
comparison
equal deleted inserted replaced
232:70dce7da266c 236:c4bab56944e8
11 ; GFX10-NEXT: s_waitcnt vmcnt(0) 11 ; GFX10-NEXT: s_waitcnt vmcnt(0)
12 ; GFX10-NEXT: ; return to shader part epilog 12 ; GFX10-NEXT: ; return to shader part epilog
13 ; 13 ;
14 ; GFX10GISEL-LABEL: sample_d_1d: 14 ; GFX10GISEL-LABEL: sample_d_1d:
15 ; GFX10GISEL: ; %bb.0: ; %main_body 15 ; GFX10GISEL: ; %bb.0: ; %main_body
16 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
17 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, s12
18 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 16 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
19 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 17 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
20 ; GFX10GISEL-NEXT: ; return to shader part epilog 18 ; GFX10GISEL-NEXT: ; return to shader part epilog
21 main_body: 19 main_body:
22 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 20 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
24 } 22 }
25 23
26 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { 24 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
27 ; GFX10-LABEL: sample_d_2d: 25 ; GFX10-LABEL: sample_d_2d:
28 ; GFX10: ; %bb.0: ; %main_body 26 ; GFX10: ; %bb.0: ; %main_body
29 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 27 ; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
30 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
31 ; GFX10-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 28 ; GFX10-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
32 ; GFX10-NEXT: s_waitcnt vmcnt(0) 29 ; GFX10-NEXT: s_waitcnt vmcnt(0)
33 ; GFX10-NEXT: ; return to shader part epilog 30 ; GFX10-NEXT: ; return to shader part epilog
34 ; 31 ;
35 ; GFX10GISEL-LABEL: sample_d_2d: 32 ; GFX10GISEL-LABEL: sample_d_2d:
36 ; GFX10GISEL: ; %bb.0: ; %main_body 33 ; GFX10GISEL: ; %bb.0: ; %main_body
37 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 34 ; GFX10GISEL-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
38 ; GFX10GISEL-NEXT: v_and_or_b32 v4, 0xffff, v4, v5
39 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 35 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
40 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 36 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
41 ; GFX10GISEL-NEXT: ; return to shader part epilog 37 ; GFX10GISEL-NEXT: ; return to shader part epilog
42 main_body: 38 main_body:
43 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 39 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
45 } 41 }
46 42
47 define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) { 43 define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) {
48 ; GFX10-LABEL: sample_d_3d: 44 ; GFX10-LABEL: sample_d_3d:
49 ; GFX10: ; %bb.0: ; %main_body 45 ; GFX10: ; %bb.0: ; %main_body
50 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 46 ; GFX10-NEXT: v_mov_b32_e32 v15, v8
51 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 47 ; GFX10-NEXT: v_mov_b32_e32 v13, v5
52 ; GFX10-NEXT: image_sample_d v[0:3], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 48 ; GFX10-NEXT: v_mov_b32_e32 v12, v4
49 ; GFX10-NEXT: v_mov_b32_e32 v11, v3
50 ; GFX10-NEXT: v_mov_b32_e32 v10, v2
51 ; GFX10-NEXT: v_mov_b32_e32 v9, v1
52 ; GFX10-NEXT: v_mov_b32_e32 v8, v0
53 ; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100
54 ; GFX10-NEXT: image_sample_d v[0:3], v[8:15], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
53 ; GFX10-NEXT: s_waitcnt vmcnt(0) 55 ; GFX10-NEXT: s_waitcnt vmcnt(0)
54 ; GFX10-NEXT: ; return to shader part epilog 56 ; GFX10-NEXT: ; return to shader part epilog
55 ; 57 ;
56 ; GFX10GISEL-LABEL: sample_d_3d: 58 ; GFX10GISEL-LABEL: sample_d_3d:
57 ; GFX10GISEL: ; %bb.0: ; %main_body 59 ; GFX10GISEL: ; %bb.0: ; %main_body
58 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff 60 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7
59 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7 61 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
60 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 62 ; GFX10GISEL-NEXT: v_perm_b32 v6, v9, v6, 0x5040100
61 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7
62 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12
63 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 63 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
64 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 64 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
65 ; GFX10GISEL-NEXT: ; return to shader part epilog 65 ; GFX10GISEL-NEXT: ; return to shader part epilog
66 main_body: 66 main_body:
67 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 67 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
75 ; GFX10-NEXT: s_waitcnt vmcnt(0) 75 ; GFX10-NEXT: s_waitcnt vmcnt(0)
76 ; GFX10-NEXT: ; return to shader part epilog 76 ; GFX10-NEXT: ; return to shader part epilog
77 ; 77 ;
78 ; GFX10GISEL-LABEL: sample_c_d_1d: 78 ; GFX10GISEL-LABEL: sample_c_d_1d:
79 ; GFX10GISEL: ; %bb.0: ; %main_body 79 ; GFX10GISEL: ; %bb.0: ; %main_body
80 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
81 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, s12
82 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 80 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
83 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 81 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
84 ; GFX10GISEL-NEXT: ; return to shader part epilog 82 ; GFX10GISEL-NEXT: ; return to shader part epilog
85 main_body: 83 main_body:
86 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 84 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
88 } 86 }
89 87
90 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { 88 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
91 ; GFX10-LABEL: sample_c_d_2d: 89 ; GFX10-LABEL: sample_c_d_2d:
92 ; GFX10: ; %bb.0: ; %main_body 90 ; GFX10: ; %bb.0: ; %main_body
93 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 91 ; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
94 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 92 ; GFX10-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
95 ; GFX10-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
96 ; GFX10-NEXT: s_waitcnt vmcnt(0) 93 ; GFX10-NEXT: s_waitcnt vmcnt(0)
97 ; GFX10-NEXT: ; return to shader part epilog 94 ; GFX10-NEXT: ; return to shader part epilog
98 ; 95 ;
99 ; GFX10GISEL-LABEL: sample_c_d_2d: 96 ; GFX10GISEL-LABEL: sample_c_d_2d:
100 ; GFX10GISEL: ; %bb.0: ; %main_body 97 ; GFX10GISEL: ; %bb.0: ; %main_body
101 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 98 ; GFX10GISEL-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
102 ; GFX10GISEL-NEXT: v_and_or_b32 v5, 0xffff, v5, v6 99 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
103 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
104 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 100 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
105 ; GFX10GISEL-NEXT: ; return to shader part epilog 101 ; GFX10GISEL-NEXT: ; return to shader part epilog
106 main_body: 102 main_body:
107 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 103 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
108 ret <4 x float> %v 104 ret <4 x float> %v
109 } 105 }
110 106
111 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { 107 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
112 ; GFX10-LABEL: sample_d_cl_1d: 108 ; GFX10-LABEL: sample_d_cl_1d:
113 ; GFX10: ; %bb.0: ; %main_body 109 ; GFX10: ; %bb.0: ; %main_body
114 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 110 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
115 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
116 ; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 111 ; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
117 ; GFX10-NEXT: s_waitcnt vmcnt(0) 112 ; GFX10-NEXT: s_waitcnt vmcnt(0)
118 ; GFX10-NEXT: ; return to shader part epilog 113 ; GFX10-NEXT: ; return to shader part epilog
119 ; 114 ;
120 ; GFX10GISEL-LABEL: sample_d_cl_1d: 115 ; GFX10GISEL-LABEL: sample_d_cl_1d:
121 ; GFX10GISEL: ; %bb.0: ; %main_body 116 ; GFX10GISEL: ; %bb.0: ; %main_body
122 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 117 ; GFX10GISEL-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
123 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, v3
124 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 118 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
125 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 119 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
126 ; GFX10GISEL-NEXT: ; return to shader part epilog 120 ; GFX10GISEL-NEXT: ; return to shader part epilog
127 main_body: 121 main_body:
128 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 122 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
130 } 124 }
131 125
132 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { 126 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
133 ; GFX10-LABEL: sample_d_cl_2d: 127 ; GFX10-LABEL: sample_d_cl_2d:
134 ; GFX10: ; %bb.0: ; %main_body 128 ; GFX10: ; %bb.0: ; %main_body
135 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 129 ; GFX10-NEXT: v_mov_b32_e32 v11, v6
136 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 130 ; GFX10-NEXT: v_mov_b32_e32 v9, v3
137 ; GFX10-NEXT: image_sample_d_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 131 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
132 ; GFX10-NEXT: v_mov_b32_e32 v7, v1
133 ; GFX10-NEXT: v_mov_b32_e32 v6, v0
134 ; GFX10-NEXT: v_perm_b32 v10, v5, v4, 0x5040100
135 ; GFX10-NEXT: image_sample_d_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
138 ; GFX10-NEXT: s_waitcnt vmcnt(0) 136 ; GFX10-NEXT: s_waitcnt vmcnt(0)
139 ; GFX10-NEXT: ; return to shader part epilog 137 ; GFX10-NEXT: ; return to shader part epilog
140 ; 138 ;
141 ; GFX10GISEL-LABEL: sample_d_cl_2d: 139 ; GFX10GISEL-LABEL: sample_d_cl_2d:
142 ; GFX10GISEL: ; %bb.0: ; %main_body 140 ; GFX10GISEL: ; %bb.0: ; %main_body
143 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff 141 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v5
144 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 142 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, v6
145 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 143 ; GFX10GISEL-NEXT: v_perm_b32 v4, v7, v4, 0x5040100
146 ; GFX10GISEL-NEXT: v_and_or_b32 v4, v4, v7, v5 144 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
147 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v6, v7, s12
148 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
149 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 145 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
150 ; GFX10GISEL-NEXT: ; return to shader part epilog 146 ; GFX10GISEL-NEXT: ; return to shader part epilog
151 main_body: 147 main_body:
152 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 148 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
153 ret <4 x float> %v 149 ret <4 x float> %v
154 } 150 }
155 151
156 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { 152 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
157 ; GFX10-LABEL: sample_c_d_cl_1d: 153 ; GFX10-LABEL: sample_c_d_cl_1d:
158 ; GFX10: ; %bb.0: ; %main_body 154 ; GFX10: ; %bb.0: ; %main_body
159 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 155 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
160 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
161 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 156 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
162 ; GFX10-NEXT: s_waitcnt vmcnt(0) 157 ; GFX10-NEXT: s_waitcnt vmcnt(0)
163 ; GFX10-NEXT: ; return to shader part epilog 158 ; GFX10-NEXT: ; return to shader part epilog
164 ; 159 ;
165 ; GFX10GISEL-LABEL: sample_c_d_cl_1d: 160 ; GFX10GISEL-LABEL: sample_c_d_cl_1d:
166 ; GFX10GISEL: ; %bb.0: ; %main_body 161 ; GFX10GISEL: ; %bb.0: ; %main_body
167 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 162 ; GFX10GISEL-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
168 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, v4
169 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 163 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
170 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 164 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
171 ; GFX10GISEL-NEXT: ; return to shader part epilog 165 ; GFX10GISEL-NEXT: ; return to shader part epilog
172 main_body: 166 main_body:
173 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 167 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
175 } 169 }
176 170
177 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { 171 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
178 ; GFX10-LABEL: sample_c_d_cl_2d: 172 ; GFX10-LABEL: sample_c_d_cl_2d:
179 ; GFX10: ; %bb.0: ; %main_body 173 ; GFX10: ; %bb.0: ; %main_body
180 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 174 ; GFX10-NEXT: v_mov_b32_e32 v13, v7
181 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 175 ; GFX10-NEXT: v_mov_b32_e32 v11, v4
182 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 176 ; GFX10-NEXT: v_mov_b32_e32 v10, v3
177 ; GFX10-NEXT: v_mov_b32_e32 v9, v2
178 ; GFX10-NEXT: v_mov_b32_e32 v8, v1
179 ; GFX10-NEXT: v_mov_b32_e32 v7, v0
180 ; GFX10-NEXT: v_perm_b32 v12, v6, v5, 0x5040100
181 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
183 ; GFX10-NEXT: s_waitcnt vmcnt(0) 182 ; GFX10-NEXT: s_waitcnt vmcnt(0)
184 ; GFX10-NEXT: ; return to shader part epilog 183 ; GFX10-NEXT: ; return to shader part epilog
185 ; 184 ;
186 ; GFX10GISEL-LABEL: sample_c_d_cl_2d: 185 ; GFX10GISEL-LABEL: sample_c_d_cl_2d:
187 ; GFX10GISEL: ; %bb.0: ; %main_body 186 ; GFX10GISEL: ; %bb.0: ; %main_body
188 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff 187 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v6
189 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 188 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, v7
190 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 189 ; GFX10GISEL-NEXT: v_perm_b32 v5, v8, v5, 0x5040100
191 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6 190 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
192 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12
193 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
194 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 191 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
195 ; GFX10GISEL-NEXT: ; return to shader part epilog 192 ; GFX10GISEL-NEXT: ; return to shader part epilog
196 main_body: 193 main_body:
197 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 194 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
198 ret <4 x float> %v 195 ret <4 x float> %v
205 ; GFX10-NEXT: s_waitcnt vmcnt(0) 202 ; GFX10-NEXT: s_waitcnt vmcnt(0)
206 ; GFX10-NEXT: ; return to shader part epilog 203 ; GFX10-NEXT: ; return to shader part epilog
207 ; 204 ;
208 ; GFX10GISEL-LABEL: sample_cd_1d: 205 ; GFX10GISEL-LABEL: sample_cd_1d:
209 ; GFX10GISEL: ; %bb.0: ; %main_body 206 ; GFX10GISEL: ; %bb.0: ; %main_body
210 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
211 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, s12
212 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 207 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
213 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 208 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
214 ; GFX10GISEL-NEXT: ; return to shader part epilog 209 ; GFX10GISEL-NEXT: ; return to shader part epilog
215 main_body: 210 main_body:
216 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 211 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
218 } 213 }
219 214
220 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { 215 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
221 ; GFX10-LABEL: sample_cd_2d: 216 ; GFX10-LABEL: sample_cd_2d:
222 ; GFX10: ; %bb.0: ; %main_body 217 ; GFX10: ; %bb.0: ; %main_body
223 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 218 ; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
224 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4
225 ; GFX10-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 219 ; GFX10-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
226 ; GFX10-NEXT: s_waitcnt vmcnt(0) 220 ; GFX10-NEXT: s_waitcnt vmcnt(0)
227 ; GFX10-NEXT: ; return to shader part epilog 221 ; GFX10-NEXT: ; return to shader part epilog
228 ; 222 ;
229 ; GFX10GISEL-LABEL: sample_cd_2d: 223 ; GFX10GISEL-LABEL: sample_cd_2d:
230 ; GFX10GISEL: ; %bb.0: ; %main_body 224 ; GFX10GISEL: ; %bb.0: ; %main_body
231 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 225 ; GFX10GISEL-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
232 ; GFX10GISEL-NEXT: v_and_or_b32 v4, 0xffff, v4, v5
233 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 226 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
234 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 227 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
235 ; GFX10GISEL-NEXT: ; return to shader part epilog 228 ; GFX10GISEL-NEXT: ; return to shader part epilog
236 main_body: 229 main_body:
237 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 230 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
245 ; GFX10-NEXT: s_waitcnt vmcnt(0) 238 ; GFX10-NEXT: s_waitcnt vmcnt(0)
246 ; GFX10-NEXT: ; return to shader part epilog 239 ; GFX10-NEXT: ; return to shader part epilog
247 ; 240 ;
248 ; GFX10GISEL-LABEL: sample_c_cd_1d: 241 ; GFX10GISEL-LABEL: sample_c_cd_1d:
249 ; GFX10GISEL: ; %bb.0: ; %main_body 242 ; GFX10GISEL: ; %bb.0: ; %main_body
250 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
251 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, s12
252 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 243 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
253 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 244 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
254 ; GFX10GISEL-NEXT: ; return to shader part epilog 245 ; GFX10GISEL-NEXT: ; return to shader part epilog
255 main_body: 246 main_body:
256 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 247 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
258 } 249 }
259 250
260 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { 251 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
261 ; GFX10-LABEL: sample_c_cd_2d: 252 ; GFX10-LABEL: sample_c_cd_2d:
262 ; GFX10: ; %bb.0: ; %main_body 253 ; GFX10: ; %bb.0: ; %main_body
263 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 254 ; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
264 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 255 ; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
265 ; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
266 ; GFX10-NEXT: s_waitcnt vmcnt(0) 256 ; GFX10-NEXT: s_waitcnt vmcnt(0)
267 ; GFX10-NEXT: ; return to shader part epilog 257 ; GFX10-NEXT: ; return to shader part epilog
268 ; 258 ;
269 ; GFX10GISEL-LABEL: sample_c_cd_2d: 259 ; GFX10GISEL-LABEL: sample_c_cd_2d:
270 ; GFX10GISEL: ; %bb.0: ; %main_body 260 ; GFX10GISEL: ; %bb.0: ; %main_body
271 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 261 ; GFX10GISEL-NEXT: v_perm_b32 v5, v6, v5, 0x5040100
272 ; GFX10GISEL-NEXT: v_and_or_b32 v5, 0xffff, v5, v6 262 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
273 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
274 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 263 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
275 ; GFX10GISEL-NEXT: ; return to shader part epilog 264 ; GFX10GISEL-NEXT: ; return to shader part epilog
276 main_body: 265 main_body:
277 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 266 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
278 ret <4 x float> %v 267 ret <4 x float> %v
279 } 268 }
280 269
281 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { 270 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
282 ; GFX10-LABEL: sample_cd_cl_1d: 271 ; GFX10-LABEL: sample_cd_cl_1d:
283 ; GFX10: ; %bb.0: ; %main_body 272 ; GFX10: ; %bb.0: ; %main_body
284 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 273 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
285 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
286 ; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 274 ; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
287 ; GFX10-NEXT: s_waitcnt vmcnt(0) 275 ; GFX10-NEXT: s_waitcnt vmcnt(0)
288 ; GFX10-NEXT: ; return to shader part epilog 276 ; GFX10-NEXT: ; return to shader part epilog
289 ; 277 ;
290 ; GFX10GISEL-LABEL: sample_cd_cl_1d: 278 ; GFX10GISEL-LABEL: sample_cd_cl_1d:
291 ; GFX10GISEL: ; %bb.0: ; %main_body 279 ; GFX10GISEL: ; %bb.0: ; %main_body
292 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 280 ; GFX10GISEL-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
293 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, v3
294 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 281 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
295 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 282 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
296 ; GFX10GISEL-NEXT: ; return to shader part epilog 283 ; GFX10GISEL-NEXT: ; return to shader part epilog
297 main_body: 284 main_body:
298 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 285 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
300 } 287 }
301 288
302 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { 289 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
303 ; GFX10-LABEL: sample_cd_cl_2d: 290 ; GFX10-LABEL: sample_cd_cl_2d:
304 ; GFX10: ; %bb.0: ; %main_body 291 ; GFX10: ; %bb.0: ; %main_body
305 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 292 ; GFX10-NEXT: v_mov_b32_e32 v11, v6
306 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 293 ; GFX10-NEXT: v_mov_b32_e32 v9, v3
307 ; GFX10-NEXT: image_sample_cd_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 294 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
295 ; GFX10-NEXT: v_mov_b32_e32 v7, v1
296 ; GFX10-NEXT: v_mov_b32_e32 v6, v0
297 ; GFX10-NEXT: v_perm_b32 v10, v5, v4, 0x5040100
298 ; GFX10-NEXT: image_sample_cd_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
308 ; GFX10-NEXT: s_waitcnt vmcnt(0) 299 ; GFX10-NEXT: s_waitcnt vmcnt(0)
309 ; GFX10-NEXT: ; return to shader part epilog 300 ; GFX10-NEXT: ; return to shader part epilog
310 ; 301 ;
311 ; GFX10GISEL-LABEL: sample_cd_cl_2d: 302 ; GFX10GISEL-LABEL: sample_cd_cl_2d:
312 ; GFX10GISEL: ; %bb.0: ; %main_body 303 ; GFX10GISEL: ; %bb.0: ; %main_body
313 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff 304 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v5
314 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 305 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, v6
315 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 306 ; GFX10GISEL-NEXT: v_perm_b32 v4, v7, v4, 0x5040100
316 ; GFX10GISEL-NEXT: v_and_or_b32 v4, v4, v7, v5 307 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
317 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v6, v7, s12
318 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
319 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 308 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
320 ; GFX10GISEL-NEXT: ; return to shader part epilog 309 ; GFX10GISEL-NEXT: ; return to shader part epilog
321 main_body: 310 main_body:
322 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 311 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
323 ret <4 x float> %v 312 ret <4 x float> %v
324 } 313 }
325 314
326 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { 315 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
327 ; GFX10-LABEL: sample_c_cd_cl_1d: 316 ; GFX10-LABEL: sample_c_cd_cl_1d:
328 ; GFX10: ; %bb.0: ; %main_body 317 ; GFX10: ; %bb.0: ; %main_body
329 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 318 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
330 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
331 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 319 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
332 ; GFX10-NEXT: s_waitcnt vmcnt(0) 320 ; GFX10-NEXT: s_waitcnt vmcnt(0)
333 ; GFX10-NEXT: ; return to shader part epilog 321 ; GFX10-NEXT: ; return to shader part epilog
334 ; 322 ;
335 ; GFX10GISEL-LABEL: sample_c_cd_cl_1d: 323 ; GFX10GISEL-LABEL: sample_c_cd_cl_1d:
336 ; GFX10GISEL: ; %bb.0: ; %main_body 324 ; GFX10GISEL: ; %bb.0: ; %main_body
337 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 325 ; GFX10GISEL-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
338 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, v4
339 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 326 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
340 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 327 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
341 ; GFX10GISEL-NEXT: ; return to shader part epilog 328 ; GFX10GISEL-NEXT: ; return to shader part epilog
342 main_body: 329 main_body:
343 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 330 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
345 } 332 }
346 333
347 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { 334 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
348 ; GFX10-LABEL: sample_c_cd_cl_2d: 335 ; GFX10-LABEL: sample_c_cd_cl_2d:
349 ; GFX10: ; %bb.0: ; %main_body 336 ; GFX10: ; %bb.0: ; %main_body
350 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 337 ; GFX10-NEXT: v_mov_b32_e32 v13, v7
351 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 338 ; GFX10-NEXT: v_mov_b32_e32 v11, v4
352 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 339 ; GFX10-NEXT: v_mov_b32_e32 v10, v3
340 ; GFX10-NEXT: v_mov_b32_e32 v9, v2
341 ; GFX10-NEXT: v_mov_b32_e32 v8, v1
342 ; GFX10-NEXT: v_mov_b32_e32 v7, v0
343 ; GFX10-NEXT: v_perm_b32 v12, v6, v5, 0x5040100
344 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
353 ; GFX10-NEXT: s_waitcnt vmcnt(0) 345 ; GFX10-NEXT: s_waitcnt vmcnt(0)
354 ; GFX10-NEXT: ; return to shader part epilog 346 ; GFX10-NEXT: ; return to shader part epilog
355 ; 347 ;
356 ; GFX10GISEL-LABEL: sample_c_cd_cl_2d: 348 ; GFX10GISEL-LABEL: sample_c_cd_cl_2d:
357 ; GFX10GISEL: ; %bb.0: ; %main_body 349 ; GFX10GISEL: ; %bb.0: ; %main_body
358 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff 350 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v6
359 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 351 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, v7
360 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 352 ; GFX10GISEL-NEXT: v_perm_b32 v5, v8, v5, 0x5040100
361 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6 353 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
362 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12
363 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
364 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 354 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
365 ; GFX10GISEL-NEXT: ; return to shader part epilog 355 ; GFX10GISEL-NEXT: ; return to shader part epilog
366 main_body: 356 main_body:
367 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 357 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
368 ret <4 x float> %v 358 ret <4 x float> %v
369 } 359 }
370 360
371 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { 361 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
372 ; GFX10-LABEL: sample_c_d_o_2darray_V1: 362 ; GFX10-LABEL: sample_c_d_o_2darray_V1:
373 ; GFX10: ; %bb.0: ; %main_body 363 ; GFX10: ; %bb.0: ; %main_body
374 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 364 ; GFX10-NEXT: v_mov_b32_e32 v15, v8
375 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 365 ; GFX10-NEXT: v_mov_b32_e32 v13, v5
376 ; GFX10-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 366 ; GFX10-NEXT: v_mov_b32_e32 v12, v4
367 ; GFX10-NEXT: v_mov_b32_e32 v11, v3
368 ; GFX10-NEXT: v_mov_b32_e32 v10, v2
369 ; GFX10-NEXT: v_mov_b32_e32 v9, v1
370 ; GFX10-NEXT: v_mov_b32_e32 v8, v0
371 ; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100
372 ; GFX10-NEXT: image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
377 ; GFX10-NEXT: s_waitcnt vmcnt(0) 373 ; GFX10-NEXT: s_waitcnt vmcnt(0)
378 ; GFX10-NEXT: ; return to shader part epilog 374 ; GFX10-NEXT: ; return to shader part epilog
379 ; 375 ;
380 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1: 376 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1:
381 ; GFX10GISEL: ; %bb.0: ; %main_body 377 ; GFX10GISEL: ; %bb.0: ; %main_body
382 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff 378 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7
383 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7 379 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
384 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 380 ; GFX10GISEL-NEXT: v_perm_b32 v6, v9, v6, 0x5040100
385 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7
386 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12
387 ; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 381 ; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
388 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 382 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
389 ; GFX10GISEL-NEXT: ; return to shader part epilog 383 ; GFX10GISEL-NEXT: ; return to shader part epilog
390 main_body: 384 main_body:
391 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 385 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
393 } 387 }
394 388
395 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { 389 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
396 ; GFX10-LABEL: sample_c_d_o_2darray_V2: 390 ; GFX10-LABEL: sample_c_d_o_2darray_V2:
397 ; GFX10: ; %bb.0: ; %main_body 391 ; GFX10: ; %bb.0: ; %main_body
398 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 392 ; GFX10-NEXT: v_mov_b32_e32 v15, v8
399 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 393 ; GFX10-NEXT: v_mov_b32_e32 v13, v5
400 ; GFX10-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 394 ; GFX10-NEXT: v_mov_b32_e32 v12, v4
395 ; GFX10-NEXT: v_mov_b32_e32 v11, v3
396 ; GFX10-NEXT: v_mov_b32_e32 v10, v2
397 ; GFX10-NEXT: v_mov_b32_e32 v9, v1
398 ; GFX10-NEXT: v_mov_b32_e32 v8, v0
399 ; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100
400 ; GFX10-NEXT: image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
401 ; GFX10-NEXT: s_waitcnt vmcnt(0) 401 ; GFX10-NEXT: s_waitcnt vmcnt(0)
402 ; GFX10-NEXT: ; return to shader part epilog 402 ; GFX10-NEXT: ; return to shader part epilog
403 ; 403 ;
404 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2: 404 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2:
405 ; GFX10GISEL: ; %bb.0: ; %main_body 405 ; GFX10GISEL: ; %bb.0: ; %main_body
406 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff 406 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7
407 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7 407 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
408 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 408 ; GFX10GISEL-NEXT: v_perm_b32 v6, v9, v6, 0x5040100
409 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7
410 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12
411 ; GFX10GISEL-NEXT: image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 409 ; GFX10GISEL-NEXT: image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
412 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 410 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
413 ; GFX10GISEL-NEXT: ; return to shader part epilog 411 ; GFX10GISEL-NEXT: ; return to shader part epilog
414 main_body: 412 main_body:
415 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 413 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
445 ; GFX10-NEXT: s_waitcnt vmcnt(0) 443 ; GFX10-NEXT: s_waitcnt vmcnt(0)
446 ; GFX10-NEXT: ; return to shader part epilog 444 ; GFX10-NEXT: ; return to shader part epilog
447 ; 445 ;
448 ; GFX10GISEL-LABEL: sample_g16_noa16_d_1d: 446 ; GFX10GISEL-LABEL: sample_g16_noa16_d_1d:
449 ; GFX10GISEL: ; %bb.0: ; %main_body 447 ; GFX10GISEL: ; %bb.0: ; %main_body
450 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, 0xffff
451 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
452 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v3, s12
453 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v3, s12
454 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 448 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
455 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 449 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
456 ; GFX10GISEL-NEXT: ; return to shader part epilog 450 ; GFX10GISEL-NEXT: ; return to shader part epilog
457 main_body: 451 main_body:
458 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 452 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
460 } 454 }
461 455
462 define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 456 define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
463 ; GFX10-LABEL: sample_g16_noa16_d_2d: 457 ; GFX10-LABEL: sample_g16_noa16_d_2d:
464 ; GFX10: ; %bb.0: ; %main_body 458 ; GFX10: ; %bb.0: ; %main_body
465 ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 459 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
466 ; GFX10-NEXT: v_and_b32_e32 v2, v6, v2 460 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
467 ; GFX10-NEXT: v_and_b32_e32 v0, v6, v0
468 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
469 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
470 ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 461 ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
471 ; GFX10-NEXT: s_waitcnt vmcnt(0) 462 ; GFX10-NEXT: s_waitcnt vmcnt(0)
472 ; GFX10-NEXT: ; return to shader part epilog 463 ; GFX10-NEXT: ; return to shader part epilog
473 ; 464 ;
474 ; GFX10GISEL-LABEL: sample_g16_noa16_d_2d: 465 ; GFX10GISEL-LABEL: sample_g16_noa16_d_2d:
475 ; GFX10GISEL: ; %bb.0: ; %main_body 466 ; GFX10GISEL: ; %bb.0: ; %main_body
476 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, 0xffff 467 ; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
477 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 468 ; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
478 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
479 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v6, v1
480 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v6, v3
481 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 469 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
482 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 470 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
483 ; GFX10GISEL-NEXT: ; return to shader part epilog 471 ; GFX10GISEL-NEXT: ; return to shader part epilog
484 main_body: 472 main_body:
485 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 473 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
487 } 475 }
488 476
489 define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { 477 define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
490 ; GFX10-LABEL: sample_g16_noa16_d_3d: 478 ; GFX10-LABEL: sample_g16_noa16_d_3d:
491 ; GFX10: ; %bb.0: ; %main_body 479 ; GFX10: ; %bb.0: ; %main_body
492 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff 480 ; GFX10-NEXT: v_mov_b32_e32 v9, v3
493 ; GFX10-NEXT: v_and_b32_e32 v3, v9, v3 481 ; GFX10-NEXT: v_mov_b32_e32 v3, v2
494 ; GFX10-NEXT: v_and_b32_e32 v0, v9, v0 482 ; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
495 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 483 ; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
496 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 484 ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
497 ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
498 ; GFX10-NEXT: s_waitcnt vmcnt(0) 485 ; GFX10-NEXT: s_waitcnt vmcnt(0)
499 ; GFX10-NEXT: ; return to shader part epilog 486 ; GFX10-NEXT: ; return to shader part epilog
500 ; 487 ;
501 ; GFX10GISEL-LABEL: sample_g16_noa16_d_3d: 488 ; GFX10GISEL-LABEL: sample_g16_noa16_d_3d:
502 ; GFX10GISEL: ; %bb.0: ; %main_body 489 ; GFX10GISEL: ; %bb.0: ; %main_body
503 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff 490 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
504 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 491 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v2
505 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 492 ; GFX10GISEL-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
506 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 493 ; GFX10GISEL-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
507 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v9, v1 494 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
508 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v9, s12
509 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v9, v4
510 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v5, v9, s12
511 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
512 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 495 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
513 ; GFX10GISEL-NEXT: ; return to shader part epilog 496 ; GFX10GISEL-NEXT: ; return to shader part epilog
514 main_body: 497 main_body:
515 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 498 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
516 ret <4 x float> %v 499 ret <4 x float> %v
523 ; GFX10-NEXT: s_waitcnt vmcnt(0) 506 ; GFX10-NEXT: s_waitcnt vmcnt(0)
524 ; GFX10-NEXT: ; return to shader part epilog 507 ; GFX10-NEXT: ; return to shader part epilog
525 ; 508 ;
526 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_1d: 509 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_1d:
527 ; GFX10GISEL: ; %bb.0: ; %main_body 510 ; GFX10GISEL: ; %bb.0: ; %main_body
528 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff
529 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
530 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12
531 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v4, s12
532 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 511 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
533 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 512 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
534 ; GFX10GISEL-NEXT: ; return to shader part epilog 513 ; GFX10GISEL-NEXT: ; return to shader part epilog
535 main_body: 514 main_body:
536 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 515 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
538 } 517 }
539 518
540 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 519 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
541 ; GFX10-LABEL: sample_g16_noa16_c_d_2d: 520 ; GFX10-LABEL: sample_g16_noa16_c_d_2d:
542 ; GFX10: ; %bb.0: ; %main_body 521 ; GFX10: ; %bb.0: ; %main_body
543 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 522 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
544 ; GFX10-NEXT: v_and_b32_e32 v3, v7, v3 523 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
545 ; GFX10-NEXT: v_and_b32_e32 v1, v7, v1
546 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
547 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
548 ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 524 ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
549 ; GFX10-NEXT: s_waitcnt vmcnt(0) 525 ; GFX10-NEXT: s_waitcnt vmcnt(0)
550 ; GFX10-NEXT: ; return to shader part epilog 526 ; GFX10-NEXT: ; return to shader part epilog
551 ; 527 ;
552 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_2d: 528 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_2d:
553 ; GFX10GISEL: ; %bb.0: ; %main_body 529 ; GFX10GISEL: ; %bb.0: ; %main_body
554 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff 530 ; GFX10GISEL-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
555 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 531 ; GFX10GISEL-NEXT: v_perm_b32 v2, v4, v3, 0x5040100
556 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
557 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v7, v2
558 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v7, v4
559 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 532 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
560 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 533 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
561 ; GFX10GISEL-NEXT: ; return to shader part epilog 534 ; GFX10GISEL-NEXT: ; return to shader part epilog
562 main_body: 535 main_body:
563 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 536 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
571 ; GFX10-NEXT: s_waitcnt vmcnt(0) 544 ; GFX10-NEXT: s_waitcnt vmcnt(0)
572 ; GFX10-NEXT: ; return to shader part epilog 545 ; GFX10-NEXT: ; return to shader part epilog
573 ; 546 ;
574 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_1d: 547 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_1d:
575 ; GFX10GISEL: ; %bb.0: ; %main_body 548 ; GFX10GISEL: ; %bb.0: ; %main_body
576 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff
577 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
578 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v4, s12
579 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12
580 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 549 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
581 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 550 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
582 ; GFX10GISEL-NEXT: ; return to shader part epilog 551 ; GFX10GISEL-NEXT: ; return to shader part epilog
583 main_body: 552 main_body:
584 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 553 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
586 } 555 }
587 556
588 define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 557 define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
589 ; GFX10-LABEL: sample_g16_noa16_d_cl_2d: 558 ; GFX10-LABEL: sample_g16_noa16_d_cl_2d:
590 ; GFX10: ; %bb.0: ; %main_body 559 ; GFX10: ; %bb.0: ; %main_body
591 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 560 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
592 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 561 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
593 ; GFX10-NEXT: v_and_b32_e32 v0, v7, v0
594 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
595 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
596 ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 562 ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
597 ; GFX10-NEXT: s_waitcnt vmcnt(0) 563 ; GFX10-NEXT: s_waitcnt vmcnt(0)
598 ; GFX10-NEXT: ; return to shader part epilog 564 ; GFX10-NEXT: ; return to shader part epilog
599 ; 565 ;
600 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_2d: 566 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_2d:
601 ; GFX10GISEL: ; %bb.0: ; %main_body 567 ; GFX10GISEL: ; %bb.0: ; %main_body
602 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff 568 ; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
603 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 569 ; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
604 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
605 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v7, v1
606 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v7, v3
607 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 570 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
608 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 571 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
609 ; GFX10GISEL-NEXT: ; return to shader part epilog 572 ; GFX10GISEL-NEXT: ; return to shader part epilog
610 main_body: 573 main_body:
611 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 574 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
619 ; GFX10-NEXT: s_waitcnt vmcnt(0) 582 ; GFX10-NEXT: s_waitcnt vmcnt(0)
620 ; GFX10-NEXT: ; return to shader part epilog 583 ; GFX10-NEXT: ; return to shader part epilog
621 ; 584 ;
622 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_1d: 585 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_1d:
623 ; GFX10GISEL: ; %bb.0: ; %main_body 586 ; GFX10GISEL: ; %bb.0: ; %main_body
624 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, 0xffff
625 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
626 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v5, s12
627 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v5, s12
628 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 587 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
629 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 588 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
630 ; GFX10GISEL-NEXT: ; return to shader part epilog 589 ; GFX10GISEL-NEXT: ; return to shader part epilog
631 main_body: 590 main_body:
632 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 591 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
634 } 593 }
635 594
636 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 595 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
637 ; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d: 596 ; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d:
638 ; GFX10: ; %bb.0: ; %main_body 597 ; GFX10: ; %bb.0: ; %main_body
639 ; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff 598 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
640 ; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 599 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
641 ; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 600 ; GFX10-NEXT: v_perm_b32 v4, v4, v3, 0x5040100
642 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 601 ; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
643 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 602 ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
644 ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
645 ; GFX10-NEXT: s_waitcnt vmcnt(0) 603 ; GFX10-NEXT: s_waitcnt vmcnt(0)
646 ; GFX10-NEXT: ; return to shader part epilog 604 ; GFX10-NEXT: ; return to shader part epilog
647 ; 605 ;
648 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d: 606 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d:
649 ; GFX10GISEL: ; %bb.0: ; %main_body 607 ; GFX10GISEL: ; %bb.0: ; %main_body
650 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff 608 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
651 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 609 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
652 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 610 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
653 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2 611 ; GFX10GISEL-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
654 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4 612 ; GFX10GISEL-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
655 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 613 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
656 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 614 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
657 ; GFX10GISEL-NEXT: ; return to shader part epilog 615 ; GFX10GISEL-NEXT: ; return to shader part epilog
658 main_body: 616 main_body:
659 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 617 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
660 ret <4 x float> %v 618 ret <4 x float> %v
667 ; GFX10-NEXT: s_waitcnt vmcnt(0) 625 ; GFX10-NEXT: s_waitcnt vmcnt(0)
668 ; GFX10-NEXT: ; return to shader part epilog 626 ; GFX10-NEXT: ; return to shader part epilog
669 ; 627 ;
670 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_1d: 628 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_1d:
671 ; GFX10GISEL: ; %bb.0: ; %main_body 629 ; GFX10GISEL: ; %bb.0: ; %main_body
672 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, 0xffff
673 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
674 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v3, s12
675 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v3, s12
676 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 630 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
677 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 631 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
678 ; GFX10GISEL-NEXT: ; return to shader part epilog 632 ; GFX10GISEL-NEXT: ; return to shader part epilog
679 main_body: 633 main_body:
680 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 634 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
682 } 636 }
683 637
684 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 638 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
685 ; GFX10-LABEL: sample_g16_noa16_cd_2d: 639 ; GFX10-LABEL: sample_g16_noa16_cd_2d:
686 ; GFX10: ; %bb.0: ; %main_body 640 ; GFX10: ; %bb.0: ; %main_body
687 ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 641 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
688 ; GFX10-NEXT: v_and_b32_e32 v2, v6, v2 642 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
689 ; GFX10-NEXT: v_and_b32_e32 v0, v6, v0
690 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
691 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
692 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 643 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
693 ; GFX10-NEXT: s_waitcnt vmcnt(0) 644 ; GFX10-NEXT: s_waitcnt vmcnt(0)
694 ; GFX10-NEXT: ; return to shader part epilog 645 ; GFX10-NEXT: ; return to shader part epilog
695 ; 646 ;
696 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_2d: 647 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_2d:
697 ; GFX10GISEL: ; %bb.0: ; %main_body 648 ; GFX10GISEL: ; %bb.0: ; %main_body
698 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, 0xffff 649 ; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
699 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 650 ; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
700 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
701 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v6, v1
702 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v6, v3
703 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 651 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
704 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 652 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
705 ; GFX10GISEL-NEXT: ; return to shader part epilog 653 ; GFX10GISEL-NEXT: ; return to shader part epilog
706 main_body: 654 main_body:
707 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 655 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
715 ; GFX10-NEXT: s_waitcnt vmcnt(0) 663 ; GFX10-NEXT: s_waitcnt vmcnt(0)
716 ; GFX10-NEXT: ; return to shader part epilog 664 ; GFX10-NEXT: ; return to shader part epilog
717 ; 665 ;
718 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_1d: 666 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_1d:
719 ; GFX10GISEL: ; %bb.0: ; %main_body 667 ; GFX10GISEL: ; %bb.0: ; %main_body
720 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff
721 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
722 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12
723 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v4, s12
724 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 668 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
725 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 669 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
726 ; GFX10GISEL-NEXT: ; return to shader part epilog 670 ; GFX10GISEL-NEXT: ; return to shader part epilog
727 main_body: 671 main_body:
728 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 672 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
730 } 674 }
731 675
732 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 676 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
733 ; GFX10-LABEL: sample_g16_noa16_c_cd_2d: 677 ; GFX10-LABEL: sample_g16_noa16_c_cd_2d:
734 ; GFX10: ; %bb.0: ; %main_body 678 ; GFX10: ; %bb.0: ; %main_body
735 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 679 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100
736 ; GFX10-NEXT: v_and_b32_e32 v3, v7, v3 680 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
737 ; GFX10-NEXT: v_and_b32_e32 v1, v7, v1
738 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3
739 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1
740 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 681 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
741 ; GFX10-NEXT: s_waitcnt vmcnt(0) 682 ; GFX10-NEXT: s_waitcnt vmcnt(0)
742 ; GFX10-NEXT: ; return to shader part epilog 683 ; GFX10-NEXT: ; return to shader part epilog
743 ; 684 ;
744 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_2d: 685 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_2d:
745 ; GFX10GISEL: ; %bb.0: ; %main_body 686 ; GFX10GISEL: ; %bb.0: ; %main_body
746 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff 687 ; GFX10GISEL-NEXT: v_perm_b32 v1, v2, v1, 0x5040100
747 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 688 ; GFX10GISEL-NEXT: v_perm_b32 v2, v4, v3, 0x5040100
748 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
749 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v7, v2
750 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v7, v4
751 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 689 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
752 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 690 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
753 ; GFX10GISEL-NEXT: ; return to shader part epilog 691 ; GFX10GISEL-NEXT: ; return to shader part epilog
754 main_body: 692 main_body:
755 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 693 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
763 ; GFX10-NEXT: s_waitcnt vmcnt(0) 701 ; GFX10-NEXT: s_waitcnt vmcnt(0)
764 ; GFX10-NEXT: ; return to shader part epilog 702 ; GFX10-NEXT: ; return to shader part epilog
765 ; 703 ;
766 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_1d: 704 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_1d:
767 ; GFX10GISEL: ; %bb.0: ; %main_body 705 ; GFX10GISEL: ; %bb.0: ; %main_body
768 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff
769 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
770 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v4, s12
771 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12
772 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 706 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
773 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 707 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
774 ; GFX10GISEL-NEXT: ; return to shader part epilog 708 ; GFX10GISEL-NEXT: ; return to shader part epilog
775 main_body: 709 main_body:
776 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 710 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
778 } 712 }
779 713
780 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 714 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
781 ; GFX10-LABEL: sample_g16_noa16_cd_cl_2d: 715 ; GFX10-LABEL: sample_g16_noa16_cd_cl_2d:
782 ; GFX10: ; %bb.0: ; %main_body 716 ; GFX10: ; %bb.0: ; %main_body
783 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff 717 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100
784 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 718 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
785 ; GFX10-NEXT: v_and_b32_e32 v0, v7, v0
786 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2
787 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0
788 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 719 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
789 ; GFX10-NEXT: s_waitcnt vmcnt(0) 720 ; GFX10-NEXT: s_waitcnt vmcnt(0)
790 ; GFX10-NEXT: ; return to shader part epilog 721 ; GFX10-NEXT: ; return to shader part epilog
791 ; 722 ;
792 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_2d: 723 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_2d:
793 ; GFX10GISEL: ; %bb.0: ; %main_body 724 ; GFX10GISEL: ; %bb.0: ; %main_body
794 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff 725 ; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
795 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 726 ; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
796 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
797 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v7, v1
798 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v7, v3
799 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 727 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
800 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 728 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
801 ; GFX10GISEL-NEXT: ; return to shader part epilog 729 ; GFX10GISEL-NEXT: ; return to shader part epilog
802 main_body: 730 main_body:
803 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 731 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
811 ; GFX10-NEXT: s_waitcnt vmcnt(0) 739 ; GFX10-NEXT: s_waitcnt vmcnt(0)
812 ; GFX10-NEXT: ; return to shader part epilog 740 ; GFX10-NEXT: ; return to shader part epilog
813 ; 741 ;
814 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_1d: 742 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_1d:
815 ; GFX10GISEL: ; %bb.0: ; %main_body 743 ; GFX10GISEL: ; %bb.0: ; %main_body
816 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, 0xffff
817 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16
818 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v5, s12
819 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v5, s12
820 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 744 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
821 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 745 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
822 ; GFX10GISEL-NEXT: ; return to shader part epilog 746 ; GFX10GISEL-NEXT: ; return to shader part epilog
823 main_body: 747 main_body:
824 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 748 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
826 } 750 }
827 751
828 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 752 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
829 ; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d: 753 ; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d:
830 ; GFX10: ; %bb.0: ; %main_body 754 ; GFX10: ; %bb.0: ; %main_body
831 ; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff 755 ; GFX10-NEXT: v_mov_b32_e32 v8, v2
832 ; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 756 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
833 ; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 757 ; GFX10-NEXT: v_perm_b32 v4, v4, v3, 0x5040100
834 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 758 ; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
835 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 759 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
836 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
837 ; GFX10-NEXT: s_waitcnt vmcnt(0) 760 ; GFX10-NEXT: s_waitcnt vmcnt(0)
838 ; GFX10-NEXT: ; return to shader part epilog 761 ; GFX10-NEXT: ; return to shader part epilog
839 ; 762 ;
840 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d: 763 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d:
841 ; GFX10GISEL: ; %bb.0: ; %main_body 764 ; GFX10GISEL: ; %bb.0: ; %main_body
842 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff 765 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2
843 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 766 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
844 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 767 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
845 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2 768 ; GFX10GISEL-NEXT: v_perm_b32 v3, v8, v1, 0x5040100
846 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4 769 ; GFX10GISEL-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
847 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 770 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
848 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 771 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
849 ; GFX10GISEL-NEXT: ; return to shader part epilog 772 ; GFX10GISEL-NEXT: ; return to shader part epilog
850 main_body: 773 main_body:
851 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 774 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
852 ret <4 x float> %v 775 ret <4 x float> %v
853 } 776 }
854 777
855 define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 778 define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
856 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1: 779 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
857 ; GFX10: ; %bb.0: ; %main_body 780 ; GFX10: ; %bb.0: ; %main_body
858 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff 781 ; GFX10-NEXT: v_mov_b32_e32 v9, v3
859 ; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 782 ; GFX10-NEXT: v_mov_b32_e32 v10, v2
860 ; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 783 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
861 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 784 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
862 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 785 ; GFX10-NEXT: v_perm_b32 v5, v5, v4, 0x5040100
863 ; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 786 ; GFX10-NEXT: v_perm_b32 v4, v9, v10, 0x5040100
787 ; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
864 ; GFX10-NEXT: s_waitcnt vmcnt(0) 788 ; GFX10-NEXT: s_waitcnt vmcnt(0)
865 ; GFX10-NEXT: ; return to shader part epilog 789 ; GFX10-NEXT: ; return to shader part epilog
866 ; 790 ;
867 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1: 791 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
868 ; GFX10GISEL: ; %bb.0: ; %main_body 792 ; GFX10GISEL: ; %bb.0: ; %main_body
869 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff 793 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2
870 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 794 ; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3
871 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 795 ; GFX10GISEL-NEXT: v_mov_b32_e32 v11, v4
872 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3 796 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
873 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5 797 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
874 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 798 ; GFX10GISEL-NEXT: v_perm_b32 v4, v10, v9, 0x5040100
799 ; GFX10GISEL-NEXT: v_perm_b32 v5, v5, v11, 0x5040100
800 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
875 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 801 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
876 ; GFX10GISEL-NEXT: ; return to shader part epilog 802 ; GFX10GISEL-NEXT: ; return to shader part epilog
877 main_body: 803 main_body:
878 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 804 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
879 ret float %v 805 ret float %v
880 } 806 }
881 807
882 define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 808 define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
883 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2: 809 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
884 ; GFX10: ; %bb.0: ; %main_body 810 ; GFX10: ; %bb.0: ; %main_body
885 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff 811 ; GFX10-NEXT: v_mov_b32_e32 v9, v3
886 ; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 812 ; GFX10-NEXT: v_mov_b32_e32 v10, v2
887 ; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 813 ; GFX10-NEXT: v_mov_b32_e32 v3, v1
888 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 814 ; GFX10-NEXT: v_mov_b32_e32 v2, v0
889 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 815 ; GFX10-NEXT: v_perm_b32 v5, v5, v4, 0x5040100
890 ; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 816 ; GFX10-NEXT: v_perm_b32 v4, v9, v10, 0x5040100
817 ; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
891 ; GFX10-NEXT: s_waitcnt vmcnt(0) 818 ; GFX10-NEXT: s_waitcnt vmcnt(0)
892 ; GFX10-NEXT: ; return to shader part epilog 819 ; GFX10-NEXT: ; return to shader part epilog
893 ; 820 ;
894 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2: 821 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
895 ; GFX10GISEL: ; %bb.0: ; %main_body 822 ; GFX10GISEL: ; %bb.0: ; %main_body
896 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff 823 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2
897 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 824 ; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3
898 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 825 ; GFX10GISEL-NEXT: v_mov_b32_e32 v11, v4
899 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3 826 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0
900 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5 827 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1
901 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 828 ; GFX10GISEL-NEXT: v_perm_b32 v4, v10, v9, 0x5040100
829 ; GFX10GISEL-NEXT: v_perm_b32 v5, v5, v11, 0x5040100
830 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
902 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 831 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
903 ; GFX10GISEL-NEXT: ; return to shader part epilog 832 ; GFX10GISEL-NEXT: ; return to shader part epilog
904 main_body: 833 main_body:
905 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 834 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
906 ret <2 x float> %v 835 ret <2 x float> %v