Mercurial > hg > CbC > CbC_llvm
comparison llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll @ 236:c4bab56944e8 llvm-original
LLVM 16
author | kono |
---|---|
date | Wed, 09 Nov 2022 17:45:10 +0900 |
parents | 5f17cb93ff66 |
children | 1f2b6ac9f198 |
comparison
equal
deleted
inserted
replaced
232:70dce7da266c | 236:c4bab56944e8 |
---|---|
11 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 11 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
12 ; GFX10-NEXT: ; return to shader part epilog | 12 ; GFX10-NEXT: ; return to shader part epilog |
13 ; | 13 ; |
14 ; GFX10GISEL-LABEL: sample_d_1d: | 14 ; GFX10GISEL-LABEL: sample_d_1d: |
15 ; GFX10GISEL: ; %bb.0: ; %main_body | 15 ; GFX10GISEL: ; %bb.0: ; %main_body |
16 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
17 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, s12 | |
18 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 16 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
19 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 17 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
20 ; GFX10GISEL-NEXT: ; return to shader part epilog | 18 ; GFX10GISEL-NEXT: ; return to shader part epilog |
21 main_body: | 19 main_body: |
22 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 20 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
24 } | 22 } |
25 | 23 |
26 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { | 24 define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { |
27 ; GFX10-LABEL: sample_d_2d: | 25 ; GFX10-LABEL: sample_d_2d: |
28 ; GFX10: ; %bb.0: ; %main_body | 26 ; GFX10: ; %bb.0: ; %main_body |
29 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 | 27 ; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 |
30 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | |
31 ; GFX10-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | 28 ; GFX10-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
32 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 29 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
33 ; GFX10-NEXT: ; return to shader part epilog | 30 ; GFX10-NEXT: ; return to shader part epilog |
34 ; | 31 ; |
35 ; GFX10GISEL-LABEL: sample_d_2d: | 32 ; GFX10GISEL-LABEL: sample_d_2d: |
36 ; GFX10GISEL: ; %bb.0: ; %main_body | 33 ; GFX10GISEL: ; %bb.0: ; %main_body |
37 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | 34 ; GFX10GISEL-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 |
38 ; GFX10GISEL-NEXT: v_and_or_b32 v4, 0xffff, v4, v5 | |
39 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | 35 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
40 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 36 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
41 ; GFX10GISEL-NEXT: ; return to shader part epilog | 37 ; GFX10GISEL-NEXT: ; return to shader part epilog |
42 main_body: | 38 main_body: |
43 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 39 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
45 } | 41 } |
46 | 42 |
47 define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) { | 43 define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) { |
48 ; GFX10-LABEL: sample_d_3d: | 44 ; GFX10-LABEL: sample_d_3d: |
49 ; GFX10: ; %bb.0: ; %main_body | 45 ; GFX10: ; %bb.0: ; %main_body |
50 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 | 46 ; GFX10-NEXT: v_mov_b32_e32 v15, v8 |
51 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 | 47 ; GFX10-NEXT: v_mov_b32_e32 v13, v5 |
52 ; GFX10-NEXT: image_sample_d v[0:3], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 | 48 ; GFX10-NEXT: v_mov_b32_e32 v12, v4 |
49 ; GFX10-NEXT: v_mov_b32_e32 v11, v3 | |
50 ; GFX10-NEXT: v_mov_b32_e32 v10, v2 | |
51 ; GFX10-NEXT: v_mov_b32_e32 v9, v1 | |
52 ; GFX10-NEXT: v_mov_b32_e32 v8, v0 | |
53 ; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100 | |
54 ; GFX10-NEXT: image_sample_d v[0:3], v[8:15], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 | |
53 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 55 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
54 ; GFX10-NEXT: ; return to shader part epilog | 56 ; GFX10-NEXT: ; return to shader part epilog |
55 ; | 57 ; |
56 ; GFX10GISEL-LABEL: sample_d_3d: | 58 ; GFX10GISEL-LABEL: sample_d_3d: |
57 ; GFX10GISEL: ; %bb.0: ; %main_body | 59 ; GFX10GISEL: ; %bb.0: ; %main_body |
58 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | 60 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7 |
59 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7 | 61 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8 |
60 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | 62 ; GFX10GISEL-NEXT: v_perm_b32 v6, v9, v6, 0x5040100 |
61 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7 | |
62 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12 | |
63 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 | 63 ; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 |
64 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 64 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
65 ; GFX10GISEL-NEXT: ; return to shader part epilog | 65 ; GFX10GISEL-NEXT: ; return to shader part epilog |
66 main_body: | 66 main_body: |
67 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 67 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
75 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 75 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
76 ; GFX10-NEXT: ; return to shader part epilog | 76 ; GFX10-NEXT: ; return to shader part epilog |
77 ; | 77 ; |
78 ; GFX10GISEL-LABEL: sample_c_d_1d: | 78 ; GFX10GISEL-LABEL: sample_c_d_1d: |
79 ; GFX10GISEL: ; %bb.0: ; %main_body | 79 ; GFX10GISEL: ; %bb.0: ; %main_body |
80 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
81 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, s12 | |
82 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 80 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
83 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 81 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
84 ; GFX10GISEL-NEXT: ; return to shader part epilog | 82 ; GFX10GISEL-NEXT: ; return to shader part epilog |
85 main_body: | 83 main_body: |
86 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 84 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
88 } | 86 } |
89 | 87 |
90 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { | 88 define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { |
91 ; GFX10-LABEL: sample_c_d_2d: | 89 ; GFX10-LABEL: sample_c_d_2d: |
92 ; GFX10: ; %bb.0: ; %main_body | 90 ; GFX10: ; %bb.0: ; %main_body |
93 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 | 91 ; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100 |
94 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 | 92 ; GFX10-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
95 ; GFX10-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
96 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 93 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
97 ; GFX10-NEXT: ; return to shader part epilog | 94 ; GFX10-NEXT: ; return to shader part epilog |
98 ; | 95 ; |
99 ; GFX10GISEL-LABEL: sample_c_d_2d: | 96 ; GFX10GISEL-LABEL: sample_c_d_2d: |
100 ; GFX10GISEL: ; %bb.0: ; %main_body | 97 ; GFX10GISEL: ; %bb.0: ; %main_body |
101 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 | 98 ; GFX10GISEL-NEXT: v_perm_b32 v5, v6, v5, 0x5040100 |
102 ; GFX10GISEL-NEXT: v_and_or_b32 v5, 0xffff, v5, v6 | 99 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
103 ; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
104 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 100 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
105 ; GFX10GISEL-NEXT: ; return to shader part epilog | 101 ; GFX10GISEL-NEXT: ; return to shader part epilog |
106 main_body: | 102 main_body: |
107 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 103 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
108 ret <4 x float> %v | 104 ret <4 x float> %v |
109 } | 105 } |
110 | 106 |
111 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { | 107 define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { |
112 ; GFX10-LABEL: sample_d_cl_1d: | 108 ; GFX10-LABEL: sample_d_cl_1d: |
113 ; GFX10: ; %bb.0: ; %main_body | 109 ; GFX10: ; %bb.0: ; %main_body |
114 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 | 110 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 |
115 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
116 ; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 111 ; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
117 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 112 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
118 ; GFX10-NEXT: ; return to shader part epilog | 113 ; GFX10-NEXT: ; return to shader part epilog |
119 ; | 114 ; |
120 ; GFX10GISEL-LABEL: sample_d_cl_1d: | 115 ; GFX10GISEL-LABEL: sample_d_cl_1d: |
121 ; GFX10GISEL: ; %bb.0: ; %main_body | 116 ; GFX10GISEL: ; %bb.0: ; %main_body |
122 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | 117 ; GFX10GISEL-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 |
123 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, v3 | |
124 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 118 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
125 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 119 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
126 ; GFX10GISEL-NEXT: ; return to shader part epilog | 120 ; GFX10GISEL-NEXT: ; return to shader part epilog |
127 main_body: | 121 main_body: |
128 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 122 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
130 } | 124 } |
131 | 125 |
132 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { | 126 define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { |
133 ; GFX10-LABEL: sample_d_cl_2d: | 127 ; GFX10-LABEL: sample_d_cl_2d: |
134 ; GFX10: ; %bb.0: ; %main_body | 128 ; GFX10: ; %bb.0: ; %main_body |
135 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 | 129 ; GFX10-NEXT: v_mov_b32_e32 v11, v6 |
136 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | 130 ; GFX10-NEXT: v_mov_b32_e32 v9, v3 |
137 ; GFX10-NEXT: image_sample_d_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | 131 ; GFX10-NEXT: v_mov_b32_e32 v8, v2 |
132 ; GFX10-NEXT: v_mov_b32_e32 v7, v1 | |
133 ; GFX10-NEXT: v_mov_b32_e32 v6, v0 | |
134 ; GFX10-NEXT: v_perm_b32 v10, v5, v4, 0x5040100 | |
135 ; GFX10-NEXT: image_sample_d_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
138 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 136 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
139 ; GFX10-NEXT: ; return to shader part epilog | 137 ; GFX10-NEXT: ; return to shader part epilog |
140 ; | 138 ; |
141 ; GFX10GISEL-LABEL: sample_d_cl_2d: | 139 ; GFX10GISEL-LABEL: sample_d_cl_2d: |
142 ; GFX10GISEL: ; %bb.0: ; %main_body | 140 ; GFX10GISEL: ; %bb.0: ; %main_body |
143 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | 141 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v5 |
144 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | 142 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, v6 |
145 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | 143 ; GFX10GISEL-NEXT: v_perm_b32 v4, v7, v4, 0x5040100 |
146 ; GFX10GISEL-NEXT: v_and_or_b32 v4, v4, v7, v5 | 144 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
147 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v6, v7, s12 | |
148 ; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
149 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 145 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
150 ; GFX10GISEL-NEXT: ; return to shader part epilog | 146 ; GFX10GISEL-NEXT: ; return to shader part epilog |
151 main_body: | 147 main_body: |
152 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 148 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
153 ret <4 x float> %v | 149 ret <4 x float> %v |
154 } | 150 } |
155 | 151 |
156 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { | 152 define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { |
157 ; GFX10-LABEL: sample_c_d_cl_1d: | 153 ; GFX10-LABEL: sample_c_d_cl_1d: |
158 ; GFX10: ; %bb.0: ; %main_body | 154 ; GFX10: ; %bb.0: ; %main_body |
159 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 | 155 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 |
160 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | |
161 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 156 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
162 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 157 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
163 ; GFX10-NEXT: ; return to shader part epilog | 158 ; GFX10-NEXT: ; return to shader part epilog |
164 ; | 159 ; |
165 ; GFX10GISEL-LABEL: sample_c_d_cl_1d: | 160 ; GFX10GISEL-LABEL: sample_c_d_cl_1d: |
166 ; GFX10GISEL: ; %bb.0: ; %main_body | 161 ; GFX10GISEL: ; %bb.0: ; %main_body |
167 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | 162 ; GFX10GISEL-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 |
168 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, v4 | |
169 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 163 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
170 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 164 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
171 ; GFX10GISEL-NEXT: ; return to shader part epilog | 165 ; GFX10GISEL-NEXT: ; return to shader part epilog |
172 main_body: | 166 main_body: |
173 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 167 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
175 } | 169 } |
176 | 170 |
177 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { | 171 define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { |
178 ; GFX10-LABEL: sample_c_d_cl_2d: | 172 ; GFX10-LABEL: sample_c_d_cl_2d: |
179 ; GFX10: ; %bb.0: ; %main_body | 173 ; GFX10: ; %bb.0: ; %main_body |
180 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 | 174 ; GFX10-NEXT: v_mov_b32_e32 v13, v7 |
181 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 | 175 ; GFX10-NEXT: v_mov_b32_e32 v11, v4 |
182 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | 176 ; GFX10-NEXT: v_mov_b32_e32 v10, v3 |
177 ; GFX10-NEXT: v_mov_b32_e32 v9, v2 | |
178 ; GFX10-NEXT: v_mov_b32_e32 v8, v1 | |
179 ; GFX10-NEXT: v_mov_b32_e32 v7, v0 | |
180 ; GFX10-NEXT: v_perm_b32 v12, v6, v5, 0x5040100 | |
181 ; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
183 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 182 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
184 ; GFX10-NEXT: ; return to shader part epilog | 183 ; GFX10-NEXT: ; return to shader part epilog |
185 ; | 184 ; |
186 ; GFX10GISEL-LABEL: sample_c_d_cl_2d: | 185 ; GFX10GISEL-LABEL: sample_c_d_cl_2d: |
187 ; GFX10GISEL: ; %bb.0: ; %main_body | 186 ; GFX10GISEL: ; %bb.0: ; %main_body |
188 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff | 187 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v6 |
189 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 | 188 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, v7 |
190 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | 189 ; GFX10GISEL-NEXT: v_perm_b32 v5, v8, v5, 0x5040100 |
191 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6 | 190 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
192 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12 | |
193 ; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
194 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 191 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
195 ; GFX10GISEL-NEXT: ; return to shader part epilog | 192 ; GFX10GISEL-NEXT: ; return to shader part epilog |
196 main_body: | 193 main_body: |
197 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 194 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
198 ret <4 x float> %v | 195 ret <4 x float> %v |
205 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 202 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
206 ; GFX10-NEXT: ; return to shader part epilog | 203 ; GFX10-NEXT: ; return to shader part epilog |
207 ; | 204 ; |
208 ; GFX10GISEL-LABEL: sample_cd_1d: | 205 ; GFX10GISEL-LABEL: sample_cd_1d: |
209 ; GFX10GISEL: ; %bb.0: ; %main_body | 206 ; GFX10GISEL: ; %bb.0: ; %main_body |
210 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
211 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, s12 | |
212 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 207 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
213 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 208 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
214 ; GFX10GISEL-NEXT: ; return to shader part epilog | 209 ; GFX10GISEL-NEXT: ; return to shader part epilog |
215 main_body: | 210 main_body: |
216 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 211 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
218 } | 213 } |
219 | 214 |
220 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { | 215 define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { |
221 ; GFX10-LABEL: sample_cd_2d: | 216 ; GFX10-LABEL: sample_cd_2d: |
222 ; GFX10: ; %bb.0: ; %main_body | 217 ; GFX10: ; %bb.0: ; %main_body |
223 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 | 218 ; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 |
224 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | |
225 ; GFX10-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | 219 ; GFX10-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
226 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 220 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
227 ; GFX10-NEXT: ; return to shader part epilog | 221 ; GFX10-NEXT: ; return to shader part epilog |
228 ; | 222 ; |
229 ; GFX10GISEL-LABEL: sample_cd_2d: | 223 ; GFX10GISEL-LABEL: sample_cd_2d: |
230 ; GFX10GISEL: ; %bb.0: ; %main_body | 224 ; GFX10GISEL: ; %bb.0: ; %main_body |
231 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | 225 ; GFX10GISEL-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 |
232 ; GFX10GISEL-NEXT: v_and_or_b32 v4, 0xffff, v4, v5 | |
233 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | 226 ; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
234 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 227 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
235 ; GFX10GISEL-NEXT: ; return to shader part epilog | 228 ; GFX10GISEL-NEXT: ; return to shader part epilog |
236 main_body: | 229 main_body: |
237 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 230 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
245 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 238 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
246 ; GFX10-NEXT: ; return to shader part epilog | 239 ; GFX10-NEXT: ; return to shader part epilog |
247 ; | 240 ; |
248 ; GFX10GISEL-LABEL: sample_c_cd_1d: | 241 ; GFX10GISEL-LABEL: sample_c_cd_1d: |
249 ; GFX10GISEL: ; %bb.0: ; %main_body | 242 ; GFX10GISEL: ; %bb.0: ; %main_body |
250 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
251 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, s12 | |
252 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 243 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
253 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 244 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
254 ; GFX10GISEL-NEXT: ; return to shader part epilog | 245 ; GFX10GISEL-NEXT: ; return to shader part epilog |
255 main_body: | 246 main_body: |
256 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 247 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
258 } | 249 } |
259 | 250 |
260 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { | 251 define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { |
261 ; GFX10-LABEL: sample_c_cd_2d: | 252 ; GFX10-LABEL: sample_c_cd_2d: |
262 ; GFX10: ; %bb.0: ; %main_body | 253 ; GFX10: ; %bb.0: ; %main_body |
263 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 | 254 ; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100 |
264 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 | 255 ; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
265 ; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
266 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 256 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
267 ; GFX10-NEXT: ; return to shader part epilog | 257 ; GFX10-NEXT: ; return to shader part epilog |
268 ; | 258 ; |
269 ; GFX10GISEL-LABEL: sample_c_cd_2d: | 259 ; GFX10GISEL-LABEL: sample_c_cd_2d: |
270 ; GFX10GISEL: ; %bb.0: ; %main_body | 260 ; GFX10GISEL: ; %bb.0: ; %main_body |
271 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 | 261 ; GFX10GISEL-NEXT: v_perm_b32 v5, v6, v5, 0x5040100 |
272 ; GFX10GISEL-NEXT: v_and_or_b32 v5, 0xffff, v5, v6 | 262 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
273 ; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
274 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 263 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
275 ; GFX10GISEL-NEXT: ; return to shader part epilog | 264 ; GFX10GISEL-NEXT: ; return to shader part epilog |
276 main_body: | 265 main_body: |
277 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 266 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
278 ret <4 x float> %v | 267 ret <4 x float> %v |
279 } | 268 } |
280 | 269 |
281 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { | 270 define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { |
282 ; GFX10-LABEL: sample_cd_cl_1d: | 271 ; GFX10-LABEL: sample_cd_cl_1d: |
283 ; GFX10: ; %bb.0: ; %main_body | 272 ; GFX10: ; %bb.0: ; %main_body |
284 ; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 | 273 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 |
285 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
286 ; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 274 ; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
287 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 275 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
288 ; GFX10-NEXT: ; return to shader part epilog | 276 ; GFX10-NEXT: ; return to shader part epilog |
289 ; | 277 ; |
290 ; GFX10GISEL-LABEL: sample_cd_cl_1d: | 278 ; GFX10GISEL-LABEL: sample_cd_cl_1d: |
291 ; GFX10GISEL: ; %bb.0: ; %main_body | 279 ; GFX10GISEL: ; %bb.0: ; %main_body |
292 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | 280 ; GFX10GISEL-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 |
293 ; GFX10GISEL-NEXT: v_and_or_b32 v2, 0xffff, v2, v3 | |
294 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 281 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
295 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 282 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
296 ; GFX10GISEL-NEXT: ; return to shader part epilog | 283 ; GFX10GISEL-NEXT: ; return to shader part epilog |
297 main_body: | 284 main_body: |
298 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 285 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
300 } | 287 } |
301 | 288 |
302 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { | 289 define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { |
303 ; GFX10-LABEL: sample_cd_cl_2d: | 290 ; GFX10-LABEL: sample_cd_cl_2d: |
304 ; GFX10: ; %bb.0: ; %main_body | 291 ; GFX10: ; %bb.0: ; %main_body |
305 ; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 | 292 ; GFX10-NEXT: v_mov_b32_e32 v11, v6 |
306 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | 293 ; GFX10-NEXT: v_mov_b32_e32 v9, v3 |
307 ; GFX10-NEXT: image_sample_cd_cl v[0:3], [v0, v1, v2, v3, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | 294 ; GFX10-NEXT: v_mov_b32_e32 v8, v2 |
295 ; GFX10-NEXT: v_mov_b32_e32 v7, v1 | |
296 ; GFX10-NEXT: v_mov_b32_e32 v6, v0 | |
297 ; GFX10-NEXT: v_perm_b32 v10, v5, v4, 0x5040100 | |
298 ; GFX10-NEXT: image_sample_cd_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
308 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 299 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
309 ; GFX10-NEXT: ; return to shader part epilog | 300 ; GFX10-NEXT: ; return to shader part epilog |
310 ; | 301 ; |
311 ; GFX10GISEL-LABEL: sample_cd_cl_2d: | 302 ; GFX10GISEL-LABEL: sample_cd_cl_2d: |
312 ; GFX10GISEL: ; %bb.0: ; %main_body | 303 ; GFX10GISEL: ; %bb.0: ; %main_body |
313 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | 304 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v5 |
314 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | 305 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, v6 |
315 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | 306 ; GFX10GISEL-NEXT: v_perm_b32 v4, v7, v4, 0x5040100 |
316 ; GFX10GISEL-NEXT: v_and_or_b32 v4, v4, v7, v5 | 307 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
317 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v6, v7, s12 | |
318 ; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
319 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 308 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
320 ; GFX10GISEL-NEXT: ; return to shader part epilog | 309 ; GFX10GISEL-NEXT: ; return to shader part epilog |
321 main_body: | 310 main_body: |
322 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 311 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
323 ret <4 x float> %v | 312 ret <4 x float> %v |
324 } | 313 } |
325 | 314 |
326 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { | 315 define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { |
327 ; GFX10-LABEL: sample_c_cd_cl_1d: | 316 ; GFX10-LABEL: sample_c_cd_cl_1d: |
328 ; GFX10: ; %bb.0: ; %main_body | 317 ; GFX10: ; %bb.0: ; %main_body |
329 ; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 | 318 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 |
330 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | |
331 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 319 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
332 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 320 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
333 ; GFX10-NEXT: ; return to shader part epilog | 321 ; GFX10-NEXT: ; return to shader part epilog |
334 ; | 322 ; |
335 ; GFX10GISEL-LABEL: sample_c_cd_cl_1d: | 323 ; GFX10GISEL-LABEL: sample_c_cd_cl_1d: |
336 ; GFX10GISEL: ; %bb.0: ; %main_body | 324 ; GFX10GISEL: ; %bb.0: ; %main_body |
337 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | 325 ; GFX10GISEL-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 |
338 ; GFX10GISEL-NEXT: v_and_or_b32 v3, 0xffff, v3, v4 | |
339 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 | 326 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 |
340 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 327 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
341 ; GFX10GISEL-NEXT: ; return to shader part epilog | 328 ; GFX10GISEL-NEXT: ; return to shader part epilog |
342 main_body: | 329 main_body: |
343 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 330 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
345 } | 332 } |
346 | 333 |
347 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { | 334 define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { |
348 ; GFX10-LABEL: sample_c_cd_cl_2d: | 335 ; GFX10-LABEL: sample_c_cd_cl_2d: |
349 ; GFX10: ; %bb.0: ; %main_body | 336 ; GFX10: ; %bb.0: ; %main_body |
350 ; GFX10-NEXT: v_and_b32_e32 v5, 0xffff, v5 | 337 ; GFX10-NEXT: v_mov_b32_e32 v13, v7 |
351 ; GFX10-NEXT: v_lshl_or_b32 v5, v6, 16, v5 | 338 ; GFX10-NEXT: v_mov_b32_e32 v11, v4 |
352 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], [v0, v1, v2, v3, v4, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | 339 ; GFX10-NEXT: v_mov_b32_e32 v10, v3 |
340 ; GFX10-NEXT: v_mov_b32_e32 v9, v2 | |
341 ; GFX10-NEXT: v_mov_b32_e32 v8, v1 | |
342 ; GFX10-NEXT: v_mov_b32_e32 v7, v0 | |
343 ; GFX10-NEXT: v_perm_b32 v12, v6, v5, 0x5040100 | |
344 ; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
353 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 345 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
354 ; GFX10-NEXT: ; return to shader part epilog | 346 ; GFX10-NEXT: ; return to shader part epilog |
355 ; | 347 ; |
356 ; GFX10GISEL-LABEL: sample_c_cd_cl_2d: | 348 ; GFX10GISEL-LABEL: sample_c_cd_cl_2d: |
357 ; GFX10GISEL: ; %bb.0: ; %main_body | 349 ; GFX10GISEL: ; %bb.0: ; %main_body |
358 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff | 350 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v6 |
359 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6 | 351 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, v7 |
360 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | 352 ; GFX10GISEL-NEXT: v_perm_b32 v5, v8, v5, 0x5040100 |
361 ; GFX10GISEL-NEXT: v_and_or_b32 v5, v5, v8, v6 | 353 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 |
362 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v7, v8, s12 | |
363 ; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 | |
364 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 354 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
365 ; GFX10GISEL-NEXT: ; return to shader part epilog | 355 ; GFX10GISEL-NEXT: ; return to shader part epilog |
366 main_body: | 356 main_body: |
367 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 357 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
368 ret <4 x float> %v | 358 ret <4 x float> %v |
369 } | 359 } |
370 | 360 |
371 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { | 361 define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { |
372 ; GFX10-LABEL: sample_c_d_o_2darray_V1: | 362 ; GFX10-LABEL: sample_c_d_o_2darray_V1: |
373 ; GFX10: ; %bb.0: ; %main_body | 363 ; GFX10: ; %bb.0: ; %main_body |
374 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 | 364 ; GFX10-NEXT: v_mov_b32_e32 v15, v8 |
375 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 | 365 ; GFX10-NEXT: v_mov_b32_e32 v13, v5 |
376 ; GFX10-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 | 366 ; GFX10-NEXT: v_mov_b32_e32 v12, v4 |
367 ; GFX10-NEXT: v_mov_b32_e32 v11, v3 | |
368 ; GFX10-NEXT: v_mov_b32_e32 v10, v2 | |
369 ; GFX10-NEXT: v_mov_b32_e32 v9, v1 | |
370 ; GFX10-NEXT: v_mov_b32_e32 v8, v0 | |
371 ; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100 | |
372 ; GFX10-NEXT: image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 | |
377 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 373 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
378 ; GFX10-NEXT: ; return to shader part epilog | 374 ; GFX10-NEXT: ; return to shader part epilog |
379 ; | 375 ; |
380 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1: | 376 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1: |
381 ; GFX10GISEL: ; %bb.0: ; %main_body | 377 ; GFX10GISEL: ; %bb.0: ; %main_body |
382 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | 378 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7 |
383 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7 | 379 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8 |
384 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | 380 ; GFX10GISEL-NEXT: v_perm_b32 v6, v9, v6, 0x5040100 |
385 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7 | |
386 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12 | |
387 ; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 | 381 ; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 |
388 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 382 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
389 ; GFX10GISEL-NEXT: ; return to shader part epilog | 383 ; GFX10GISEL-NEXT: ; return to shader part epilog |
390 main_body: | 384 main_body: |
391 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 385 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
393 } | 387 } |
394 | 388 |
395 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { | 389 define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { |
396 ; GFX10-LABEL: sample_c_d_o_2darray_V2: | 390 ; GFX10-LABEL: sample_c_d_o_2darray_V2: |
397 ; GFX10: ; %bb.0: ; %main_body | 391 ; GFX10: ; %bb.0: ; %main_body |
398 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 | 392 ; GFX10-NEXT: v_mov_b32_e32 v15, v8 |
399 ; GFX10-NEXT: v_lshl_or_b32 v6, v7, 16, v6 | 393 ; GFX10-NEXT: v_mov_b32_e32 v13, v5 |
400 ; GFX10-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v3, v4, v5, v6, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 | 394 ; GFX10-NEXT: v_mov_b32_e32 v12, v4 |
395 ; GFX10-NEXT: v_mov_b32_e32 v11, v3 | |
396 ; GFX10-NEXT: v_mov_b32_e32 v10, v2 | |
397 ; GFX10-NEXT: v_mov_b32_e32 v9, v1 | |
398 ; GFX10-NEXT: v_mov_b32_e32 v8, v0 | |
399 ; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100 | |
400 ; GFX10-NEXT: image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 | |
401 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 401 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
402 ; GFX10-NEXT: ; return to shader part epilog | 402 ; GFX10-NEXT: ; return to shader part epilog |
403 ; | 403 ; |
404 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2: | 404 ; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2: |
405 ; GFX10GISEL: ; %bb.0: ; %main_body | 405 ; GFX10GISEL: ; %bb.0: ; %main_body |
406 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | 406 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7 |
407 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7 | 407 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8 |
408 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | 408 ; GFX10GISEL-NEXT: v_perm_b32 v6, v9, v6, 0x5040100 |
409 ; GFX10GISEL-NEXT: v_and_or_b32 v6, v6, v9, v7 | |
410 ; GFX10GISEL-NEXT: v_and_or_b32 v7, v8, v9, s12 | |
411 ; GFX10GISEL-NEXT: image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 | 409 ; GFX10GISEL-NEXT: image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 |
412 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 410 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
413 ; GFX10GISEL-NEXT: ; return to shader part epilog | 411 ; GFX10GISEL-NEXT: ; return to shader part epilog |
414 main_body: | 412 main_body: |
415 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 413 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
445 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 443 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
446 ; GFX10-NEXT: ; return to shader part epilog | 444 ; GFX10-NEXT: ; return to shader part epilog |
447 ; | 445 ; |
448 ; GFX10GISEL-LABEL: sample_g16_noa16_d_1d: | 446 ; GFX10GISEL-LABEL: sample_g16_noa16_d_1d: |
449 ; GFX10GISEL: ; %bb.0: ; %main_body | 447 ; GFX10GISEL: ; %bb.0: ; %main_body |
450 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, 0xffff | |
451 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
452 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v3, s12 | |
453 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v3, s12 | |
454 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | 448 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
455 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 449 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
456 ; GFX10GISEL-NEXT: ; return to shader part epilog | 450 ; GFX10GISEL-NEXT: ; return to shader part epilog |
457 main_body: | 451 main_body: |
458 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 452 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
460 } | 454 } |
461 | 455 |
462 define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { | 456 define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { |
463 ; GFX10-LABEL: sample_g16_noa16_d_2d: | 457 ; GFX10-LABEL: sample_g16_noa16_d_2d: |
464 ; GFX10: ; %bb.0: ; %main_body | 458 ; GFX10: ; %bb.0: ; %main_body |
465 ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff | 459 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 |
466 ; GFX10-NEXT: v_and_b32_e32 v2, v6, v2 | 460 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 |
467 ; GFX10-NEXT: v_and_b32_e32 v0, v6, v0 | |
468 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
469 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 | |
470 ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 461 ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
471 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 462 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
472 ; GFX10-NEXT: ; return to shader part epilog | 463 ; GFX10-NEXT: ; return to shader part epilog |
473 ; | 464 ; |
474 ; GFX10GISEL-LABEL: sample_g16_noa16_d_2d: | 465 ; GFX10GISEL-LABEL: sample_g16_noa16_d_2d: |
475 ; GFX10GISEL: ; %bb.0: ; %main_body | 466 ; GFX10GISEL: ; %bb.0: ; %main_body |
476 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, 0xffff | 467 ; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 |
477 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 | 468 ; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 |
478 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
479 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v6, v1 | |
480 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v6, v3 | |
481 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 469 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
482 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 470 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
483 ; GFX10GISEL-NEXT: ; return to shader part epilog | 471 ; GFX10GISEL-NEXT: ; return to shader part epilog |
484 main_body: | 472 main_body: |
485 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 473 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
487 } | 475 } |
488 | 476 |
489 define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { | 477 define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { |
490 ; GFX10-LABEL: sample_g16_noa16_d_3d: | 478 ; GFX10-LABEL: sample_g16_noa16_d_3d: |
491 ; GFX10: ; %bb.0: ; %main_body | 479 ; GFX10: ; %bb.0: ; %main_body |
492 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff | 480 ; GFX10-NEXT: v_mov_b32_e32 v9, v3 |
493 ; GFX10-NEXT: v_and_b32_e32 v3, v9, v3 | 481 ; GFX10-NEXT: v_mov_b32_e32 v3, v2 |
494 ; GFX10-NEXT: v_and_b32_e32 v0, v9, v0 | 482 ; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100 |
495 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | 483 ; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x5040100 |
496 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 | 484 ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D |
497 ; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v3, v5, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D | |
498 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 485 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
499 ; GFX10-NEXT: ; return to shader part epilog | 486 ; GFX10-NEXT: ; return to shader part epilog |
500 ; | 487 ; |
501 ; GFX10GISEL-LABEL: sample_g16_noa16_d_3d: | 488 ; GFX10GISEL-LABEL: sample_g16_noa16_d_3d: |
502 ; GFX10GISEL: ; %bb.0: ; %main_body | 489 ; GFX10GISEL: ; %bb.0: ; %main_body |
503 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | 490 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3 |
504 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 | 491 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v2 |
505 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | 492 ; GFX10GISEL-NEXT: v_perm_b32 v2, v1, v0, 0x5040100 |
506 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | 493 ; GFX10GISEL-NEXT: v_perm_b32 v4, v4, v9, 0x5040100 |
507 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v9, v1 | 494 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D |
508 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v9, s12 | |
509 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v9, v4 | |
510 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v5, v9, s12 | |
511 ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D | |
512 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 495 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
513 ; GFX10GISEL-NEXT: ; return to shader part epilog | 496 ; GFX10GISEL-NEXT: ; return to shader part epilog |
514 main_body: | 497 main_body: |
515 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 498 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
516 ret <4 x float> %v | 499 ret <4 x float> %v |
523 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 506 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
524 ; GFX10-NEXT: ; return to shader part epilog | 507 ; GFX10-NEXT: ; return to shader part epilog |
525 ; | 508 ; |
526 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_1d: | 509 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_1d: |
527 ; GFX10GISEL: ; %bb.0: ; %main_body | 510 ; GFX10GISEL: ; %bb.0: ; %main_body |
528 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff | |
529 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
530 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12 | |
531 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v4, s12 | |
532 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | 511 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
533 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 512 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
534 ; GFX10GISEL-NEXT: ; return to shader part epilog | 513 ; GFX10GISEL-NEXT: ; return to shader part epilog |
535 main_body: | 514 main_body: |
536 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 515 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
538 } | 517 } |
539 | 518 |
540 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { | 519 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { |
541 ; GFX10-LABEL: sample_g16_noa16_c_d_2d: | 520 ; GFX10-LABEL: sample_g16_noa16_c_d_2d: |
542 ; GFX10: ; %bb.0: ; %main_body | 521 ; GFX10: ; %bb.0: ; %main_body |
543 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff | 522 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 |
544 ; GFX10-NEXT: v_and_b32_e32 v3, v7, v3 | 523 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 |
545 ; GFX10-NEXT: v_and_b32_e32 v1, v7, v1 | |
546 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | |
547 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 | |
548 ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 524 ; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
549 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 525 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
550 ; GFX10-NEXT: ; return to shader part epilog | 526 ; GFX10-NEXT: ; return to shader part epilog |
551 ; | 527 ; |
552 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_2d: | 528 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_2d: |
553 ; GFX10GISEL: ; %bb.0: ; %main_body | 529 ; GFX10GISEL: ; %bb.0: ; %main_body |
554 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | 530 ; GFX10GISEL-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 |
555 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 | 531 ; GFX10GISEL-NEXT: v_perm_b32 v2, v4, v3, 0x5040100 |
556 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | |
557 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v7, v2 | |
558 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v7, v4 | |
559 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 532 ; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
560 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 533 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
561 ; GFX10GISEL-NEXT: ; return to shader part epilog | 534 ; GFX10GISEL-NEXT: ; return to shader part epilog |
562 main_body: | 535 main_body: |
563 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 536 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
571 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 544 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
572 ; GFX10-NEXT: ; return to shader part epilog | 545 ; GFX10-NEXT: ; return to shader part epilog |
573 ; | 546 ; |
574 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_1d: | 547 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_1d: |
575 ; GFX10GISEL: ; %bb.0: ; %main_body | 548 ; GFX10GISEL: ; %bb.0: ; %main_body |
576 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff | |
577 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
578 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v4, s12 | |
579 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12 | |
580 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | 549 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
581 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 550 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
582 ; GFX10GISEL-NEXT: ; return to shader part epilog | 551 ; GFX10GISEL-NEXT: ; return to shader part epilog |
583 main_body: | 552 main_body: |
584 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 553 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
586 } | 555 } |
587 | 556 |
588 define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { | 557 define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { |
589 ; GFX10-LABEL: sample_g16_noa16_d_cl_2d: | 558 ; GFX10-LABEL: sample_g16_noa16_d_cl_2d: |
590 ; GFX10: ; %bb.0: ; %main_body | 559 ; GFX10: ; %bb.0: ; %main_body |
591 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff | 560 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 |
592 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 | 561 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 |
593 ; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 | |
594 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
595 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 | |
596 ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 562 ; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
597 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 563 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
598 ; GFX10-NEXT: ; return to shader part epilog | 564 ; GFX10-NEXT: ; return to shader part epilog |
599 ; | 565 ; |
600 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_2d: | 566 ; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_2d: |
601 ; GFX10GISEL: ; %bb.0: ; %main_body | 567 ; GFX10GISEL: ; %bb.0: ; %main_body |
602 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | 568 ; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 |
603 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 | 569 ; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 |
604 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
605 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v7, v1 | |
606 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v7, v3 | |
607 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 570 ; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
608 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 571 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
609 ; GFX10GISEL-NEXT: ; return to shader part epilog | 572 ; GFX10GISEL-NEXT: ; return to shader part epilog |
610 main_body: | 573 main_body: |
611 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 574 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
619 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 582 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
620 ; GFX10-NEXT: ; return to shader part epilog | 583 ; GFX10-NEXT: ; return to shader part epilog |
621 ; | 584 ; |
622 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_1d: | 585 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_1d: |
623 ; GFX10GISEL: ; %bb.0: ; %main_body | 586 ; GFX10GISEL: ; %bb.0: ; %main_body |
624 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, 0xffff | |
625 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
626 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v5, s12 | |
627 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v5, s12 | |
628 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | 587 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
629 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 588 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
630 ; GFX10GISEL-NEXT: ; return to shader part epilog | 589 ; GFX10GISEL-NEXT: ; return to shader part epilog |
631 main_body: | 590 main_body: |
632 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 591 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
634 } | 593 } |
635 | 594 |
636 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { | 595 define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { |
637 ; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d: | 596 ; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d: |
638 ; GFX10: ; %bb.0: ; %main_body | 597 ; GFX10: ; %bb.0: ; %main_body |
639 ; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff | 598 ; GFX10-NEXT: v_mov_b32_e32 v8, v2 |
640 ; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 | 599 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 |
641 ; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 | 600 ; GFX10-NEXT: v_perm_b32 v4, v4, v3, 0x5040100 |
642 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | 601 ; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100 |
643 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 | 602 ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
644 ; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
645 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 603 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
646 ; GFX10-NEXT: ; return to shader part epilog | 604 ; GFX10-NEXT: ; return to shader part epilog |
647 ; | 605 ; |
648 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d: | 606 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d: |
649 ; GFX10GISEL: ; %bb.0: ; %main_body | 607 ; GFX10GISEL: ; %bb.0: ; %main_body |
650 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff | 608 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2 |
651 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 | 609 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3 |
652 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | 610 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0 |
653 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2 | 611 ; GFX10GISEL-NEXT: v_perm_b32 v3, v8, v1, 0x5040100 |
654 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4 | 612 ; GFX10GISEL-NEXT: v_perm_b32 v4, v4, v9, 0x5040100 |
655 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 613 ; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
656 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 614 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
657 ; GFX10GISEL-NEXT: ; return to shader part epilog | 615 ; GFX10GISEL-NEXT: ; return to shader part epilog |
658 main_body: | 616 main_body: |
659 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 617 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
660 ret <4 x float> %v | 618 ret <4 x float> %v |
667 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 625 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
668 ; GFX10-NEXT: ; return to shader part epilog | 626 ; GFX10-NEXT: ; return to shader part epilog |
669 ; | 627 ; |
670 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_1d: | 628 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_1d: |
671 ; GFX10GISEL: ; %bb.0: ; %main_body | 629 ; GFX10GISEL: ; %bb.0: ; %main_body |
672 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, 0xffff | |
673 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
674 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v3, s12 | |
675 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v3, s12 | |
676 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | 630 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
677 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 631 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
678 ; GFX10GISEL-NEXT: ; return to shader part epilog | 632 ; GFX10GISEL-NEXT: ; return to shader part epilog |
679 main_body: | 633 main_body: |
680 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 634 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
682 } | 636 } |
683 | 637 |
684 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { | 638 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { |
685 ; GFX10-LABEL: sample_g16_noa16_cd_2d: | 639 ; GFX10-LABEL: sample_g16_noa16_cd_2d: |
686 ; GFX10: ; %bb.0: ; %main_body | 640 ; GFX10: ; %bb.0: ; %main_body |
687 ; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff | 641 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 |
688 ; GFX10-NEXT: v_and_b32_e32 v2, v6, v2 | 642 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 |
689 ; GFX10-NEXT: v_and_b32_e32 v0, v6, v0 | |
690 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
691 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 | |
692 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 643 ; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
693 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 644 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
694 ; GFX10-NEXT: ; return to shader part epilog | 645 ; GFX10-NEXT: ; return to shader part epilog |
695 ; | 646 ; |
696 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_2d: | 647 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_2d: |
697 ; GFX10GISEL: ; %bb.0: ; %main_body | 648 ; GFX10GISEL: ; %bb.0: ; %main_body |
698 ; GFX10GISEL-NEXT: v_mov_b32_e32 v6, 0xffff | 649 ; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 |
699 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 | 650 ; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 |
700 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
701 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v6, v1 | |
702 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v6, v3 | |
703 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 651 ; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
704 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 652 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
705 ; GFX10GISEL-NEXT: ; return to shader part epilog | 653 ; GFX10GISEL-NEXT: ; return to shader part epilog |
706 main_body: | 654 main_body: |
707 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 655 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
715 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 663 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
716 ; GFX10-NEXT: ; return to shader part epilog | 664 ; GFX10-NEXT: ; return to shader part epilog |
717 ; | 665 ; |
718 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_1d: | 666 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_1d: |
719 ; GFX10GISEL: ; %bb.0: ; %main_body | 667 ; GFX10GISEL: ; %bb.0: ; %main_body |
720 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff | |
721 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
722 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12 | |
723 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v4, s12 | |
724 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | 668 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
725 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 669 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
726 ; GFX10GISEL-NEXT: ; return to shader part epilog | 670 ; GFX10GISEL-NEXT: ; return to shader part epilog |
727 main_body: | 671 main_body: |
728 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 672 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
730 } | 674 } |
731 | 675 |
732 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { | 676 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { |
733 ; GFX10-LABEL: sample_g16_noa16_c_cd_2d: | 677 ; GFX10-LABEL: sample_g16_noa16_c_cd_2d: |
734 ; GFX10: ; %bb.0: ; %main_body | 678 ; GFX10: ; %bb.0: ; %main_body |
735 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff | 679 ; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 |
736 ; GFX10-NEXT: v_and_b32_e32 v3, v7, v3 | 680 ; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 |
737 ; GFX10-NEXT: v_and_b32_e32 v1, v7, v1 | |
738 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | |
739 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 | |
740 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 681 ; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
741 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 682 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
742 ; GFX10-NEXT: ; return to shader part epilog | 683 ; GFX10-NEXT: ; return to shader part epilog |
743 ; | 684 ; |
744 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_2d: | 685 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_2d: |
745 ; GFX10GISEL: ; %bb.0: ; %main_body | 686 ; GFX10GISEL: ; %bb.0: ; %main_body |
746 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | 687 ; GFX10GISEL-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 |
747 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 | 688 ; GFX10GISEL-NEXT: v_perm_b32 v2, v4, v3, 0x5040100 |
748 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | |
749 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v7, v2 | |
750 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v7, v4 | |
751 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 689 ; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
752 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 690 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
753 ; GFX10GISEL-NEXT: ; return to shader part epilog | 691 ; GFX10GISEL-NEXT: ; return to shader part epilog |
754 main_body: | 692 main_body: |
755 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 693 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
763 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 701 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
764 ; GFX10-NEXT: ; return to shader part epilog | 702 ; GFX10-NEXT: ; return to shader part epilog |
765 ; | 703 ; |
766 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_1d: | 704 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_1d: |
767 ; GFX10GISEL: ; %bb.0: ; %main_body | 705 ; GFX10GISEL: ; %bb.0: ; %main_body |
768 ; GFX10GISEL-NEXT: v_mov_b32_e32 v4, 0xffff | |
769 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
770 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v4, s12 | |
771 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v4, s12 | |
772 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | 706 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
773 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 707 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
774 ; GFX10GISEL-NEXT: ; return to shader part epilog | 708 ; GFX10GISEL-NEXT: ; return to shader part epilog |
775 main_body: | 709 main_body: |
776 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 710 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
778 } | 712 } |
779 | 713 |
780 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { | 714 define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { |
781 ; GFX10-LABEL: sample_g16_noa16_cd_cl_2d: | 715 ; GFX10-LABEL: sample_g16_noa16_cd_cl_2d: |
782 ; GFX10: ; %bb.0: ; %main_body | 716 ; GFX10: ; %bb.0: ; %main_body |
783 ; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff | 717 ; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 |
784 ; GFX10-NEXT: v_and_b32_e32 v2, v7, v2 | 718 ; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 |
785 ; GFX10-NEXT: v_and_b32_e32 v0, v7, v0 | |
786 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | |
787 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 | |
788 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 719 ; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
789 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 720 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
790 ; GFX10-NEXT: ; return to shader part epilog | 721 ; GFX10-NEXT: ; return to shader part epilog |
791 ; | 722 ; |
792 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_2d: | 723 ; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_2d: |
793 ; GFX10GISEL: ; %bb.0: ; %main_body | 724 ; GFX10GISEL: ; %bb.0: ; %main_body |
794 ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, 0xffff | 725 ; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 |
795 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 | 726 ; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100 |
796 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | |
797 ; GFX10GISEL-NEXT: v_and_or_b32 v0, v0, v7, v1 | |
798 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v2, v7, v3 | |
799 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 727 ; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
800 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 728 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
801 ; GFX10GISEL-NEXT: ; return to shader part epilog | 729 ; GFX10GISEL-NEXT: ; return to shader part epilog |
802 main_body: | 730 main_body: |
803 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 731 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
811 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 739 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
812 ; GFX10-NEXT: ; return to shader part epilog | 740 ; GFX10-NEXT: ; return to shader part epilog |
813 ; | 741 ; |
814 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_1d: | 742 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_1d: |
815 ; GFX10GISEL: ; %bb.0: ; %main_body | 743 ; GFX10GISEL: ; %bb.0: ; %main_body |
816 ; GFX10GISEL-NEXT: v_mov_b32_e32 v5, 0xffff | |
817 ; GFX10GISEL-NEXT: s_lshl_b32 s12, s0, 16 | |
818 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v5, s12 | |
819 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v5, s12 | |
820 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D | 744 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D |
821 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 745 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
822 ; GFX10GISEL-NEXT: ; return to shader part epilog | 746 ; GFX10GISEL-NEXT: ; return to shader part epilog |
823 main_body: | 747 main_body: |
824 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 748 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
826 } | 750 } |
827 | 751 |
828 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { | 752 define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { |
829 ; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d: | 753 ; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d: |
830 ; GFX10: ; %bb.0: ; %main_body | 754 ; GFX10: ; %bb.0: ; %main_body |
831 ; GFX10-NEXT: v_mov_b32_e32 v8, 0xffff | 755 ; GFX10-NEXT: v_mov_b32_e32 v8, v2 |
832 ; GFX10-NEXT: v_and_b32_e32 v3, v8, v3 | 756 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 |
833 ; GFX10-NEXT: v_and_b32_e32 v1, v8, v1 | 757 ; GFX10-NEXT: v_perm_b32 v4, v4, v3, 0x5040100 |
834 ; GFX10-NEXT: v_lshl_or_b32 v3, v4, 16, v3 | 758 ; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100 |
835 ; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 | 759 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
836 ; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | |
837 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 760 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
838 ; GFX10-NEXT: ; return to shader part epilog | 761 ; GFX10-NEXT: ; return to shader part epilog |
839 ; | 762 ; |
840 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d: | 763 ; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d: |
841 ; GFX10GISEL: ; %bb.0: ; %main_body | 764 ; GFX10GISEL: ; %bb.0: ; %main_body |
842 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, 0xffff | 765 ; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2 |
843 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 | 766 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3 |
844 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 | 767 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0 |
845 ; GFX10GISEL-NEXT: v_and_or_b32 v1, v1, v8, v2 | 768 ; GFX10GISEL-NEXT: v_perm_b32 v3, v8, v1, 0x5040100 |
846 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v3, v8, v4 | 769 ; GFX10GISEL-NEXT: v_perm_b32 v4, v4, v9, 0x5040100 |
847 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v2, v5, v6, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D | 770 ; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D |
848 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 771 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
849 ; GFX10GISEL-NEXT: ; return to shader part epilog | 772 ; GFX10GISEL-NEXT: ; return to shader part epilog |
850 main_body: | 773 main_body: |
851 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 774 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
852 ret <4 x float> %v | 775 ret <4 x float> %v |
853 } | 776 } |
854 | 777 |
855 define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { | 778 define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { |
856 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1: | 779 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1: |
857 ; GFX10: ; %bb.0: ; %main_body | 780 ; GFX10: ; %bb.0: ; %main_body |
858 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff | 781 ; GFX10-NEXT: v_mov_b32_e32 v9, v3 |
859 ; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 | 782 ; GFX10-NEXT: v_mov_b32_e32 v10, v2 |
860 ; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 | 783 ; GFX10-NEXT: v_mov_b32_e32 v3, v1 |
861 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | 784 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 |
862 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | 785 ; GFX10-NEXT: v_perm_b32 v5, v5, v4, 0x5040100 |
863 ; GFX10-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY | 786 ; GFX10-NEXT: v_perm_b32 v4, v9, v10, 0x5040100 |
787 ; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY | |
864 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 788 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
865 ; GFX10-NEXT: ; return to shader part epilog | 789 ; GFX10-NEXT: ; return to shader part epilog |
866 ; | 790 ; |
867 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1: | 791 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1: |
868 ; GFX10GISEL: ; %bb.0: ; %main_body | 792 ; GFX10GISEL: ; %bb.0: ; %main_body |
869 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | 793 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2 |
870 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | 794 ; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3 |
871 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | 795 ; GFX10GISEL-NEXT: v_mov_b32_e32 v11, v4 |
872 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3 | 796 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0 |
873 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5 | 797 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1 |
874 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY | 798 ; GFX10GISEL-NEXT: v_perm_b32 v4, v10, v9, 0x5040100 |
799 ; GFX10GISEL-NEXT: v_perm_b32 v5, v5, v11, 0x5040100 | |
800 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY | |
875 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 801 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
876 ; GFX10GISEL-NEXT: ; return to shader part epilog | 802 ; GFX10GISEL-NEXT: ; return to shader part epilog |
877 main_body: | 803 main_body: |
878 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 804 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
879 ret float %v | 805 ret float %v |
880 } | 806 } |
881 | 807 |
882 define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { | 808 define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { |
883 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2: | 809 ; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2: |
884 ; GFX10: ; %bb.0: ; %main_body | 810 ; GFX10: ; %bb.0: ; %main_body |
885 ; GFX10-NEXT: v_mov_b32_e32 v9, 0xffff | 811 ; GFX10-NEXT: v_mov_b32_e32 v9, v3 |
886 ; GFX10-NEXT: v_and_b32_e32 v4, v9, v4 | 812 ; GFX10-NEXT: v_mov_b32_e32 v10, v2 |
887 ; GFX10-NEXT: v_and_b32_e32 v2, v9, v2 | 813 ; GFX10-NEXT: v_mov_b32_e32 v3, v1 |
888 ; GFX10-NEXT: v_lshl_or_b32 v4, v5, 16, v4 | 814 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 |
889 ; GFX10-NEXT: v_lshl_or_b32 v2, v3, 16, v2 | 815 ; GFX10-NEXT: v_perm_b32 v5, v5, v4, 0x5040100 |
890 ; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v4, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY | 816 ; GFX10-NEXT: v_perm_b32 v4, v9, v10, 0x5040100 |
817 ; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY | |
891 ; GFX10-NEXT: s_waitcnt vmcnt(0) | 818 ; GFX10-NEXT: s_waitcnt vmcnt(0) |
892 ; GFX10-NEXT: ; return to shader part epilog | 819 ; GFX10-NEXT: ; return to shader part epilog |
893 ; | 820 ; |
894 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2: | 821 ; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2: |
895 ; GFX10GISEL: ; %bb.0: ; %main_body | 822 ; GFX10GISEL: ; %bb.0: ; %main_body |
896 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, 0xffff | 823 ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2 |
897 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 | 824 ; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3 |
898 ; GFX10GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 | 825 ; GFX10GISEL-NEXT: v_mov_b32_e32 v11, v4 |
899 ; GFX10GISEL-NEXT: v_and_or_b32 v2, v2, v9, v3 | 826 ; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0 |
900 ; GFX10GISEL-NEXT: v_and_or_b32 v3, v4, v9, v5 | 827 ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1 |
901 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v6, v7, v8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY | 828 ; GFX10GISEL-NEXT: v_perm_b32 v4, v10, v9, 0x5040100 |
829 ; GFX10GISEL-NEXT: v_perm_b32 v5, v5, v11, 0x5040100 | |
830 ; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY | |
902 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) | 831 ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) |
903 ; GFX10GISEL-NEXT: ; return to shader part epilog | 832 ; GFX10GISEL-NEXT: ; return to shader part epilog |
904 main_body: | 833 main_body: |
905 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) | 834 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) |
906 ret <2 x float> %v | 835 ret <2 x float> %v |