150
|
1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
|
2 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
|
3
|
|
4 ; SI-LABEL: {{^}}s_movk_i32_k0:
|
|
5 ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}}
|
|
6 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
7 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
8 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
|
|
9 ; SI: s_endpgm
|
|
10 define amdgpu_kernel void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
11 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
12 %or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32)
|
|
13 store i64 %or, i64 addrspace(1)* %out
|
|
14 call void asm sideeffect "; use $0", "s"(i64 4295032831)
|
|
15 ret void
|
|
16 }
|
|
17
|
|
18 ; SI-LABEL: {{^}}s_movk_i32_k1:
|
|
19 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
|
|
20 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
21 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
22 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
|
|
23 ; SI: s_endpgm
|
|
24 define amdgpu_kernel void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
25 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
26 %or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32)
|
|
27 store i64 %or, i64 addrspace(1)* %out
|
|
28 call void asm sideeffect "; use $0", "s"(i64 4295000063)
|
|
29 ret void
|
|
30 }
|
|
31
|
|
32 ; SI-LABEL: {{^}}s_movk_i32_k2:
|
|
33 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
|
|
34 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
35 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
36 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 64, v[[HI_VREG]]
|
|
37 ; SI: s_endpgm
|
|
38 define amdgpu_kernel void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
39 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
40 %or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32)
|
|
41 store i64 %or, i64 addrspace(1)* %out
|
|
42 call void asm sideeffect "; use $0", "s"(i64 274877939711)
|
|
43 ret void
|
|
44 }
|
|
45
|
|
46 ; SI-LABEL: {{^}}s_movk_i32_k3:
|
|
47 ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
|
|
48 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
49 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
50 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
|
|
51 ; SI: s_endpgm
|
|
52 define amdgpu_kernel void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
53 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
54 %or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32)
|
|
55 store i64 %or, i64 addrspace(1)* %out
|
|
56 call void asm sideeffect "; use $0", "s"(i64 4295000064)
|
|
57 ret void
|
|
58 }
|
|
59
|
|
60 ; SI-LABEL: {{^}}s_movk_i32_k4:
|
|
61 ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}}
|
|
62 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
63 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
64 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
|
|
65 ; SI: s_endpgm
|
|
66 define amdgpu_kernel void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
67 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
68 %or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32)
|
|
69 store i64 %or, i64 addrspace(1)* %out
|
|
70 call void asm sideeffect "; use $0", "s"(i64 4295098368)
|
|
71 ret void
|
|
72 }
|
|
73
|
|
74 ; SI-LABEL: {{^}}s_movk_i32_k5:
|
|
75 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0xffef{{$}}
|
|
76 ; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0xff00ffff{{$}}
|
|
77 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
78 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
79 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
|
80 ; SI: s_endpgm
|
|
81 define amdgpu_kernel void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
82 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
83 %or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff
|
|
84 store i64 %or, i64 addrspace(1)* %out
|
|
85 call void asm sideeffect "; use $0", "s"(i64 18374967954648334319)
|
|
86 ret void
|
|
87 }
|
|
88
|
|
89 ; SI-LABEL: {{^}}s_movk_i32_k6:
|
|
90 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}}
|
|
91 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
92 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
93 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 63, v[[HI_VREG]]
|
|
94 ; SI: s_endpgm
|
|
95 define amdgpu_kernel void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
96 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
97 %or = or i64 %loada, 270582939713 ; 65 | (63 << 32)
|
|
98 store i64 %or, i64 addrspace(1)* %out
|
|
99 call void asm sideeffect "; use $0", "s"(i64 270582939713)
|
|
100 ret void
|
|
101 }
|
|
102
|
|
103 ; SI-LABEL: {{^}}s_movk_i32_k7:
|
|
104 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x2000{{$}}
|
|
105 ; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x4000{{$}}
|
|
106 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
107 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
108 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
|
109 ; SI: s_endpgm
|
|
110 define amdgpu_kernel void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
111 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
112 %or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32)
|
|
113 store i64 %or, i64 addrspace(1)* %out
|
|
114 call void asm sideeffect "; use $0", "s"(i64 70368744185856)
|
|
115 ret void
|
|
116 }
|
|
117
|
|
118 ; SI-LABEL: {{^}}s_movk_i32_k8:
|
|
119 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
|
|
120 ; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
|
|
121 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
122 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
123 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
|
124 ; SI: s_endpgm
|
|
125 define amdgpu_kernel void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
126 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
127 %or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000
|
|
128 store i64 %or, i64 addrspace(1)* %out
|
|
129 call void asm sideeffect "; use $0", "s"(i64 1229782942255906816)
|
|
130 ret void
|
|
131 }
|
|
132
|
|
133 ; SI-LABEL: {{^}}s_movk_i32_k9:
|
|
134 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8001{{$}}
|
|
135 ; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
|
|
136 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
137 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
138 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
|
139 ; SI: s_endpgm
|
|
140 define amdgpu_kernel void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
141 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
142 %or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001
|
|
143 store i64 %or, i64 addrspace(1)* %out
|
|
144 call void asm sideeffect "; use $0", "s"(i64 1229782942255906817)
|
|
145 ret void
|
|
146 }
|
|
147
|
|
148 ; SI-LABEL: {{^}}s_movk_i32_k10:
|
|
149 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8888{{$}}
|
|
150 ; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
|
|
151 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
152 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
153 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
|
154 ; SI: s_endpgm
|
|
155 define amdgpu_kernel void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
156 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
157 %or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888
|
|
158 store i64 %or, i64 addrspace(1)* %out
|
|
159 call void asm sideeffect "; use $0", "s"(i64 1229782942255909000)
|
|
160 ret void
|
|
161 }
|
|
162
|
|
163 ; SI-LABEL: {{^}}s_movk_i32_k11:
|
|
164 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8fff{{$}}
|
|
165 ; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
|
|
166 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
167 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
168 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
|
169 ; SI: s_endpgm
|
|
170 define amdgpu_kernel void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
171 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
172 %or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff
|
|
173 store i64 %or, i64 addrspace(1)* %out
|
|
174 call void asm sideeffect "; use $0", "s"(i64 1229782942255910911)
|
|
175 ret void
|
|
176 }
|
|
177
|
|
178 ; SI-LABEL: {{^}}s_movk_i32_k12:
|
|
179 ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff7001{{$}}
|
|
180 ; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}}
|
|
181 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
|
|
182 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
|
|
183 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
|
|
184 ; SI: s_endpgm
|
|
185 define amdgpu_kernel void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
|
|
186 %loada = load i64, i64 addrspace(1)* %a, align 4
|
|
187 %or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001
|
|
188 store i64 %or, i64 addrspace(1)* %out
|
|
189 call void asm sideeffect "; use $0", "s"(i64 1229782942255902721)
|
|
190 ret void
|
|
191 }
|