comparison lib/IR/AutoUpgrade.cpp @ 148:63bd29f05246

merged
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Wed, 14 Aug 2019 19:46:37 +0900
parents c2174574ed3a
children
comparison
equal deleted inserted replaced
146:3fc4d5c3e21e 148:63bd29f05246
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2 // 2 //
3 // The LLVM Compiler Infrastructure 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // 4 // See https://llvm.org/LICENSE.txt for license information.
5 // This file is distributed under the University of Illinois Open Source 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 // License. See LICENSE.TXT for details.
7 // 6 //
8 //===----------------------------------------------------------------------===// 7 //===----------------------------------------------------------------------===//
9 // 8 //
10 // This file implements the auto-upgrade helper functions. 9 // This file implements the auto-upgrade helper functions.
11 // This is where deprecated IR intrinsics and other IR features are updated to 10 // This is where deprecated IR intrinsics and other IR features are updated to
63 rename(F); 62 rename(F);
64 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); 63 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
65 return true; 64 return true;
66 } 65 }
67 66
68 // Upgrade the declaration of fp compare intrinsics that change return type
69 // from scalar to vXi1 mask.
70 static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID,
71 Function *&NewFn) {
72 // Check if the return type is a vector.
73 if (F->getReturnType()->isVectorTy())
74 return false;
75
76 rename(F);
77 NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
78 return true;
79 }
80
81 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { 67 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
82 // All of the intrinsics matches below should be marked with which llvm 68 // All of the intrinsics matches below should be marked with which llvm
83 // version started autoupgrading them. At some point in the future we would 69 // version started autoupgrading them. At some point in the future we would
84 // like to use this information to remove upgrade code for some older 70 // like to use this information to remove upgrade code for some older
85 // intrinsics. It is currently undecided how we will determine that future 71 // intrinsics. It is currently undecided how we will determine that future
86 // point. 72 // point.
87 if (Name=="ssse3.pabs.b.128" || // Added in 6.0 73 if (Name == "addcarryx.u32" || // Added in 8.0
74 Name == "addcarryx.u64" || // Added in 8.0
75 Name == "addcarry.u32" || // Added in 8.0
76 Name == "addcarry.u64" || // Added in 8.0
77 Name == "subborrow.u32" || // Added in 8.0
78 Name == "subborrow.u64" || // Added in 8.0
79 Name.startswith("sse2.padds.") || // Added in 8.0
80 Name.startswith("sse2.psubs.") || // Added in 8.0
81 Name.startswith("sse2.paddus.") || // Added in 8.0
82 Name.startswith("sse2.psubus.") || // Added in 8.0
83 Name.startswith("avx2.padds.") || // Added in 8.0
84 Name.startswith("avx2.psubs.") || // Added in 8.0
85 Name.startswith("avx2.paddus.") || // Added in 8.0
86 Name.startswith("avx2.psubus.") || // Added in 8.0
87 Name.startswith("avx512.padds.") || // Added in 8.0
88 Name.startswith("avx512.psubs.") || // Added in 8.0
89 Name.startswith("avx512.mask.padds.") || // Added in 8.0
90 Name.startswith("avx512.mask.psubs.") || // Added in 8.0
91 Name.startswith("avx512.mask.paddus.") || // Added in 8.0
92 Name.startswith("avx512.mask.psubus.") || // Added in 8.0
93 Name=="ssse3.pabs.b.128" || // Added in 6.0
88 Name=="ssse3.pabs.w.128" || // Added in 6.0 94 Name=="ssse3.pabs.w.128" || // Added in 6.0
89 Name=="ssse3.pabs.d.128" || // Added in 6.0 95 Name=="ssse3.pabs.d.128" || // Added in 6.0
96 Name.startswith("fma4.vfmadd.s") || // Added in 7.0
97 Name.startswith("fma.vfmadd.") || // Added in 7.0
98 Name.startswith("fma.vfmsub.") || // Added in 7.0
99 Name.startswith("fma.vfmaddsub.") || // Added in 7.0
100 Name.startswith("fma.vfmsubadd.") || // Added in 7.0
101 Name.startswith("fma.vfnmadd.") || // Added in 7.0
102 Name.startswith("fma.vfnmsub.") || // Added in 7.0
103 Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0
104 Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0
105 Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0
106 Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0
107 Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0
108 Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0
109 Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0
110 Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0
111 Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0
112 Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0
113 Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0
90 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 114 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0
91 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 115 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0
92 Name.startswith("avx512.kunpck") || //added in 6.0 116 Name.startswith("avx512.kunpck") || //added in 6.0
93 Name.startswith("avx2.pabs.") || // Added in 6.0 117 Name.startswith("avx2.pabs.") || // Added in 6.0
94 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 118 Name.startswith("avx512.mask.pabs.") || // Added in 6.0
95 Name.startswith("avx512.broadcastm") || // Added in 6.0 119 Name.startswith("avx512.broadcastm") || // Added in 6.0
120 Name == "sse.sqrt.ss" || // Added in 7.0
121 Name == "sse2.sqrt.sd" || // Added in 7.0
122 Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0
123 Name.startswith("avx.sqrt.p") || // Added in 7.0
124 Name.startswith("sse2.sqrt.p") || // Added in 7.0
125 Name.startswith("sse.sqrt.p") || // Added in 7.0
96 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0 126 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0
97 Name.startswith("sse2.pcmpeq.") || // Added in 3.1 127 Name.startswith("sse2.pcmpeq.") || // Added in 3.1
98 Name.startswith("sse2.pcmpgt.") || // Added in 3.1 128 Name.startswith("sse2.pcmpgt.") || // Added in 3.1
99 Name.startswith("avx2.pcmpeq.") || // Added in 3.1 129 Name.startswith("avx2.pcmpeq.") || // Added in 3.1
100 Name.startswith("avx2.pcmpgt.") || // Added in 3.1 130 Name.startswith("avx2.pcmpgt.") || // Added in 3.1
166 Name.startswith("avx512.mask.padd.") || // Added in 4.0 196 Name.startswith("avx512.mask.padd.") || // Added in 4.0
167 Name.startswith("avx512.mask.psub.") || // Added in 4.0 197 Name.startswith("avx512.mask.psub.") || // Added in 4.0
168 Name.startswith("avx512.mask.pmull.") || // Added in 4.0 198 Name.startswith("avx512.mask.pmull.") || // Added in 4.0
169 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 199 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
170 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 200 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
201 Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0
202 Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0
203 Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0
204 Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0
205 Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0
206 Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0
207 Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0
208 Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0
209 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0
210 Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0
211 Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0
212 Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0
213 Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0
214 Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0
215 Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0
216 Name == "avx512.cvtusi2sd" || // Added in 7.0
217 Name.startswith("avx512.mask.permvar.") || // Added in 7.0
218 Name == "sse2.pmulu.dq" || // Added in 7.0
219 Name == "sse41.pmuldq" || // Added in 7.0
220 Name == "avx2.pmulu.dq" || // Added in 7.0
221 Name == "avx2.pmul.dq" || // Added in 7.0
222 Name == "avx512.pmulu.dq.512" || // Added in 7.0
223 Name == "avx512.pmul.dq.512" || // Added in 7.0
171 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0 224 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
172 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0 225 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
226 Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0
227 Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0
228 Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0
229 Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0
230 Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0
173 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0 231 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0
174 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0 232 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0
175 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0 233 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0
176 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0 234 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0
177 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0 235 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0
178 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0 236 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0
179 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 237 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0
180 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 238 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0
239 Name.startswith("avx512.mask.cmp.p") || // Added in 7.0
181 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 240 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0
182 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0 241 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0
183 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 242 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0
184 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0 243 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0
185 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0 244 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0
186 Name == "avx512.mask.add.pd.128" || // Added in 4.0
187 Name == "avx512.mask.add.pd.256" || // Added in 4.0
188 Name == "avx512.mask.add.ps.128" || // Added in 4.0
189 Name == "avx512.mask.add.ps.256" || // Added in 4.0
190 Name == "avx512.mask.div.pd.128" || // Added in 4.0
191 Name == "avx512.mask.div.pd.256" || // Added in 4.0
192 Name == "avx512.mask.div.ps.128" || // Added in 4.0
193 Name == "avx512.mask.div.ps.256" || // Added in 4.0
194 Name == "avx512.mask.mul.pd.128" || // Added in 4.0
195 Name == "avx512.mask.mul.pd.256" || // Added in 4.0
196 Name == "avx512.mask.mul.ps.128" || // Added in 4.0
197 Name == "avx512.mask.mul.ps.256" || // Added in 4.0
198 Name == "avx512.mask.sub.pd.128" || // Added in 4.0
199 Name == "avx512.mask.sub.pd.256" || // Added in 4.0
200 Name == "avx512.mask.sub.ps.128" || // Added in 4.0
201 Name == "avx512.mask.sub.ps.256" || // Added in 4.0
202 Name == "avx512.mask.max.pd.128" || // Added in 5.0
203 Name == "avx512.mask.max.pd.256" || // Added in 5.0
204 Name == "avx512.mask.max.ps.128" || // Added in 5.0
205 Name == "avx512.mask.max.ps.256" || // Added in 5.0
206 Name == "avx512.mask.min.pd.128" || // Added in 5.0
207 Name == "avx512.mask.min.pd.256" || // Added in 5.0
208 Name == "avx512.mask.min.ps.128" || // Added in 5.0
209 Name == "avx512.mask.min.ps.256" || // Added in 5.0
210 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0 245 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
211 Name.startswith("avx512.mask.psll.d") || // Added in 4.0 246 Name.startswith("avx512.mask.psll.d") || // Added in 4.0
212 Name.startswith("avx512.mask.psll.q") || // Added in 4.0 247 Name.startswith("avx512.mask.psll.q") || // Added in 4.0
213 Name.startswith("avx512.mask.psll.w") || // Added in 4.0 248 Name.startswith("avx512.mask.psll.w") || // Added in 4.0
214 Name.startswith("avx512.mask.psra.d") || // Added in 4.0 249 Name.startswith("avx512.mask.psra.d") || // Added in 4.0
228 Name.startswith("avx2.pmovsx") || // Added in 3.9 263 Name.startswith("avx2.pmovsx") || // Added in 3.9
229 Name.startswith("avx2.pmovzx") || // Added in 3.9 264 Name.startswith("avx2.pmovzx") || // Added in 3.9
230 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0 265 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
231 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0 266 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
232 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0 267 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0
268 Name.startswith("avx512.mask.pternlog.") || // Added in 7.0
269 Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0
270 Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0
271 Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0
272 Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0
273 Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0
274 Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0
275 Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0
276 Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0
277 Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0
278 Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0
279 Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0
280 Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0
281 Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0
282 Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0
283 Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0
284 Name.startswith("avx512.mask.vpshld.") || // Added in 7.0
285 Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0
286 Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0
287 Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0
288 Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0
289 Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0
290 Name.startswith("avx512.vpshld.") || // Added in 8.0
291 Name.startswith("avx512.vpshrd.") || // Added in 8.0
292 Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0
293 Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0
294 Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0
295 Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0
296 Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
297 Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
298 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
299 Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0
300 Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0
301 Name.startswith("avx512.mask.conflict.") || // Added in 9.0
302 Name == "avx512.mask.pmov.qd.256" || // Added in 9.0
303 Name == "avx512.mask.pmov.qd.512" || // Added in 9.0
304 Name == "avx512.mask.pmov.wb.256" || // Added in 9.0
305 Name == "avx512.mask.pmov.wb.512" || // Added in 9.0
306 Name == "sse.cvtsi2ss" || // Added in 7.0
307 Name == "sse.cvtsi642ss" || // Added in 7.0
308 Name == "sse2.cvtsi2sd" || // Added in 7.0
309 Name == "sse2.cvtsi642sd" || // Added in 7.0
310 Name == "sse2.cvtss2sd" || // Added in 7.0
233 Name == "sse2.cvtdq2pd" || // Added in 3.9 311 Name == "sse2.cvtdq2pd" || // Added in 3.9
312 Name == "sse2.cvtdq2ps" || // Added in 7.0
234 Name == "sse2.cvtps2pd" || // Added in 3.9 313 Name == "sse2.cvtps2pd" || // Added in 3.9
235 Name == "avx.cvtdq2.pd.256" || // Added in 3.9 314 Name == "avx.cvtdq2.pd.256" || // Added in 3.9
315 Name == "avx.cvtdq2.ps.256" || // Added in 7.0
236 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 316 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
237 Name.startswith("avx.vinsertf128.") || // Added in 3.7 317 Name.startswith("avx.vinsertf128.") || // Added in 3.7
238 Name == "avx2.vinserti128" || // Added in 3.7 318 Name == "avx2.vinserti128" || // Added in 3.7
239 Name.startswith("avx512.mask.insert") || // Added in 4.0 319 Name.startswith("avx512.mask.insert") || // Added in 4.0
240 Name.startswith("avx.vextractf128.") || // Added in 3.7 320 Name.startswith("avx.vextractf128.") || // Added in 3.7
254 Name.startswith("avx512.mask.store.p") || // Added in 3.9 334 Name.startswith("avx512.mask.store.p") || // Added in 3.9
255 Name.startswith("avx512.mask.store.b.") || // Added in 3.9 335 Name.startswith("avx512.mask.store.b.") || // Added in 3.9
256 Name.startswith("avx512.mask.store.w.") || // Added in 3.9 336 Name.startswith("avx512.mask.store.w.") || // Added in 3.9
257 Name.startswith("avx512.mask.store.d.") || // Added in 3.9 337 Name.startswith("avx512.mask.store.d.") || // Added in 3.9
258 Name.startswith("avx512.mask.store.q.") || // Added in 3.9 338 Name.startswith("avx512.mask.store.q.") || // Added in 3.9
339 Name == "avx512.mask.store.ss" || // Added in 7.0
259 Name.startswith("avx512.mask.loadu.") || // Added in 3.9 340 Name.startswith("avx512.mask.loadu.") || // Added in 3.9
260 Name.startswith("avx512.mask.load.") || // Added in 3.9 341 Name.startswith("avx512.mask.load.") || // Added in 3.9
342 Name.startswith("avx512.mask.expand.load.") || // Added in 7.0
343 Name.startswith("avx512.mask.compress.store.") || // Added in 7.0
344 Name.startswith("avx512.mask.expand.b") || // Added in 9.0
345 Name.startswith("avx512.mask.expand.w") || // Added in 9.0
346 Name.startswith("avx512.mask.expand.d") || // Added in 9.0
347 Name.startswith("avx512.mask.expand.q") || // Added in 9.0
348 Name.startswith("avx512.mask.expand.p") || // Added in 9.0
349 Name.startswith("avx512.mask.compress.b") || // Added in 9.0
350 Name.startswith("avx512.mask.compress.w") || // Added in 9.0
351 Name.startswith("avx512.mask.compress.d") || // Added in 9.0
352 Name.startswith("avx512.mask.compress.q") || // Added in 9.0
353 Name.startswith("avx512.mask.compress.p") || // Added in 9.0
261 Name == "sse42.crc32.64.8" || // Added in 3.4 354 Name == "sse42.crc32.64.8" || // Added in 3.4
262 Name.startswith("avx.vbroadcast.s") || // Added in 3.5 355 Name.startswith("avx.vbroadcast.s") || // Added in 3.5
356 Name.startswith("avx512.vbroadcast.s") || // Added in 7.0
263 Name.startswith("avx512.mask.palignr.") || // Added in 3.9 357 Name.startswith("avx512.mask.palignr.") || // Added in 3.9
264 Name.startswith("avx512.mask.valign.") || // Added in 4.0 358 Name.startswith("avx512.mask.valign.") || // Added in 4.0
265 Name.startswith("sse2.psll.dq") || // Added in 3.7 359 Name.startswith("sse2.psll.dq") || // Added in 3.7
266 Name.startswith("sse2.psrl.dq") || // Added in 3.7 360 Name.startswith("sse2.psrl.dq") || // Added in 3.7
267 Name.startswith("avx2.psll.dq") || // Added in 3.7 361 Name.startswith("avx2.psll.dq") || // Added in 3.7
279 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0 373 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0
280 Name == "xop.vpcmov" || // Added in 3.8 374 Name == "xop.vpcmov" || // Added in 3.8
281 Name == "xop.vpcmov.256" || // Added in 5.0 375 Name == "xop.vpcmov.256" || // Added in 5.0
282 Name.startswith("avx512.mask.move.s") || // Added in 4.0 376 Name.startswith("avx512.mask.move.s") || // Added in 4.0
283 Name.startswith("avx512.cvtmask2") || // Added in 5.0 377 Name.startswith("avx512.cvtmask2") || // Added in 5.0
284 (Name.startswith("xop.vpcom") && // Added in 3.2 378 Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0
285 F->arg_size() == 2) || 379 Name.startswith("xop.vprot") || // Added in 8.0
380 Name.startswith("avx512.prol") || // Added in 8.0
381 Name.startswith("avx512.pror") || // Added in 8.0
382 Name.startswith("avx512.mask.prorv.") || // Added in 8.0
383 Name.startswith("avx512.mask.pror.") || // Added in 8.0
384 Name.startswith("avx512.mask.prolv.") || // Added in 8.0
385 Name.startswith("avx512.mask.prol.") || // Added in 8.0
286 Name.startswith("avx512.ptestm") || //Added in 6.0 386 Name.startswith("avx512.ptestm") || //Added in 6.0
287 Name.startswith("avx512.ptestnm") || //Added in 6.0 387 Name.startswith("avx512.ptestnm") || //Added in 6.0
288 Name.startswith("sse2.pavg") || // Added in 6.0
289 Name.startswith("avx2.pavg") || // Added in 6.0
290 Name.startswith("avx512.mask.pavg")) // Added in 6.0 388 Name.startswith("avx512.mask.pavg")) // Added in 6.0
291 return true; 389 return true;
292 390
293 return false; 391 return false;
294 } 392 }
301 // Remove "x86." prefix. 399 // Remove "x86." prefix.
302 Name = Name.substr(4); 400 Name = Name.substr(4);
303 401
304 if (ShouldUpgradeX86Intrinsic(F, Name)) { 402 if (ShouldUpgradeX86Intrinsic(F, Name)) {
305 NewFn = nullptr; 403 NewFn = nullptr;
404 return true;
405 }
406
407 if (Name == "rdtscp") { // Added in 8.0
408 // If this intrinsic has 0 operands, it's the new version.
409 if (F->getFunctionType()->getNumParams() == 0)
410 return false;
411
412 rename(F);
413 NewFn = Intrinsic::getDeclaration(F->getParent(),
414 Intrinsic::x86_rdtscp);
306 return true; 415 return true;
307 } 416 }
308 417
309 // SSE4.1 ptest functions may have an old signature. 418 // SSE4.1 ptest functions may have an old signature.
310 if (Name.startswith("sse41.ptest")) { // Added in 3.2 419 if (Name.startswith("sse41.ptest")) { // Added in 3.2
333 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, 442 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
334 NewFn); 443 NewFn);
335 if (Name == "avx2.mpsadbw") // Added in 3.6 444 if (Name == "avx2.mpsadbw") // Added in 3.6
336 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, 445 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
337 NewFn); 446 NewFn);
338 if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0
339 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128,
340 NewFn);
341 if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0
342 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256,
343 NewFn);
344 if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0
345 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512,
346 NewFn);
347 if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0
348 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128,
349 NewFn);
350 if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0
351 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256,
352 NewFn);
353 if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0
354 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512,
355 NewFn);
356 447
357 // frcz.ss/sd may need to have an argument dropped. Added in 3.2 448 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
358 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) { 449 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
359 rename(F); 450 rename(F);
360 NewFn = Intrinsic::getDeclaration(F->getParent(), 451 NewFn = Intrinsic::getDeclaration(F->getParent(),
386 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); 477 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID);
387 return true; 478 return true;
388 } 479 }
389 } 480 }
390 481
482 if (Name == "seh.recoverfp") {
483 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp);
484 return true;
485 }
486
391 return false; 487 return false;
392 } 488 }
393 489
394 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { 490 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
395 assert(F && "Illegal to upgrade a non-existent Function."); 491 assert(F && "Illegal to upgrade a non-existent Function.");
415 }; 511 };
416 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to 512 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to
417 // the end of the name. Change name from llvm.arm.neon.vclz.* to 513 // the end of the name. Change name from llvm.arm.neon.vclz.* to
418 // llvm.ctlz.* 514 // llvm.ctlz.*
419 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); 515 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false);
420 NewFn = Function::Create(fType, F->getLinkage(), 516 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
421 "llvm.ctlz." + Name.substr(14), F->getParent()); 517 "llvm.ctlz." + Name.substr(14), F->getParent());
422 return true; 518 return true;
423 } 519 }
424 if (Name.startswith("arm.neon.vcnt")) { 520 if (Name.startswith("arm.neon.vcnt")) {
425 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, 521 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop,
431 auto fArgs = F->getFunctionType()->params(); 527 auto fArgs = F->getFunctionType()->params();
432 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); 528 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
433 // Can't use Intrinsic::getDeclaration here as the return types might 529 // Can't use Intrinsic::getDeclaration here as the return types might
434 // then only be structurally equal. 530 // then only be structurally equal.
435 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); 531 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
436 NewFn = Function::Create(fType, F->getLinkage(), 532 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(),
437 "llvm." + Name + ".p0i8", F->getParent()); 533 "llvm." + Name + ".p0i8", F->getParent());
438 return true; 534 return true;
439 } 535 }
440 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); 536 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
441 if (vstRegex.match(Name)) { 537 if (vstRegex.match(Name)) {
461 } 557 }
462 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { 558 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") {
463 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); 559 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer);
464 return true; 560 return true;
465 } 561 }
562 if (Name.startswith("aarch64.neon.addp")) {
563 if (F->arg_size() != 2)
564 break; // Invalid IR.
565 auto fArgs = F->getFunctionType()->params();
566 VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]);
567 if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) {
568 NewFn = Intrinsic::getDeclaration(F->getParent(),
569 Intrinsic::aarch64_neon_faddp, fArgs);
570 return true;
571 }
572 }
466 break; 573 break;
467 } 574 }
468 575
469 case 'c': { 576 case 'c': {
470 if (Name.startswith("ctlz.") && F->arg_size() == 1) { 577 if (Name.startswith("ctlz.") && F->arg_size() == 1) {
487 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value); 594 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
488 return true; 595 return true;
489 } 596 }
490 break; 597 break;
491 } 598 }
599 case 'e': {
600 SmallVector<StringRef, 2> Groups;
601 Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+");
602 if (R.match(Name, &Groups)) {
603 Intrinsic::ID ID = Intrinsic::not_intrinsic;
604 if (Groups[1] == "fadd")
605 ID = Intrinsic::experimental_vector_reduce_v2_fadd;
606 if (Groups[1] == "fmul")
607 ID = Intrinsic::experimental_vector_reduce_v2_fmul;
608
609 if (ID != Intrinsic::not_intrinsic) {
610 rename(F);
611 auto Args = F->getFunctionType()->params();
612 Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]};
613 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
614 return true;
615 }
616 }
617 break;
618 }
492 case 'i': 619 case 'i':
493 case 'l': { 620 case 'l': {
494 bool IsLifetimeStart = Name.startswith("lifetime.start"); 621 bool IsLifetimeStart = Name.startswith("lifetime.start");
495 if (IsLifetimeStart || Name.startswith("invariant.start")) { 622 if (IsLifetimeStart || Name.startswith("invariant.start")) {
496 Intrinsic::ID ID = IsLifetimeStart ? 623 Intrinsic::ID ID = IsLifetimeStart ?
515 rename(F); 642 rename(F);
516 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); 643 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr);
517 return true; 644 return true;
518 } 645 }
519 } 646 }
647 if (Name.startswith("invariant.group.barrier")) {
648 // Rename invariant.group.barrier to launder.invariant.group
649 auto Args = F->getFunctionType()->params();
650 Type* ObjectPtr[1] = {Args[0]};
651 rename(F);
652 NewFn = Intrinsic::getDeclaration(F->getParent(),
653 Intrinsic::launder_invariant_group, ObjectPtr);
654 return true;
655
656 }
657
520 break; 658 break;
521 } 659 }
522 case 'm': { 660 case 'm': {
523 if (Name.startswith("masked.load.")) { 661 if (Name.startswith("masked.load.")) {
524 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; 662 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
618 bool Expand = StringSwitch<bool>(Name) 756 bool Expand = StringSwitch<bool>(Name)
619 .Cases("abs.i", "abs.ll", true) 757 .Cases("abs.i", "abs.ll", true)
620 .Cases("clz.ll", "popc.ll", "h2f", true) 758 .Cases("clz.ll", "popc.ll", "h2f", true)
621 .Cases("max.i", "max.ll", "max.ui", "max.ull", true) 759 .Cases("max.i", "max.ll", "max.ui", "max.ull", true)
622 .Cases("min.i", "min.ll", "min.ui", "min.ull", true) 760 .Cases("min.i", "min.ll", "min.ui", "min.ull", true)
761 .StartsWith("atomic.load.add.f32.p", true)
762 .StartsWith("atomic.load.add.f64.p", true)
623 .Default(false); 763 .Default(false);
624 if (Expand) { 764 if (Expand) {
625 NewFn = nullptr; 765 NewFn = nullptr;
626 return true; 766 return true;
627 } 767 }
631 case 'o': 771 case 'o':
632 // We only need to change the name to match the mangling including the 772 // We only need to change the name to match the mangling including the
633 // address space. 773 // address space.
634 if (Name.startswith("objectsize.")) { 774 if (Name.startswith("objectsize.")) {
635 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 775 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
636 if (F->arg_size() == 2 || 776 if (F->arg_size() == 2 || F->arg_size() == 3 ||
637 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { 777 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
638 rename(F); 778 rename(F);
639 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize, 779 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize,
640 Tys); 780 Tys);
641 return true; 781 return true;
642 } 782 }
643 } 783 }
644 break; 784 break;
645 785
786 case 'p':
787 if (Name == "prefetch") {
788 // Handle address space overloading.
789 Type *Tys[] = {F->arg_begin()->getType()};
790 if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) {
791 rename(F);
792 NewFn =
793 Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys);
794 return true;
795 }
796 }
797 break;
798
646 case 's': 799 case 's':
647 if (Name == "stackprotectorcheck") { 800 if (Name == "stackprotectorcheck") {
648 NewFn = nullptr; 801 NewFn = nullptr;
649 return true; 802 return true;
650 } 803 }
679 if (Intrinsic::ID id = F->getIntrinsicID()) 832 if (Intrinsic::ID id = F->getIntrinsicID())
680 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 833 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id));
681 return Upgraded; 834 return Upgraded;
682 } 835 }
683 836
684 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 837 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
685 // Nothing to do yet. 838 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
686 return false; 839 GV->getName() == "llvm.global_dtors")) ||
840 !GV->hasInitializer())
841 return nullptr;
842 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType());
843 if (!ATy)
844 return nullptr;
845 StructType *STy = dyn_cast<StructType>(ATy->getElementType());
846 if (!STy || STy->getNumElements() != 2)
847 return nullptr;
848
849 LLVMContext &C = GV->getContext();
850 IRBuilder<> IRB(C);
851 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
852 IRB.getInt8PtrTy());
853 Constant *Init = GV->getInitializer();
854 unsigned N = Init->getNumOperands();
855 std::vector<Constant *> NewCtors(N);
856 for (unsigned i = 0; i != N; ++i) {
857 auto Ctor = cast<Constant>(Init->getOperand(i));
858 NewCtors[i] = ConstantStruct::get(
859 EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1),
860 Constant::getNullValue(IRB.getInt8PtrTy()));
861 }
862 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
863
864 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
865 NewInit, GV->getName());
687 } 866 }
688 867
689 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them 868 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
690 // to byte shuffles. 869 // to byte shuffles.
691 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, 870 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder,
774 return Mask; 953 return Mask;
775 } 954 }
776 955
777 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, 956 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
778 Value *Op0, Value *Op1) { 957 Value *Op0, Value *Op1) {
779 // If the mask is all ones just emit the align operation. 958 // If the mask is all ones just emit the first operation.
780 if (const auto *C = dyn_cast<Constant>(Mask)) 959 if (const auto *C = dyn_cast<Constant>(Mask))
781 if (C->isAllOnesValue()) 960 if (C->isAllOnesValue())
782 return Op0; 961 return Op0;
783 962
784 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); 963 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements());
964 return Builder.CreateSelect(Mask, Op0, Op1);
965 }
966
967 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask,
968 Value *Op0, Value *Op1) {
969 // If the mask is all ones just emit the first operation.
970 if (const auto *C = dyn_cast<Constant>(Mask))
971 if (C->isAllOnesValue())
972 return Op0;
973
974 llvm::VectorType *MaskTy =
975 llvm::VectorType::get(Builder.getInt1Ty(),
976 Mask->getType()->getIntegerBitWidth());
977 Mask = Builder.CreateBitCast(Mask, MaskTy);
978 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
785 return Builder.CreateSelect(Mask, Op0, Op1); 979 return Builder.CreateSelect(Mask, Op0, Op1);
786 } 980 }
787 981
788 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. 982 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
789 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate 983 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate
832 "palignr"); 1026 "palignr");
833 1027
834 return EmitX86Select(Builder, Mask, Align, Passthru); 1028 return EmitX86Select(Builder, Mask, Align, Passthru);
835 } 1029 }
836 1030
1031 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI,
1032 bool ZeroMask, bool IndexForm) {
1033 Type *Ty = CI.getType();
1034 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
1035 unsigned EltWidth = Ty->getScalarSizeInBits();
1036 bool IsFloat = Ty->isFPOrFPVectorTy();
1037 Intrinsic::ID IID;
1038 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
1039 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
1040 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
1041 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
1042 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
1043 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
1044 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
1045 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
1046 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1047 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
1048 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1049 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
1050 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1051 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
1052 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1053 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
1054 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1055 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
1056 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1057 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
1058 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1059 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
1060 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1061 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
1062 else if (VecWidth == 128 && EltWidth == 16)
1063 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
1064 else if (VecWidth == 256 && EltWidth == 16)
1065 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
1066 else if (VecWidth == 512 && EltWidth == 16)
1067 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
1068 else if (VecWidth == 128 && EltWidth == 8)
1069 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
1070 else if (VecWidth == 256 && EltWidth == 8)
1071 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
1072 else if (VecWidth == 512 && EltWidth == 8)
1073 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
1074 else
1075 llvm_unreachable("Unexpected intrinsic");
1076
1077 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
1078 CI.getArgOperand(2) };
1079
1080 // If this isn't index form we need to swap operand 0 and 1.
1081 if (!IndexForm)
1082 std::swap(Args[0], Args[1]);
1083
1084 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1085 Args);
1086 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
1087 : Builder.CreateBitCast(CI.getArgOperand(1),
1088 Ty);
1089 return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
1090 }
1091
1092 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
1093 bool IsSigned, bool IsAddition) {
1094 Type *Ty = CI.getType();
1095 Value *Op0 = CI.getOperand(0);
1096 Value *Op1 = CI.getOperand(1);
1097
1098 Intrinsic::ID IID =
1099 IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
1100 : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
1101 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1102 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
1103
1104 if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1105 Value *VecSrc = CI.getOperand(2);
1106 Value *Mask = CI.getOperand(3);
1107 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1108 }
1109 return Res;
1110 }
1111
1112 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI,
1113 bool IsRotateRight) {
1114 Type *Ty = CI.getType();
1115 Value *Src = CI.getArgOperand(0);
1116 Value *Amt = CI.getArgOperand(1);
1117
1118 // Amount may be scalar immediate, in which case create a splat vector.
1119 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1120 // we only care about the lowest log2 bits anyway.
1121 if (Amt->getType() != Ty) {
1122 unsigned NumElts = Ty->getVectorNumElements();
1123 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1124 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1125 }
1126
1127 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1128 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1129 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt});
1130
1131 if (CI.getNumArgOperands() == 4) { // For masked intrinsics.
1132 Value *VecSrc = CI.getOperand(2);
1133 Value *Mask = CI.getOperand(3);
1134 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1135 }
1136 return Res;
1137 }
1138
1139 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm,
1140 bool IsSigned) {
1141 Type *Ty = CI.getType();
1142 Value *LHS = CI.getArgOperand(0);
1143 Value *RHS = CI.getArgOperand(1);
1144
1145 CmpInst::Predicate Pred;
1146 switch (Imm) {
1147 case 0x0:
1148 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1149 break;
1150 case 0x1:
1151 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1152 break;
1153 case 0x2:
1154 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1155 break;
1156 case 0x3:
1157 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1158 break;
1159 case 0x4:
1160 Pred = ICmpInst::ICMP_EQ;
1161 break;
1162 case 0x5:
1163 Pred = ICmpInst::ICMP_NE;
1164 break;
1165 case 0x6:
1166 return Constant::getNullValue(Ty); // FALSE
1167 case 0x7:
1168 return Constant::getAllOnesValue(Ty); // TRUE
1169 default:
1170 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
1171 }
1172
1173 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
1174 Value *Ext = Builder.CreateSExt(Cmp, Ty);
1175 return Ext;
1176 }
1177
1178 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI,
1179 bool IsShiftRight, bool ZeroMask) {
1180 Type *Ty = CI.getType();
1181 Value *Op0 = CI.getArgOperand(0);
1182 Value *Op1 = CI.getArgOperand(1);
1183 Value *Amt = CI.getArgOperand(2);
1184
1185 if (IsShiftRight)
1186 std::swap(Op0, Op1);
1187
1188 // Amount may be scalar immediate, in which case create a splat vector.
1189 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
1190 // we only care about the lowest log2 bits anyway.
1191 if (Amt->getType() != Ty) {
1192 unsigned NumElts = Ty->getVectorNumElements();
1193 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
1194 Amt = Builder.CreateVectorSplat(NumElts, Amt);
1195 }
1196
1197 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
1198 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
1199 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt});
1200
1201 unsigned NumArgs = CI.getNumArgOperands();
1202 if (NumArgs >= 4) { // For masked intrinsics.
1203 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
1204 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
1205 CI.getArgOperand(0);
1206 Value *Mask = CI.getOperand(NumArgs - 1);
1207 Res = EmitX86Select(Builder, Mask, Res, VecSrc);
1208 }
1209 return Res;
1210 }
1211
837 static Value *UpgradeMaskedStore(IRBuilder<> &Builder, 1212 static Value *UpgradeMaskedStore(IRBuilder<> &Builder,
838 Value *Ptr, Value *Data, Value *Mask, 1213 Value *Ptr, Value *Data, Value *Mask,
839 bool Aligned) { 1214 bool Aligned) {
840 // Cast the pointer to the right type. 1215 // Cast the pointer to the right type.
841 Ptr = Builder.CreateBitCast(Ptr, 1216 Ptr = Builder.CreateBitCast(Ptr,
855 } 1230 }
856 1231
857 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, 1232 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder,
858 Value *Ptr, Value *Passthru, Value *Mask, 1233 Value *Ptr, Value *Passthru, Value *Mask,
859 bool Aligned) { 1234 bool Aligned) {
1235 Type *ValTy = Passthru->getType();
860 // Cast the pointer to the right type. 1236 // Cast the pointer to the right type.
861 Ptr = Builder.CreateBitCast(Ptr, 1237 Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy));
862 llvm::PointerType::getUnqual(Passthru->getType()));
863 unsigned Align = 1238 unsigned Align =
864 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; 1239 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
865 1240
866 // If the mask is all ones just emit a regular store. 1241 // If the mask is all ones just emit a regular store.
867 if (const auto *C = dyn_cast<Constant>(Mask)) 1242 if (const auto *C = dyn_cast<Constant>(Mask))
868 if (C->isAllOnesValue()) 1243 if (C->isAllOnesValue())
869 return Builder.CreateAlignedLoad(Ptr, Align); 1244 return Builder.CreateAlignedLoad(ValTy, Ptr, Align);
870 1245
871 // Convert the mask from an integer type to a vector of i1. 1246 // Convert the mask from an integer type to a vector of i1.
872 unsigned NumElts = Passthru->getType()->getVectorNumElements(); 1247 unsigned NumElts = Passthru->getType()->getVectorNumElements();
873 Mask = getX86MaskVec(Builder, Mask, NumElts); 1248 Mask = getX86MaskVec(Builder, Mask, NumElts);
874 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); 1249 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
899 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); 1274 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
900 1275
901 return Res; 1276 return Res;
902 } 1277 }
903 1278
1279 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) {
1280 Type *Ty = CI.getType();
1281
1282 // Arguments have a vXi32 type so cast to vXi64.
1283 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
1284 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
1285
1286 if (IsSigned) {
1287 // Shift left then arithmetic shift right.
1288 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
1289 LHS = Builder.CreateShl(LHS, ShiftAmt);
1290 LHS = Builder.CreateAShr(LHS, ShiftAmt);
1291 RHS = Builder.CreateShl(RHS, ShiftAmt);
1292 RHS = Builder.CreateAShr(RHS, ShiftAmt);
1293 } else {
1294 // Clear the upper bits.
1295 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
1296 LHS = Builder.CreateAnd(LHS, Mask);
1297 RHS = Builder.CreateAnd(RHS, Mask);
1298 }
1299
1300 Value *Res = Builder.CreateMul(LHS, RHS);
1301
1302 if (CI.getNumArgOperands() == 4)
1303 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
1304
1305 return Res;
1306 }
1307
904 // Applying mask on vector of i1's and make sure result is at least 8 bits wide. 1308 // Applying mask on vector of i1's and make sure result is at least 8 bits wide.
905 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask, 1309 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec,
906 unsigned NumElts) { 1310 Value *Mask) {
1311 unsigned NumElts = Vec->getType()->getVectorNumElements();
907 if (Mask) { 1312 if (Mask) {
908 const auto *C = dyn_cast<Constant>(Mask); 1313 const auto *C = dyn_cast<Constant>(Mask);
909 if (!C || !C->isAllOnesValue()) 1314 if (!C || !C->isAllOnesValue())
910 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); 1315 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
911 } 1316 }
947 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); 1352 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
948 } 1353 }
949 1354
950 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); 1355 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
951 1356
952 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask, NumElts); 1357 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask);
953 } 1358 }
954 1359
955 // Replace a masked intrinsic with an older unmasked intrinsic. 1360 // Replace a masked intrinsic with an older unmasked intrinsic.
956 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, 1361 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
957 Intrinsic::ID IID) { 1362 Intrinsic::ID IID) {
958 Function *F = CI.getCalledFunction(); 1363 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID);
959 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
960 Value *Rep = Builder.CreateCall(Intrin, 1364 Value *Rep = Builder.CreateCall(Intrin,
961 { CI.getArgOperand(0), CI.getArgOperand(1) }); 1365 { CI.getArgOperand(0), CI.getArgOperand(1) });
962 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2)); 1366 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
963 } 1367 }
964 1368
981 Value* Op = CI.getArgOperand(0); 1385 Value* Op = CI.getArgOperand(0);
982 Type* ReturnOp = CI.getType(); 1386 Type* ReturnOp = CI.getType();
983 unsigned NumElts = CI.getType()->getVectorNumElements(); 1387 unsigned NumElts = CI.getType()->getVectorNumElements();
984 Value *Mask = getX86MaskVec(Builder, Op, NumElts); 1388 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
985 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2"); 1389 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
1390 }
1391
1392 // Replace intrinsic with unmasked version and a select.
1393 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
1394 CallInst &CI, Value *&Rep) {
1395 Name = Name.substr(12); // Remove avx512.mask.
1396
1397 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
1398 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
1399 Intrinsic::ID IID;
1400 if (Name.startswith("max.p")) {
1401 if (VecWidth == 128 && EltWidth == 32)
1402 IID = Intrinsic::x86_sse_max_ps;
1403 else if (VecWidth == 128 && EltWidth == 64)
1404 IID = Intrinsic::x86_sse2_max_pd;
1405 else if (VecWidth == 256 && EltWidth == 32)
1406 IID = Intrinsic::x86_avx_max_ps_256;
1407 else if (VecWidth == 256 && EltWidth == 64)
1408 IID = Intrinsic::x86_avx_max_pd_256;
1409 else
1410 llvm_unreachable("Unexpected intrinsic");
1411 } else if (Name.startswith("min.p")) {
1412 if (VecWidth == 128 && EltWidth == 32)
1413 IID = Intrinsic::x86_sse_min_ps;
1414 else if (VecWidth == 128 && EltWidth == 64)
1415 IID = Intrinsic::x86_sse2_min_pd;
1416 else if (VecWidth == 256 && EltWidth == 32)
1417 IID = Intrinsic::x86_avx_min_ps_256;
1418 else if (VecWidth == 256 && EltWidth == 64)
1419 IID = Intrinsic::x86_avx_min_pd_256;
1420 else
1421 llvm_unreachable("Unexpected intrinsic");
1422 } else if (Name.startswith("pshuf.b.")) {
1423 if (VecWidth == 128)
1424 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1425 else if (VecWidth == 256)
1426 IID = Intrinsic::x86_avx2_pshuf_b;
1427 else if (VecWidth == 512)
1428 IID = Intrinsic::x86_avx512_pshuf_b_512;
1429 else
1430 llvm_unreachable("Unexpected intrinsic");
1431 } else if (Name.startswith("pmul.hr.sw.")) {
1432 if (VecWidth == 128)
1433 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
1434 else if (VecWidth == 256)
1435 IID = Intrinsic::x86_avx2_pmul_hr_sw;
1436 else if (VecWidth == 512)
1437 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
1438 else
1439 llvm_unreachable("Unexpected intrinsic");
1440 } else if (Name.startswith("pmulh.w.")) {
1441 if (VecWidth == 128)
1442 IID = Intrinsic::x86_sse2_pmulh_w;
1443 else if (VecWidth == 256)
1444 IID = Intrinsic::x86_avx2_pmulh_w;
1445 else if (VecWidth == 512)
1446 IID = Intrinsic::x86_avx512_pmulh_w_512;
1447 else
1448 llvm_unreachable("Unexpected intrinsic");
1449 } else if (Name.startswith("pmulhu.w.")) {
1450 if (VecWidth == 128)
1451 IID = Intrinsic::x86_sse2_pmulhu_w;
1452 else if (VecWidth == 256)
1453 IID = Intrinsic::x86_avx2_pmulhu_w;
1454 else if (VecWidth == 512)
1455 IID = Intrinsic::x86_avx512_pmulhu_w_512;
1456 else
1457 llvm_unreachable("Unexpected intrinsic");
1458 } else if (Name.startswith("pmaddw.d.")) {
1459 if (VecWidth == 128)
1460 IID = Intrinsic::x86_sse2_pmadd_wd;
1461 else if (VecWidth == 256)
1462 IID = Intrinsic::x86_avx2_pmadd_wd;
1463 else if (VecWidth == 512)
1464 IID = Intrinsic::x86_avx512_pmaddw_d_512;
1465 else
1466 llvm_unreachable("Unexpected intrinsic");
1467 } else if (Name.startswith("pmaddubs.w.")) {
1468 if (VecWidth == 128)
1469 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
1470 else if (VecWidth == 256)
1471 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
1472 else if (VecWidth == 512)
1473 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
1474 else
1475 llvm_unreachable("Unexpected intrinsic");
1476 } else if (Name.startswith("packsswb.")) {
1477 if (VecWidth == 128)
1478 IID = Intrinsic::x86_sse2_packsswb_128;
1479 else if (VecWidth == 256)
1480 IID = Intrinsic::x86_avx2_packsswb;
1481 else if (VecWidth == 512)
1482 IID = Intrinsic::x86_avx512_packsswb_512;
1483 else
1484 llvm_unreachable("Unexpected intrinsic");
1485 } else if (Name.startswith("packssdw.")) {
1486 if (VecWidth == 128)
1487 IID = Intrinsic::x86_sse2_packssdw_128;
1488 else if (VecWidth == 256)
1489 IID = Intrinsic::x86_avx2_packssdw;
1490 else if (VecWidth == 512)
1491 IID = Intrinsic::x86_avx512_packssdw_512;
1492 else
1493 llvm_unreachable("Unexpected intrinsic");
1494 } else if (Name.startswith("packuswb.")) {
1495 if (VecWidth == 128)
1496 IID = Intrinsic::x86_sse2_packuswb_128;
1497 else if (VecWidth == 256)
1498 IID = Intrinsic::x86_avx2_packuswb;
1499 else if (VecWidth == 512)
1500 IID = Intrinsic::x86_avx512_packuswb_512;
1501 else
1502 llvm_unreachable("Unexpected intrinsic");
1503 } else if (Name.startswith("packusdw.")) {
1504 if (VecWidth == 128)
1505 IID = Intrinsic::x86_sse41_packusdw;
1506 else if (VecWidth == 256)
1507 IID = Intrinsic::x86_avx2_packusdw;
1508 else if (VecWidth == 512)
1509 IID = Intrinsic::x86_avx512_packusdw_512;
1510 else
1511 llvm_unreachable("Unexpected intrinsic");
1512 } else if (Name.startswith("vpermilvar.")) {
1513 if (VecWidth == 128 && EltWidth == 32)
1514 IID = Intrinsic::x86_avx_vpermilvar_ps;
1515 else if (VecWidth == 128 && EltWidth == 64)
1516 IID = Intrinsic::x86_avx_vpermilvar_pd;
1517 else if (VecWidth == 256 && EltWidth == 32)
1518 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
1519 else if (VecWidth == 256 && EltWidth == 64)
1520 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
1521 else if (VecWidth == 512 && EltWidth == 32)
1522 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
1523 else if (VecWidth == 512 && EltWidth == 64)
1524 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
1525 else
1526 llvm_unreachable("Unexpected intrinsic");
1527 } else if (Name == "cvtpd2dq.256") {
1528 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
1529 } else if (Name == "cvtpd2ps.256") {
1530 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
1531 } else if (Name == "cvttpd2dq.256") {
1532 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
1533 } else if (Name == "cvttps2dq.128") {
1534 IID = Intrinsic::x86_sse2_cvttps2dq;
1535 } else if (Name == "cvttps2dq.256") {
1536 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
1537 } else if (Name.startswith("permvar.")) {
1538 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
1539 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
1540 IID = Intrinsic::x86_avx2_permps;
1541 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
1542 IID = Intrinsic::x86_avx2_permd;
1543 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
1544 IID = Intrinsic::x86_avx512_permvar_df_256;
1545 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
1546 IID = Intrinsic::x86_avx512_permvar_di_256;
1547 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
1548 IID = Intrinsic::x86_avx512_permvar_sf_512;
1549 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
1550 IID = Intrinsic::x86_avx512_permvar_si_512;
1551 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
1552 IID = Intrinsic::x86_avx512_permvar_df_512;
1553 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
1554 IID = Intrinsic::x86_avx512_permvar_di_512;
1555 else if (VecWidth == 128 && EltWidth == 16)
1556 IID = Intrinsic::x86_avx512_permvar_hi_128;
1557 else if (VecWidth == 256 && EltWidth == 16)
1558 IID = Intrinsic::x86_avx512_permvar_hi_256;
1559 else if (VecWidth == 512 && EltWidth == 16)
1560 IID = Intrinsic::x86_avx512_permvar_hi_512;
1561 else if (VecWidth == 128 && EltWidth == 8)
1562 IID = Intrinsic::x86_avx512_permvar_qi_128;
1563 else if (VecWidth == 256 && EltWidth == 8)
1564 IID = Intrinsic::x86_avx512_permvar_qi_256;
1565 else if (VecWidth == 512 && EltWidth == 8)
1566 IID = Intrinsic::x86_avx512_permvar_qi_512;
1567 else
1568 llvm_unreachable("Unexpected intrinsic");
1569 } else if (Name.startswith("dbpsadbw.")) {
1570 if (VecWidth == 128)
1571 IID = Intrinsic::x86_avx512_dbpsadbw_128;
1572 else if (VecWidth == 256)
1573 IID = Intrinsic::x86_avx512_dbpsadbw_256;
1574 else if (VecWidth == 512)
1575 IID = Intrinsic::x86_avx512_dbpsadbw_512;
1576 else
1577 llvm_unreachable("Unexpected intrinsic");
1578 } else if (Name.startswith("pmultishift.qb.")) {
1579 if (VecWidth == 128)
1580 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
1581 else if (VecWidth == 256)
1582 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
1583 else if (VecWidth == 512)
1584 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
1585 else
1586 llvm_unreachable("Unexpected intrinsic");
1587 } else if (Name.startswith("conflict.")) {
1588 if (Name[9] == 'd' && VecWidth == 128)
1589 IID = Intrinsic::x86_avx512_conflict_d_128;
1590 else if (Name[9] == 'd' && VecWidth == 256)
1591 IID = Intrinsic::x86_avx512_conflict_d_256;
1592 else if (Name[9] == 'd' && VecWidth == 512)
1593 IID = Intrinsic::x86_avx512_conflict_d_512;
1594 else if (Name[9] == 'q' && VecWidth == 128)
1595 IID = Intrinsic::x86_avx512_conflict_q_128;
1596 else if (Name[9] == 'q' && VecWidth == 256)
1597 IID = Intrinsic::x86_avx512_conflict_q_256;
1598 else if (Name[9] == 'q' && VecWidth == 512)
1599 IID = Intrinsic::x86_avx512_conflict_q_512;
1600 else
1601 llvm_unreachable("Unexpected intrinsic");
1602 } else if (Name.startswith("pavg.")) {
1603 if (Name[5] == 'b' && VecWidth == 128)
1604 IID = Intrinsic::x86_sse2_pavg_b;
1605 else if (Name[5] == 'b' && VecWidth == 256)
1606 IID = Intrinsic::x86_avx2_pavg_b;
1607 else if (Name[5] == 'b' && VecWidth == 512)
1608 IID = Intrinsic::x86_avx512_pavg_b_512;
1609 else if (Name[5] == 'w' && VecWidth == 128)
1610 IID = Intrinsic::x86_sse2_pavg_w;
1611 else if (Name[5] == 'w' && VecWidth == 256)
1612 IID = Intrinsic::x86_avx2_pavg_w;
1613 else if (Name[5] == 'w' && VecWidth == 512)
1614 IID = Intrinsic::x86_avx512_pavg_w_512;
1615 else
1616 llvm_unreachable("Unexpected intrinsic");
1617 } else
1618 return false;
1619
1620 SmallVector<Value *, 4> Args(CI.arg_operands().begin(),
1621 CI.arg_operands().end());
1622 Args.pop_back();
1623 Args.pop_back();
1624 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID),
1625 Args);
1626 unsigned NumArgs = CI.getNumArgOperands();
1627 Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
1628 CI.getArgOperand(NumArgs - 2));
1629 return true;
1630 }
1631
1632 /// Upgrade comment in call to inline asm that represents an objc retain release
1633 /// marker.
1634 void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
1635 size_t Pos;
1636 if (AsmStr->find("mov\tfp") == 0 &&
1637 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
1638 (Pos = AsmStr->find("# marker")) != std::string::npos) {
1639 AsmStr->replace(Pos, 1, ";");
1640 }
1641 return;
986 } 1642 }
987 1643
988 /// Upgrade a call to an old intrinsic. All argument and return casting must be 1644 /// Upgrade a call to an old intrinsic. All argument and return casting must be
989 /// provided to seamlessly integrate with existing context. 1645 /// provided to seamlessly integrate with existing context.
990 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { 1646 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
1085 1741
1086 Arg0 = Builder.CreateBitCast(Arg0, 1742 Arg0 = Builder.CreateBitCast(Arg0,
1087 PointerType::getUnqual(Arg1->getType()), 1743 PointerType::getUnqual(Arg1->getType()),
1088 "cast"); 1744 "cast");
1089 Builder.CreateAlignedStore(Arg1, Arg0, 1); 1745 Builder.CreateAlignedStore(Arg1, Arg0, 1);
1746
1747 // Remove intrinsic.
1748 CI->eraseFromParent();
1749 return;
1750 }
1751
1752 if (IsX86 && Name == "avx512.mask.store.ss") {
1753 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
1754 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
1755 Mask, false);
1090 1756
1091 // Remove intrinsic. 1757 // Remove intrinsic.
1092 CI->eraseFromParent(); 1758 CI->eraseFromParent();
1093 return; 1759 return;
1094 } 1760 }
1119 ExtTy = Type::getInt64Ty(C); 1785 ExtTy = Type::getInt64Ty(C);
1120 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 1786 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
1121 ExtTy->getPrimitiveSizeInBits(); 1787 ExtTy->getPrimitiveSizeInBits();
1122 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); 1788 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
1123 Rep = Builder.CreateVectorSplat(NumElts, Rep); 1789 Rep = Builder.CreateVectorSplat(NumElts, Rep);
1790 } else if (IsX86 && (Name == "sse.sqrt.ss" ||
1791 Name == "sse2.sqrt.sd")) {
1792 Value *Vec = CI->getArgOperand(0);
1793 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
1794 Function *Intr = Intrinsic::getDeclaration(F->getParent(),
1795 Intrinsic::sqrt, Elt0->getType());
1796 Elt0 = Builder.CreateCall(Intr, Elt0);
1797 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
1798 } else if (IsX86 && (Name.startswith("avx.sqrt.p") ||
1799 Name.startswith("sse2.sqrt.p") ||
1800 Name.startswith("sse.sqrt.p"))) {
1801 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1802 Intrinsic::sqrt,
1803 CI->getType()),
1804 {CI->getArgOperand(0)});
1805 } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) {
1806 if (CI->getNumArgOperands() == 4 &&
1807 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
1808 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
1809 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
1810 : Intrinsic::x86_avx512_sqrt_pd_512;
1811
1812 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) };
1813 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
1814 IID), Args);
1815 } else {
1816 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1817 Intrinsic::sqrt,
1818 CI->getType()),
1819 {CI->getArgOperand(0)});
1820 }
1821 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1822 CI->getArgOperand(1));
1124 } else if (IsX86 && (Name.startswith("avx512.ptestm") || 1823 } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
1125 Name.startswith("avx512.ptestnm"))) { 1824 Name.startswith("avx512.ptestnm"))) {
1126 Value *Op0 = CI->getArgOperand(0); 1825 Value *Op0 = CI->getArgOperand(0);
1127 Value *Op1 = CI->getArgOperand(1); 1826 Value *Op1 = CI->getArgOperand(1);
1128 Value *Mask = CI->getArgOperand(2); 1827 Value *Mask = CI->getArgOperand(2);
1130 llvm::Type *Ty = Op0->getType(); 1829 llvm::Type *Ty = Op0->getType();
1131 Value *Zero = llvm::Constant::getNullValue(Ty); 1830 Value *Zero = llvm::Constant::getNullValue(Ty);
1132 ICmpInst::Predicate Pred = 1831 ICmpInst::Predicate Pred =
1133 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; 1832 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1134 Rep = Builder.CreateICmp(Pred, Rep, Zero); 1833 Rep = Builder.CreateICmp(Pred, Rep, Zero);
1135 unsigned NumElts = Op0->getType()->getVectorNumElements(); 1834 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask);
1136 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask, NumElts);
1137 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ 1835 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
1138 unsigned NumElts = 1836 unsigned NumElts =
1139 CI->getArgOperand(1)->getType()->getVectorNumElements(); 1837 CI->getArgOperand(1)->getType()->getVectorNumElements();
1140 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); 1838 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
1141 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 1839 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1201 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty()); 1899 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
1202 else 1900 else
1203 C = ConstantInt::getNullValue(Builder.getInt16Ty()); 1901 C = ConstantInt::getNullValue(Builder.getInt16Ty());
1204 Rep = Builder.CreateICmpEQ(Rep, C); 1902 Rep = Builder.CreateICmpEQ(Rep, C);
1205 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty()); 1903 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
1206 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) { 1904 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
1905 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
1906 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
1907 Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1207 Type *I32Ty = Type::getInt32Ty(C); 1908 Type *I32Ty = Type::getInt32Ty(C);
1208 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1909 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1209 ConstantInt::get(I32Ty, 0)); 1910 ConstantInt::get(I32Ty, 0));
1210 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1911 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1211 ConstantInt::get(I32Ty, 0)); 1912 ConstantInt::get(I32Ty, 0));
1212 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), 1913 Value *EltOp;
1213 Builder.CreateFAdd(Elt0, Elt1), 1914 if (Name.contains(".add."))
1214 ConstantInt::get(I32Ty, 0)); 1915 EltOp = Builder.CreateFAdd(Elt0, Elt1);
1215 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) { 1916 else if (Name.contains(".sub."))
1216 Type *I32Ty = Type::getInt32Ty(C); 1917 EltOp = Builder.CreateFSub(Elt0, Elt1);
1217 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 1918 else if (Name.contains(".mul."))
1218 ConstantInt::get(I32Ty, 0)); 1919 EltOp = Builder.CreateFMul(Elt0, Elt1);
1219 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 1920 else
1220 ConstantInt::get(I32Ty, 0)); 1921 EltOp = Builder.CreateFDiv(Elt0, Elt1);
1221 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), 1922 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
1222 Builder.CreateFSub(Elt0, Elt1),
1223 ConstantInt::get(I32Ty, 0));
1224 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
1225 Type *I32Ty = Type::getInt32Ty(C);
1226 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1227 ConstantInt::get(I32Ty, 0));
1228 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1229 ConstantInt::get(I32Ty, 0));
1230 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1231 Builder.CreateFMul(Elt0, Elt1),
1232 ConstantInt::get(I32Ty, 0));
1233 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
1234 Type *I32Ty = Type::getInt32Ty(C);
1235 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
1236 ConstantInt::get(I32Ty, 0));
1237 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
1238 ConstantInt::get(I32Ty, 0));
1239 Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
1240 Builder.CreateFDiv(Elt0, Elt1),
1241 ConstantInt::get(I32Ty, 0)); 1923 ConstantInt::get(I32Ty, 0));
1242 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) { 1924 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) {
1243 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." 1925 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
1244 bool CmpEq = Name[16] == 'e'; 1926 bool CmpEq = Name[16] == 'e';
1245 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true); 1927 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
1246 } else if (IsX86 && Name.startswith("avx512.mask.cmp")) { 1928 } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) {
1929 Type *OpTy = CI->getArgOperand(0)->getType();
1930 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1931 Intrinsic::ID IID;
1932 switch (VecWidth) {
1933 default: llvm_unreachable("Unexpected intrinsic");
1934 case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break;
1935 case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break;
1936 case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break;
1937 }
1938
1939 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1940 { CI->getOperand(0), CI->getArgOperand(1) });
1941 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1942 } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) {
1943 Type *OpTy = CI->getArgOperand(0)->getType();
1944 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1945 unsigned EltWidth = OpTy->getScalarSizeInBits();
1946 Intrinsic::ID IID;
1947 if (VecWidth == 128 && EltWidth == 32)
1948 IID = Intrinsic::x86_avx512_fpclass_ps_128;
1949 else if (VecWidth == 256 && EltWidth == 32)
1950 IID = Intrinsic::x86_avx512_fpclass_ps_256;
1951 else if (VecWidth == 512 && EltWidth == 32)
1952 IID = Intrinsic::x86_avx512_fpclass_ps_512;
1953 else if (VecWidth == 128 && EltWidth == 64)
1954 IID = Intrinsic::x86_avx512_fpclass_pd_128;
1955 else if (VecWidth == 256 && EltWidth == 64)
1956 IID = Intrinsic::x86_avx512_fpclass_pd_256;
1957 else if (VecWidth == 512 && EltWidth == 64)
1958 IID = Intrinsic::x86_avx512_fpclass_pd_512;
1959 else
1960 llvm_unreachable("Unexpected intrinsic");
1961
1962 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1963 { CI->getOperand(0), CI->getArgOperand(1) });
1964 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
1965 } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) {
1966 Type *OpTy = CI->getArgOperand(0)->getType();
1967 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
1968 unsigned EltWidth = OpTy->getScalarSizeInBits();
1969 Intrinsic::ID IID;
1970 if (VecWidth == 128 && EltWidth == 32)
1971 IID = Intrinsic::x86_avx512_cmp_ps_128;
1972 else if (VecWidth == 256 && EltWidth == 32)
1973 IID = Intrinsic::x86_avx512_cmp_ps_256;
1974 else if (VecWidth == 512 && EltWidth == 32)
1975 IID = Intrinsic::x86_avx512_cmp_ps_512;
1976 else if (VecWidth == 128 && EltWidth == 64)
1977 IID = Intrinsic::x86_avx512_cmp_pd_128;
1978 else if (VecWidth == 256 && EltWidth == 64)
1979 IID = Intrinsic::x86_avx512_cmp_pd_256;
1980 else if (VecWidth == 512 && EltWidth == 64)
1981 IID = Intrinsic::x86_avx512_cmp_pd_512;
1982 else
1983 llvm_unreachable("Unexpected intrinsic");
1984
1985 SmallVector<Value *, 4> Args;
1986 Args.push_back(CI->getArgOperand(0));
1987 Args.push_back(CI->getArgOperand(1));
1988 Args.push_back(CI->getArgOperand(2));
1989 if (CI->getNumArgOperands() == 5)
1990 Args.push_back(CI->getArgOperand(4));
1991
1992 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1993 Args);
1994 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3));
1995 } else if (IsX86 && Name.startswith("avx512.mask.cmp.") &&
1996 Name[16] != 'p') {
1997 // Integer compare intrinsics.
1247 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 1998 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1248 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); 1999 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
1249 } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) { 2000 } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) {
1250 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2001 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1251 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); 2002 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
1252 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") || 2003 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") ||
1253 Name.startswith("avx512.cvtw2mask.") || 2004 Name.startswith("avx512.cvtw2mask.") ||
1254 Name.startswith("avx512.cvtd2mask.") || 2005 Name.startswith("avx512.cvtd2mask.") ||
1255 Name.startswith("avx512.cvtq2mask."))) { 2006 Name.startswith("avx512.cvtq2mask."))) {
1256 Value *Op = CI->getArgOperand(0); 2007 Value *Op = CI->getArgOperand(0);
1257 Value *Zero = llvm::Constant::getNullValue(Op->getType()); 2008 Value *Zero = llvm::Constant::getNullValue(Op->getType());
1258 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); 2009 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
1259 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr, 2010 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr);
1260 Op->getType()->getVectorNumElements());
1261 } else if(IsX86 && (Name == "ssse3.pabs.b.128" || 2011 } else if(IsX86 && (Name == "ssse3.pabs.b.128" ||
1262 Name == "ssse3.pabs.w.128" || 2012 Name == "ssse3.pabs.w.128" ||
1263 Name == "ssse3.pabs.d.128" || 2013 Name == "ssse3.pabs.d.128" ||
1264 Name.startswith("avx2.pabs") || 2014 Name.startswith("avx2.pabs") ||
1265 Name.startswith("avx512.mask.pabs"))) { 2015 Name.startswith("avx512.mask.pabs"))) {
1286 Name == "sse41.pminuw" || 2036 Name == "sse41.pminuw" ||
1287 Name == "sse41.pminud" || 2037 Name == "sse41.pminud" ||
1288 Name.startswith("avx2.pminu") || 2038 Name.startswith("avx2.pminu") ||
1289 Name.startswith("avx512.mask.pminu"))) { 2039 Name.startswith("avx512.mask.pminu"))) {
1290 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); 2040 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
2041 } else if (IsX86 && (Name == "sse2.pmulu.dq" ||
2042 Name == "avx2.pmulu.dq" ||
2043 Name == "avx512.pmulu.dq.512" ||
2044 Name.startswith("avx512.mask.pmulu.dq."))) {
2045 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false);
2046 } else if (IsX86 && (Name == "sse41.pmuldq" ||
2047 Name == "avx2.pmul.dq" ||
2048 Name == "avx512.pmul.dq.512" ||
2049 Name.startswith("avx512.mask.pmul.dq."))) {
2050 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true);
2051 } else if (IsX86 && (Name == "sse.cvtsi2ss" ||
2052 Name == "sse2.cvtsi2sd" ||
2053 Name == "sse.cvtsi642ss" ||
2054 Name == "sse2.cvtsi642sd")) {
2055 Rep = Builder.CreateSIToFP(CI->getArgOperand(1),
2056 CI->getType()->getVectorElementType());
2057 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2058 } else if (IsX86 && Name == "avx512.cvtusi2sd") {
2059 Rep = Builder.CreateUIToFP(CI->getArgOperand(1),
2060 CI->getType()->getVectorElementType());
2061 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
2062 } else if (IsX86 && Name == "sse2.cvtss2sd") {
2063 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
2064 Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType());
2065 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
1291 } else if (IsX86 && (Name == "sse2.cvtdq2pd" || 2066 } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
2067 Name == "sse2.cvtdq2ps" ||
2068 Name == "avx.cvtdq2.pd.256" ||
2069 Name == "avx.cvtdq2.ps.256" ||
2070 Name.startswith("avx512.mask.cvtdq2pd.") ||
2071 Name.startswith("avx512.mask.cvtudq2pd.") ||
2072 Name.startswith("avx512.mask.cvtdq2ps.") ||
2073 Name.startswith("avx512.mask.cvtudq2ps.") ||
2074 Name.startswith("avx512.mask.cvtqq2pd.") ||
2075 Name.startswith("avx512.mask.cvtuqq2pd.") ||
2076 Name == "avx512.mask.cvtqq2ps.256" ||
2077 Name == "avx512.mask.cvtqq2ps.512" ||
2078 Name == "avx512.mask.cvtuqq2ps.256" ||
2079 Name == "avx512.mask.cvtuqq2ps.512" ||
1292 Name == "sse2.cvtps2pd" || 2080 Name == "sse2.cvtps2pd" ||
1293 Name == "avx.cvtdq2.pd.256" ||
1294 Name == "avx.cvt.ps2.pd.256" || 2081 Name == "avx.cvt.ps2.pd.256" ||
1295 Name.startswith("avx512.mask.cvtdq2pd.") || 2082 Name == "avx512.mask.cvtps2pd.128" ||
1296 Name.startswith("avx512.mask.cvtudq2pd."))) { 2083 Name == "avx512.mask.cvtps2pd.256")) {
1297 // Lossless i32/float to double conversion. 2084 Type *DstTy = CI->getType();
1298 // Extract the bottom elements if necessary and convert to double vector.
1299 Value *Src = CI->getArgOperand(0);
1300 VectorType *SrcTy = cast<VectorType>(Src->getType());
1301 VectorType *DstTy = cast<VectorType>(CI->getType());
1302 Rep = CI->getArgOperand(0); 2085 Rep = CI->getArgOperand(0);
1303 2086 Type *SrcTy = Rep->getType();
1304 unsigned NumDstElts = DstTy->getNumElements(); 2087
1305 if (NumDstElts < SrcTy->getNumElements()) { 2088 unsigned NumDstElts = DstTy->getVectorNumElements();
2089 if (NumDstElts < SrcTy->getVectorNumElements()) {
1306 assert(NumDstElts == 2 && "Unexpected vector size"); 2090 assert(NumDstElts == 2 && "Unexpected vector size");
1307 uint32_t ShuffleMask[2] = { 0, 1 }; 2091 uint32_t ShuffleMask[2] = { 0, 1 };
1308 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), 2092 Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask);
1309 ShuffleMask);
1310 } 2093 }
1311 2094
1312 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2")); 2095 bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy();
1313 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2")); 2096 bool IsUnsigned = (StringRef::npos != Name.find("cvtu"));
1314 if (SInt2Double) 2097 if (IsPS2PD)
1315 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
1316 else if (UInt2Double)
1317 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
1318 else
1319 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); 2098 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
1320 2099 else if (CI->getNumArgOperands() == 4 &&
1321 if (CI->getNumArgOperands() == 3) 2100 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2101 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2102 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
2103 : Intrinsic::x86_avx512_sitofp_round;
2104 Function *F = Intrinsic::getDeclaration(CI->getModule(), IID,
2105 { DstTy, SrcTy });
2106 Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) });
2107 } else {
2108 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
2109 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
2110 }
2111
2112 if (CI->getNumArgOperands() >= 3)
1322 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2113 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1323 CI->getArgOperand(1)); 2114 CI->getArgOperand(1));
1324 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) { 2115 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
1325 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 2116 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1326 CI->getArgOperand(1), CI->getArgOperand(2), 2117 CI->getArgOperand(1), CI->getArgOperand(2),
1327 /*Aligned*/false); 2118 /*Aligned*/false);
1328 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) { 2119 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
1329 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), 2120 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
1330 CI->getArgOperand(1),CI->getArgOperand(2), 2121 CI->getArgOperand(1),CI->getArgOperand(2),
1331 /*Aligned*/true); 2122 /*Aligned*/true);
2123 } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) {
2124 Type *ResultTy = CI->getType();
2125 Type *PtrTy = ResultTy->getVectorElementType();
2126
2127 // Cast the pointer to element type.
2128 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2129 llvm::PointerType::getUnqual(PtrTy));
2130
2131 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2132 ResultTy->getVectorNumElements());
2133
2134 Function *ELd = Intrinsic::getDeclaration(F->getParent(),
2135 Intrinsic::masked_expandload,
2136 ResultTy);
2137 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) });
2138 } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) {
2139 Type *ResultTy = CI->getArgOperand(1)->getType();
2140 Type *PtrTy = ResultTy->getVectorElementType();
2141
2142 // Cast the pointer to element type.
2143 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0),
2144 llvm::PointerType::getUnqual(PtrTy));
2145
2146 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2147 ResultTy->getVectorNumElements());
2148
2149 Function *CSt = Intrinsic::getDeclaration(F->getParent(),
2150 Intrinsic::masked_compressstore,
2151 ResultTy);
2152 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec });
2153 } else if (IsX86 && (Name.startswith("avx512.mask.compress.") ||
2154 Name.startswith("avx512.mask.expand."))) {
2155 Type *ResultTy = CI->getType();
2156
2157 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
2158 ResultTy->getVectorNumElements());
2159
2160 bool IsCompress = Name[12] == 'c';
2161 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
2162 : Intrinsic::x86_avx512_mask_expand;
2163 Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy);
2164 Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1),
2165 MaskVec });
1332 } else if (IsX86 && Name.startswith("xop.vpcom")) { 2166 } else if (IsX86 && Name.startswith("xop.vpcom")) {
1333 Intrinsic::ID intID; 2167 bool IsSigned;
1334 if (Name.endswith("ub")) 2168 if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") ||
1335 intID = Intrinsic::x86_xop_vpcomub; 2169 Name.endswith("uq"))
1336 else if (Name.endswith("uw")) 2170 IsSigned = false;
1337 intID = Intrinsic::x86_xop_vpcomuw; 2171 else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") ||
1338 else if (Name.endswith("ud")) 2172 Name.endswith("q"))
1339 intID = Intrinsic::x86_xop_vpcomud; 2173 IsSigned = true;
1340 else if (Name.endswith("uq"))
1341 intID = Intrinsic::x86_xop_vpcomuq;
1342 else if (Name.endswith("b"))
1343 intID = Intrinsic::x86_xop_vpcomb;
1344 else if (Name.endswith("w"))
1345 intID = Intrinsic::x86_xop_vpcomw;
1346 else if (Name.endswith("d"))
1347 intID = Intrinsic::x86_xop_vpcomd;
1348 else if (Name.endswith("q"))
1349 intID = Intrinsic::x86_xop_vpcomq;
1350 else 2174 else
1351 llvm_unreachable("Unknown suffix"); 2175 llvm_unreachable("Unknown suffix");
1352 2176
1353 Name = Name.substr(9); // strip off "xop.vpcom"
1354 unsigned Imm; 2177 unsigned Imm;
1355 if (Name.startswith("lt")) 2178 if (CI->getNumArgOperands() == 3) {
1356 Imm = 0; 2179 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
1357 else if (Name.startswith("le")) 2180 } else {
1358 Imm = 1; 2181 Name = Name.substr(9); // strip off "xop.vpcom"
1359 else if (Name.startswith("gt")) 2182 if (Name.startswith("lt"))
1360 Imm = 2; 2183 Imm = 0;
1361 else if (Name.startswith("ge")) 2184 else if (Name.startswith("le"))
1362 Imm = 3; 2185 Imm = 1;
1363 else if (Name.startswith("eq")) 2186 else if (Name.startswith("gt"))
1364 Imm = 4; 2187 Imm = 2;
1365 else if (Name.startswith("ne")) 2188 else if (Name.startswith("ge"))
1366 Imm = 5; 2189 Imm = 3;
1367 else if (Name.startswith("false")) 2190 else if (Name.startswith("eq"))
1368 Imm = 6; 2191 Imm = 4;
1369 else if (Name.startswith("true")) 2192 else if (Name.startswith("ne"))
1370 Imm = 7; 2193 Imm = 5;
1371 else 2194 else if (Name.startswith("false"))
1372 llvm_unreachable("Unknown condition"); 2195 Imm = 6;
1373 2196 else if (Name.startswith("true"))
1374 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); 2197 Imm = 7;
1375 Rep = 2198 else
1376 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), 2199 llvm_unreachable("Unknown condition");
1377 Builder.getInt8(Imm)}); 2200 }
2201
2202 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
1378 } else if (IsX86 && Name.startswith("xop.vpcmov")) { 2203 } else if (IsX86 && Name.startswith("xop.vpcmov")) {
1379 Value *Sel = CI->getArgOperand(2); 2204 Value *Sel = CI->getArgOperand(2);
1380 Value *NotSel = Builder.CreateNot(Sel); 2205 Value *NotSel = Builder.CreateNot(Sel);
1381 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel); 2206 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
1382 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel); 2207 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
1383 Rep = Builder.CreateOr(Sel0, Sel1); 2208 Rep = Builder.CreateOr(Sel0, Sel1);
2209 } else if (IsX86 && (Name.startswith("xop.vprot") ||
2210 Name.startswith("avx512.prol") ||
2211 Name.startswith("avx512.mask.prol"))) {
2212 Rep = upgradeX86Rotate(Builder, *CI, false);
2213 } else if (IsX86 && (Name.startswith("avx512.pror") ||
2214 Name.startswith("avx512.mask.pror"))) {
2215 Rep = upgradeX86Rotate(Builder, *CI, true);
2216 } else if (IsX86 && (Name.startswith("avx512.vpshld.") ||
2217 Name.startswith("avx512.mask.vpshld") ||
2218 Name.startswith("avx512.maskz.vpshld"))) {
2219 bool ZeroMask = Name[11] == 'z';
2220 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
2221 } else if (IsX86 && (Name.startswith("avx512.vpshrd.") ||
2222 Name.startswith("avx512.mask.vpshrd") ||
2223 Name.startswith("avx512.maskz.vpshrd"))) {
2224 bool ZeroMask = Name[11] == 'z';
2225 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
1384 } else if (IsX86 && Name == "sse42.crc32.64.8") { 2226 } else if (IsX86 && Name == "sse42.crc32.64.8") {
1385 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), 2227 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
1386 Intrinsic::x86_sse42_crc32_32_8); 2228 Intrinsic::x86_sse42_crc32_32_8);
1387 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 2229 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
1388 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); 2230 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
1389 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 2231 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
1390 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) { 2232 } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") ||
2233 Name.startswith("avx512.vbroadcast.s"))) {
1391 // Replace broadcasts with a series of insertelements. 2234 // Replace broadcasts with a series of insertelements.
1392 Type *VecTy = CI->getType(); 2235 Type *VecTy = CI->getType();
1393 Type *EltTy = VecTy->getVectorElementType(); 2236 Type *EltTy = VecTy->getVectorElementType();
1394 unsigned EltNum = VecTy->getVectorNumElements(); 2237 unsigned EltNum = VecTy->getVectorNumElements();
1395 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), 2238 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0),
1423 : Builder.CreateZExt(SV, DstTy); 2266 : Builder.CreateZExt(SV, DstTy);
1424 // If there are 3 arguments, it's a masked intrinsic so we need a select. 2267 // If there are 3 arguments, it's a masked intrinsic so we need a select.
1425 if (CI->getNumArgOperands() == 3) 2268 if (CI->getNumArgOperands() == 3)
1426 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2269 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1427 CI->getArgOperand(1)); 2270 CI->getArgOperand(1));
2271 } else if (Name == "avx512.mask.pmov.qd.256" ||
2272 Name == "avx512.mask.pmov.qd.512" ||
2273 Name == "avx512.mask.pmov.wb.256" ||
2274 Name == "avx512.mask.pmov.wb.512") {
2275 Type *Ty = CI->getArgOperand(1)->getType();
2276 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
2277 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
2278 CI->getArgOperand(1));
1428 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") || 2279 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
1429 Name == "avx2.vbroadcasti128")) { 2280 Name == "avx2.vbroadcasti128")) {
1430 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. 2281 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
1431 Type *EltTy = CI->getType()->getVectorElementType(); 2282 Type *EltTy = CI->getType()->getVectorElementType();
1432 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); 2283 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
1433 Type *VT = VectorType::get(EltTy, NumSrcElts); 2284 Type *VT = VectorType::get(EltTy, NumSrcElts);
1434 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), 2285 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
1435 PointerType::getUnqual(VT)); 2286 PointerType::getUnqual(VT));
1436 Value *Load = Builder.CreateAlignedLoad(Op, 1); 2287 Value *Load = Builder.CreateAlignedLoad(VT, Op, 1);
1437 if (NumSrcElts == 2) 2288 if (NumSrcElts == 2)
1438 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 2289 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1439 { 0, 1, 0, 1 }); 2290 { 0, 1, 0, 1 });
1440 else 2291 else
1441 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), 2292 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
1489 Constant::getNullValue(MaskTy)); 2340 Constant::getNullValue(MaskTy));
1490 2341
1491 if (CI->getNumArgOperands() == 3) 2342 if (CI->getNumArgOperands() == 3)
1492 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2343 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1493 CI->getArgOperand(1)); 2344 CI->getArgOperand(1));
2345 } else if (IsX86 && (Name.startswith("sse2.padds.") ||
2346 Name.startswith("sse2.psubs.") ||
2347 Name.startswith("avx2.padds.") ||
2348 Name.startswith("avx2.psubs.") ||
2349 Name.startswith("avx512.padds.") ||
2350 Name.startswith("avx512.psubs.") ||
2351 Name.startswith("avx512.mask.padds.") ||
2352 Name.startswith("avx512.mask.psubs."))) {
2353 bool IsAdd = Name.contains(".padds");
2354 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
2355 } else if (IsX86 && (Name.startswith("sse2.paddus.") ||
2356 Name.startswith("sse2.psubus.") ||
2357 Name.startswith("avx2.paddus.") ||
2358 Name.startswith("avx2.psubus.") ||
2359 Name.startswith("avx512.mask.paddus.") ||
2360 Name.startswith("avx512.mask.psubus."))) {
2361 bool IsAdd = Name.contains(".paddus");
2362 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
1494 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { 2363 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
1495 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), 2364 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
1496 CI->getArgOperand(1), 2365 CI->getArgOperand(1),
1497 CI->getArgOperand(2), 2366 CI->getArgOperand(2),
1498 CI->getArgOperand(3), 2367 CI->getArgOperand(3),
1806 2675
1807 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 2676 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
1808 2677
1809 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2678 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1810 CI->getArgOperand(2)); 2679 CI->getArgOperand(2));
1811 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) { 2680 } else if (IsX86 && (Name.startswith("avx512.mask.and.") ||
1812 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1)); 2681 Name.startswith("avx512.mask.pand."))) {
1813 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1814 CI->getArgOperand(2));
1815 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) {
1816 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)),
1817 CI->getArgOperand(1));
1818 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1819 CI->getArgOperand(2));
1820 } else if (IsX86 && Name.startswith("avx512.mask.por.")) {
1821 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1));
1822 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1823 CI->getArgOperand(2));
1824 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) {
1825 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
1826 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1827 CI->getArgOperand(2));
1828 } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
1829 VectorType *FTy = cast<VectorType>(CI->getType()); 2682 VectorType *FTy = cast<VectorType>(CI->getType());
1830 VectorType *ITy = VectorType::getInteger(FTy); 2683 VectorType *ITy = VectorType::getInteger(FTy);
1831 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 2684 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1832 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2685 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1833 Rep = Builder.CreateBitCast(Rep, FTy); 2686 Rep = Builder.CreateBitCast(Rep, FTy);
1834 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2687 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1835 CI->getArgOperand(2)); 2688 CI->getArgOperand(2));
1836 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) { 2689 } else if (IsX86 && (Name.startswith("avx512.mask.andn.") ||
2690 Name.startswith("avx512.mask.pandn."))) {
1837 VectorType *FTy = cast<VectorType>(CI->getType()); 2691 VectorType *FTy = cast<VectorType>(CI->getType());
1838 VectorType *ITy = VectorType::getInteger(FTy); 2692 VectorType *ITy = VectorType::getInteger(FTy);
1839 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); 2693 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
1840 Rep = Builder.CreateAnd(Rep, 2694 Rep = Builder.CreateAnd(Rep,
1841 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2695 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1842 Rep = Builder.CreateBitCast(Rep, FTy); 2696 Rep = Builder.CreateBitCast(Rep, FTy);
1843 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2697 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1844 CI->getArgOperand(2)); 2698 CI->getArgOperand(2));
1845 } else if (IsX86 && Name.startswith("avx512.mask.or.")) { 2699 } else if (IsX86 && (Name.startswith("avx512.mask.or.") ||
2700 Name.startswith("avx512.mask.por."))) {
1846 VectorType *FTy = cast<VectorType>(CI->getType()); 2701 VectorType *FTy = cast<VectorType>(CI->getType());
1847 VectorType *ITy = VectorType::getInteger(FTy); 2702 VectorType *ITy = VectorType::getInteger(FTy);
1848 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 2703 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1849 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2704 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1850 Rep = Builder.CreateBitCast(Rep, FTy); 2705 Rep = Builder.CreateBitCast(Rep, FTy);
1851 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2706 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1852 CI->getArgOperand(2)); 2707 CI->getArgOperand(2));
1853 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) { 2708 } else if (IsX86 && (Name.startswith("avx512.mask.xor.") ||
2709 Name.startswith("avx512.mask.pxor."))) {
1854 VectorType *FTy = cast<VectorType>(CI->getType()); 2710 VectorType *FTy = cast<VectorType>(CI->getType());
1855 VectorType *ITy = VectorType::getInteger(FTy); 2711 VectorType *ITy = VectorType::getInteger(FTy);
1856 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 2712 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
1857 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 2713 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
1858 Rep = Builder.CreateBitCast(Rep, FTy); 2714 Rep = Builder.CreateBitCast(Rep, FTy);
1868 CI->getArgOperand(2)); 2724 CI->getArgOperand(2));
1869 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) { 2725 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
1870 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1)); 2726 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
1871 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2727 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1872 CI->getArgOperand(2)); 2728 CI->getArgOperand(2));
1873 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) { 2729 } else if (IsX86 && Name.startswith("avx512.mask.add.p")) {
1874 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 2730 if (Name.endswith(".512")) {
2731 Intrinsic::ID IID;
2732 if (Name[17] == 's')
2733 IID = Intrinsic::x86_avx512_add_ps_512;
2734 else
2735 IID = Intrinsic::x86_avx512_add_pd_512;
2736
2737 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2738 { CI->getArgOperand(0), CI->getArgOperand(1),
2739 CI->getArgOperand(4) });
2740 } else {
2741 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
2742 }
1875 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2743 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1876 CI->getArgOperand(2)); 2744 CI->getArgOperand(2));
1877 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) { 2745 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
1878 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); 2746 if (Name.endswith(".512")) {
2747 Intrinsic::ID IID;
2748 if (Name[17] == 's')
2749 IID = Intrinsic::x86_avx512_div_ps_512;
2750 else
2751 IID = Intrinsic::x86_avx512_div_pd_512;
2752
2753 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2754 { CI->getArgOperand(0), CI->getArgOperand(1),
2755 CI->getArgOperand(4) });
2756 } else {
2757 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
2758 }
1879 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2759 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1880 CI->getArgOperand(2)); 2760 CI->getArgOperand(2));
1881 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) { 2761 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
1882 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1)); 2762 if (Name.endswith(".512")) {
2763 Intrinsic::ID IID;
2764 if (Name[17] == 's')
2765 IID = Intrinsic::x86_avx512_mul_ps_512;
2766 else
2767 IID = Intrinsic::x86_avx512_mul_pd_512;
2768
2769 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2770 { CI->getArgOperand(0), CI->getArgOperand(1),
2771 CI->getArgOperand(4) });
2772 } else {
2773 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
2774 }
1883 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2775 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1884 CI->getArgOperand(2)); 2776 CI->getArgOperand(2));
1885 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) { 2777 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
1886 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); 2778 if (Name.endswith(".512")) {
2779 Intrinsic::ID IID;
2780 if (Name[17] == 's')
2781 IID = Intrinsic::x86_avx512_sub_ps_512;
2782 else
2783 IID = Intrinsic::x86_avx512_sub_pd_512;
2784
2785 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2786 { CI->getArgOperand(0), CI->getArgOperand(1),
2787 CI->getArgOperand(4) });
2788 } else {
2789 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
2790 }
2791 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2792 CI->getArgOperand(2));
2793 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
2794 Name.startswith("avx512.mask.min.p")) &&
2795 Name.drop_front(18) == ".512") {
2796 bool IsDouble = Name[17] == 'd';
2797 bool IsMin = Name[13] == 'i';
2798 static const Intrinsic::ID MinMaxTbl[2][2] = {
2799 { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 },
2800 { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 }
2801 };
2802 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
2803
2804 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
2805 { CI->getArgOperand(0), CI->getArgOperand(1),
2806 CI->getArgOperand(4) });
1887 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 2807 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1888 CI->getArgOperand(2)); 2808 CI->getArgOperand(2));
1889 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) { 2809 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) {
1890 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), 2810 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(),
1891 Intrinsic::ctlz, 2811 Intrinsic::ctlz,
1892 CI->getType()), 2812 CI->getType()),
1893 { CI->getArgOperand(0), Builder.getInt1(false) }); 2813 { CI->getArgOperand(0), Builder.getInt1(false) });
1894 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, 2814 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
1895 CI->getArgOperand(1)); 2815 CI->getArgOperand(1));
1896 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") ||
1897 Name.startswith("avx512.mask.min.p"))) {
1898 bool IsMin = Name[13] == 'i';
1899 VectorType *VecTy = cast<VectorType>(CI->getType());
1900 unsigned VecWidth = VecTy->getPrimitiveSizeInBits();
1901 unsigned EltWidth = VecTy->getScalarSizeInBits();
1902 Intrinsic::ID IID;
1903 if (!IsMin && VecWidth == 128 && EltWidth == 32)
1904 IID = Intrinsic::x86_sse_max_ps;
1905 else if (!IsMin && VecWidth == 128 && EltWidth == 64)
1906 IID = Intrinsic::x86_sse2_max_pd;
1907 else if (!IsMin && VecWidth == 256 && EltWidth == 32)
1908 IID = Intrinsic::x86_avx_max_ps_256;
1909 else if (!IsMin && VecWidth == 256 && EltWidth == 64)
1910 IID = Intrinsic::x86_avx_max_pd_256;
1911 else if (IsMin && VecWidth == 128 && EltWidth == 32)
1912 IID = Intrinsic::x86_sse_min_ps;
1913 else if (IsMin && VecWidth == 128 && EltWidth == 64)
1914 IID = Intrinsic::x86_sse2_min_pd;
1915 else if (IsMin && VecWidth == 256 && EltWidth == 32)
1916 IID = Intrinsic::x86_avx_min_ps_256;
1917 else if (IsMin && VecWidth == 256 && EltWidth == 64)
1918 IID = Intrinsic::x86_avx_min_pd_256;
1919 else
1920 llvm_unreachable("Unexpected intrinsic");
1921
1922 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1923 { CI->getArgOperand(0), CI->getArgOperand(1) });
1924 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1925 CI->getArgOperand(2));
1926 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
1927 VectorType *VecTy = cast<VectorType>(CI->getType());
1928 Intrinsic::ID IID;
1929 if (VecTy->getPrimitiveSizeInBits() == 128)
1930 IID = Intrinsic::x86_ssse3_pshuf_b_128;
1931 else if (VecTy->getPrimitiveSizeInBits() == 256)
1932 IID = Intrinsic::x86_avx2_pshuf_b;
1933 else if (VecTy->getPrimitiveSizeInBits() == 512)
1934 IID = Intrinsic::x86_avx512_pshuf_b_512;
1935 else
1936 llvm_unreachable("Unexpected intrinsic");
1937
1938 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1939 { CI->getArgOperand(0), CI->getArgOperand(1) });
1940 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1941 CI->getArgOperand(2));
1942 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
1943 Name.startswith("avx512.mask.pmulu.dq."))) {
1944 bool IsUnsigned = Name[16] == 'u';
1945 VectorType *VecTy = cast<VectorType>(CI->getType());
1946 Intrinsic::ID IID;
1947 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1948 IID = Intrinsic::x86_sse41_pmuldq;
1949 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1950 IID = Intrinsic::x86_avx2_pmul_dq;
1951 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1952 IID = Intrinsic::x86_avx512_pmul_dq_512;
1953 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
1954 IID = Intrinsic::x86_sse2_pmulu_dq;
1955 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
1956 IID = Intrinsic::x86_avx2_pmulu_dq;
1957 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
1958 IID = Intrinsic::x86_avx512_pmulu_dq_512;
1959 else
1960 llvm_unreachable("Unexpected intrinsic");
1961
1962 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1963 { CI->getArgOperand(0), CI->getArgOperand(1) });
1964 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
1965 CI->getArgOperand(2));
1966 } else if (IsX86 && Name.startswith("avx512.mask.pack")) {
1967 bool IsUnsigned = Name[16] == 'u';
1968 bool IsDW = Name[18] == 'd';
1969 VectorType *VecTy = cast<VectorType>(CI->getType());
1970 Intrinsic::ID IID;
1971 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1972 IID = Intrinsic::x86_sse2_packsswb_128;
1973 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1974 IID = Intrinsic::x86_avx2_packsswb;
1975 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1976 IID = Intrinsic::x86_avx512_packsswb_512;
1977 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1978 IID = Intrinsic::x86_sse2_packssdw_128;
1979 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1980 IID = Intrinsic::x86_avx2_packssdw;
1981 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1982 IID = Intrinsic::x86_avx512_packssdw_512;
1983 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1984 IID = Intrinsic::x86_sse2_packuswb_128;
1985 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1986 IID = Intrinsic::x86_avx2_packuswb;
1987 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1988 IID = Intrinsic::x86_avx512_packuswb_512;
1989 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128)
1990 IID = Intrinsic::x86_sse41_packusdw;
1991 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256)
1992 IID = Intrinsic::x86_avx2_packusdw;
1993 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512)
1994 IID = Intrinsic::x86_avx512_packusdw_512;
1995 else
1996 llvm_unreachable("Unexpected intrinsic");
1997
1998 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
1999 { CI->getArgOperand(0), CI->getArgOperand(1) });
2000 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2001 CI->getArgOperand(2));
2002 } else if (IsX86 && Name.startswith("avx512.mask.psll")) { 2816 } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
2003 bool IsImmediate = Name[16] == 'i' || 2817 bool IsImmediate = Name[16] == 'i' ||
2004 (Name.size() > 18 && Name[18] == 'i'); 2818 (Name.size() > 18 && Name[18] == 'i');
2005 bool IsVariable = Name[16] == 'v'; 2819 bool IsVariable = Name[16] == 'v';
2006 char Size = Name[16] == '.' ? Name[17] : 2820 char Size = Name[16] == '.' ? Name[17] :
2203 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); 3017 Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
2204 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) { 3018 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
2205 Rep = upgradeMaskedMove(Builder, *CI); 3019 Rep = upgradeMaskedMove(Builder, *CI);
2206 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) { 3020 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
2207 Rep = UpgradeMaskToInt(Builder, *CI); 3021 Rep = UpgradeMaskToInt(Builder, *CI);
2208 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
2209 Intrinsic::ID IID;
2210 if (Name.endswith("ps.128"))
2211 IID = Intrinsic::x86_avx_vpermilvar_ps;
2212 else if (Name.endswith("pd.128"))
2213 IID = Intrinsic::x86_avx_vpermilvar_pd;
2214 else if (Name.endswith("ps.256"))
2215 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2216 else if (Name.endswith("pd.256"))
2217 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2218 else if (Name.endswith("ps.512"))
2219 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2220 else if (Name.endswith("pd.512"))
2221 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2222 else
2223 llvm_unreachable("Unexpected vpermilvar intrinsic");
2224
2225 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
2226 Rep = Builder.CreateCall(Intrin,
2227 { CI->getArgOperand(0), CI->getArgOperand(1) });
2228 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
2229 CI->getArgOperand(2));
2230 } else if (IsX86 && Name.endswith(".movntdqa")) { 3022 } else if (IsX86 && Name.endswith(".movntdqa")) {
2231 Module *M = F->getParent(); 3023 Module *M = F->getParent();
2232 MDNode *Node = MDNode::get( 3024 MDNode *Node = MDNode::get(
2233 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 3025 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2234 3026
2236 VectorType *VTy = cast<VectorType>(CI->getType()); 3028 VectorType *VTy = cast<VectorType>(CI->getType());
2237 3029
2238 // Convert the type of the pointer to a pointer to the stored type. 3030 // Convert the type of the pointer to a pointer to the stored type.
2239 Value *BC = 3031 Value *BC =
2240 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast"); 3032 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast");
2241 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8); 3033 LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8);
2242 LI->setMetadata(M->getMDKindID("nontemporal"), Node); 3034 LI->setMetadata(M->getMDKindID("nontemporal"), Node);
2243 Rep = LI; 3035 Rep = LI;
2244 } else if (IsX86 && 3036 } else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
2245 (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") || 3037 Name.startswith("fma.vfmsub.") ||
2246 Name.startswith("avx512.mask.pavg"))) { 3038 Name.startswith("fma.vfnmadd.") ||
2247 // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w, 3039 Name.startswith("fma.vfnmsub."))) {
2248 // llvm.x86.avx512.mask.pavg.b/w 3040 bool NegMul = Name[6] == 'n';
3041 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
3042 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
3043
3044 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3045 CI->getArgOperand(2) };
3046
3047 if (IsScalar) {
3048 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3049 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3050 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3051 }
3052
3053 if (NegMul && !IsScalar)
3054 Ops[0] = Builder.CreateFNeg(Ops[0]);
3055 if (NegMul && IsScalar)
3056 Ops[1] = Builder.CreateFNeg(Ops[1]);
3057 if (NegAcc)
3058 Ops[2] = Builder.CreateFNeg(Ops[2]);
3059
3060 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3061 Intrinsic::fma,
3062 Ops[0]->getType()),
3063 Ops);
3064
3065 if (IsScalar)
3066 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep,
3067 (uint64_t)0);
3068 } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) {
3069 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3070 CI->getArgOperand(2) };
3071
3072 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
3073 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
3074 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
3075
3076 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(),
3077 Intrinsic::fma,
3078 Ops[0]->getType()),
3079 Ops);
3080
3081 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
3082 Rep, (uint64_t)0);
3083 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") ||
3084 Name.startswith("avx512.maskz.vfmadd.s") ||
3085 Name.startswith("avx512.mask3.vfmadd.s") ||
3086 Name.startswith("avx512.mask3.vfmsub.s") ||
3087 Name.startswith("avx512.mask3.vfnmsub.s"))) {
3088 bool IsMask3 = Name[11] == '3';
3089 bool IsMaskZ = Name[11] == 'z';
3090 // Drop the "avx512.mask." to make it easier.
3091 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3092 bool NegMul = Name[2] == 'n';
3093 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3094
2249 Value *A = CI->getArgOperand(0); 3095 Value *A = CI->getArgOperand(0);
2250 Value *B = CI->getArgOperand(1); 3096 Value *B = CI->getArgOperand(1);
2251 VectorType *ZextType = VectorType::getExtendedElementVectorType( 3097 Value *C = CI->getArgOperand(2);
2252 cast<VectorType>(A->getType())); 3098
2253 Value *ExtendedA = Builder.CreateZExt(A, ZextType); 3099 if (NegMul && (IsMask3 || IsMaskZ))
2254 Value *ExtendedB = Builder.CreateZExt(B, ZextType); 3100 A = Builder.CreateFNeg(A);
2255 Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB); 3101 if (NegMul && !(IsMask3 || IsMaskZ))
2256 Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1)); 3102 B = Builder.CreateFNeg(B);
2257 Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1)); 3103 if (NegAcc)
2258 Rep = Builder.CreateTrunc(ShiftR, A->getType()); 3104 C = Builder.CreateFNeg(C);
2259 if (CI->getNumArgOperands() > 2) { 3105
2260 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, 3106 A = Builder.CreateExtractElement(A, (uint64_t)0);
2261 CI->getArgOperand(2)); 3107 B = Builder.CreateExtractElement(B, (uint64_t)0);
3108 C = Builder.CreateExtractElement(C, (uint64_t)0);
3109
3110 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3111 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
3112 Value *Ops[] = { A, B, C, CI->getArgOperand(4) };
3113
3114 Intrinsic::ID IID;
3115 if (Name.back() == 'd')
3116 IID = Intrinsic::x86_avx512_vfmadd_f64;
3117 else
3118 IID = Intrinsic::x86_avx512_vfmadd_f32;
3119 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID);
3120 Rep = Builder.CreateCall(FMA, Ops);
3121 } else {
3122 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3123 Intrinsic::fma,
3124 A->getType());
3125 Rep = Builder.CreateCall(FMA, { A, B, C });
2262 } 3126 }
3127
3128 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) :
3129 IsMask3 ? C : A;
3130
3131 // For Mask3 with NegAcc, we need to create a new extractelement that
3132 // avoids the negation above.
3133 if (NegAcc && IsMask3)
3134 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2),
3135 (uint64_t)0);
3136
3137 Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3),
3138 Rep, PassThru);
3139 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0),
3140 Rep, (uint64_t)0);
3141 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") ||
3142 Name.startswith("avx512.mask.vfnmadd.p") ||
3143 Name.startswith("avx512.mask.vfnmsub.p") ||
3144 Name.startswith("avx512.mask3.vfmadd.p") ||
3145 Name.startswith("avx512.mask3.vfmsub.p") ||
3146 Name.startswith("avx512.mask3.vfnmsub.p") ||
3147 Name.startswith("avx512.maskz.vfmadd.p"))) {
3148 bool IsMask3 = Name[11] == '3';
3149 bool IsMaskZ = Name[11] == 'z';
3150 // Drop the "avx512.mask." to make it easier.
3151 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3152 bool NegMul = Name[2] == 'n';
3153 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
3154
3155 Value *A = CI->getArgOperand(0);
3156 Value *B = CI->getArgOperand(1);
3157 Value *C = CI->getArgOperand(2);
3158
3159 if (NegMul && (IsMask3 || IsMaskZ))
3160 A = Builder.CreateFNeg(A);
3161 if (NegMul && !(IsMask3 || IsMaskZ))
3162 B = Builder.CreateFNeg(B);
3163 if (NegAcc)
3164 C = Builder.CreateFNeg(C);
3165
3166 if (CI->getNumArgOperands() == 5 &&
3167 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3168 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3169 Intrinsic::ID IID;
3170 // Check the character before ".512" in string.
3171 if (Name[Name.size()-5] == 's')
3172 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
3173 else
3174 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
3175
3176 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3177 { A, B, C, CI->getArgOperand(4) });
3178 } else {
3179 Function *FMA = Intrinsic::getDeclaration(CI->getModule(),
3180 Intrinsic::fma,
3181 A->getType());
3182 Rep = Builder.CreateCall(FMA, { A, B, C });
3183 }
3184
3185 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3186 IsMask3 ? CI->getArgOperand(2) :
3187 CI->getArgOperand(0);
3188
3189 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3190 } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") ||
3191 Name.startswith("fma.vfmsubadd.p"))) {
3192 bool IsSubAdd = Name[7] == 's';
3193 int NumElts = CI->getType()->getVectorNumElements();
3194
3195 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3196 CI->getArgOperand(2) };
3197
3198 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3199 Ops[0]->getType());
3200 Value *Odd = Builder.CreateCall(FMA, Ops);
3201 Ops[2] = Builder.CreateFNeg(Ops[2]);
3202 Value *Even = Builder.CreateCall(FMA, Ops);
3203
3204 if (IsSubAdd)
3205 std::swap(Even, Odd);
3206
3207 SmallVector<uint32_t, 32> Idxs(NumElts);
3208 for (int i = 0; i != NumElts; ++i)
3209 Idxs[i] = i + (i % 2) * NumElts;
3210
3211 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3212 } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") ||
3213 Name.startswith("avx512.mask3.vfmaddsub.p") ||
3214 Name.startswith("avx512.maskz.vfmaddsub.p") ||
3215 Name.startswith("avx512.mask3.vfmsubadd.p"))) {
3216 bool IsMask3 = Name[11] == '3';
3217 bool IsMaskZ = Name[11] == 'z';
3218 // Drop the "avx512.mask." to make it easier.
3219 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
3220 bool IsSubAdd = Name[3] == 's';
3221 if (CI->getNumArgOperands() == 5 &&
3222 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
3223 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
3224 Intrinsic::ID IID;
3225 // Check the character before ".512" in string.
3226 if (Name[Name.size()-5] == 's')
3227 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
3228 else
3229 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
3230
3231 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3232 CI->getArgOperand(2), CI->getArgOperand(4) };
3233 if (IsSubAdd)
3234 Ops[2] = Builder.CreateFNeg(Ops[2]);
3235
3236 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
3237 {CI->getArgOperand(0), CI->getArgOperand(1),
3238 CI->getArgOperand(2), CI->getArgOperand(4)});
3239 } else {
3240 int NumElts = CI->getType()->getVectorNumElements();
3241
3242 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3243 CI->getArgOperand(2) };
3244
3245 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma,
3246 Ops[0]->getType());
3247 Value *Odd = Builder.CreateCall(FMA, Ops);
3248 Ops[2] = Builder.CreateFNeg(Ops[2]);
3249 Value *Even = Builder.CreateCall(FMA, Ops);
3250
3251 if (IsSubAdd)
3252 std::swap(Even, Odd);
3253
3254 SmallVector<uint32_t, 32> Idxs(NumElts);
3255 for (int i = 0; i != NumElts; ++i)
3256 Idxs[i] = i + (i % 2) * NumElts;
3257
3258 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
3259 }
3260
3261 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) :
3262 IsMask3 ? CI->getArgOperand(2) :
3263 CI->getArgOperand(0);
3264
3265 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3266 } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") ||
3267 Name.startswith("avx512.maskz.pternlog."))) {
3268 bool ZeroMask = Name[11] == 'z';
3269 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3270 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
3271 Intrinsic::ID IID;
3272 if (VecWidth == 128 && EltWidth == 32)
3273 IID = Intrinsic::x86_avx512_pternlog_d_128;
3274 else if (VecWidth == 256 && EltWidth == 32)
3275 IID = Intrinsic::x86_avx512_pternlog_d_256;
3276 else if (VecWidth == 512 && EltWidth == 32)
3277 IID = Intrinsic::x86_avx512_pternlog_d_512;
3278 else if (VecWidth == 128 && EltWidth == 64)
3279 IID = Intrinsic::x86_avx512_pternlog_q_128;
3280 else if (VecWidth == 256 && EltWidth == 64)
3281 IID = Intrinsic::x86_avx512_pternlog_q_256;
3282 else if (VecWidth == 512 && EltWidth == 64)
3283 IID = Intrinsic::x86_avx512_pternlog_q_512;
3284 else
3285 llvm_unreachable("Unexpected intrinsic");
3286
3287 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3288 CI->getArgOperand(2), CI->getArgOperand(3) };
3289 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3290 Args);
3291 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3292 : CI->getArgOperand(0);
3293 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
3294 } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") ||
3295 Name.startswith("avx512.maskz.vpmadd52"))) {
3296 bool ZeroMask = Name[11] == 'z';
3297 bool High = Name[20] == 'h' || Name[21] == 'h';
3298 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3299 Intrinsic::ID IID;
3300 if (VecWidth == 128 && !High)
3301 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
3302 else if (VecWidth == 256 && !High)
3303 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
3304 else if (VecWidth == 512 && !High)
3305 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
3306 else if (VecWidth == 128 && High)
3307 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
3308 else if (VecWidth == 256 && High)
3309 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
3310 else if (VecWidth == 512 && High)
3311 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
3312 else
3313 llvm_unreachable("Unexpected intrinsic");
3314
3315 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1),
3316 CI->getArgOperand(2) };
3317 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3318 Args);
3319 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3320 : CI->getArgOperand(0);
3321 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3322 } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") ||
3323 Name.startswith("avx512.mask.vpermt2var.") ||
3324 Name.startswith("avx512.maskz.vpermt2var."))) {
3325 bool ZeroMask = Name[11] == 'z';
3326 bool IndexForm = Name[17] == 'i';
3327 Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
3328 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") ||
3329 Name.startswith("avx512.maskz.vpdpbusd.") ||
3330 Name.startswith("avx512.mask.vpdpbusds.") ||
3331 Name.startswith("avx512.maskz.vpdpbusds."))) {
3332 bool ZeroMask = Name[11] == 'z';
3333 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3334 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3335 Intrinsic::ID IID;
3336 if (VecWidth == 128 && !IsSaturating)
3337 IID = Intrinsic::x86_avx512_vpdpbusd_128;
3338 else if (VecWidth == 256 && !IsSaturating)
3339 IID = Intrinsic::x86_avx512_vpdpbusd_256;
3340 else if (VecWidth == 512 && !IsSaturating)
3341 IID = Intrinsic::x86_avx512_vpdpbusd_512;
3342 else if (VecWidth == 128 && IsSaturating)
3343 IID = Intrinsic::x86_avx512_vpdpbusds_128;
3344 else if (VecWidth == 256 && IsSaturating)
3345 IID = Intrinsic::x86_avx512_vpdpbusds_256;
3346 else if (VecWidth == 512 && IsSaturating)
3347 IID = Intrinsic::x86_avx512_vpdpbusds_512;
3348 else
3349 llvm_unreachable("Unexpected intrinsic");
3350
3351 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3352 CI->getArgOperand(2) };
3353 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3354 Args);
3355 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3356 : CI->getArgOperand(0);
3357 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3358 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") ||
3359 Name.startswith("avx512.maskz.vpdpwssd.") ||
3360 Name.startswith("avx512.mask.vpdpwssds.") ||
3361 Name.startswith("avx512.maskz.vpdpwssds."))) {
3362 bool ZeroMask = Name[11] == 'z';
3363 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
3364 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
3365 Intrinsic::ID IID;
3366 if (VecWidth == 128 && !IsSaturating)
3367 IID = Intrinsic::x86_avx512_vpdpwssd_128;
3368 else if (VecWidth == 256 && !IsSaturating)
3369 IID = Intrinsic::x86_avx512_vpdpwssd_256;
3370 else if (VecWidth == 512 && !IsSaturating)
3371 IID = Intrinsic::x86_avx512_vpdpwssd_512;
3372 else if (VecWidth == 128 && IsSaturating)
3373 IID = Intrinsic::x86_avx512_vpdpwssds_128;
3374 else if (VecWidth == 256 && IsSaturating)
3375 IID = Intrinsic::x86_avx512_vpdpwssds_256;
3376 else if (VecWidth == 512 && IsSaturating)
3377 IID = Intrinsic::x86_avx512_vpdpwssds_512;
3378 else
3379 llvm_unreachable("Unexpected intrinsic");
3380
3381 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3382 CI->getArgOperand(2) };
3383 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID),
3384 Args);
3385 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
3386 : CI->getArgOperand(0);
3387 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
3388 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
3389 Name == "addcarry.u32" || Name == "addcarry.u64" ||
3390 Name == "subborrow.u32" || Name == "subborrow.u64")) {
3391 Intrinsic::ID IID;
3392 if (Name[0] == 'a' && Name.back() == '2')
3393 IID = Intrinsic::x86_addcarry_32;
3394 else if (Name[0] == 'a' && Name.back() == '4')
3395 IID = Intrinsic::x86_addcarry_64;
3396 else if (Name[0] == 's' && Name.back() == '2')
3397 IID = Intrinsic::x86_subborrow_32;
3398 else if (Name[0] == 's' && Name.back() == '4')
3399 IID = Intrinsic::x86_subborrow_64;
3400 else
3401 llvm_unreachable("Unexpected intrinsic");
3402
3403 // Make a call with 3 operands.
3404 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1),
3405 CI->getArgOperand(2)};
3406 Value *NewCall = Builder.CreateCall(
3407 Intrinsic::getDeclaration(CI->getModule(), IID),
3408 Args);
3409
3410 // Extract the second result and store it.
3411 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3412 // Cast the pointer to the right type.
3413 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3),
3414 llvm::PointerType::getUnqual(Data->getType()));
3415 Builder.CreateAlignedStore(Data, Ptr, 1);
3416 // Replace the original call result with the first result of the new call.
3417 Value *CF = Builder.CreateExtractValue(NewCall, 0);
3418
3419 CI->replaceAllUsesWith(CF);
3420 Rep = nullptr;
3421 } else if (IsX86 && Name.startswith("avx512.mask.") &&
3422 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
3423 // Rep will be updated by the call in the condition.
2263 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { 3424 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) {
2264 Value *Arg = CI->getArgOperand(0); 3425 Value *Arg = CI->getArgOperand(0);
2265 Value *Neg = Builder.CreateNeg(Arg, "neg"); 3426 Value *Neg = Builder.CreateNeg(Arg, "neg");
2266 Value *Cmp = Builder.CreateICmpSGE( 3427 Value *Cmp = Builder.CreateICmpSGE(
2267 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); 3428 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2268 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); 3429 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
3430 } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") ||
3431 Name.startswith("atomic.load.add.f64.p"))) {
3432 Value *Ptr = CI->getArgOperand(0);
3433 Value *Val = CI->getArgOperand(1);
3434 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val,
3435 AtomicOrdering::SequentiallyConsistent);
2269 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" || 3436 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" ||
2270 Name == "max.ui" || Name == "max.ull")) { 3437 Name == "max.ui" || Name == "max.ull")) {
2271 Value *Arg0 = CI->getArgOperand(0); 3438 Value *Arg0 = CI->getArgOperand(0);
2272 Value *Arg1 = CI->getArgOperand(1); 3439 Value *Arg1 = CI->getArgOperand(1);
2273 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") 3440 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull")
2325 switch (NewFn->getIntrinsicID()) { 3492 switch (NewFn->getIntrinsicID()) {
2326 default: { 3493 default: {
2327 DefaultCase(); 3494 DefaultCase();
2328 return; 3495 return;
2329 } 3496 }
2330 3497 case Intrinsic::experimental_vector_reduce_v2_fmul: {
3498 SmallVector<Value *, 2> Args;
3499 if (CI->isFast())
3500 Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0));
3501 else
3502 Args.push_back(CI->getOperand(0));
3503 Args.push_back(CI->getOperand(1));
3504 NewCall = Builder.CreateCall(NewFn, Args);
3505 cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3506 break;
3507 }
3508 case Intrinsic::experimental_vector_reduce_v2_fadd: {
3509 SmallVector<Value *, 2> Args;
3510 if (CI->isFast())
3511 Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType()));
3512 else
3513 Args.push_back(CI->getOperand(0));
3514 Args.push_back(CI->getOperand(1));
3515 NewCall = Builder.CreateCall(NewFn, Args);
3516 cast<Instruction>(NewCall)->copyFastMathFlags(CI);
3517 break;
3518 }
2331 case Intrinsic::arm_neon_vld1: 3519 case Intrinsic::arm_neon_vld1:
2332 case Intrinsic::arm_neon_vld2: 3520 case Intrinsic::arm_neon_vld2:
2333 case Intrinsic::arm_neon_vld3: 3521 case Intrinsic::arm_neon_vld3:
2334 case Intrinsic::arm_neon_vld4: 3522 case Intrinsic::arm_neon_vld4:
2335 case Intrinsic::arm_neon_vld2lane: 3523 case Intrinsic::arm_neon_vld2lane:
2362 3550
2363 case Intrinsic::objectsize: { 3551 case Intrinsic::objectsize: {
2364 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2 3552 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2
2365 ? Builder.getFalse() 3553 ? Builder.getFalse()
2366 : CI->getArgOperand(2); 3554 : CI->getArgOperand(2);
3555 Value *Dynamic =
3556 CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
2367 NewCall = Builder.CreateCall( 3557 NewCall = Builder.CreateCall(
2368 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize}); 3558 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
2369 break; 3559 break;
2370 } 3560 }
2371 3561
2372 case Intrinsic::ctpop: 3562 case Intrinsic::ctpop:
2373 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 3563 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
2430 3620
2431 NewCall = Builder.CreateCall(NewFn, {BC0, BC1}); 3621 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
2432 break; 3622 break;
2433 } 3623 }
2434 3624
3625 case Intrinsic::x86_rdtscp: {
3626 // This used to take 1 arguments. If we have no arguments, it is already
3627 // upgraded.
3628 if (CI->getNumOperands() == 0)
3629 return;
3630
3631 NewCall = Builder.CreateCall(NewFn);
3632 // Extract the second result and store it.
3633 Value *Data = Builder.CreateExtractValue(NewCall, 1);
3634 // Cast the pointer to the right type.
3635 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0),
3636 llvm::PointerType::getUnqual(Data->getType()));
3637 Builder.CreateAlignedStore(Data, Ptr, 1);
3638 // Replace the original call result with the first result of the new call.
3639 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
3640
3641 std::string Name = CI->getName();
3642 if (!Name.empty()) {
3643 CI->setName(Name + ".old");
3644 NewCall->setName(Name);
3645 }
3646 CI->replaceAllUsesWith(TSC);
3647 CI->eraseFromParent();
3648 return;
3649 }
3650
2435 case Intrinsic::x86_sse41_insertps: 3651 case Intrinsic::x86_sse41_insertps:
2436 case Intrinsic::x86_sse41_dppd: 3652 case Intrinsic::x86_sse41_dppd:
2437 case Intrinsic::x86_sse41_dpps: 3653 case Intrinsic::x86_sse41_dpps:
2438 case Intrinsic::x86_sse41_mpsadbw: 3654 case Intrinsic::x86_sse41_mpsadbw:
2439 case Intrinsic::x86_avx_dp_ps_256: 3655 case Intrinsic::x86_avx_dp_ps_256:
2445 3661
2446 // Replace the last argument with a trunc. 3662 // Replace the last argument with a trunc.
2447 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 3663 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
2448 NewCall = Builder.CreateCall(NewFn, Args); 3664 NewCall = Builder.CreateCall(NewFn, Args);
2449 break; 3665 break;
2450 }
2451
2452 case Intrinsic::x86_avx512_mask_cmp_pd_128:
2453 case Intrinsic::x86_avx512_mask_cmp_pd_256:
2454 case Intrinsic::x86_avx512_mask_cmp_pd_512:
2455 case Intrinsic::x86_avx512_mask_cmp_ps_128:
2456 case Intrinsic::x86_avx512_mask_cmp_ps_256:
2457 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
2458 SmallVector<Value *, 4> Args;
2459 Args.push_back(CI->getArgOperand(0));
2460 Args.push_back(CI->getArgOperand(1));
2461 Args.push_back(CI->getArgOperand(2));
2462 if (CI->getNumArgOperands() == 5)
2463 Args.push_back(CI->getArgOperand(4));
2464
2465 NewCall = Builder.CreateCall(NewFn, Args);
2466 unsigned NumElts = Args[0]->getType()->getVectorNumElements();
2467 Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, CI->getArgOperand(3),
2468 NumElts);
2469
2470 std::string Name = CI->getName();
2471 if (!Name.empty()) {
2472 CI->setName(Name + ".old");
2473 NewCall->setName(Name);
2474 }
2475 CI->replaceAllUsesWith(Res);
2476 CI->eraseFromParent();
2477 return;
2478 } 3666 }
2479 3667
2480 case Intrinsic::thread_pointer: { 3668 case Intrinsic::thread_pointer: {
2481 NewCall = Builder.CreateCall(NewFn, {}); 3669 NewCall = Builder.CreateCall(NewFn, {});
2482 break; 3670 break;
2638 // Diagnose a version mismatch. 3826 // Diagnose a version mismatch.
2639 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 3827 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version);
2640 M.getContext().diagnose(DiagVersion); 3828 M.getContext().diagnose(DiagVersion);
2641 } 3829 }
2642 return Modified; 3830 return Modified;
3831 }
3832
3833 /// This checks for objc retain release marker which should be upgraded. It
3834 /// returns true if module is modified.
3835 static bool UpgradeRetainReleaseMarker(Module &M) {
3836 bool Changed = false;
3837 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
3838 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
3839 if (ModRetainReleaseMarker) {
3840 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
3841 if (Op) {
3842 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
3843 if (ID) {
3844 SmallVector<StringRef, 4> ValueComp;
3845 ID->getString().split(ValueComp, "#");
3846 if (ValueComp.size() == 2) {
3847 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
3848 ID = MDString::get(M.getContext(), NewValue);
3849 }
3850 M.addModuleFlag(Module::Error, MarkerKey, ID);
3851 M.eraseNamedMetadata(ModRetainReleaseMarker);
3852 Changed = true;
3853 }
3854 }
3855 }
3856 return Changed;
3857 }
3858
3859 void llvm::UpgradeARCRuntime(Module &M) {
3860 // This lambda converts normal function calls to ARC runtime functions to
3861 // intrinsic calls.
3862 auto UpgradeToIntrinsic = [&](const char *OldFunc,
3863 llvm::Intrinsic::ID IntrinsicFunc) {
3864 Function *Fn = M.getFunction(OldFunc);
3865
3866 if (!Fn)
3867 return;
3868
3869 Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc);
3870
3871 for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) {
3872 CallInst *CI = dyn_cast<CallInst>(*I++);
3873 if (!CI || CI->getCalledFunction() != Fn)
3874 continue;
3875
3876 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
3877 FunctionType *NewFuncTy = NewFn->getFunctionType();
3878 SmallVector<Value *, 2> Args;
3879
3880 for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) {
3881 Value *Arg = CI->getArgOperand(I);
3882 // Bitcast argument to the parameter type of the new function if it's
3883 // not a variadic argument.
3884 if (I < NewFuncTy->getNumParams())
3885 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
3886 Args.push_back(Arg);
3887 }
3888
3889 // Create a call instruction that calls the new function.
3890 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
3891 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
3892 NewCall->setName(CI->getName());
3893
3894 // Bitcast the return value back to the type of the old call.
3895 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
3896
3897 if (!CI->use_empty())
3898 CI->replaceAllUsesWith(NewRetVal);
3899 CI->eraseFromParent();
3900 }
3901
3902 if (Fn->use_empty())
3903 Fn->eraseFromParent();
3904 };
3905
3906 // Unconditionally convert a call to "clang.arc.use" to a call to
3907 // "llvm.objc.clang.arc.use".
3908 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
3909
3910 // Upgrade the retain release marker. If there is no need to upgrade
3911 // the marker, that means either the module is already new enough to contain
3912 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
3913 if (!UpgradeRetainReleaseMarker(M))
3914 return;
3915
3916 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
3917 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
3918 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
3919 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
3920 {"objc_autoreleaseReturnValue",
3921 llvm::Intrinsic::objc_autoreleaseReturnValue},
3922 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
3923 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
3924 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
3925 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
3926 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
3927 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
3928 {"objc_release", llvm::Intrinsic::objc_release},
3929 {"objc_retain", llvm::Intrinsic::objc_retain},
3930 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
3931 {"objc_retainAutoreleaseReturnValue",
3932 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
3933 {"objc_retainAutoreleasedReturnValue",
3934 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
3935 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
3936 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
3937 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
3938 {"objc_unsafeClaimAutoreleasedReturnValue",
3939 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
3940 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
3941 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
3942 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
3943 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
3944 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
3945 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
3946 {"objc_arc_annotation_topdown_bbstart",
3947 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
3948 {"objc_arc_annotation_topdown_bbend",
3949 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
3950 {"objc_arc_annotation_bottomup_bbstart",
3951 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
3952 {"objc_arc_annotation_bottomup_bbend",
3953 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
3954
3955 for (auto &I : RuntimeFuncs)
3956 UpgradeToIntrinsic(I.first, I.second);
2643 } 3957 }
2644 3958
2645 bool llvm::UpgradeModuleFlags(Module &M) { 3959 bool llvm::UpgradeModuleFlags(Module &M) {
2646 NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); 3960 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
2647 if (!ModFlags) 3961 if (!ModFlags)