Mercurial > hg > CbC > CbC_llvm
comparison lib/IR/AutoUpgrade.cpp @ 148:63bd29f05246
merged
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Wed, 14 Aug 2019 19:46:37 +0900 |
parents | c2174574ed3a |
children |
comparison
equal
deleted
inserted
replaced
146:3fc4d5c3e21e | 148:63bd29f05246 |
---|---|
1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// | 1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// |
2 // | 2 // |
3 // The LLVM Compiler Infrastructure | 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 // | 4 // See https://llvm.org/LICENSE.txt for license information. |
5 // This file is distributed under the University of Illinois Open Source | 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 // License. See LICENSE.TXT for details. | |
7 // | 6 // |
8 //===----------------------------------------------------------------------===// | 7 //===----------------------------------------------------------------------===// |
9 // | 8 // |
10 // This file implements the auto-upgrade helper functions. | 9 // This file implements the auto-upgrade helper functions. |
11 // This is where deprecated IR intrinsics and other IR features are updated to | 10 // This is where deprecated IR intrinsics and other IR features are updated to |
63 rename(F); | 62 rename(F); |
64 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); | 63 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); |
65 return true; | 64 return true; |
66 } | 65 } |
67 | 66 |
68 // Upgrade the declaration of fp compare intrinsics that change return type | |
69 // from scalar to vXi1 mask. | |
70 static bool UpgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, | |
71 Function *&NewFn) { | |
72 // Check if the return type is a vector. | |
73 if (F->getReturnType()->isVectorTy()) | |
74 return false; | |
75 | |
76 rename(F); | |
77 NewFn = Intrinsic::getDeclaration(F->getParent(), IID); | |
78 return true; | |
79 } | |
80 | |
81 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { | 67 static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) { |
82 // All of the intrinsics matches below should be marked with which llvm | 68 // All of the intrinsics matches below should be marked with which llvm |
83 // version started autoupgrading them. At some point in the future we would | 69 // version started autoupgrading them. At some point in the future we would |
84 // like to use this information to remove upgrade code for some older | 70 // like to use this information to remove upgrade code for some older |
85 // intrinsics. It is currently undecided how we will determine that future | 71 // intrinsics. It is currently undecided how we will determine that future |
86 // point. | 72 // point. |
87 if (Name=="ssse3.pabs.b.128" || // Added in 6.0 | 73 if (Name == "addcarryx.u32" || // Added in 8.0 |
74 Name == "addcarryx.u64" || // Added in 8.0 | |
75 Name == "addcarry.u32" || // Added in 8.0 | |
76 Name == "addcarry.u64" || // Added in 8.0 | |
77 Name == "subborrow.u32" || // Added in 8.0 | |
78 Name == "subborrow.u64" || // Added in 8.0 | |
79 Name.startswith("sse2.padds.") || // Added in 8.0 | |
80 Name.startswith("sse2.psubs.") || // Added in 8.0 | |
81 Name.startswith("sse2.paddus.") || // Added in 8.0 | |
82 Name.startswith("sse2.psubus.") || // Added in 8.0 | |
83 Name.startswith("avx2.padds.") || // Added in 8.0 | |
84 Name.startswith("avx2.psubs.") || // Added in 8.0 | |
85 Name.startswith("avx2.paddus.") || // Added in 8.0 | |
86 Name.startswith("avx2.psubus.") || // Added in 8.0 | |
87 Name.startswith("avx512.padds.") || // Added in 8.0 | |
88 Name.startswith("avx512.psubs.") || // Added in 8.0 | |
89 Name.startswith("avx512.mask.padds.") || // Added in 8.0 | |
90 Name.startswith("avx512.mask.psubs.") || // Added in 8.0 | |
91 Name.startswith("avx512.mask.paddus.") || // Added in 8.0 | |
92 Name.startswith("avx512.mask.psubus.") || // Added in 8.0 | |
93 Name=="ssse3.pabs.b.128" || // Added in 6.0 | |
88 Name=="ssse3.pabs.w.128" || // Added in 6.0 | 94 Name=="ssse3.pabs.w.128" || // Added in 6.0 |
89 Name=="ssse3.pabs.d.128" || // Added in 6.0 | 95 Name=="ssse3.pabs.d.128" || // Added in 6.0 |
96 Name.startswith("fma4.vfmadd.s") || // Added in 7.0 | |
97 Name.startswith("fma.vfmadd.") || // Added in 7.0 | |
98 Name.startswith("fma.vfmsub.") || // Added in 7.0 | |
99 Name.startswith("fma.vfmaddsub.") || // Added in 7.0 | |
100 Name.startswith("fma.vfmsubadd.") || // Added in 7.0 | |
101 Name.startswith("fma.vfnmadd.") || // Added in 7.0 | |
102 Name.startswith("fma.vfnmsub.") || // Added in 7.0 | |
103 Name.startswith("avx512.mask.vfmadd.") || // Added in 7.0 | |
104 Name.startswith("avx512.mask.vfnmadd.") || // Added in 7.0 | |
105 Name.startswith("avx512.mask.vfnmsub.") || // Added in 7.0 | |
106 Name.startswith("avx512.mask3.vfmadd.") || // Added in 7.0 | |
107 Name.startswith("avx512.maskz.vfmadd.") || // Added in 7.0 | |
108 Name.startswith("avx512.mask3.vfmsub.") || // Added in 7.0 | |
109 Name.startswith("avx512.mask3.vfnmsub.") || // Added in 7.0 | |
110 Name.startswith("avx512.mask.vfmaddsub.") || // Added in 7.0 | |
111 Name.startswith("avx512.maskz.vfmaddsub.") || // Added in 7.0 | |
112 Name.startswith("avx512.mask3.vfmaddsub.") || // Added in 7.0 | |
113 Name.startswith("avx512.mask3.vfmsubadd.") || // Added in 7.0 | |
90 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 | 114 Name.startswith("avx512.mask.shuf.i") || // Added in 6.0 |
91 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 | 115 Name.startswith("avx512.mask.shuf.f") || // Added in 6.0 |
92 Name.startswith("avx512.kunpck") || //added in 6.0 | 116 Name.startswith("avx512.kunpck") || //added in 6.0 |
93 Name.startswith("avx2.pabs.") || // Added in 6.0 | 117 Name.startswith("avx2.pabs.") || // Added in 6.0 |
94 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 | 118 Name.startswith("avx512.mask.pabs.") || // Added in 6.0 |
95 Name.startswith("avx512.broadcastm") || // Added in 6.0 | 119 Name.startswith("avx512.broadcastm") || // Added in 6.0 |
120 Name == "sse.sqrt.ss" || // Added in 7.0 | |
121 Name == "sse2.sqrt.sd" || // Added in 7.0 | |
122 Name.startswith("avx512.mask.sqrt.p") || // Added in 7.0 | |
123 Name.startswith("avx.sqrt.p") || // Added in 7.0 | |
124 Name.startswith("sse2.sqrt.p") || // Added in 7.0 | |
125 Name.startswith("sse.sqrt.p") || // Added in 7.0 | |
96 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0 | 126 Name.startswith("avx512.mask.pbroadcast") || // Added in 6.0 |
97 Name.startswith("sse2.pcmpeq.") || // Added in 3.1 | 127 Name.startswith("sse2.pcmpeq.") || // Added in 3.1 |
98 Name.startswith("sse2.pcmpgt.") || // Added in 3.1 | 128 Name.startswith("sse2.pcmpgt.") || // Added in 3.1 |
99 Name.startswith("avx2.pcmpeq.") || // Added in 3.1 | 129 Name.startswith("avx2.pcmpeq.") || // Added in 3.1 |
100 Name.startswith("avx2.pcmpgt.") || // Added in 3.1 | 130 Name.startswith("avx2.pcmpgt.") || // Added in 3.1 |
166 Name.startswith("avx512.mask.padd.") || // Added in 4.0 | 196 Name.startswith("avx512.mask.padd.") || // Added in 4.0 |
167 Name.startswith("avx512.mask.psub.") || // Added in 4.0 | 197 Name.startswith("avx512.mask.psub.") || // Added in 4.0 |
168 Name.startswith("avx512.mask.pmull.") || // Added in 4.0 | 198 Name.startswith("avx512.mask.pmull.") || // Added in 4.0 |
169 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 | 199 Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0 |
170 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 | 200 Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0 |
201 Name.startswith("avx512.mask.cvtudq2ps.") || // Added in 7.0 updated 9.0 | |
202 Name.startswith("avx512.mask.cvtqq2pd.") || // Added in 7.0 updated 9.0 | |
203 Name.startswith("avx512.mask.cvtuqq2pd.") || // Added in 7.0 updated 9.0 | |
204 Name.startswith("avx512.mask.cvtdq2ps.") || // Added in 7.0 updated 9.0 | |
205 Name == "avx512.mask.cvtqq2ps.256" || // Added in 9.0 | |
206 Name == "avx512.mask.cvtqq2ps.512" || // Added in 9.0 | |
207 Name == "avx512.mask.cvtuqq2ps.256" || // Added in 9.0 | |
208 Name == "avx512.mask.cvtuqq2ps.512" || // Added in 9.0 | |
209 Name == "avx512.mask.cvtpd2dq.256" || // Added in 7.0 | |
210 Name == "avx512.mask.cvtpd2ps.256" || // Added in 7.0 | |
211 Name == "avx512.mask.cvttpd2dq.256" || // Added in 7.0 | |
212 Name == "avx512.mask.cvttps2dq.128" || // Added in 7.0 | |
213 Name == "avx512.mask.cvttps2dq.256" || // Added in 7.0 | |
214 Name == "avx512.mask.cvtps2pd.128" || // Added in 7.0 | |
215 Name == "avx512.mask.cvtps2pd.256" || // Added in 7.0 | |
216 Name == "avx512.cvtusi2sd" || // Added in 7.0 | |
217 Name.startswith("avx512.mask.permvar.") || // Added in 7.0 | |
218 Name == "sse2.pmulu.dq" || // Added in 7.0 | |
219 Name == "sse41.pmuldq" || // Added in 7.0 | |
220 Name == "avx2.pmulu.dq" || // Added in 7.0 | |
221 Name == "avx2.pmul.dq" || // Added in 7.0 | |
222 Name == "avx512.pmulu.dq.512" || // Added in 7.0 | |
223 Name == "avx512.pmul.dq.512" || // Added in 7.0 | |
171 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0 | 224 Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0 |
172 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0 | 225 Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0 |
226 Name.startswith("avx512.mask.pmul.hr.sw.") || // Added in 7.0 | |
227 Name.startswith("avx512.mask.pmulh.w.") || // Added in 7.0 | |
228 Name.startswith("avx512.mask.pmulhu.w.") || // Added in 7.0 | |
229 Name.startswith("avx512.mask.pmaddw.d.") || // Added in 7.0 | |
230 Name.startswith("avx512.mask.pmaddubs.w.") || // Added in 7.0 | |
173 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0 | 231 Name.startswith("avx512.mask.packsswb.") || // Added in 5.0 |
174 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0 | 232 Name.startswith("avx512.mask.packssdw.") || // Added in 5.0 |
175 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0 | 233 Name.startswith("avx512.mask.packuswb.") || // Added in 5.0 |
176 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0 | 234 Name.startswith("avx512.mask.packusdw.") || // Added in 5.0 |
177 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0 | 235 Name.startswith("avx512.mask.cmp.b") || // Added in 5.0 |
178 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0 | 236 Name.startswith("avx512.mask.cmp.d") || // Added in 5.0 |
179 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 | 237 Name.startswith("avx512.mask.cmp.q") || // Added in 5.0 |
180 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 | 238 Name.startswith("avx512.mask.cmp.w") || // Added in 5.0 |
239 Name.startswith("avx512.mask.cmp.p") || // Added in 7.0 | |
181 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 | 240 Name.startswith("avx512.mask.ucmp.") || // Added in 5.0 |
182 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0 | 241 Name.startswith("avx512.cvtb2mask.") || // Added in 7.0 |
183 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 | 242 Name.startswith("avx512.cvtw2mask.") || // Added in 7.0 |
184 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0 | 243 Name.startswith("avx512.cvtd2mask.") || // Added in 7.0 |
185 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0 | 244 Name.startswith("avx512.cvtq2mask.") || // Added in 7.0 |
186 Name == "avx512.mask.add.pd.128" || // Added in 4.0 | |
187 Name == "avx512.mask.add.pd.256" || // Added in 4.0 | |
188 Name == "avx512.mask.add.ps.128" || // Added in 4.0 | |
189 Name == "avx512.mask.add.ps.256" || // Added in 4.0 | |
190 Name == "avx512.mask.div.pd.128" || // Added in 4.0 | |
191 Name == "avx512.mask.div.pd.256" || // Added in 4.0 | |
192 Name == "avx512.mask.div.ps.128" || // Added in 4.0 | |
193 Name == "avx512.mask.div.ps.256" || // Added in 4.0 | |
194 Name == "avx512.mask.mul.pd.128" || // Added in 4.0 | |
195 Name == "avx512.mask.mul.pd.256" || // Added in 4.0 | |
196 Name == "avx512.mask.mul.ps.128" || // Added in 4.0 | |
197 Name == "avx512.mask.mul.ps.256" || // Added in 4.0 | |
198 Name == "avx512.mask.sub.pd.128" || // Added in 4.0 | |
199 Name == "avx512.mask.sub.pd.256" || // Added in 4.0 | |
200 Name == "avx512.mask.sub.ps.128" || // Added in 4.0 | |
201 Name == "avx512.mask.sub.ps.256" || // Added in 4.0 | |
202 Name == "avx512.mask.max.pd.128" || // Added in 5.0 | |
203 Name == "avx512.mask.max.pd.256" || // Added in 5.0 | |
204 Name == "avx512.mask.max.ps.128" || // Added in 5.0 | |
205 Name == "avx512.mask.max.ps.256" || // Added in 5.0 | |
206 Name == "avx512.mask.min.pd.128" || // Added in 5.0 | |
207 Name == "avx512.mask.min.pd.256" || // Added in 5.0 | |
208 Name == "avx512.mask.min.ps.128" || // Added in 5.0 | |
209 Name == "avx512.mask.min.ps.256" || // Added in 5.0 | |
210 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0 | 245 Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0 |
211 Name.startswith("avx512.mask.psll.d") || // Added in 4.0 | 246 Name.startswith("avx512.mask.psll.d") || // Added in 4.0 |
212 Name.startswith("avx512.mask.psll.q") || // Added in 4.0 | 247 Name.startswith("avx512.mask.psll.q") || // Added in 4.0 |
213 Name.startswith("avx512.mask.psll.w") || // Added in 4.0 | 248 Name.startswith("avx512.mask.psll.w") || // Added in 4.0 |
214 Name.startswith("avx512.mask.psra.d") || // Added in 4.0 | 249 Name.startswith("avx512.mask.psra.d") || // Added in 4.0 |
228 Name.startswith("avx2.pmovsx") || // Added in 3.9 | 263 Name.startswith("avx2.pmovsx") || // Added in 3.9 |
229 Name.startswith("avx2.pmovzx") || // Added in 3.9 | 264 Name.startswith("avx2.pmovzx") || // Added in 3.9 |
230 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0 | 265 Name.startswith("avx512.mask.pmovsx") || // Added in 4.0 |
231 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0 | 266 Name.startswith("avx512.mask.pmovzx") || // Added in 4.0 |
232 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0 | 267 Name.startswith("avx512.mask.lzcnt.") || // Added in 5.0 |
268 Name.startswith("avx512.mask.pternlog.") || // Added in 7.0 | |
269 Name.startswith("avx512.maskz.pternlog.") || // Added in 7.0 | |
270 Name.startswith("avx512.mask.vpmadd52") || // Added in 7.0 | |
271 Name.startswith("avx512.maskz.vpmadd52") || // Added in 7.0 | |
272 Name.startswith("avx512.mask.vpermi2var.") || // Added in 7.0 | |
273 Name.startswith("avx512.mask.vpermt2var.") || // Added in 7.0 | |
274 Name.startswith("avx512.maskz.vpermt2var.") || // Added in 7.0 | |
275 Name.startswith("avx512.mask.vpdpbusd.") || // Added in 7.0 | |
276 Name.startswith("avx512.maskz.vpdpbusd.") || // Added in 7.0 | |
277 Name.startswith("avx512.mask.vpdpbusds.") || // Added in 7.0 | |
278 Name.startswith("avx512.maskz.vpdpbusds.") || // Added in 7.0 | |
279 Name.startswith("avx512.mask.vpdpwssd.") || // Added in 7.0 | |
280 Name.startswith("avx512.maskz.vpdpwssd.") || // Added in 7.0 | |
281 Name.startswith("avx512.mask.vpdpwssds.") || // Added in 7.0 | |
282 Name.startswith("avx512.maskz.vpdpwssds.") || // Added in 7.0 | |
283 Name.startswith("avx512.mask.dbpsadbw.") || // Added in 7.0 | |
284 Name.startswith("avx512.mask.vpshld.") || // Added in 7.0 | |
285 Name.startswith("avx512.mask.vpshrd.") || // Added in 7.0 | |
286 Name.startswith("avx512.mask.vpshldv.") || // Added in 8.0 | |
287 Name.startswith("avx512.mask.vpshrdv.") || // Added in 8.0 | |
288 Name.startswith("avx512.maskz.vpshldv.") || // Added in 8.0 | |
289 Name.startswith("avx512.maskz.vpshrdv.") || // Added in 8.0 | |
290 Name.startswith("avx512.vpshld.") || // Added in 8.0 | |
291 Name.startswith("avx512.vpshrd.") || // Added in 8.0 | |
292 Name.startswith("avx512.mask.add.p") || // Added in 7.0. 128/256 in 4.0 | |
293 Name.startswith("avx512.mask.sub.p") || // Added in 7.0. 128/256 in 4.0 | |
294 Name.startswith("avx512.mask.mul.p") || // Added in 7.0. 128/256 in 4.0 | |
295 Name.startswith("avx512.mask.div.p") || // Added in 7.0. 128/256 in 4.0 | |
296 Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0 | |
297 Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0 | |
298 Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0 | |
299 Name.startswith("avx512.mask.vpshufbitqmb.") || // Added in 8.0 | |
300 Name.startswith("avx512.mask.pmultishift.qb.") || // Added in 8.0 | |
301 Name.startswith("avx512.mask.conflict.") || // Added in 9.0 | |
302 Name == "avx512.mask.pmov.qd.256" || // Added in 9.0 | |
303 Name == "avx512.mask.pmov.qd.512" || // Added in 9.0 | |
304 Name == "avx512.mask.pmov.wb.256" || // Added in 9.0 | |
305 Name == "avx512.mask.pmov.wb.512" || // Added in 9.0 | |
306 Name == "sse.cvtsi2ss" || // Added in 7.0 | |
307 Name == "sse.cvtsi642ss" || // Added in 7.0 | |
308 Name == "sse2.cvtsi2sd" || // Added in 7.0 | |
309 Name == "sse2.cvtsi642sd" || // Added in 7.0 | |
310 Name == "sse2.cvtss2sd" || // Added in 7.0 | |
233 Name == "sse2.cvtdq2pd" || // Added in 3.9 | 311 Name == "sse2.cvtdq2pd" || // Added in 3.9 |
312 Name == "sse2.cvtdq2ps" || // Added in 7.0 | |
234 Name == "sse2.cvtps2pd" || // Added in 3.9 | 313 Name == "sse2.cvtps2pd" || // Added in 3.9 |
235 Name == "avx.cvtdq2.pd.256" || // Added in 3.9 | 314 Name == "avx.cvtdq2.pd.256" || // Added in 3.9 |
315 Name == "avx.cvtdq2.ps.256" || // Added in 7.0 | |
236 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 | 316 Name == "avx.cvt.ps2.pd.256" || // Added in 3.9 |
237 Name.startswith("avx.vinsertf128.") || // Added in 3.7 | 317 Name.startswith("avx.vinsertf128.") || // Added in 3.7 |
238 Name == "avx2.vinserti128" || // Added in 3.7 | 318 Name == "avx2.vinserti128" || // Added in 3.7 |
239 Name.startswith("avx512.mask.insert") || // Added in 4.0 | 319 Name.startswith("avx512.mask.insert") || // Added in 4.0 |
240 Name.startswith("avx.vextractf128.") || // Added in 3.7 | 320 Name.startswith("avx.vextractf128.") || // Added in 3.7 |
254 Name.startswith("avx512.mask.store.p") || // Added in 3.9 | 334 Name.startswith("avx512.mask.store.p") || // Added in 3.9 |
255 Name.startswith("avx512.mask.store.b.") || // Added in 3.9 | 335 Name.startswith("avx512.mask.store.b.") || // Added in 3.9 |
256 Name.startswith("avx512.mask.store.w.") || // Added in 3.9 | 336 Name.startswith("avx512.mask.store.w.") || // Added in 3.9 |
257 Name.startswith("avx512.mask.store.d.") || // Added in 3.9 | 337 Name.startswith("avx512.mask.store.d.") || // Added in 3.9 |
258 Name.startswith("avx512.mask.store.q.") || // Added in 3.9 | 338 Name.startswith("avx512.mask.store.q.") || // Added in 3.9 |
339 Name == "avx512.mask.store.ss" || // Added in 7.0 | |
259 Name.startswith("avx512.mask.loadu.") || // Added in 3.9 | 340 Name.startswith("avx512.mask.loadu.") || // Added in 3.9 |
260 Name.startswith("avx512.mask.load.") || // Added in 3.9 | 341 Name.startswith("avx512.mask.load.") || // Added in 3.9 |
342 Name.startswith("avx512.mask.expand.load.") || // Added in 7.0 | |
343 Name.startswith("avx512.mask.compress.store.") || // Added in 7.0 | |
344 Name.startswith("avx512.mask.expand.b") || // Added in 9.0 | |
345 Name.startswith("avx512.mask.expand.w") || // Added in 9.0 | |
346 Name.startswith("avx512.mask.expand.d") || // Added in 9.0 | |
347 Name.startswith("avx512.mask.expand.q") || // Added in 9.0 | |
348 Name.startswith("avx512.mask.expand.p") || // Added in 9.0 | |
349 Name.startswith("avx512.mask.compress.b") || // Added in 9.0 | |
350 Name.startswith("avx512.mask.compress.w") || // Added in 9.0 | |
351 Name.startswith("avx512.mask.compress.d") || // Added in 9.0 | |
352 Name.startswith("avx512.mask.compress.q") || // Added in 9.0 | |
353 Name.startswith("avx512.mask.compress.p") || // Added in 9.0 | |
261 Name == "sse42.crc32.64.8" || // Added in 3.4 | 354 Name == "sse42.crc32.64.8" || // Added in 3.4 |
262 Name.startswith("avx.vbroadcast.s") || // Added in 3.5 | 355 Name.startswith("avx.vbroadcast.s") || // Added in 3.5 |
356 Name.startswith("avx512.vbroadcast.s") || // Added in 7.0 | |
263 Name.startswith("avx512.mask.palignr.") || // Added in 3.9 | 357 Name.startswith("avx512.mask.palignr.") || // Added in 3.9 |
264 Name.startswith("avx512.mask.valign.") || // Added in 4.0 | 358 Name.startswith("avx512.mask.valign.") || // Added in 4.0 |
265 Name.startswith("sse2.psll.dq") || // Added in 3.7 | 359 Name.startswith("sse2.psll.dq") || // Added in 3.7 |
266 Name.startswith("sse2.psrl.dq") || // Added in 3.7 | 360 Name.startswith("sse2.psrl.dq") || // Added in 3.7 |
267 Name.startswith("avx2.psll.dq") || // Added in 3.7 | 361 Name.startswith("avx2.psll.dq") || // Added in 3.7 |
279 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0 | 373 Name.startswith("avx512.mask.broadcasti") || // Added in 6.0 |
280 Name == "xop.vpcmov" || // Added in 3.8 | 374 Name == "xop.vpcmov" || // Added in 3.8 |
281 Name == "xop.vpcmov.256" || // Added in 5.0 | 375 Name == "xop.vpcmov.256" || // Added in 5.0 |
282 Name.startswith("avx512.mask.move.s") || // Added in 4.0 | 376 Name.startswith("avx512.mask.move.s") || // Added in 4.0 |
283 Name.startswith("avx512.cvtmask2") || // Added in 5.0 | 377 Name.startswith("avx512.cvtmask2") || // Added in 5.0 |
284 (Name.startswith("xop.vpcom") && // Added in 3.2 | 378 Name.startswith("xop.vpcom") || // Added in 3.2, Updated in 9.0 |
285 F->arg_size() == 2) || | 379 Name.startswith("xop.vprot") || // Added in 8.0 |
380 Name.startswith("avx512.prol") || // Added in 8.0 | |
381 Name.startswith("avx512.pror") || // Added in 8.0 | |
382 Name.startswith("avx512.mask.prorv.") || // Added in 8.0 | |
383 Name.startswith("avx512.mask.pror.") || // Added in 8.0 | |
384 Name.startswith("avx512.mask.prolv.") || // Added in 8.0 | |
385 Name.startswith("avx512.mask.prol.") || // Added in 8.0 | |
286 Name.startswith("avx512.ptestm") || //Added in 6.0 | 386 Name.startswith("avx512.ptestm") || //Added in 6.0 |
287 Name.startswith("avx512.ptestnm") || //Added in 6.0 | 387 Name.startswith("avx512.ptestnm") || //Added in 6.0 |
288 Name.startswith("sse2.pavg") || // Added in 6.0 | |
289 Name.startswith("avx2.pavg") || // Added in 6.0 | |
290 Name.startswith("avx512.mask.pavg")) // Added in 6.0 | 388 Name.startswith("avx512.mask.pavg")) // Added in 6.0 |
291 return true; | 389 return true; |
292 | 390 |
293 return false; | 391 return false; |
294 } | 392 } |
301 // Remove "x86." prefix. | 399 // Remove "x86." prefix. |
302 Name = Name.substr(4); | 400 Name = Name.substr(4); |
303 | 401 |
304 if (ShouldUpgradeX86Intrinsic(F, Name)) { | 402 if (ShouldUpgradeX86Intrinsic(F, Name)) { |
305 NewFn = nullptr; | 403 NewFn = nullptr; |
404 return true; | |
405 } | |
406 | |
407 if (Name == "rdtscp") { // Added in 8.0 | |
408 // If this intrinsic has 0 operands, it's the new version. | |
409 if (F->getFunctionType()->getNumParams() == 0) | |
410 return false; | |
411 | |
412 rename(F); | |
413 NewFn = Intrinsic::getDeclaration(F->getParent(), | |
414 Intrinsic::x86_rdtscp); | |
306 return true; | 415 return true; |
307 } | 416 } |
308 | 417 |
309 // SSE4.1 ptest functions may have an old signature. | 418 // SSE4.1 ptest functions may have an old signature. |
310 if (Name.startswith("sse41.ptest")) { // Added in 3.2 | 419 if (Name.startswith("sse41.ptest")) { // Added in 3.2 |
333 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, | 442 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256, |
334 NewFn); | 443 NewFn); |
335 if (Name == "avx2.mpsadbw") // Added in 3.6 | 444 if (Name == "avx2.mpsadbw") // Added in 3.6 |
336 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, | 445 return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw, |
337 NewFn); | 446 NewFn); |
338 if (Name == "avx512.mask.cmp.pd.128") // Added in 7.0 | |
339 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_128, | |
340 NewFn); | |
341 if (Name == "avx512.mask.cmp.pd.256") // Added in 7.0 | |
342 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_256, | |
343 NewFn); | |
344 if (Name == "avx512.mask.cmp.pd.512") // Added in 7.0 | |
345 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_pd_512, | |
346 NewFn); | |
347 if (Name == "avx512.mask.cmp.ps.128") // Added in 7.0 | |
348 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_128, | |
349 NewFn); | |
350 if (Name == "avx512.mask.cmp.ps.256") // Added in 7.0 | |
351 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_256, | |
352 NewFn); | |
353 if (Name == "avx512.mask.cmp.ps.512") // Added in 7.0 | |
354 return UpgradeX86MaskedFPCompare(F, Intrinsic::x86_avx512_mask_cmp_ps_512, | |
355 NewFn); | |
356 | 447 |
357 // frcz.ss/sd may need to have an argument dropped. Added in 3.2 | 448 // frcz.ss/sd may need to have an argument dropped. Added in 3.2 |
358 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) { | 449 if (Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) { |
359 rename(F); | 450 rename(F); |
360 NewFn = Intrinsic::getDeclaration(F->getParent(), | 451 NewFn = Intrinsic::getDeclaration(F->getParent(), |
386 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); | 477 NewFn = Intrinsic::getDeclaration(F->getParent(), Permil2ID); |
387 return true; | 478 return true; |
388 } | 479 } |
389 } | 480 } |
390 | 481 |
482 if (Name == "seh.recoverfp") { | |
483 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_recoverfp); | |
484 return true; | |
485 } | |
486 | |
391 return false; | 487 return false; |
392 } | 488 } |
393 | 489 |
394 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { | 490 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { |
395 assert(F && "Illegal to upgrade a non-existent Function."); | 491 assert(F && "Illegal to upgrade a non-existent Function."); |
415 }; | 511 }; |
416 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to | 512 // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to |
417 // the end of the name. Change name from llvm.arm.neon.vclz.* to | 513 // the end of the name. Change name from llvm.arm.neon.vclz.* to |
418 // llvm.ctlz.* | 514 // llvm.ctlz.* |
419 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); | 515 FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); |
420 NewFn = Function::Create(fType, F->getLinkage(), | 516 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(), |
421 "llvm.ctlz." + Name.substr(14), F->getParent()); | 517 "llvm.ctlz." + Name.substr(14), F->getParent()); |
422 return true; | 518 return true; |
423 } | 519 } |
424 if (Name.startswith("arm.neon.vcnt")) { | 520 if (Name.startswith("arm.neon.vcnt")) { |
425 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, | 521 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctpop, |
431 auto fArgs = F->getFunctionType()->params(); | 527 auto fArgs = F->getFunctionType()->params(); |
432 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); | 528 SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end()); |
433 // Can't use Intrinsic::getDeclaration here as the return types might | 529 // Can't use Intrinsic::getDeclaration here as the return types might |
434 // then only be structurally equal. | 530 // then only be structurally equal. |
435 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); | 531 FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false); |
436 NewFn = Function::Create(fType, F->getLinkage(), | 532 NewFn = Function::Create(fType, F->getLinkage(), F->getAddressSpace(), |
437 "llvm." + Name + ".p0i8", F->getParent()); | 533 "llvm." + Name + ".p0i8", F->getParent()); |
438 return true; | 534 return true; |
439 } | 535 } |
440 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); | 536 Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$"); |
441 if (vstRegex.match(Name)) { | 537 if (vstRegex.match(Name)) { |
461 } | 557 } |
462 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { | 558 if (Name == "aarch64.thread.pointer" || Name == "arm.thread.pointer") { |
463 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); | 559 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::thread_pointer); |
464 return true; | 560 return true; |
465 } | 561 } |
562 if (Name.startswith("aarch64.neon.addp")) { | |
563 if (F->arg_size() != 2) | |
564 break; // Invalid IR. | |
565 auto fArgs = F->getFunctionType()->params(); | |
566 VectorType *ArgTy = dyn_cast<VectorType>(fArgs[0]); | |
567 if (ArgTy && ArgTy->getElementType()->isFloatingPointTy()) { | |
568 NewFn = Intrinsic::getDeclaration(F->getParent(), | |
569 Intrinsic::aarch64_neon_faddp, fArgs); | |
570 return true; | |
571 } | |
572 } | |
466 break; | 573 break; |
467 } | 574 } |
468 | 575 |
469 case 'c': { | 576 case 'c': { |
470 if (Name.startswith("ctlz.") && F->arg_size() == 1) { | 577 if (Name.startswith("ctlz.") && F->arg_size() == 1) { |
487 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value); | 594 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value); |
488 return true; | 595 return true; |
489 } | 596 } |
490 break; | 597 break; |
491 } | 598 } |
599 case 'e': { | |
600 SmallVector<StringRef, 2> Groups; | |
601 Regex R("^experimental.vector.reduce.([a-z]+)\\.[fi][0-9]+"); | |
602 if (R.match(Name, &Groups)) { | |
603 Intrinsic::ID ID = Intrinsic::not_intrinsic; | |
604 if (Groups[1] == "fadd") | |
605 ID = Intrinsic::experimental_vector_reduce_v2_fadd; | |
606 if (Groups[1] == "fmul") | |
607 ID = Intrinsic::experimental_vector_reduce_v2_fmul; | |
608 | |
609 if (ID != Intrinsic::not_intrinsic) { | |
610 rename(F); | |
611 auto Args = F->getFunctionType()->params(); | |
612 Type *Tys[] = {F->getFunctionType()->getReturnType(), Args[1]}; | |
613 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Tys); | |
614 return true; | |
615 } | |
616 } | |
617 break; | |
618 } | |
492 case 'i': | 619 case 'i': |
493 case 'l': { | 620 case 'l': { |
494 bool IsLifetimeStart = Name.startswith("lifetime.start"); | 621 bool IsLifetimeStart = Name.startswith("lifetime.start"); |
495 if (IsLifetimeStart || Name.startswith("invariant.start")) { | 622 if (IsLifetimeStart || Name.startswith("invariant.start")) { |
496 Intrinsic::ID ID = IsLifetimeStart ? | 623 Intrinsic::ID ID = IsLifetimeStart ? |
515 rename(F); | 642 rename(F); |
516 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); | 643 NewFn = Intrinsic::getDeclaration(F->getParent(), ID, ObjectPtr); |
517 return true; | 644 return true; |
518 } | 645 } |
519 } | 646 } |
647 if (Name.startswith("invariant.group.barrier")) { | |
648 // Rename invariant.group.barrier to launder.invariant.group | |
649 auto Args = F->getFunctionType()->params(); | |
650 Type* ObjectPtr[1] = {Args[0]}; | |
651 rename(F); | |
652 NewFn = Intrinsic::getDeclaration(F->getParent(), | |
653 Intrinsic::launder_invariant_group, ObjectPtr); | |
654 return true; | |
655 | |
656 } | |
657 | |
520 break; | 658 break; |
521 } | 659 } |
522 case 'm': { | 660 case 'm': { |
523 if (Name.startswith("masked.load.")) { | 661 if (Name.startswith("masked.load.")) { |
524 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; | 662 Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() }; |
618 bool Expand = StringSwitch<bool>(Name) | 756 bool Expand = StringSwitch<bool>(Name) |
619 .Cases("abs.i", "abs.ll", true) | 757 .Cases("abs.i", "abs.ll", true) |
620 .Cases("clz.ll", "popc.ll", "h2f", true) | 758 .Cases("clz.ll", "popc.ll", "h2f", true) |
621 .Cases("max.i", "max.ll", "max.ui", "max.ull", true) | 759 .Cases("max.i", "max.ll", "max.ui", "max.ull", true) |
622 .Cases("min.i", "min.ll", "min.ui", "min.ull", true) | 760 .Cases("min.i", "min.ll", "min.ui", "min.ull", true) |
761 .StartsWith("atomic.load.add.f32.p", true) | |
762 .StartsWith("atomic.load.add.f64.p", true) | |
623 .Default(false); | 763 .Default(false); |
624 if (Expand) { | 764 if (Expand) { |
625 NewFn = nullptr; | 765 NewFn = nullptr; |
626 return true; | 766 return true; |
627 } | 767 } |
631 case 'o': | 771 case 'o': |
632 // We only need to change the name to match the mangling including the | 772 // We only need to change the name to match the mangling including the |
633 // address space. | 773 // address space. |
634 if (Name.startswith("objectsize.")) { | 774 if (Name.startswith("objectsize.")) { |
635 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; | 775 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; |
636 if (F->arg_size() == 2 || | 776 if (F->arg_size() == 2 || F->arg_size() == 3 || |
637 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { | 777 F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { |
638 rename(F); | 778 rename(F); |
639 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize, | 779 NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::objectsize, |
640 Tys); | 780 Tys); |
641 return true; | 781 return true; |
642 } | 782 } |
643 } | 783 } |
644 break; | 784 break; |
645 | 785 |
786 case 'p': | |
787 if (Name == "prefetch") { | |
788 // Handle address space overloading. | |
789 Type *Tys[] = {F->arg_begin()->getType()}; | |
790 if (F->getName() != Intrinsic::getName(Intrinsic::prefetch, Tys)) { | |
791 rename(F); | |
792 NewFn = | |
793 Intrinsic::getDeclaration(F->getParent(), Intrinsic::prefetch, Tys); | |
794 return true; | |
795 } | |
796 } | |
797 break; | |
798 | |
646 case 's': | 799 case 's': |
647 if (Name == "stackprotectorcheck") { | 800 if (Name == "stackprotectorcheck") { |
648 NewFn = nullptr; | 801 NewFn = nullptr; |
649 return true; | 802 return true; |
650 } | 803 } |
679 if (Intrinsic::ID id = F->getIntrinsicID()) | 832 if (Intrinsic::ID id = F->getIntrinsicID()) |
680 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); | 833 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); |
681 return Upgraded; | 834 return Upgraded; |
682 } | 835 } |
683 | 836 |
684 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) { | 837 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) { |
685 // Nothing to do yet. | 838 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" || |
686 return false; | 839 GV->getName() == "llvm.global_dtors")) || |
840 !GV->hasInitializer()) | |
841 return nullptr; | |
842 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType()); | |
843 if (!ATy) | |
844 return nullptr; | |
845 StructType *STy = dyn_cast<StructType>(ATy->getElementType()); | |
846 if (!STy || STy->getNumElements() != 2) | |
847 return nullptr; | |
848 | |
849 LLVMContext &C = GV->getContext(); | |
850 IRBuilder<> IRB(C); | |
851 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1), | |
852 IRB.getInt8PtrTy()); | |
853 Constant *Init = GV->getInitializer(); | |
854 unsigned N = Init->getNumOperands(); | |
855 std::vector<Constant *> NewCtors(N); | |
856 for (unsigned i = 0; i != N; ++i) { | |
857 auto Ctor = cast<Constant>(Init->getOperand(i)); | |
858 NewCtors[i] = ConstantStruct::get( | |
859 EltTy, Ctor->getAggregateElement(0u), Ctor->getAggregateElement(1), | |
860 Constant::getNullValue(IRB.getInt8PtrTy())); | |
861 } | |
862 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors); | |
863 | |
864 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(), | |
865 NewInit, GV->getName()); | |
687 } | 866 } |
688 | 867 |
689 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them | 868 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them |
690 // to byte shuffles. | 869 // to byte shuffles. |
691 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, | 870 static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, |
774 return Mask; | 953 return Mask; |
775 } | 954 } |
776 | 955 |
777 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, | 956 static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask, |
778 Value *Op0, Value *Op1) { | 957 Value *Op0, Value *Op1) { |
779 // If the mask is all ones just emit the align operation. | 958 // If the mask is all ones just emit the first operation. |
780 if (const auto *C = dyn_cast<Constant>(Mask)) | 959 if (const auto *C = dyn_cast<Constant>(Mask)) |
781 if (C->isAllOnesValue()) | 960 if (C->isAllOnesValue()) |
782 return Op0; | 961 return Op0; |
783 | 962 |
784 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); | 963 Mask = getX86MaskVec(Builder, Mask, Op0->getType()->getVectorNumElements()); |
964 return Builder.CreateSelect(Mask, Op0, Op1); | |
965 } | |
966 | |
967 static Value *EmitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, | |
968 Value *Op0, Value *Op1) { | |
969 // If the mask is all ones just emit the first operation. | |
970 if (const auto *C = dyn_cast<Constant>(Mask)) | |
971 if (C->isAllOnesValue()) | |
972 return Op0; | |
973 | |
974 llvm::VectorType *MaskTy = | |
975 llvm::VectorType::get(Builder.getInt1Ty(), | |
976 Mask->getType()->getIntegerBitWidth()); | |
977 Mask = Builder.CreateBitCast(Mask, MaskTy); | |
978 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); | |
785 return Builder.CreateSelect(Mask, Op0, Op1); | 979 return Builder.CreateSelect(Mask, Op0, Op1); |
786 } | 980 } |
787 | 981 |
788 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. | 982 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. |
789 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate | 983 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate |
832 "palignr"); | 1026 "palignr"); |
833 | 1027 |
834 return EmitX86Select(Builder, Mask, Align, Passthru); | 1028 return EmitX86Select(Builder, Mask, Align, Passthru); |
835 } | 1029 } |
836 | 1030 |
1031 static Value *UpgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallInst &CI, | |
1032 bool ZeroMask, bool IndexForm) { | |
1033 Type *Ty = CI.getType(); | |
1034 unsigned VecWidth = Ty->getPrimitiveSizeInBits(); | |
1035 unsigned EltWidth = Ty->getScalarSizeInBits(); | |
1036 bool IsFloat = Ty->isFPOrFPVectorTy(); | |
1037 Intrinsic::ID IID; | |
1038 if (VecWidth == 128 && EltWidth == 32 && IsFloat) | |
1039 IID = Intrinsic::x86_avx512_vpermi2var_ps_128; | |
1040 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat) | |
1041 IID = Intrinsic::x86_avx512_vpermi2var_d_128; | |
1042 else if (VecWidth == 128 && EltWidth == 64 && IsFloat) | |
1043 IID = Intrinsic::x86_avx512_vpermi2var_pd_128; | |
1044 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat) | |
1045 IID = Intrinsic::x86_avx512_vpermi2var_q_128; | |
1046 else if (VecWidth == 256 && EltWidth == 32 && IsFloat) | |
1047 IID = Intrinsic::x86_avx512_vpermi2var_ps_256; | |
1048 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) | |
1049 IID = Intrinsic::x86_avx512_vpermi2var_d_256; | |
1050 else if (VecWidth == 256 && EltWidth == 64 && IsFloat) | |
1051 IID = Intrinsic::x86_avx512_vpermi2var_pd_256; | |
1052 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) | |
1053 IID = Intrinsic::x86_avx512_vpermi2var_q_256; | |
1054 else if (VecWidth == 512 && EltWidth == 32 && IsFloat) | |
1055 IID = Intrinsic::x86_avx512_vpermi2var_ps_512; | |
1056 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) | |
1057 IID = Intrinsic::x86_avx512_vpermi2var_d_512; | |
1058 else if (VecWidth == 512 && EltWidth == 64 && IsFloat) | |
1059 IID = Intrinsic::x86_avx512_vpermi2var_pd_512; | |
1060 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) | |
1061 IID = Intrinsic::x86_avx512_vpermi2var_q_512; | |
1062 else if (VecWidth == 128 && EltWidth == 16) | |
1063 IID = Intrinsic::x86_avx512_vpermi2var_hi_128; | |
1064 else if (VecWidth == 256 && EltWidth == 16) | |
1065 IID = Intrinsic::x86_avx512_vpermi2var_hi_256; | |
1066 else if (VecWidth == 512 && EltWidth == 16) | |
1067 IID = Intrinsic::x86_avx512_vpermi2var_hi_512; | |
1068 else if (VecWidth == 128 && EltWidth == 8) | |
1069 IID = Intrinsic::x86_avx512_vpermi2var_qi_128; | |
1070 else if (VecWidth == 256 && EltWidth == 8) | |
1071 IID = Intrinsic::x86_avx512_vpermi2var_qi_256; | |
1072 else if (VecWidth == 512 && EltWidth == 8) | |
1073 IID = Intrinsic::x86_avx512_vpermi2var_qi_512; | |
1074 else | |
1075 llvm_unreachable("Unexpected intrinsic"); | |
1076 | |
1077 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1), | |
1078 CI.getArgOperand(2) }; | |
1079 | |
1080 // If this isn't index form we need to swap operand 0 and 1. | |
1081 if (!IndexForm) | |
1082 std::swap(Args[0], Args[1]); | |
1083 | |
1084 Value *V = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID), | |
1085 Args); | |
1086 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) | |
1087 : Builder.CreateBitCast(CI.getArgOperand(1), | |
1088 Ty); | |
1089 return EmitX86Select(Builder, CI.getArgOperand(3), V, PassThru); | |
1090 } | |
1091 | |
1092 static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI, | |
1093 bool IsSigned, bool IsAddition) { | |
1094 Type *Ty = CI.getType(); | |
1095 Value *Op0 = CI.getOperand(0); | |
1096 Value *Op1 = CI.getOperand(1); | |
1097 | |
1098 Intrinsic::ID IID = | |
1099 IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat) | |
1100 : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat); | |
1101 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); | |
1102 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1}); | |
1103 | |
1104 if (CI.getNumArgOperands() == 4) { // For masked intrinsics. | |
1105 Value *VecSrc = CI.getOperand(2); | |
1106 Value *Mask = CI.getOperand(3); | |
1107 Res = EmitX86Select(Builder, Mask, Res, VecSrc); | |
1108 } | |
1109 return Res; | |
1110 } | |
1111 | |
1112 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallInst &CI, | |
1113 bool IsRotateRight) { | |
1114 Type *Ty = CI.getType(); | |
1115 Value *Src = CI.getArgOperand(0); | |
1116 Value *Amt = CI.getArgOperand(1); | |
1117 | |
1118 // Amount may be scalar immediate, in which case create a splat vector. | |
1119 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so | |
1120 // we only care about the lowest log2 bits anyway. | |
1121 if (Amt->getType() != Ty) { | |
1122 unsigned NumElts = Ty->getVectorNumElements(); | |
1123 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false); | |
1124 Amt = Builder.CreateVectorSplat(NumElts, Amt); | |
1125 } | |
1126 | |
1127 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; | |
1128 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); | |
1129 Value *Res = Builder.CreateCall(Intrin, {Src, Src, Amt}); | |
1130 | |
1131 if (CI.getNumArgOperands() == 4) { // For masked intrinsics. | |
1132 Value *VecSrc = CI.getOperand(2); | |
1133 Value *Mask = CI.getOperand(3); | |
1134 Res = EmitX86Select(Builder, Mask, Res, VecSrc); | |
1135 } | |
1136 return Res; | |
1137 } | |
1138 | |
1139 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallInst &CI, unsigned Imm, | |
1140 bool IsSigned) { | |
1141 Type *Ty = CI.getType(); | |
1142 Value *LHS = CI.getArgOperand(0); | |
1143 Value *RHS = CI.getArgOperand(1); | |
1144 | |
1145 CmpInst::Predicate Pred; | |
1146 switch (Imm) { | |
1147 case 0x0: | |
1148 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; | |
1149 break; | |
1150 case 0x1: | |
1151 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; | |
1152 break; | |
1153 case 0x2: | |
1154 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; | |
1155 break; | |
1156 case 0x3: | |
1157 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; | |
1158 break; | |
1159 case 0x4: | |
1160 Pred = ICmpInst::ICMP_EQ; | |
1161 break; | |
1162 case 0x5: | |
1163 Pred = ICmpInst::ICMP_NE; | |
1164 break; | |
1165 case 0x6: | |
1166 return Constant::getNullValue(Ty); // FALSE | |
1167 case 0x7: | |
1168 return Constant::getAllOnesValue(Ty); // TRUE | |
1169 default: | |
1170 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate"); | |
1171 } | |
1172 | |
1173 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS); | |
1174 Value *Ext = Builder.CreateSExt(Cmp, Ty); | |
1175 return Ext; | |
1176 } | |
1177 | |
1178 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallInst &CI, | |
1179 bool IsShiftRight, bool ZeroMask) { | |
1180 Type *Ty = CI.getType(); | |
1181 Value *Op0 = CI.getArgOperand(0); | |
1182 Value *Op1 = CI.getArgOperand(1); | |
1183 Value *Amt = CI.getArgOperand(2); | |
1184 | |
1185 if (IsShiftRight) | |
1186 std::swap(Op0, Op1); | |
1187 | |
1188 // Amount may be scalar immediate, in which case create a splat vector. | |
1189 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so | |
1190 // we only care about the lowest log2 bits anyway. | |
1191 if (Amt->getType() != Ty) { | |
1192 unsigned NumElts = Ty->getVectorNumElements(); | |
1193 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false); | |
1194 Amt = Builder.CreateVectorSplat(NumElts, Amt); | |
1195 } | |
1196 | |
1197 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl; | |
1198 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); | |
1199 Value *Res = Builder.CreateCall(Intrin, {Op0, Op1, Amt}); | |
1200 | |
1201 unsigned NumArgs = CI.getNumArgOperands(); | |
1202 if (NumArgs >= 4) { // For masked intrinsics. | |
1203 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) : | |
1204 ZeroMask ? ConstantAggregateZero::get(CI.getType()) : | |
1205 CI.getArgOperand(0); | |
1206 Value *Mask = CI.getOperand(NumArgs - 1); | |
1207 Res = EmitX86Select(Builder, Mask, Res, VecSrc); | |
1208 } | |
1209 return Res; | |
1210 } | |
1211 | |
837 static Value *UpgradeMaskedStore(IRBuilder<> &Builder, | 1212 static Value *UpgradeMaskedStore(IRBuilder<> &Builder, |
838 Value *Ptr, Value *Data, Value *Mask, | 1213 Value *Ptr, Value *Data, Value *Mask, |
839 bool Aligned) { | 1214 bool Aligned) { |
840 // Cast the pointer to the right type. | 1215 // Cast the pointer to the right type. |
841 Ptr = Builder.CreateBitCast(Ptr, | 1216 Ptr = Builder.CreateBitCast(Ptr, |
855 } | 1230 } |
856 | 1231 |
857 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, | 1232 static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, |
858 Value *Ptr, Value *Passthru, Value *Mask, | 1233 Value *Ptr, Value *Passthru, Value *Mask, |
859 bool Aligned) { | 1234 bool Aligned) { |
1235 Type *ValTy = Passthru->getType(); | |
860 // Cast the pointer to the right type. | 1236 // Cast the pointer to the right type. |
861 Ptr = Builder.CreateBitCast(Ptr, | 1237 Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(ValTy)); |
862 llvm::PointerType::getUnqual(Passthru->getType())); | |
863 unsigned Align = | 1238 unsigned Align = |
864 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; | 1239 Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1; |
865 | 1240 |
866 // If the mask is all ones just emit a regular store. | 1241 // If the mask is all ones just emit a regular store. |
867 if (const auto *C = dyn_cast<Constant>(Mask)) | 1242 if (const auto *C = dyn_cast<Constant>(Mask)) |
868 if (C->isAllOnesValue()) | 1243 if (C->isAllOnesValue()) |
869 return Builder.CreateAlignedLoad(Ptr, Align); | 1244 return Builder.CreateAlignedLoad(ValTy, Ptr, Align); |
870 | 1245 |
871 // Convert the mask from an integer type to a vector of i1. | 1246 // Convert the mask from an integer type to a vector of i1. |
872 unsigned NumElts = Passthru->getType()->getVectorNumElements(); | 1247 unsigned NumElts = Passthru->getType()->getVectorNumElements(); |
873 Mask = getX86MaskVec(Builder, Mask, NumElts); | 1248 Mask = getX86MaskVec(Builder, Mask, NumElts); |
874 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); | 1249 return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru); |
899 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); | 1274 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); |
900 | 1275 |
901 return Res; | 1276 return Res; |
902 } | 1277 } |
903 | 1278 |
1279 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallInst &CI, bool IsSigned) { | |
1280 Type *Ty = CI.getType(); | |
1281 | |
1282 // Arguments have a vXi32 type so cast to vXi64. | |
1283 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty); | |
1284 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty); | |
1285 | |
1286 if (IsSigned) { | |
1287 // Shift left then arithmetic shift right. | |
1288 Constant *ShiftAmt = ConstantInt::get(Ty, 32); | |
1289 LHS = Builder.CreateShl(LHS, ShiftAmt); | |
1290 LHS = Builder.CreateAShr(LHS, ShiftAmt); | |
1291 RHS = Builder.CreateShl(RHS, ShiftAmt); | |
1292 RHS = Builder.CreateAShr(RHS, ShiftAmt); | |
1293 } else { | |
1294 // Clear the upper bits. | |
1295 Constant *Mask = ConstantInt::get(Ty, 0xffffffff); | |
1296 LHS = Builder.CreateAnd(LHS, Mask); | |
1297 RHS = Builder.CreateAnd(RHS, Mask); | |
1298 } | |
1299 | |
1300 Value *Res = Builder.CreateMul(LHS, RHS); | |
1301 | |
1302 if (CI.getNumArgOperands() == 4) | |
1303 Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); | |
1304 | |
1305 return Res; | |
1306 } | |
1307 | |
904 // Applying mask on vector of i1's and make sure result is at least 8 bits wide. | 1308 // Applying mask on vector of i1's and make sure result is at least 8 bits wide. |
905 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask, | 1309 static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, |
906 unsigned NumElts) { | 1310 Value *Mask) { |
1311 unsigned NumElts = Vec->getType()->getVectorNumElements(); | |
907 if (Mask) { | 1312 if (Mask) { |
908 const auto *C = dyn_cast<Constant>(Mask); | 1313 const auto *C = dyn_cast<Constant>(Mask); |
909 if (!C || !C->isAllOnesValue()) | 1314 if (!C || !C->isAllOnesValue()) |
910 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); | 1315 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); |
911 } | 1316 } |
947 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); | 1352 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); |
948 } | 1353 } |
949 | 1354 |
950 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); | 1355 Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1); |
951 | 1356 |
952 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask, NumElts); | 1357 return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask); |
953 } | 1358 } |
954 | 1359 |
955 // Replace a masked intrinsic with an older unmasked intrinsic. | 1360 // Replace a masked intrinsic with an older unmasked intrinsic. |
956 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, | 1361 static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI, |
957 Intrinsic::ID IID) { | 1362 Intrinsic::ID IID) { |
958 Function *F = CI.getCalledFunction(); | 1363 Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID); |
959 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID); | |
960 Value *Rep = Builder.CreateCall(Intrin, | 1364 Value *Rep = Builder.CreateCall(Intrin, |
961 { CI.getArgOperand(0), CI.getArgOperand(1) }); | 1365 { CI.getArgOperand(0), CI.getArgOperand(1) }); |
962 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2)); | 1366 return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2)); |
963 } | 1367 } |
964 | 1368 |
981 Value* Op = CI.getArgOperand(0); | 1385 Value* Op = CI.getArgOperand(0); |
982 Type* ReturnOp = CI.getType(); | 1386 Type* ReturnOp = CI.getType(); |
983 unsigned NumElts = CI.getType()->getVectorNumElements(); | 1387 unsigned NumElts = CI.getType()->getVectorNumElements(); |
984 Value *Mask = getX86MaskVec(Builder, Op, NumElts); | 1388 Value *Mask = getX86MaskVec(Builder, Op, NumElts); |
985 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2"); | 1389 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2"); |
1390 } | |
1391 | |
1392 // Replace intrinsic with unmasked version and a select. | |
1393 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, | |
1394 CallInst &CI, Value *&Rep) { | |
1395 Name = Name.substr(12); // Remove avx512.mask. | |
1396 | |
1397 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits(); | |
1398 unsigned EltWidth = CI.getType()->getScalarSizeInBits(); | |
1399 Intrinsic::ID IID; | |
1400 if (Name.startswith("max.p")) { | |
1401 if (VecWidth == 128 && EltWidth == 32) | |
1402 IID = Intrinsic::x86_sse_max_ps; | |
1403 else if (VecWidth == 128 && EltWidth == 64) | |
1404 IID = Intrinsic::x86_sse2_max_pd; | |
1405 else if (VecWidth == 256 && EltWidth == 32) | |
1406 IID = Intrinsic::x86_avx_max_ps_256; | |
1407 else if (VecWidth == 256 && EltWidth == 64) | |
1408 IID = Intrinsic::x86_avx_max_pd_256; | |
1409 else | |
1410 llvm_unreachable("Unexpected intrinsic"); | |
1411 } else if (Name.startswith("min.p")) { | |
1412 if (VecWidth == 128 && EltWidth == 32) | |
1413 IID = Intrinsic::x86_sse_min_ps; | |
1414 else if (VecWidth == 128 && EltWidth == 64) | |
1415 IID = Intrinsic::x86_sse2_min_pd; | |
1416 else if (VecWidth == 256 && EltWidth == 32) | |
1417 IID = Intrinsic::x86_avx_min_ps_256; | |
1418 else if (VecWidth == 256 && EltWidth == 64) | |
1419 IID = Intrinsic::x86_avx_min_pd_256; | |
1420 else | |
1421 llvm_unreachable("Unexpected intrinsic"); | |
1422 } else if (Name.startswith("pshuf.b.")) { | |
1423 if (VecWidth == 128) | |
1424 IID = Intrinsic::x86_ssse3_pshuf_b_128; | |
1425 else if (VecWidth == 256) | |
1426 IID = Intrinsic::x86_avx2_pshuf_b; | |
1427 else if (VecWidth == 512) | |
1428 IID = Intrinsic::x86_avx512_pshuf_b_512; | |
1429 else | |
1430 llvm_unreachable("Unexpected intrinsic"); | |
1431 } else if (Name.startswith("pmul.hr.sw.")) { | |
1432 if (VecWidth == 128) | |
1433 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128; | |
1434 else if (VecWidth == 256) | |
1435 IID = Intrinsic::x86_avx2_pmul_hr_sw; | |
1436 else if (VecWidth == 512) | |
1437 IID = Intrinsic::x86_avx512_pmul_hr_sw_512; | |
1438 else | |
1439 llvm_unreachable("Unexpected intrinsic"); | |
1440 } else if (Name.startswith("pmulh.w.")) { | |
1441 if (VecWidth == 128) | |
1442 IID = Intrinsic::x86_sse2_pmulh_w; | |
1443 else if (VecWidth == 256) | |
1444 IID = Intrinsic::x86_avx2_pmulh_w; | |
1445 else if (VecWidth == 512) | |
1446 IID = Intrinsic::x86_avx512_pmulh_w_512; | |
1447 else | |
1448 llvm_unreachable("Unexpected intrinsic"); | |
1449 } else if (Name.startswith("pmulhu.w.")) { | |
1450 if (VecWidth == 128) | |
1451 IID = Intrinsic::x86_sse2_pmulhu_w; | |
1452 else if (VecWidth == 256) | |
1453 IID = Intrinsic::x86_avx2_pmulhu_w; | |
1454 else if (VecWidth == 512) | |
1455 IID = Intrinsic::x86_avx512_pmulhu_w_512; | |
1456 else | |
1457 llvm_unreachable("Unexpected intrinsic"); | |
1458 } else if (Name.startswith("pmaddw.d.")) { | |
1459 if (VecWidth == 128) | |
1460 IID = Intrinsic::x86_sse2_pmadd_wd; | |
1461 else if (VecWidth == 256) | |
1462 IID = Intrinsic::x86_avx2_pmadd_wd; | |
1463 else if (VecWidth == 512) | |
1464 IID = Intrinsic::x86_avx512_pmaddw_d_512; | |
1465 else | |
1466 llvm_unreachable("Unexpected intrinsic"); | |
1467 } else if (Name.startswith("pmaddubs.w.")) { | |
1468 if (VecWidth == 128) | |
1469 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128; | |
1470 else if (VecWidth == 256) | |
1471 IID = Intrinsic::x86_avx2_pmadd_ub_sw; | |
1472 else if (VecWidth == 512) | |
1473 IID = Intrinsic::x86_avx512_pmaddubs_w_512; | |
1474 else | |
1475 llvm_unreachable("Unexpected intrinsic"); | |
1476 } else if (Name.startswith("packsswb.")) { | |
1477 if (VecWidth == 128) | |
1478 IID = Intrinsic::x86_sse2_packsswb_128; | |
1479 else if (VecWidth == 256) | |
1480 IID = Intrinsic::x86_avx2_packsswb; | |
1481 else if (VecWidth == 512) | |
1482 IID = Intrinsic::x86_avx512_packsswb_512; | |
1483 else | |
1484 llvm_unreachable("Unexpected intrinsic"); | |
1485 } else if (Name.startswith("packssdw.")) { | |
1486 if (VecWidth == 128) | |
1487 IID = Intrinsic::x86_sse2_packssdw_128; | |
1488 else if (VecWidth == 256) | |
1489 IID = Intrinsic::x86_avx2_packssdw; | |
1490 else if (VecWidth == 512) | |
1491 IID = Intrinsic::x86_avx512_packssdw_512; | |
1492 else | |
1493 llvm_unreachable("Unexpected intrinsic"); | |
1494 } else if (Name.startswith("packuswb.")) { | |
1495 if (VecWidth == 128) | |
1496 IID = Intrinsic::x86_sse2_packuswb_128; | |
1497 else if (VecWidth == 256) | |
1498 IID = Intrinsic::x86_avx2_packuswb; | |
1499 else if (VecWidth == 512) | |
1500 IID = Intrinsic::x86_avx512_packuswb_512; | |
1501 else | |
1502 llvm_unreachable("Unexpected intrinsic"); | |
1503 } else if (Name.startswith("packusdw.")) { | |
1504 if (VecWidth == 128) | |
1505 IID = Intrinsic::x86_sse41_packusdw; | |
1506 else if (VecWidth == 256) | |
1507 IID = Intrinsic::x86_avx2_packusdw; | |
1508 else if (VecWidth == 512) | |
1509 IID = Intrinsic::x86_avx512_packusdw_512; | |
1510 else | |
1511 llvm_unreachable("Unexpected intrinsic"); | |
1512 } else if (Name.startswith("vpermilvar.")) { | |
1513 if (VecWidth == 128 && EltWidth == 32) | |
1514 IID = Intrinsic::x86_avx_vpermilvar_ps; | |
1515 else if (VecWidth == 128 && EltWidth == 64) | |
1516 IID = Intrinsic::x86_avx_vpermilvar_pd; | |
1517 else if (VecWidth == 256 && EltWidth == 32) | |
1518 IID = Intrinsic::x86_avx_vpermilvar_ps_256; | |
1519 else if (VecWidth == 256 && EltWidth == 64) | |
1520 IID = Intrinsic::x86_avx_vpermilvar_pd_256; | |
1521 else if (VecWidth == 512 && EltWidth == 32) | |
1522 IID = Intrinsic::x86_avx512_vpermilvar_ps_512; | |
1523 else if (VecWidth == 512 && EltWidth == 64) | |
1524 IID = Intrinsic::x86_avx512_vpermilvar_pd_512; | |
1525 else | |
1526 llvm_unreachable("Unexpected intrinsic"); | |
1527 } else if (Name == "cvtpd2dq.256") { | |
1528 IID = Intrinsic::x86_avx_cvt_pd2dq_256; | |
1529 } else if (Name == "cvtpd2ps.256") { | |
1530 IID = Intrinsic::x86_avx_cvt_pd2_ps_256; | |
1531 } else if (Name == "cvttpd2dq.256") { | |
1532 IID = Intrinsic::x86_avx_cvtt_pd2dq_256; | |
1533 } else if (Name == "cvttps2dq.128") { | |
1534 IID = Intrinsic::x86_sse2_cvttps2dq; | |
1535 } else if (Name == "cvttps2dq.256") { | |
1536 IID = Intrinsic::x86_avx_cvtt_ps2dq_256; | |
1537 } else if (Name.startswith("permvar.")) { | |
1538 bool IsFloat = CI.getType()->isFPOrFPVectorTy(); | |
1539 if (VecWidth == 256 && EltWidth == 32 && IsFloat) | |
1540 IID = Intrinsic::x86_avx2_permps; | |
1541 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) | |
1542 IID = Intrinsic::x86_avx2_permd; | |
1543 else if (VecWidth == 256 && EltWidth == 64 && IsFloat) | |
1544 IID = Intrinsic::x86_avx512_permvar_df_256; | |
1545 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) | |
1546 IID = Intrinsic::x86_avx512_permvar_di_256; | |
1547 else if (VecWidth == 512 && EltWidth == 32 && IsFloat) | |
1548 IID = Intrinsic::x86_avx512_permvar_sf_512; | |
1549 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) | |
1550 IID = Intrinsic::x86_avx512_permvar_si_512; | |
1551 else if (VecWidth == 512 && EltWidth == 64 && IsFloat) | |
1552 IID = Intrinsic::x86_avx512_permvar_df_512; | |
1553 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) | |
1554 IID = Intrinsic::x86_avx512_permvar_di_512; | |
1555 else if (VecWidth == 128 && EltWidth == 16) | |
1556 IID = Intrinsic::x86_avx512_permvar_hi_128; | |
1557 else if (VecWidth == 256 && EltWidth == 16) | |
1558 IID = Intrinsic::x86_avx512_permvar_hi_256; | |
1559 else if (VecWidth == 512 && EltWidth == 16) | |
1560 IID = Intrinsic::x86_avx512_permvar_hi_512; | |
1561 else if (VecWidth == 128 && EltWidth == 8) | |
1562 IID = Intrinsic::x86_avx512_permvar_qi_128; | |
1563 else if (VecWidth == 256 && EltWidth == 8) | |
1564 IID = Intrinsic::x86_avx512_permvar_qi_256; | |
1565 else if (VecWidth == 512 && EltWidth == 8) | |
1566 IID = Intrinsic::x86_avx512_permvar_qi_512; | |
1567 else | |
1568 llvm_unreachable("Unexpected intrinsic"); | |
1569 } else if (Name.startswith("dbpsadbw.")) { | |
1570 if (VecWidth == 128) | |
1571 IID = Intrinsic::x86_avx512_dbpsadbw_128; | |
1572 else if (VecWidth == 256) | |
1573 IID = Intrinsic::x86_avx512_dbpsadbw_256; | |
1574 else if (VecWidth == 512) | |
1575 IID = Intrinsic::x86_avx512_dbpsadbw_512; | |
1576 else | |
1577 llvm_unreachable("Unexpected intrinsic"); | |
1578 } else if (Name.startswith("pmultishift.qb.")) { | |
1579 if (VecWidth == 128) | |
1580 IID = Intrinsic::x86_avx512_pmultishift_qb_128; | |
1581 else if (VecWidth == 256) | |
1582 IID = Intrinsic::x86_avx512_pmultishift_qb_256; | |
1583 else if (VecWidth == 512) | |
1584 IID = Intrinsic::x86_avx512_pmultishift_qb_512; | |
1585 else | |
1586 llvm_unreachable("Unexpected intrinsic"); | |
1587 } else if (Name.startswith("conflict.")) { | |
1588 if (Name[9] == 'd' && VecWidth == 128) | |
1589 IID = Intrinsic::x86_avx512_conflict_d_128; | |
1590 else if (Name[9] == 'd' && VecWidth == 256) | |
1591 IID = Intrinsic::x86_avx512_conflict_d_256; | |
1592 else if (Name[9] == 'd' && VecWidth == 512) | |
1593 IID = Intrinsic::x86_avx512_conflict_d_512; | |
1594 else if (Name[9] == 'q' && VecWidth == 128) | |
1595 IID = Intrinsic::x86_avx512_conflict_q_128; | |
1596 else if (Name[9] == 'q' && VecWidth == 256) | |
1597 IID = Intrinsic::x86_avx512_conflict_q_256; | |
1598 else if (Name[9] == 'q' && VecWidth == 512) | |
1599 IID = Intrinsic::x86_avx512_conflict_q_512; | |
1600 else | |
1601 llvm_unreachable("Unexpected intrinsic"); | |
1602 } else if (Name.startswith("pavg.")) { | |
1603 if (Name[5] == 'b' && VecWidth == 128) | |
1604 IID = Intrinsic::x86_sse2_pavg_b; | |
1605 else if (Name[5] == 'b' && VecWidth == 256) | |
1606 IID = Intrinsic::x86_avx2_pavg_b; | |
1607 else if (Name[5] == 'b' && VecWidth == 512) | |
1608 IID = Intrinsic::x86_avx512_pavg_b_512; | |
1609 else if (Name[5] == 'w' && VecWidth == 128) | |
1610 IID = Intrinsic::x86_sse2_pavg_w; | |
1611 else if (Name[5] == 'w' && VecWidth == 256) | |
1612 IID = Intrinsic::x86_avx2_pavg_w; | |
1613 else if (Name[5] == 'w' && VecWidth == 512) | |
1614 IID = Intrinsic::x86_avx512_pavg_w_512; | |
1615 else | |
1616 llvm_unreachable("Unexpected intrinsic"); | |
1617 } else | |
1618 return false; | |
1619 | |
1620 SmallVector<Value *, 4> Args(CI.arg_operands().begin(), | |
1621 CI.arg_operands().end()); | |
1622 Args.pop_back(); | |
1623 Args.pop_back(); | |
1624 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI.getModule(), IID), | |
1625 Args); | |
1626 unsigned NumArgs = CI.getNumArgOperands(); | |
1627 Rep = EmitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep, | |
1628 CI.getArgOperand(NumArgs - 2)); | |
1629 return true; | |
1630 } | |
1631 | |
1632 /// Upgrade comment in call to inline asm that represents an objc retain release | |
1633 /// marker. | |
1634 void llvm::UpgradeInlineAsmString(std::string *AsmStr) { | |
1635 size_t Pos; | |
1636 if (AsmStr->find("mov\tfp") == 0 && | |
1637 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos && | |
1638 (Pos = AsmStr->find("# marker")) != std::string::npos) { | |
1639 AsmStr->replace(Pos, 1, ";"); | |
1640 } | |
1641 return; | |
986 } | 1642 } |
987 | 1643 |
988 /// Upgrade a call to an old intrinsic. All argument and return casting must be | 1644 /// Upgrade a call to an old intrinsic. All argument and return casting must be |
989 /// provided to seamlessly integrate with existing context. | 1645 /// provided to seamlessly integrate with existing context. |
990 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { | 1646 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { |
1085 | 1741 |
1086 Arg0 = Builder.CreateBitCast(Arg0, | 1742 Arg0 = Builder.CreateBitCast(Arg0, |
1087 PointerType::getUnqual(Arg1->getType()), | 1743 PointerType::getUnqual(Arg1->getType()), |
1088 "cast"); | 1744 "cast"); |
1089 Builder.CreateAlignedStore(Arg1, Arg0, 1); | 1745 Builder.CreateAlignedStore(Arg1, Arg0, 1); |
1746 | |
1747 // Remove intrinsic. | |
1748 CI->eraseFromParent(); | |
1749 return; | |
1750 } | |
1751 | |
1752 if (IsX86 && Name == "avx512.mask.store.ss") { | |
1753 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1)); | |
1754 UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), | |
1755 Mask, false); | |
1090 | 1756 |
1091 // Remove intrinsic. | 1757 // Remove intrinsic. |
1092 CI->eraseFromParent(); | 1758 CI->eraseFromParent(); |
1093 return; | 1759 return; |
1094 } | 1760 } |
1119 ExtTy = Type::getInt64Ty(C); | 1785 ExtTy = Type::getInt64Ty(C); |
1120 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / | 1786 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / |
1121 ExtTy->getPrimitiveSizeInBits(); | 1787 ExtTy->getPrimitiveSizeInBits(); |
1122 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); | 1788 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); |
1123 Rep = Builder.CreateVectorSplat(NumElts, Rep); | 1789 Rep = Builder.CreateVectorSplat(NumElts, Rep); |
1790 } else if (IsX86 && (Name == "sse.sqrt.ss" || | |
1791 Name == "sse2.sqrt.sd")) { | |
1792 Value *Vec = CI->getArgOperand(0); | |
1793 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0); | |
1794 Function *Intr = Intrinsic::getDeclaration(F->getParent(), | |
1795 Intrinsic::sqrt, Elt0->getType()); | |
1796 Elt0 = Builder.CreateCall(Intr, Elt0); | |
1797 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0); | |
1798 } else if (IsX86 && (Name.startswith("avx.sqrt.p") || | |
1799 Name.startswith("sse2.sqrt.p") || | |
1800 Name.startswith("sse.sqrt.p"))) { | |
1801 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), | |
1802 Intrinsic::sqrt, | |
1803 CI->getType()), | |
1804 {CI->getArgOperand(0)}); | |
1805 } else if (IsX86 && (Name.startswith("avx512.mask.sqrt.p"))) { | |
1806 if (CI->getNumArgOperands() == 4 && | |
1807 (!isa<ConstantInt>(CI->getArgOperand(3)) || | |
1808 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) { | |
1809 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512 | |
1810 : Intrinsic::x86_avx512_sqrt_pd_512; | |
1811 | |
1812 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(3) }; | |
1813 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), | |
1814 IID), Args); | |
1815 } else { | |
1816 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), | |
1817 Intrinsic::sqrt, | |
1818 CI->getType()), | |
1819 {CI->getArgOperand(0)}); | |
1820 } | |
1821 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, | |
1822 CI->getArgOperand(1)); | |
1124 } else if (IsX86 && (Name.startswith("avx512.ptestm") || | 1823 } else if (IsX86 && (Name.startswith("avx512.ptestm") || |
1125 Name.startswith("avx512.ptestnm"))) { | 1824 Name.startswith("avx512.ptestnm"))) { |
1126 Value *Op0 = CI->getArgOperand(0); | 1825 Value *Op0 = CI->getArgOperand(0); |
1127 Value *Op1 = CI->getArgOperand(1); | 1826 Value *Op1 = CI->getArgOperand(1); |
1128 Value *Mask = CI->getArgOperand(2); | 1827 Value *Mask = CI->getArgOperand(2); |
1130 llvm::Type *Ty = Op0->getType(); | 1829 llvm::Type *Ty = Op0->getType(); |
1131 Value *Zero = llvm::Constant::getNullValue(Ty); | 1830 Value *Zero = llvm::Constant::getNullValue(Ty); |
1132 ICmpInst::Predicate Pred = | 1831 ICmpInst::Predicate Pred = |
1133 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; | 1832 Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ; |
1134 Rep = Builder.CreateICmp(Pred, Rep, Zero); | 1833 Rep = Builder.CreateICmp(Pred, Rep, Zero); |
1135 unsigned NumElts = Op0->getType()->getVectorNumElements(); | 1834 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask); |
1136 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask, NumElts); | |
1137 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ | 1835 } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){ |
1138 unsigned NumElts = | 1836 unsigned NumElts = |
1139 CI->getArgOperand(1)->getType()->getVectorNumElements(); | 1837 CI->getArgOperand(1)->getType()->getVectorNumElements(); |
1140 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); | 1838 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); |
1141 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, | 1839 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
1201 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty()); | 1899 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty()); |
1202 else | 1900 else |
1203 C = ConstantInt::getNullValue(Builder.getInt16Ty()); | 1901 C = ConstantInt::getNullValue(Builder.getInt16Ty()); |
1204 Rep = Builder.CreateICmpEQ(Rep, C); | 1902 Rep = Builder.CreateICmpEQ(Rep, C); |
1205 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty()); | 1903 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty()); |
1206 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) { | 1904 } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd" || |
1905 Name == "sse.sub.ss" || Name == "sse2.sub.sd" || | |
1906 Name == "sse.mul.ss" || Name == "sse2.mul.sd" || | |
1907 Name == "sse.div.ss" || Name == "sse2.div.sd")) { | |
1207 Type *I32Ty = Type::getInt32Ty(C); | 1908 Type *I32Ty = Type::getInt32Ty(C); |
1208 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), | 1909 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), |
1209 ConstantInt::get(I32Ty, 0)); | 1910 ConstantInt::get(I32Ty, 0)); |
1210 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), | 1911 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), |
1211 ConstantInt::get(I32Ty, 0)); | 1912 ConstantInt::get(I32Ty, 0)); |
1212 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), | 1913 Value *EltOp; |
1213 Builder.CreateFAdd(Elt0, Elt1), | 1914 if (Name.contains(".add.")) |
1214 ConstantInt::get(I32Ty, 0)); | 1915 EltOp = Builder.CreateFAdd(Elt0, Elt1); |
1215 } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) { | 1916 else if (Name.contains(".sub.")) |
1216 Type *I32Ty = Type::getInt32Ty(C); | 1917 EltOp = Builder.CreateFSub(Elt0, Elt1); |
1217 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), | 1918 else if (Name.contains(".mul.")) |
1218 ConstantInt::get(I32Ty, 0)); | 1919 EltOp = Builder.CreateFMul(Elt0, Elt1); |
1219 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), | 1920 else |
1220 ConstantInt::get(I32Ty, 0)); | 1921 EltOp = Builder.CreateFDiv(Elt0, Elt1); |
1221 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), | 1922 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp, |
1222 Builder.CreateFSub(Elt0, Elt1), | |
1223 ConstantInt::get(I32Ty, 0)); | |
1224 } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) { | |
1225 Type *I32Ty = Type::getInt32Ty(C); | |
1226 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), | |
1227 ConstantInt::get(I32Ty, 0)); | |
1228 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), | |
1229 ConstantInt::get(I32Ty, 0)); | |
1230 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), | |
1231 Builder.CreateFMul(Elt0, Elt1), | |
1232 ConstantInt::get(I32Ty, 0)); | |
1233 } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) { | |
1234 Type *I32Ty = Type::getInt32Ty(C); | |
1235 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), | |
1236 ConstantInt::get(I32Ty, 0)); | |
1237 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), | |
1238 ConstantInt::get(I32Ty, 0)); | |
1239 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), | |
1240 Builder.CreateFDiv(Elt0, Elt1), | |
1241 ConstantInt::get(I32Ty, 0)); | 1923 ConstantInt::get(I32Ty, 0)); |
1242 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) { | 1924 } else if (IsX86 && Name.startswith("avx512.mask.pcmp")) { |
1243 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." | 1925 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." |
1244 bool CmpEq = Name[16] == 'e'; | 1926 bool CmpEq = Name[16] == 'e'; |
1245 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true); | 1927 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true); |
1246 } else if (IsX86 && Name.startswith("avx512.mask.cmp")) { | 1928 } else if (IsX86 && Name.startswith("avx512.mask.vpshufbitqmb.")) { |
1929 Type *OpTy = CI->getArgOperand(0)->getType(); | |
1930 unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); | |
1931 Intrinsic::ID IID; | |
1932 switch (VecWidth) { | |
1933 default: llvm_unreachable("Unexpected intrinsic"); | |
1934 case 128: IID = Intrinsic::x86_avx512_vpshufbitqmb_128; break; | |
1935 case 256: IID = Intrinsic::x86_avx512_vpshufbitqmb_256; break; | |
1936 case 512: IID = Intrinsic::x86_avx512_vpshufbitqmb_512; break; | |
1937 } | |
1938 | |
1939 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
1940 { CI->getOperand(0), CI->getArgOperand(1) }); | |
1941 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); | |
1942 } else if (IsX86 && Name.startswith("avx512.mask.fpclass.p")) { | |
1943 Type *OpTy = CI->getArgOperand(0)->getType(); | |
1944 unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); | |
1945 unsigned EltWidth = OpTy->getScalarSizeInBits(); | |
1946 Intrinsic::ID IID; | |
1947 if (VecWidth == 128 && EltWidth == 32) | |
1948 IID = Intrinsic::x86_avx512_fpclass_ps_128; | |
1949 else if (VecWidth == 256 && EltWidth == 32) | |
1950 IID = Intrinsic::x86_avx512_fpclass_ps_256; | |
1951 else if (VecWidth == 512 && EltWidth == 32) | |
1952 IID = Intrinsic::x86_avx512_fpclass_ps_512; | |
1953 else if (VecWidth == 128 && EltWidth == 64) | |
1954 IID = Intrinsic::x86_avx512_fpclass_pd_128; | |
1955 else if (VecWidth == 256 && EltWidth == 64) | |
1956 IID = Intrinsic::x86_avx512_fpclass_pd_256; | |
1957 else if (VecWidth == 512 && EltWidth == 64) | |
1958 IID = Intrinsic::x86_avx512_fpclass_pd_512; | |
1959 else | |
1960 llvm_unreachable("Unexpected intrinsic"); | |
1961 | |
1962 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
1963 { CI->getOperand(0), CI->getArgOperand(1) }); | |
1964 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); | |
1965 } else if (IsX86 && Name.startswith("avx512.mask.cmp.p")) { | |
1966 Type *OpTy = CI->getArgOperand(0)->getType(); | |
1967 unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); | |
1968 unsigned EltWidth = OpTy->getScalarSizeInBits(); | |
1969 Intrinsic::ID IID; | |
1970 if (VecWidth == 128 && EltWidth == 32) | |
1971 IID = Intrinsic::x86_avx512_cmp_ps_128; | |
1972 else if (VecWidth == 256 && EltWidth == 32) | |
1973 IID = Intrinsic::x86_avx512_cmp_ps_256; | |
1974 else if (VecWidth == 512 && EltWidth == 32) | |
1975 IID = Intrinsic::x86_avx512_cmp_ps_512; | |
1976 else if (VecWidth == 128 && EltWidth == 64) | |
1977 IID = Intrinsic::x86_avx512_cmp_pd_128; | |
1978 else if (VecWidth == 256 && EltWidth == 64) | |
1979 IID = Intrinsic::x86_avx512_cmp_pd_256; | |
1980 else if (VecWidth == 512 && EltWidth == 64) | |
1981 IID = Intrinsic::x86_avx512_cmp_pd_512; | |
1982 else | |
1983 llvm_unreachable("Unexpected intrinsic"); | |
1984 | |
1985 SmallVector<Value *, 4> Args; | |
1986 Args.push_back(CI->getArgOperand(0)); | |
1987 Args.push_back(CI->getArgOperand(1)); | |
1988 Args.push_back(CI->getArgOperand(2)); | |
1989 if (CI->getNumArgOperands() == 5) | |
1990 Args.push_back(CI->getArgOperand(4)); | |
1991 | |
1992 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
1993 Args); | |
1994 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(3)); | |
1995 } else if (IsX86 && Name.startswith("avx512.mask.cmp.") && | |
1996 Name[16] != 'p') { | |
1997 // Integer compare intrinsics. | |
1247 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); | 1998 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); |
1248 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); | 1999 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); |
1249 } else if (IsX86 && Name.startswith("avx512.mask.ucmp")) { | 2000 } else if (IsX86 && Name.startswith("avx512.mask.ucmp.")) { |
1250 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); | 2001 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); |
1251 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); | 2002 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); |
1252 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") || | 2003 } else if (IsX86 && (Name.startswith("avx512.cvtb2mask.") || |
1253 Name.startswith("avx512.cvtw2mask.") || | 2004 Name.startswith("avx512.cvtw2mask.") || |
1254 Name.startswith("avx512.cvtd2mask.") || | 2005 Name.startswith("avx512.cvtd2mask.") || |
1255 Name.startswith("avx512.cvtq2mask."))) { | 2006 Name.startswith("avx512.cvtq2mask."))) { |
1256 Value *Op = CI->getArgOperand(0); | 2007 Value *Op = CI->getArgOperand(0); |
1257 Value *Zero = llvm::Constant::getNullValue(Op->getType()); | 2008 Value *Zero = llvm::Constant::getNullValue(Op->getType()); |
1258 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); | 2009 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); |
1259 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr, | 2010 Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, nullptr); |
1260 Op->getType()->getVectorNumElements()); | |
1261 } else if(IsX86 && (Name == "ssse3.pabs.b.128" || | 2011 } else if(IsX86 && (Name == "ssse3.pabs.b.128" || |
1262 Name == "ssse3.pabs.w.128" || | 2012 Name == "ssse3.pabs.w.128" || |
1263 Name == "ssse3.pabs.d.128" || | 2013 Name == "ssse3.pabs.d.128" || |
1264 Name.startswith("avx2.pabs") || | 2014 Name.startswith("avx2.pabs") || |
1265 Name.startswith("avx512.mask.pabs"))) { | 2015 Name.startswith("avx512.mask.pabs"))) { |
1286 Name == "sse41.pminuw" || | 2036 Name == "sse41.pminuw" || |
1287 Name == "sse41.pminud" || | 2037 Name == "sse41.pminud" || |
1288 Name.startswith("avx2.pminu") || | 2038 Name.startswith("avx2.pminu") || |
1289 Name.startswith("avx512.mask.pminu"))) { | 2039 Name.startswith("avx512.mask.pminu"))) { |
1290 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); | 2040 Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT); |
2041 } else if (IsX86 && (Name == "sse2.pmulu.dq" || | |
2042 Name == "avx2.pmulu.dq" || | |
2043 Name == "avx512.pmulu.dq.512" || | |
2044 Name.startswith("avx512.mask.pmulu.dq."))) { | |
2045 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/false); | |
2046 } else if (IsX86 && (Name == "sse41.pmuldq" || | |
2047 Name == "avx2.pmul.dq" || | |
2048 Name == "avx512.pmul.dq.512" || | |
2049 Name.startswith("avx512.mask.pmul.dq."))) { | |
2050 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/true); | |
2051 } else if (IsX86 && (Name == "sse.cvtsi2ss" || | |
2052 Name == "sse2.cvtsi2sd" || | |
2053 Name == "sse.cvtsi642ss" || | |
2054 Name == "sse2.cvtsi642sd")) { | |
2055 Rep = Builder.CreateSIToFP(CI->getArgOperand(1), | |
2056 CI->getType()->getVectorElementType()); | |
2057 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); | |
2058 } else if (IsX86 && Name == "avx512.cvtusi2sd") { | |
2059 Rep = Builder.CreateUIToFP(CI->getArgOperand(1), | |
2060 CI->getType()->getVectorElementType()); | |
2061 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); | |
2062 } else if (IsX86 && Name == "sse2.cvtss2sd") { | |
2063 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0); | |
2064 Rep = Builder.CreateFPExt(Rep, CI->getType()->getVectorElementType()); | |
2065 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); | |
1291 } else if (IsX86 && (Name == "sse2.cvtdq2pd" || | 2066 } else if (IsX86 && (Name == "sse2.cvtdq2pd" || |
2067 Name == "sse2.cvtdq2ps" || | |
2068 Name == "avx.cvtdq2.pd.256" || | |
2069 Name == "avx.cvtdq2.ps.256" || | |
2070 Name.startswith("avx512.mask.cvtdq2pd.") || | |
2071 Name.startswith("avx512.mask.cvtudq2pd.") || | |
2072 Name.startswith("avx512.mask.cvtdq2ps.") || | |
2073 Name.startswith("avx512.mask.cvtudq2ps.") || | |
2074 Name.startswith("avx512.mask.cvtqq2pd.") || | |
2075 Name.startswith("avx512.mask.cvtuqq2pd.") || | |
2076 Name == "avx512.mask.cvtqq2ps.256" || | |
2077 Name == "avx512.mask.cvtqq2ps.512" || | |
2078 Name == "avx512.mask.cvtuqq2ps.256" || | |
2079 Name == "avx512.mask.cvtuqq2ps.512" || | |
1292 Name == "sse2.cvtps2pd" || | 2080 Name == "sse2.cvtps2pd" || |
1293 Name == "avx.cvtdq2.pd.256" || | |
1294 Name == "avx.cvt.ps2.pd.256" || | 2081 Name == "avx.cvt.ps2.pd.256" || |
1295 Name.startswith("avx512.mask.cvtdq2pd.") || | 2082 Name == "avx512.mask.cvtps2pd.128" || |
1296 Name.startswith("avx512.mask.cvtudq2pd."))) { | 2083 Name == "avx512.mask.cvtps2pd.256")) { |
1297 // Lossless i32/float to double conversion. | 2084 Type *DstTy = CI->getType(); |
1298 // Extract the bottom elements if necessary and convert to double vector. | |
1299 Value *Src = CI->getArgOperand(0); | |
1300 VectorType *SrcTy = cast<VectorType>(Src->getType()); | |
1301 VectorType *DstTy = cast<VectorType>(CI->getType()); | |
1302 Rep = CI->getArgOperand(0); | 2085 Rep = CI->getArgOperand(0); |
1303 | 2086 Type *SrcTy = Rep->getType(); |
1304 unsigned NumDstElts = DstTy->getNumElements(); | 2087 |
1305 if (NumDstElts < SrcTy->getNumElements()) { | 2088 unsigned NumDstElts = DstTy->getVectorNumElements(); |
2089 if (NumDstElts < SrcTy->getVectorNumElements()) { | |
1306 assert(NumDstElts == 2 && "Unexpected vector size"); | 2090 assert(NumDstElts == 2 && "Unexpected vector size"); |
1307 uint32_t ShuffleMask[2] = { 0, 1 }; | 2091 uint32_t ShuffleMask[2] = { 0, 1 }; |
1308 Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy), | 2092 Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask); |
1309 ShuffleMask); | |
1310 } | 2093 } |
1311 | 2094 |
1312 bool SInt2Double = (StringRef::npos != Name.find("cvtdq2")); | 2095 bool IsPS2PD = SrcTy->getVectorElementType()->isFloatTy(); |
1313 bool UInt2Double = (StringRef::npos != Name.find("cvtudq2")); | 2096 bool IsUnsigned = (StringRef::npos != Name.find("cvtu")); |
1314 if (SInt2Double) | 2097 if (IsPS2PD) |
1315 Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd"); | |
1316 else if (UInt2Double) | |
1317 Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd"); | |
1318 else | |
1319 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); | 2098 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); |
1320 | 2099 else if (CI->getNumArgOperands() == 4 && |
1321 if (CI->getNumArgOperands() == 3) | 2100 (!isa<ConstantInt>(CI->getArgOperand(3)) || |
2101 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) { | |
2102 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round | |
2103 : Intrinsic::x86_avx512_sitofp_round; | |
2104 Function *F = Intrinsic::getDeclaration(CI->getModule(), IID, | |
2105 { DstTy, SrcTy }); | |
2106 Rep = Builder.CreateCall(F, { Rep, CI->getArgOperand(3) }); | |
2107 } else { | |
2108 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt") | |
2109 : Builder.CreateSIToFP(Rep, DstTy, "cvt"); | |
2110 } | |
2111 | |
2112 if (CI->getNumArgOperands() >= 3) | |
1322 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, | 2113 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
1323 CI->getArgOperand(1)); | 2114 CI->getArgOperand(1)); |
1324 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) { | 2115 } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) { |
1325 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), | 2116 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), |
1326 CI->getArgOperand(1), CI->getArgOperand(2), | 2117 CI->getArgOperand(1), CI->getArgOperand(2), |
1327 /*Aligned*/false); | 2118 /*Aligned*/false); |
1328 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) { | 2119 } else if (IsX86 && (Name.startswith("avx512.mask.load."))) { |
1329 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), | 2120 Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0), |
1330 CI->getArgOperand(1),CI->getArgOperand(2), | 2121 CI->getArgOperand(1),CI->getArgOperand(2), |
1331 /*Aligned*/true); | 2122 /*Aligned*/true); |
2123 } else if (IsX86 && Name.startswith("avx512.mask.expand.load.")) { | |
2124 Type *ResultTy = CI->getType(); | |
2125 Type *PtrTy = ResultTy->getVectorElementType(); | |
2126 | |
2127 // Cast the pointer to element type. | |
2128 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0), | |
2129 llvm::PointerType::getUnqual(PtrTy)); | |
2130 | |
2131 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), | |
2132 ResultTy->getVectorNumElements()); | |
2133 | |
2134 Function *ELd = Intrinsic::getDeclaration(F->getParent(), | |
2135 Intrinsic::masked_expandload, | |
2136 ResultTy); | |
2137 Rep = Builder.CreateCall(ELd, { Ptr, MaskVec, CI->getOperand(1) }); | |
2138 } else if (IsX86 && Name.startswith("avx512.mask.compress.store.")) { | |
2139 Type *ResultTy = CI->getArgOperand(1)->getType(); | |
2140 Type *PtrTy = ResultTy->getVectorElementType(); | |
2141 | |
2142 // Cast the pointer to element type. | |
2143 Value *Ptr = Builder.CreateBitCast(CI->getOperand(0), | |
2144 llvm::PointerType::getUnqual(PtrTy)); | |
2145 | |
2146 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), | |
2147 ResultTy->getVectorNumElements()); | |
2148 | |
2149 Function *CSt = Intrinsic::getDeclaration(F->getParent(), | |
2150 Intrinsic::masked_compressstore, | |
2151 ResultTy); | |
2152 Rep = Builder.CreateCall(CSt, { CI->getArgOperand(1), Ptr, MaskVec }); | |
2153 } else if (IsX86 && (Name.startswith("avx512.mask.compress.") || | |
2154 Name.startswith("avx512.mask.expand."))) { | |
2155 Type *ResultTy = CI->getType(); | |
2156 | |
2157 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), | |
2158 ResultTy->getVectorNumElements()); | |
2159 | |
2160 bool IsCompress = Name[12] == 'c'; | |
2161 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress | |
2162 : Intrinsic::x86_avx512_mask_expand; | |
2163 Function *Intr = Intrinsic::getDeclaration(F->getParent(), IID, ResultTy); | |
2164 Rep = Builder.CreateCall(Intr, { CI->getOperand(0), CI->getOperand(1), | |
2165 MaskVec }); | |
1332 } else if (IsX86 && Name.startswith("xop.vpcom")) { | 2166 } else if (IsX86 && Name.startswith("xop.vpcom")) { |
1333 Intrinsic::ID intID; | 2167 bool IsSigned; |
1334 if (Name.endswith("ub")) | 2168 if (Name.endswith("ub") || Name.endswith("uw") || Name.endswith("ud") || |
1335 intID = Intrinsic::x86_xop_vpcomub; | 2169 Name.endswith("uq")) |
1336 else if (Name.endswith("uw")) | 2170 IsSigned = false; |
1337 intID = Intrinsic::x86_xop_vpcomuw; | 2171 else if (Name.endswith("b") || Name.endswith("w") || Name.endswith("d") || |
1338 else if (Name.endswith("ud")) | 2172 Name.endswith("q")) |
1339 intID = Intrinsic::x86_xop_vpcomud; | 2173 IsSigned = true; |
1340 else if (Name.endswith("uq")) | |
1341 intID = Intrinsic::x86_xop_vpcomuq; | |
1342 else if (Name.endswith("b")) | |
1343 intID = Intrinsic::x86_xop_vpcomb; | |
1344 else if (Name.endswith("w")) | |
1345 intID = Intrinsic::x86_xop_vpcomw; | |
1346 else if (Name.endswith("d")) | |
1347 intID = Intrinsic::x86_xop_vpcomd; | |
1348 else if (Name.endswith("q")) | |
1349 intID = Intrinsic::x86_xop_vpcomq; | |
1350 else | 2174 else |
1351 llvm_unreachable("Unknown suffix"); | 2175 llvm_unreachable("Unknown suffix"); |
1352 | 2176 |
1353 Name = Name.substr(9); // strip off "xop.vpcom" | |
1354 unsigned Imm; | 2177 unsigned Imm; |
1355 if (Name.startswith("lt")) | 2178 if (CI->getNumArgOperands() == 3) { |
1356 Imm = 0; | 2179 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); |
1357 else if (Name.startswith("le")) | 2180 } else { |
1358 Imm = 1; | 2181 Name = Name.substr(9); // strip off "xop.vpcom" |
1359 else if (Name.startswith("gt")) | 2182 if (Name.startswith("lt")) |
1360 Imm = 2; | 2183 Imm = 0; |
1361 else if (Name.startswith("ge")) | 2184 else if (Name.startswith("le")) |
1362 Imm = 3; | 2185 Imm = 1; |
1363 else if (Name.startswith("eq")) | 2186 else if (Name.startswith("gt")) |
1364 Imm = 4; | 2187 Imm = 2; |
1365 else if (Name.startswith("ne")) | 2188 else if (Name.startswith("ge")) |
1366 Imm = 5; | 2189 Imm = 3; |
1367 else if (Name.startswith("false")) | 2190 else if (Name.startswith("eq")) |
1368 Imm = 6; | 2191 Imm = 4; |
1369 else if (Name.startswith("true")) | 2192 else if (Name.startswith("ne")) |
1370 Imm = 7; | 2193 Imm = 5; |
1371 else | 2194 else if (Name.startswith("false")) |
1372 llvm_unreachable("Unknown condition"); | 2195 Imm = 6; |
1373 | 2196 else if (Name.startswith("true")) |
1374 Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); | 2197 Imm = 7; |
1375 Rep = | 2198 else |
1376 Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1), | 2199 llvm_unreachable("Unknown condition"); |
1377 Builder.getInt8(Imm)}); | 2200 } |
2201 | |
2202 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned); | |
1378 } else if (IsX86 && Name.startswith("xop.vpcmov")) { | 2203 } else if (IsX86 && Name.startswith("xop.vpcmov")) { |
1379 Value *Sel = CI->getArgOperand(2); | 2204 Value *Sel = CI->getArgOperand(2); |
1380 Value *NotSel = Builder.CreateNot(Sel); | 2205 Value *NotSel = Builder.CreateNot(Sel); |
1381 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel); | 2206 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel); |
1382 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel); | 2207 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel); |
1383 Rep = Builder.CreateOr(Sel0, Sel1); | 2208 Rep = Builder.CreateOr(Sel0, Sel1); |
2209 } else if (IsX86 && (Name.startswith("xop.vprot") || | |
2210 Name.startswith("avx512.prol") || | |
2211 Name.startswith("avx512.mask.prol"))) { | |
2212 Rep = upgradeX86Rotate(Builder, *CI, false); | |
2213 } else if (IsX86 && (Name.startswith("avx512.pror") || | |
2214 Name.startswith("avx512.mask.pror"))) { | |
2215 Rep = upgradeX86Rotate(Builder, *CI, true); | |
2216 } else if (IsX86 && (Name.startswith("avx512.vpshld.") || | |
2217 Name.startswith("avx512.mask.vpshld") || | |
2218 Name.startswith("avx512.maskz.vpshld"))) { | |
2219 bool ZeroMask = Name[11] == 'z'; | |
2220 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask); | |
2221 } else if (IsX86 && (Name.startswith("avx512.vpshrd.") || | |
2222 Name.startswith("avx512.mask.vpshrd") || | |
2223 Name.startswith("avx512.maskz.vpshrd"))) { | |
2224 bool ZeroMask = Name[11] == 'z'; | |
2225 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask); | |
1384 } else if (IsX86 && Name == "sse42.crc32.64.8") { | 2226 } else if (IsX86 && Name == "sse42.crc32.64.8") { |
1385 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), | 2227 Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), |
1386 Intrinsic::x86_sse42_crc32_32_8); | 2228 Intrinsic::x86_sse42_crc32_32_8); |
1387 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); | 2229 Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); |
1388 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); | 2230 Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)}); |
1389 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); | 2231 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); |
1390 } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) { | 2232 } else if (IsX86 && (Name.startswith("avx.vbroadcast.s") || |
2233 Name.startswith("avx512.vbroadcast.s"))) { | |
1391 // Replace broadcasts with a series of insertelements. | 2234 // Replace broadcasts with a series of insertelements. |
1392 Type *VecTy = CI->getType(); | 2235 Type *VecTy = CI->getType(); |
1393 Type *EltTy = VecTy->getVectorElementType(); | 2236 Type *EltTy = VecTy->getVectorElementType(); |
1394 unsigned EltNum = VecTy->getVectorNumElements(); | 2237 unsigned EltNum = VecTy->getVectorNumElements(); |
1395 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), | 2238 Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), |
1423 : Builder.CreateZExt(SV, DstTy); | 2266 : Builder.CreateZExt(SV, DstTy); |
1424 // If there are 3 arguments, it's a masked intrinsic so we need a select. | 2267 // If there are 3 arguments, it's a masked intrinsic so we need a select. |
1425 if (CI->getNumArgOperands() == 3) | 2268 if (CI->getNumArgOperands() == 3) |
1426 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, | 2269 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
1427 CI->getArgOperand(1)); | 2270 CI->getArgOperand(1)); |
2271 } else if (Name == "avx512.mask.pmov.qd.256" || | |
2272 Name == "avx512.mask.pmov.qd.512" || | |
2273 Name == "avx512.mask.pmov.wb.256" || | |
2274 Name == "avx512.mask.pmov.wb.512") { | |
2275 Type *Ty = CI->getArgOperand(1)->getType(); | |
2276 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty); | |
2277 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, | |
2278 CI->getArgOperand(1)); | |
1428 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") || | 2279 } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") || |
1429 Name == "avx2.vbroadcasti128")) { | 2280 Name == "avx2.vbroadcasti128")) { |
1430 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. | 2281 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. |
1431 Type *EltTy = CI->getType()->getVectorElementType(); | 2282 Type *EltTy = CI->getType()->getVectorElementType(); |
1432 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); | 2283 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); |
1433 Type *VT = VectorType::get(EltTy, NumSrcElts); | 2284 Type *VT = VectorType::get(EltTy, NumSrcElts); |
1434 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), | 2285 Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0), |
1435 PointerType::getUnqual(VT)); | 2286 PointerType::getUnqual(VT)); |
1436 Value *Load = Builder.CreateAlignedLoad(Op, 1); | 2287 Value *Load = Builder.CreateAlignedLoad(VT, Op, 1); |
1437 if (NumSrcElts == 2) | 2288 if (NumSrcElts == 2) |
1438 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), | 2289 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), |
1439 { 0, 1, 0, 1 }); | 2290 { 0, 1, 0, 1 }); |
1440 else | 2291 else |
1441 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), | 2292 Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()), |
1489 Constant::getNullValue(MaskTy)); | 2340 Constant::getNullValue(MaskTy)); |
1490 | 2341 |
1491 if (CI->getNumArgOperands() == 3) | 2342 if (CI->getNumArgOperands() == 3) |
1492 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, | 2343 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
1493 CI->getArgOperand(1)); | 2344 CI->getArgOperand(1)); |
2345 } else if (IsX86 && (Name.startswith("sse2.padds.") || | |
2346 Name.startswith("sse2.psubs.") || | |
2347 Name.startswith("avx2.padds.") || | |
2348 Name.startswith("avx2.psubs.") || | |
2349 Name.startswith("avx512.padds.") || | |
2350 Name.startswith("avx512.psubs.") || | |
2351 Name.startswith("avx512.mask.padds.") || | |
2352 Name.startswith("avx512.mask.psubs."))) { | |
2353 bool IsAdd = Name.contains(".padds"); | |
2354 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd); | |
2355 } else if (IsX86 && (Name.startswith("sse2.paddus.") || | |
2356 Name.startswith("sse2.psubus.") || | |
2357 Name.startswith("avx2.paddus.") || | |
2358 Name.startswith("avx2.psubus.") || | |
2359 Name.startswith("avx512.mask.paddus.") || | |
2360 Name.startswith("avx512.mask.psubus."))) { | |
2361 bool IsAdd = Name.contains(".paddus"); | |
2362 Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd); | |
1494 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { | 2363 } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) { |
1495 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), | 2364 Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), |
1496 CI->getArgOperand(1), | 2365 CI->getArgOperand(1), |
1497 CI->getArgOperand(2), | 2366 CI->getArgOperand(2), |
1498 CI->getArgOperand(3), | 2367 CI->getArgOperand(3), |
1806 | 2675 |
1807 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); | 2676 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); |
1808 | 2677 |
1809 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | 2678 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
1810 CI->getArgOperand(2)); | 2679 CI->getArgOperand(2)); |
1811 } else if (IsX86 && Name.startswith("avx512.mask.pand.")) { | 2680 } else if (IsX86 && (Name.startswith("avx512.mask.and.") || |
1812 Rep = Builder.CreateAnd(CI->getArgOperand(0), CI->getArgOperand(1)); | 2681 Name.startswith("avx512.mask.pand."))) { |
1813 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | |
1814 CI->getArgOperand(2)); | |
1815 } else if (IsX86 && Name.startswith("avx512.mask.pandn.")) { | |
1816 Rep = Builder.CreateAnd(Builder.CreateNot(CI->getArgOperand(0)), | |
1817 CI->getArgOperand(1)); | |
1818 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | |
1819 CI->getArgOperand(2)); | |
1820 } else if (IsX86 && Name.startswith("avx512.mask.por.")) { | |
1821 Rep = Builder.CreateOr(CI->getArgOperand(0), CI->getArgOperand(1)); | |
1822 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | |
1823 CI->getArgOperand(2)); | |
1824 } else if (IsX86 && Name.startswith("avx512.mask.pxor.")) { | |
1825 Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1)); | |
1826 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | |
1827 CI->getArgOperand(2)); | |
1828 } else if (IsX86 && Name.startswith("avx512.mask.and.")) { | |
1829 VectorType *FTy = cast<VectorType>(CI->getType()); | 2682 VectorType *FTy = cast<VectorType>(CI->getType()); |
1830 VectorType *ITy = VectorType::getInteger(FTy); | 2683 VectorType *ITy = VectorType::getInteger(FTy); |
1831 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), | 2684 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), |
1832 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); | 2685 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); |
1833 Rep = Builder.CreateBitCast(Rep, FTy); | 2686 Rep = Builder.CreateBitCast(Rep, FTy); |
1834 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | 2687 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
1835 CI->getArgOperand(2)); | 2688 CI->getArgOperand(2)); |
1836 } else if (IsX86 && Name.startswith("avx512.mask.andn.")) { | 2689 } else if (IsX86 && (Name.startswith("avx512.mask.andn.") || |
2690 Name.startswith("avx512.mask.pandn."))) { | |
1837 VectorType *FTy = cast<VectorType>(CI->getType()); | 2691 VectorType *FTy = cast<VectorType>(CI->getType()); |
1838 VectorType *ITy = VectorType::getInteger(FTy); | 2692 VectorType *ITy = VectorType::getInteger(FTy); |
1839 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); | 2693 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); |
1840 Rep = Builder.CreateAnd(Rep, | 2694 Rep = Builder.CreateAnd(Rep, |
1841 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); | 2695 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); |
1842 Rep = Builder.CreateBitCast(Rep, FTy); | 2696 Rep = Builder.CreateBitCast(Rep, FTy); |
1843 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | 2697 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
1844 CI->getArgOperand(2)); | 2698 CI->getArgOperand(2)); |
1845 } else if (IsX86 && Name.startswith("avx512.mask.or.")) { | 2699 } else if (IsX86 && (Name.startswith("avx512.mask.or.") || |
2700 Name.startswith("avx512.mask.por."))) { | |
1846 VectorType *FTy = cast<VectorType>(CI->getType()); | 2701 VectorType *FTy = cast<VectorType>(CI->getType()); |
1847 VectorType *ITy = VectorType::getInteger(FTy); | 2702 VectorType *ITy = VectorType::getInteger(FTy); |
1848 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), | 2703 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), |
1849 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); | 2704 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); |
1850 Rep = Builder.CreateBitCast(Rep, FTy); | 2705 Rep = Builder.CreateBitCast(Rep, FTy); |
1851 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | 2706 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
1852 CI->getArgOperand(2)); | 2707 CI->getArgOperand(2)); |
1853 } else if (IsX86 && Name.startswith("avx512.mask.xor.")) { | 2708 } else if (IsX86 && (Name.startswith("avx512.mask.xor.") || |
2709 Name.startswith("avx512.mask.pxor."))) { | |
1854 VectorType *FTy = cast<VectorType>(CI->getType()); | 2710 VectorType *FTy = cast<VectorType>(CI->getType()); |
1855 VectorType *ITy = VectorType::getInteger(FTy); | 2711 VectorType *ITy = VectorType::getInteger(FTy); |
1856 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), | 2712 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), |
1857 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); | 2713 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); |
1858 Rep = Builder.CreateBitCast(Rep, FTy); | 2714 Rep = Builder.CreateBitCast(Rep, FTy); |
1868 CI->getArgOperand(2)); | 2724 CI->getArgOperand(2)); |
1869 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) { | 2725 } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) { |
1870 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1)); | 2726 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1)); |
1871 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | 2727 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
1872 CI->getArgOperand(2)); | 2728 CI->getArgOperand(2)); |
1873 } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) { | 2729 } else if (IsX86 && Name.startswith("avx512.mask.add.p")) { |
1874 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); | 2730 if (Name.endswith(".512")) { |
2731 Intrinsic::ID IID; | |
2732 if (Name[17] == 's') | |
2733 IID = Intrinsic::x86_avx512_add_ps_512; | |
2734 else | |
2735 IID = Intrinsic::x86_avx512_add_pd_512; | |
2736 | |
2737 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
2738 { CI->getArgOperand(0), CI->getArgOperand(1), | |
2739 CI->getArgOperand(4) }); | |
2740 } else { | |
2741 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); | |
2742 } | |
1875 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | 2743 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
1876 CI->getArgOperand(2)); | 2744 CI->getArgOperand(2)); |
1877 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) { | 2745 } else if (IsX86 && Name.startswith("avx512.mask.div.p")) { |
1878 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); | 2746 if (Name.endswith(".512")) { |
2747 Intrinsic::ID IID; | |
2748 if (Name[17] == 's') | |
2749 IID = Intrinsic::x86_avx512_div_ps_512; | |
2750 else | |
2751 IID = Intrinsic::x86_avx512_div_pd_512; | |
2752 | |
2753 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
2754 { CI->getArgOperand(0), CI->getArgOperand(1), | |
2755 CI->getArgOperand(4) }); | |
2756 } else { | |
2757 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); | |
2758 } | |
1879 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | 2759 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
1880 CI->getArgOperand(2)); | 2760 CI->getArgOperand(2)); |
1881 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) { | 2761 } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) { |
1882 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1)); | 2762 if (Name.endswith(".512")) { |
2763 Intrinsic::ID IID; | |
2764 if (Name[17] == 's') | |
2765 IID = Intrinsic::x86_avx512_mul_ps_512; | |
2766 else | |
2767 IID = Intrinsic::x86_avx512_mul_pd_512; | |
2768 | |
2769 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
2770 { CI->getArgOperand(0), CI->getArgOperand(1), | |
2771 CI->getArgOperand(4) }); | |
2772 } else { | |
2773 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1)); | |
2774 } | |
1883 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | 2775 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
1884 CI->getArgOperand(2)); | 2776 CI->getArgOperand(2)); |
1885 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) { | 2777 } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) { |
1886 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); | 2778 if (Name.endswith(".512")) { |
2779 Intrinsic::ID IID; | |
2780 if (Name[17] == 's') | |
2781 IID = Intrinsic::x86_avx512_sub_ps_512; | |
2782 else | |
2783 IID = Intrinsic::x86_avx512_sub_pd_512; | |
2784 | |
2785 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
2786 { CI->getArgOperand(0), CI->getArgOperand(1), | |
2787 CI->getArgOperand(4) }); | |
2788 } else { | |
2789 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); | |
2790 } | |
2791 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | |
2792 CI->getArgOperand(2)); | |
2793 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") || | |
2794 Name.startswith("avx512.mask.min.p")) && | |
2795 Name.drop_front(18) == ".512") { | |
2796 bool IsDouble = Name[17] == 'd'; | |
2797 bool IsMin = Name[13] == 'i'; | |
2798 static const Intrinsic::ID MinMaxTbl[2][2] = { | |
2799 { Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512 }, | |
2800 { Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512 } | |
2801 }; | |
2802 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble]; | |
2803 | |
2804 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
2805 { CI->getArgOperand(0), CI->getArgOperand(1), | |
2806 CI->getArgOperand(4) }); | |
1887 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | 2807 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, |
1888 CI->getArgOperand(2)); | 2808 CI->getArgOperand(2)); |
1889 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) { | 2809 } else if (IsX86 && Name.startswith("avx512.mask.lzcnt.")) { |
1890 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), | 2810 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), |
1891 Intrinsic::ctlz, | 2811 Intrinsic::ctlz, |
1892 CI->getType()), | 2812 CI->getType()), |
1893 { CI->getArgOperand(0), Builder.getInt1(false) }); | 2813 { CI->getArgOperand(0), Builder.getInt1(false) }); |
1894 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, | 2814 Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep, |
1895 CI->getArgOperand(1)); | 2815 CI->getArgOperand(1)); |
1896 } else if (IsX86 && (Name.startswith("avx512.mask.max.p") || | |
1897 Name.startswith("avx512.mask.min.p"))) { | |
1898 bool IsMin = Name[13] == 'i'; | |
1899 VectorType *VecTy = cast<VectorType>(CI->getType()); | |
1900 unsigned VecWidth = VecTy->getPrimitiveSizeInBits(); | |
1901 unsigned EltWidth = VecTy->getScalarSizeInBits(); | |
1902 Intrinsic::ID IID; | |
1903 if (!IsMin && VecWidth == 128 && EltWidth == 32) | |
1904 IID = Intrinsic::x86_sse_max_ps; | |
1905 else if (!IsMin && VecWidth == 128 && EltWidth == 64) | |
1906 IID = Intrinsic::x86_sse2_max_pd; | |
1907 else if (!IsMin && VecWidth == 256 && EltWidth == 32) | |
1908 IID = Intrinsic::x86_avx_max_ps_256; | |
1909 else if (!IsMin && VecWidth == 256 && EltWidth == 64) | |
1910 IID = Intrinsic::x86_avx_max_pd_256; | |
1911 else if (IsMin && VecWidth == 128 && EltWidth == 32) | |
1912 IID = Intrinsic::x86_sse_min_ps; | |
1913 else if (IsMin && VecWidth == 128 && EltWidth == 64) | |
1914 IID = Intrinsic::x86_sse2_min_pd; | |
1915 else if (IsMin && VecWidth == 256 && EltWidth == 32) | |
1916 IID = Intrinsic::x86_avx_min_ps_256; | |
1917 else if (IsMin && VecWidth == 256 && EltWidth == 64) | |
1918 IID = Intrinsic::x86_avx_min_pd_256; | |
1919 else | |
1920 llvm_unreachable("Unexpected intrinsic"); | |
1921 | |
1922 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
1923 { CI->getArgOperand(0), CI->getArgOperand(1) }); | |
1924 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | |
1925 CI->getArgOperand(2)); | |
1926 } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) { | |
1927 VectorType *VecTy = cast<VectorType>(CI->getType()); | |
1928 Intrinsic::ID IID; | |
1929 if (VecTy->getPrimitiveSizeInBits() == 128) | |
1930 IID = Intrinsic::x86_ssse3_pshuf_b_128; | |
1931 else if (VecTy->getPrimitiveSizeInBits() == 256) | |
1932 IID = Intrinsic::x86_avx2_pshuf_b; | |
1933 else if (VecTy->getPrimitiveSizeInBits() == 512) | |
1934 IID = Intrinsic::x86_avx512_pshuf_b_512; | |
1935 else | |
1936 llvm_unreachable("Unexpected intrinsic"); | |
1937 | |
1938 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
1939 { CI->getArgOperand(0), CI->getArgOperand(1) }); | |
1940 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | |
1941 CI->getArgOperand(2)); | |
1942 } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") || | |
1943 Name.startswith("avx512.mask.pmulu.dq."))) { | |
1944 bool IsUnsigned = Name[16] == 'u'; | |
1945 VectorType *VecTy = cast<VectorType>(CI->getType()); | |
1946 Intrinsic::ID IID; | |
1947 if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128) | |
1948 IID = Intrinsic::x86_sse41_pmuldq; | |
1949 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256) | |
1950 IID = Intrinsic::x86_avx2_pmul_dq; | |
1951 else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512) | |
1952 IID = Intrinsic::x86_avx512_pmul_dq_512; | |
1953 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128) | |
1954 IID = Intrinsic::x86_sse2_pmulu_dq; | |
1955 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256) | |
1956 IID = Intrinsic::x86_avx2_pmulu_dq; | |
1957 else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512) | |
1958 IID = Intrinsic::x86_avx512_pmulu_dq_512; | |
1959 else | |
1960 llvm_unreachable("Unexpected intrinsic"); | |
1961 | |
1962 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
1963 { CI->getArgOperand(0), CI->getArgOperand(1) }); | |
1964 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | |
1965 CI->getArgOperand(2)); | |
1966 } else if (IsX86 && Name.startswith("avx512.mask.pack")) { | |
1967 bool IsUnsigned = Name[16] == 'u'; | |
1968 bool IsDW = Name[18] == 'd'; | |
1969 VectorType *VecTy = cast<VectorType>(CI->getType()); | |
1970 Intrinsic::ID IID; | |
1971 if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128) | |
1972 IID = Intrinsic::x86_sse2_packsswb_128; | |
1973 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256) | |
1974 IID = Intrinsic::x86_avx2_packsswb; | |
1975 else if (!IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512) | |
1976 IID = Intrinsic::x86_avx512_packsswb_512; | |
1977 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128) | |
1978 IID = Intrinsic::x86_sse2_packssdw_128; | |
1979 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256) | |
1980 IID = Intrinsic::x86_avx2_packssdw; | |
1981 else if (!IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512) | |
1982 IID = Intrinsic::x86_avx512_packssdw_512; | |
1983 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 128) | |
1984 IID = Intrinsic::x86_sse2_packuswb_128; | |
1985 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 256) | |
1986 IID = Intrinsic::x86_avx2_packuswb; | |
1987 else if (IsUnsigned && !IsDW && VecTy->getPrimitiveSizeInBits() == 512) | |
1988 IID = Intrinsic::x86_avx512_packuswb_512; | |
1989 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 128) | |
1990 IID = Intrinsic::x86_sse41_packusdw; | |
1991 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 256) | |
1992 IID = Intrinsic::x86_avx2_packusdw; | |
1993 else if (IsUnsigned && IsDW && VecTy->getPrimitiveSizeInBits() == 512) | |
1994 IID = Intrinsic::x86_avx512_packusdw_512; | |
1995 else | |
1996 llvm_unreachable("Unexpected intrinsic"); | |
1997 | |
1998 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
1999 { CI->getArgOperand(0), CI->getArgOperand(1) }); | |
2000 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | |
2001 CI->getArgOperand(2)); | |
2002 } else if (IsX86 && Name.startswith("avx512.mask.psll")) { | 2816 } else if (IsX86 && Name.startswith("avx512.mask.psll")) { |
2003 bool IsImmediate = Name[16] == 'i' || | 2817 bool IsImmediate = Name[16] == 'i' || |
2004 (Name.size() > 18 && Name[18] == 'i'); | 2818 (Name.size() > 18 && Name[18] == 'i'); |
2005 bool IsVariable = Name[16] == 'v'; | 2819 bool IsVariable = Name[16] == 'v'; |
2006 char Size = Name[16] == '.' ? Name[17] : | 2820 char Size = Name[16] == '.' ? Name[17] : |
2203 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); | 3017 Rep = UpgradeX86MaskedShift(Builder, *CI, IID); |
2204 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) { | 3018 } else if (IsX86 && Name.startswith("avx512.mask.move.s")) { |
2205 Rep = upgradeMaskedMove(Builder, *CI); | 3019 Rep = upgradeMaskedMove(Builder, *CI); |
2206 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) { | 3020 } else if (IsX86 && Name.startswith("avx512.cvtmask2")) { |
2207 Rep = UpgradeMaskToInt(Builder, *CI); | 3021 Rep = UpgradeMaskToInt(Builder, *CI); |
2208 } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) { | |
2209 Intrinsic::ID IID; | |
2210 if (Name.endswith("ps.128")) | |
2211 IID = Intrinsic::x86_avx_vpermilvar_ps; | |
2212 else if (Name.endswith("pd.128")) | |
2213 IID = Intrinsic::x86_avx_vpermilvar_pd; | |
2214 else if (Name.endswith("ps.256")) | |
2215 IID = Intrinsic::x86_avx_vpermilvar_ps_256; | |
2216 else if (Name.endswith("pd.256")) | |
2217 IID = Intrinsic::x86_avx_vpermilvar_pd_256; | |
2218 else if (Name.endswith("ps.512")) | |
2219 IID = Intrinsic::x86_avx512_vpermilvar_ps_512; | |
2220 else if (Name.endswith("pd.512")) | |
2221 IID = Intrinsic::x86_avx512_vpermilvar_pd_512; | |
2222 else | |
2223 llvm_unreachable("Unexpected vpermilvar intrinsic"); | |
2224 | |
2225 Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID); | |
2226 Rep = Builder.CreateCall(Intrin, | |
2227 { CI->getArgOperand(0), CI->getArgOperand(1) }); | |
2228 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | |
2229 CI->getArgOperand(2)); | |
2230 } else if (IsX86 && Name.endswith(".movntdqa")) { | 3022 } else if (IsX86 && Name.endswith(".movntdqa")) { |
2231 Module *M = F->getParent(); | 3023 Module *M = F->getParent(); |
2232 MDNode *Node = MDNode::get( | 3024 MDNode *Node = MDNode::get( |
2233 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); | 3025 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); |
2234 | 3026 |
2236 VectorType *VTy = cast<VectorType>(CI->getType()); | 3028 VectorType *VTy = cast<VectorType>(CI->getType()); |
2237 | 3029 |
2238 // Convert the type of the pointer to a pointer to the stored type. | 3030 // Convert the type of the pointer to a pointer to the stored type. |
2239 Value *BC = | 3031 Value *BC = |
2240 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast"); | 3032 Builder.CreateBitCast(Ptr, PointerType::getUnqual(VTy), "cast"); |
2241 LoadInst *LI = Builder.CreateAlignedLoad(BC, VTy->getBitWidth() / 8); | 3033 LoadInst *LI = Builder.CreateAlignedLoad(VTy, BC, VTy->getBitWidth() / 8); |
2242 LI->setMetadata(M->getMDKindID("nontemporal"), Node); | 3034 LI->setMetadata(M->getMDKindID("nontemporal"), Node); |
2243 Rep = LI; | 3035 Rep = LI; |
2244 } else if (IsX86 && | 3036 } else if (IsX86 && (Name.startswith("fma.vfmadd.") || |
2245 (Name.startswith("sse2.pavg") || Name.startswith("avx2.pavg") || | 3037 Name.startswith("fma.vfmsub.") || |
2246 Name.startswith("avx512.mask.pavg"))) { | 3038 Name.startswith("fma.vfnmadd.") || |
2247 // llvm.x86.sse2.pavg.b/w, llvm.x86.avx2.pavg.b/w, | 3039 Name.startswith("fma.vfnmsub."))) { |
2248 // llvm.x86.avx512.mask.pavg.b/w | 3040 bool NegMul = Name[6] == 'n'; |
3041 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's'; | |
3042 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's'; | |
3043 | |
3044 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), | |
3045 CI->getArgOperand(2) }; | |
3046 | |
3047 if (IsScalar) { | |
3048 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0); | |
3049 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0); | |
3050 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0); | |
3051 } | |
3052 | |
3053 if (NegMul && !IsScalar) | |
3054 Ops[0] = Builder.CreateFNeg(Ops[0]); | |
3055 if (NegMul && IsScalar) | |
3056 Ops[1] = Builder.CreateFNeg(Ops[1]); | |
3057 if (NegAcc) | |
3058 Ops[2] = Builder.CreateFNeg(Ops[2]); | |
3059 | |
3060 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), | |
3061 Intrinsic::fma, | |
3062 Ops[0]->getType()), | |
3063 Ops); | |
3064 | |
3065 if (IsScalar) | |
3066 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, | |
3067 (uint64_t)0); | |
3068 } else if (IsX86 && Name.startswith("fma4.vfmadd.s")) { | |
3069 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), | |
3070 CI->getArgOperand(2) }; | |
3071 | |
3072 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0); | |
3073 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0); | |
3074 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0); | |
3075 | |
3076 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), | |
3077 Intrinsic::fma, | |
3078 Ops[0]->getType()), | |
3079 Ops); | |
3080 | |
3081 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()), | |
3082 Rep, (uint64_t)0); | |
3083 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.s") || | |
3084 Name.startswith("avx512.maskz.vfmadd.s") || | |
3085 Name.startswith("avx512.mask3.vfmadd.s") || | |
3086 Name.startswith("avx512.mask3.vfmsub.s") || | |
3087 Name.startswith("avx512.mask3.vfnmsub.s"))) { | |
3088 bool IsMask3 = Name[11] == '3'; | |
3089 bool IsMaskZ = Name[11] == 'z'; | |
3090 // Drop the "avx512.mask." to make it easier. | |
3091 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); | |
3092 bool NegMul = Name[2] == 'n'; | |
3093 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; | |
3094 | |
2249 Value *A = CI->getArgOperand(0); | 3095 Value *A = CI->getArgOperand(0); |
2250 Value *B = CI->getArgOperand(1); | 3096 Value *B = CI->getArgOperand(1); |
2251 VectorType *ZextType = VectorType::getExtendedElementVectorType( | 3097 Value *C = CI->getArgOperand(2); |
2252 cast<VectorType>(A->getType())); | 3098 |
2253 Value *ExtendedA = Builder.CreateZExt(A, ZextType); | 3099 if (NegMul && (IsMask3 || IsMaskZ)) |
2254 Value *ExtendedB = Builder.CreateZExt(B, ZextType); | 3100 A = Builder.CreateFNeg(A); |
2255 Value *Sum = Builder.CreateAdd(ExtendedA, ExtendedB); | 3101 if (NegMul && !(IsMask3 || IsMaskZ)) |
2256 Value *AddOne = Builder.CreateAdd(Sum, ConstantInt::get(ZextType, 1)); | 3102 B = Builder.CreateFNeg(B); |
2257 Value *ShiftR = Builder.CreateLShr(AddOne, ConstantInt::get(ZextType, 1)); | 3103 if (NegAcc) |
2258 Rep = Builder.CreateTrunc(ShiftR, A->getType()); | 3104 C = Builder.CreateFNeg(C); |
2259 if (CI->getNumArgOperands() > 2) { | 3105 |
2260 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, | 3106 A = Builder.CreateExtractElement(A, (uint64_t)0); |
2261 CI->getArgOperand(2)); | 3107 B = Builder.CreateExtractElement(B, (uint64_t)0); |
3108 C = Builder.CreateExtractElement(C, (uint64_t)0); | |
3109 | |
3110 if (!isa<ConstantInt>(CI->getArgOperand(4)) || | |
3111 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) { | |
3112 Value *Ops[] = { A, B, C, CI->getArgOperand(4) }; | |
3113 | |
3114 Intrinsic::ID IID; | |
3115 if (Name.back() == 'd') | |
3116 IID = Intrinsic::x86_avx512_vfmadd_f64; | |
3117 else | |
3118 IID = Intrinsic::x86_avx512_vfmadd_f32; | |
3119 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), IID); | |
3120 Rep = Builder.CreateCall(FMA, Ops); | |
3121 } else { | |
3122 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), | |
3123 Intrinsic::fma, | |
3124 A->getType()); | |
3125 Rep = Builder.CreateCall(FMA, { A, B, C }); | |
2262 } | 3126 } |
3127 | |
3128 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) : | |
3129 IsMask3 ? C : A; | |
3130 | |
3131 // For Mask3 with NegAcc, we need to create a new extractelement that | |
3132 // avoids the negation above. | |
3133 if (NegAcc && IsMask3) | |
3134 PassThru = Builder.CreateExtractElement(CI->getArgOperand(2), | |
3135 (uint64_t)0); | |
3136 | |
3137 Rep = EmitX86ScalarSelect(Builder, CI->getArgOperand(3), | |
3138 Rep, PassThru); | |
3139 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), | |
3140 Rep, (uint64_t)0); | |
3141 } else if (IsX86 && (Name.startswith("avx512.mask.vfmadd.p") || | |
3142 Name.startswith("avx512.mask.vfnmadd.p") || | |
3143 Name.startswith("avx512.mask.vfnmsub.p") || | |
3144 Name.startswith("avx512.mask3.vfmadd.p") || | |
3145 Name.startswith("avx512.mask3.vfmsub.p") || | |
3146 Name.startswith("avx512.mask3.vfnmsub.p") || | |
3147 Name.startswith("avx512.maskz.vfmadd.p"))) { | |
3148 bool IsMask3 = Name[11] == '3'; | |
3149 bool IsMaskZ = Name[11] == 'z'; | |
3150 // Drop the "avx512.mask." to make it easier. | |
3151 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); | |
3152 bool NegMul = Name[2] == 'n'; | |
3153 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; | |
3154 | |
3155 Value *A = CI->getArgOperand(0); | |
3156 Value *B = CI->getArgOperand(1); | |
3157 Value *C = CI->getArgOperand(2); | |
3158 | |
3159 if (NegMul && (IsMask3 || IsMaskZ)) | |
3160 A = Builder.CreateFNeg(A); | |
3161 if (NegMul && !(IsMask3 || IsMaskZ)) | |
3162 B = Builder.CreateFNeg(B); | |
3163 if (NegAcc) | |
3164 C = Builder.CreateFNeg(C); | |
3165 | |
3166 if (CI->getNumArgOperands() == 5 && | |
3167 (!isa<ConstantInt>(CI->getArgOperand(4)) || | |
3168 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) { | |
3169 Intrinsic::ID IID; | |
3170 // Check the character before ".512" in string. | |
3171 if (Name[Name.size()-5] == 's') | |
3172 IID = Intrinsic::x86_avx512_vfmadd_ps_512; | |
3173 else | |
3174 IID = Intrinsic::x86_avx512_vfmadd_pd_512; | |
3175 | |
3176 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
3177 { A, B, C, CI->getArgOperand(4) }); | |
3178 } else { | |
3179 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), | |
3180 Intrinsic::fma, | |
3181 A->getType()); | |
3182 Rep = Builder.CreateCall(FMA, { A, B, C }); | |
3183 } | |
3184 | |
3185 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) : | |
3186 IsMask3 ? CI->getArgOperand(2) : | |
3187 CI->getArgOperand(0); | |
3188 | |
3189 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); | |
3190 } else if (IsX86 && (Name.startswith("fma.vfmaddsub.p") || | |
3191 Name.startswith("fma.vfmsubadd.p"))) { | |
3192 bool IsSubAdd = Name[7] == 's'; | |
3193 int NumElts = CI->getType()->getVectorNumElements(); | |
3194 | |
3195 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), | |
3196 CI->getArgOperand(2) }; | |
3197 | |
3198 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma, | |
3199 Ops[0]->getType()); | |
3200 Value *Odd = Builder.CreateCall(FMA, Ops); | |
3201 Ops[2] = Builder.CreateFNeg(Ops[2]); | |
3202 Value *Even = Builder.CreateCall(FMA, Ops); | |
3203 | |
3204 if (IsSubAdd) | |
3205 std::swap(Even, Odd); | |
3206 | |
3207 SmallVector<uint32_t, 32> Idxs(NumElts); | |
3208 for (int i = 0; i != NumElts; ++i) | |
3209 Idxs[i] = i + (i % 2) * NumElts; | |
3210 | |
3211 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs); | |
3212 } else if (IsX86 && (Name.startswith("avx512.mask.vfmaddsub.p") || | |
3213 Name.startswith("avx512.mask3.vfmaddsub.p") || | |
3214 Name.startswith("avx512.maskz.vfmaddsub.p") || | |
3215 Name.startswith("avx512.mask3.vfmsubadd.p"))) { | |
3216 bool IsMask3 = Name[11] == '3'; | |
3217 bool IsMaskZ = Name[11] == 'z'; | |
3218 // Drop the "avx512.mask." to make it easier. | |
3219 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); | |
3220 bool IsSubAdd = Name[3] == 's'; | |
3221 if (CI->getNumArgOperands() == 5 && | |
3222 (!isa<ConstantInt>(CI->getArgOperand(4)) || | |
3223 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) { | |
3224 Intrinsic::ID IID; | |
3225 // Check the character before ".512" in string. | |
3226 if (Name[Name.size()-5] == 's') | |
3227 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512; | |
3228 else | |
3229 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512; | |
3230 | |
3231 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), | |
3232 CI->getArgOperand(2), CI->getArgOperand(4) }; | |
3233 if (IsSubAdd) | |
3234 Ops[2] = Builder.CreateFNeg(Ops[2]); | |
3235 | |
3236 Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID), | |
3237 {CI->getArgOperand(0), CI->getArgOperand(1), | |
3238 CI->getArgOperand(2), CI->getArgOperand(4)}); | |
3239 } else { | |
3240 int NumElts = CI->getType()->getVectorNumElements(); | |
3241 | |
3242 Value *Ops[] = { CI->getArgOperand(0), CI->getArgOperand(1), | |
3243 CI->getArgOperand(2) }; | |
3244 | |
3245 Function *FMA = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::fma, | |
3246 Ops[0]->getType()); | |
3247 Value *Odd = Builder.CreateCall(FMA, Ops); | |
3248 Ops[2] = Builder.CreateFNeg(Ops[2]); | |
3249 Value *Even = Builder.CreateCall(FMA, Ops); | |
3250 | |
3251 if (IsSubAdd) | |
3252 std::swap(Even, Odd); | |
3253 | |
3254 SmallVector<uint32_t, 32> Idxs(NumElts); | |
3255 for (int i = 0; i != NumElts; ++i) | |
3256 Idxs[i] = i + (i % 2) * NumElts; | |
3257 | |
3258 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs); | |
3259 } | |
3260 | |
3261 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) : | |
3262 IsMask3 ? CI->getArgOperand(2) : | |
3263 CI->getArgOperand(0); | |
3264 | |
3265 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); | |
3266 } else if (IsX86 && (Name.startswith("avx512.mask.pternlog.") || | |
3267 Name.startswith("avx512.maskz.pternlog."))) { | |
3268 bool ZeroMask = Name[11] == 'z'; | |
3269 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); | |
3270 unsigned EltWidth = CI->getType()->getScalarSizeInBits(); | |
3271 Intrinsic::ID IID; | |
3272 if (VecWidth == 128 && EltWidth == 32) | |
3273 IID = Intrinsic::x86_avx512_pternlog_d_128; | |
3274 else if (VecWidth == 256 && EltWidth == 32) | |
3275 IID = Intrinsic::x86_avx512_pternlog_d_256; | |
3276 else if (VecWidth == 512 && EltWidth == 32) | |
3277 IID = Intrinsic::x86_avx512_pternlog_d_512; | |
3278 else if (VecWidth == 128 && EltWidth == 64) | |
3279 IID = Intrinsic::x86_avx512_pternlog_q_128; | |
3280 else if (VecWidth == 256 && EltWidth == 64) | |
3281 IID = Intrinsic::x86_avx512_pternlog_q_256; | |
3282 else if (VecWidth == 512 && EltWidth == 64) | |
3283 IID = Intrinsic::x86_avx512_pternlog_q_512; | |
3284 else | |
3285 llvm_unreachable("Unexpected intrinsic"); | |
3286 | |
3287 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), | |
3288 CI->getArgOperand(2), CI->getArgOperand(3) }; | |
3289 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), | |
3290 Args); | |
3291 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) | |
3292 : CI->getArgOperand(0); | |
3293 Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru); | |
3294 } else if (IsX86 && (Name.startswith("avx512.mask.vpmadd52") || | |
3295 Name.startswith("avx512.maskz.vpmadd52"))) { | |
3296 bool ZeroMask = Name[11] == 'z'; | |
3297 bool High = Name[20] == 'h' || Name[21] == 'h'; | |
3298 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); | |
3299 Intrinsic::ID IID; | |
3300 if (VecWidth == 128 && !High) | |
3301 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128; | |
3302 else if (VecWidth == 256 && !High) | |
3303 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256; | |
3304 else if (VecWidth == 512 && !High) | |
3305 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512; | |
3306 else if (VecWidth == 128 && High) | |
3307 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128; | |
3308 else if (VecWidth == 256 && High) | |
3309 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256; | |
3310 else if (VecWidth == 512 && High) | |
3311 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512; | |
3312 else | |
3313 llvm_unreachable("Unexpected intrinsic"); | |
3314 | |
3315 Value *Args[] = { CI->getArgOperand(0) , CI->getArgOperand(1), | |
3316 CI->getArgOperand(2) }; | |
3317 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), | |
3318 Args); | |
3319 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) | |
3320 : CI->getArgOperand(0); | |
3321 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); | |
3322 } else if (IsX86 && (Name.startswith("avx512.mask.vpermi2var.") || | |
3323 Name.startswith("avx512.mask.vpermt2var.") || | |
3324 Name.startswith("avx512.maskz.vpermt2var."))) { | |
3325 bool ZeroMask = Name[11] == 'z'; | |
3326 bool IndexForm = Name[17] == 'i'; | |
3327 Rep = UpgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm); | |
3328 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpbusd.") || | |
3329 Name.startswith("avx512.maskz.vpdpbusd.") || | |
3330 Name.startswith("avx512.mask.vpdpbusds.") || | |
3331 Name.startswith("avx512.maskz.vpdpbusds."))) { | |
3332 bool ZeroMask = Name[11] == 'z'; | |
3333 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; | |
3334 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); | |
3335 Intrinsic::ID IID; | |
3336 if (VecWidth == 128 && !IsSaturating) | |
3337 IID = Intrinsic::x86_avx512_vpdpbusd_128; | |
3338 else if (VecWidth == 256 && !IsSaturating) | |
3339 IID = Intrinsic::x86_avx512_vpdpbusd_256; | |
3340 else if (VecWidth == 512 && !IsSaturating) | |
3341 IID = Intrinsic::x86_avx512_vpdpbusd_512; | |
3342 else if (VecWidth == 128 && IsSaturating) | |
3343 IID = Intrinsic::x86_avx512_vpdpbusds_128; | |
3344 else if (VecWidth == 256 && IsSaturating) | |
3345 IID = Intrinsic::x86_avx512_vpdpbusds_256; | |
3346 else if (VecWidth == 512 && IsSaturating) | |
3347 IID = Intrinsic::x86_avx512_vpdpbusds_512; | |
3348 else | |
3349 llvm_unreachable("Unexpected intrinsic"); | |
3350 | |
3351 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), | |
3352 CI->getArgOperand(2) }; | |
3353 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), | |
3354 Args); | |
3355 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) | |
3356 : CI->getArgOperand(0); | |
3357 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); | |
3358 } else if (IsX86 && (Name.startswith("avx512.mask.vpdpwssd.") || | |
3359 Name.startswith("avx512.maskz.vpdpwssd.") || | |
3360 Name.startswith("avx512.mask.vpdpwssds.") || | |
3361 Name.startswith("avx512.maskz.vpdpwssds."))) { | |
3362 bool ZeroMask = Name[11] == 'z'; | |
3363 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; | |
3364 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); | |
3365 Intrinsic::ID IID; | |
3366 if (VecWidth == 128 && !IsSaturating) | |
3367 IID = Intrinsic::x86_avx512_vpdpwssd_128; | |
3368 else if (VecWidth == 256 && !IsSaturating) | |
3369 IID = Intrinsic::x86_avx512_vpdpwssd_256; | |
3370 else if (VecWidth == 512 && !IsSaturating) | |
3371 IID = Intrinsic::x86_avx512_vpdpwssd_512; | |
3372 else if (VecWidth == 128 && IsSaturating) | |
3373 IID = Intrinsic::x86_avx512_vpdpwssds_128; | |
3374 else if (VecWidth == 256 && IsSaturating) | |
3375 IID = Intrinsic::x86_avx512_vpdpwssds_256; | |
3376 else if (VecWidth == 512 && IsSaturating) | |
3377 IID = Intrinsic::x86_avx512_vpdpwssds_512; | |
3378 else | |
3379 llvm_unreachable("Unexpected intrinsic"); | |
3380 | |
3381 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), | |
3382 CI->getArgOperand(2) }; | |
3383 Rep = Builder.CreateCall(Intrinsic::getDeclaration(CI->getModule(), IID), | |
3384 Args); | |
3385 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) | |
3386 : CI->getArgOperand(0); | |
3387 Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); | |
3388 } else if (IsX86 && (Name == "addcarryx.u32" || Name == "addcarryx.u64" || | |
3389 Name == "addcarry.u32" || Name == "addcarry.u64" || | |
3390 Name == "subborrow.u32" || Name == "subborrow.u64")) { | |
3391 Intrinsic::ID IID; | |
3392 if (Name[0] == 'a' && Name.back() == '2') | |
3393 IID = Intrinsic::x86_addcarry_32; | |
3394 else if (Name[0] == 'a' && Name.back() == '4') | |
3395 IID = Intrinsic::x86_addcarry_64; | |
3396 else if (Name[0] == 's' && Name.back() == '2') | |
3397 IID = Intrinsic::x86_subborrow_32; | |
3398 else if (Name[0] == 's' && Name.back() == '4') | |
3399 IID = Intrinsic::x86_subborrow_64; | |
3400 else | |
3401 llvm_unreachable("Unexpected intrinsic"); | |
3402 | |
3403 // Make a call with 3 operands. | |
3404 Value *Args[] = { CI->getArgOperand(0), CI->getArgOperand(1), | |
3405 CI->getArgOperand(2)}; | |
3406 Value *NewCall = Builder.CreateCall( | |
3407 Intrinsic::getDeclaration(CI->getModule(), IID), | |
3408 Args); | |
3409 | |
3410 // Extract the second result and store it. | |
3411 Value *Data = Builder.CreateExtractValue(NewCall, 1); | |
3412 // Cast the pointer to the right type. | |
3413 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(3), | |
3414 llvm::PointerType::getUnqual(Data->getType())); | |
3415 Builder.CreateAlignedStore(Data, Ptr, 1); | |
3416 // Replace the original call result with the first result of the new call. | |
3417 Value *CF = Builder.CreateExtractValue(NewCall, 0); | |
3418 | |
3419 CI->replaceAllUsesWith(CF); | |
3420 Rep = nullptr; | |
3421 } else if (IsX86 && Name.startswith("avx512.mask.") && | |
3422 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) { | |
3423 // Rep will be updated by the call in the condition. | |
2263 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { | 3424 } else if (IsNVVM && (Name == "abs.i" || Name == "abs.ll")) { |
2264 Value *Arg = CI->getArgOperand(0); | 3425 Value *Arg = CI->getArgOperand(0); |
2265 Value *Neg = Builder.CreateNeg(Arg, "neg"); | 3426 Value *Neg = Builder.CreateNeg(Arg, "neg"); |
2266 Value *Cmp = Builder.CreateICmpSGE( | 3427 Value *Cmp = Builder.CreateICmpSGE( |
2267 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); | 3428 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); |
2268 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); | 3429 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); |
3430 } else if (IsNVVM && (Name.startswith("atomic.load.add.f32.p") || | |
3431 Name.startswith("atomic.load.add.f64.p"))) { | |
3432 Value *Ptr = CI->getArgOperand(0); | |
3433 Value *Val = CI->getArgOperand(1); | |
3434 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, | |
3435 AtomicOrdering::SequentiallyConsistent); | |
2269 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" || | 3436 } else if (IsNVVM && (Name == "max.i" || Name == "max.ll" || |
2270 Name == "max.ui" || Name == "max.ull")) { | 3437 Name == "max.ui" || Name == "max.ull")) { |
2271 Value *Arg0 = CI->getArgOperand(0); | 3438 Value *Arg0 = CI->getArgOperand(0); |
2272 Value *Arg1 = CI->getArgOperand(1); | 3439 Value *Arg1 = CI->getArgOperand(1); |
2273 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") | 3440 Value *Cmp = Name.endswith(".ui") || Name.endswith(".ull") |
2325 switch (NewFn->getIntrinsicID()) { | 3492 switch (NewFn->getIntrinsicID()) { |
2326 default: { | 3493 default: { |
2327 DefaultCase(); | 3494 DefaultCase(); |
2328 return; | 3495 return; |
2329 } | 3496 } |
2330 | 3497 case Intrinsic::experimental_vector_reduce_v2_fmul: { |
3498 SmallVector<Value *, 2> Args; | |
3499 if (CI->isFast()) | |
3500 Args.push_back(ConstantFP::get(CI->getOperand(0)->getType(), 1.0)); | |
3501 else | |
3502 Args.push_back(CI->getOperand(0)); | |
3503 Args.push_back(CI->getOperand(1)); | |
3504 NewCall = Builder.CreateCall(NewFn, Args); | |
3505 cast<Instruction>(NewCall)->copyFastMathFlags(CI); | |
3506 break; | |
3507 } | |
3508 case Intrinsic::experimental_vector_reduce_v2_fadd: { | |
3509 SmallVector<Value *, 2> Args; | |
3510 if (CI->isFast()) | |
3511 Args.push_back(Constant::getNullValue(CI->getOperand(0)->getType())); | |
3512 else | |
3513 Args.push_back(CI->getOperand(0)); | |
3514 Args.push_back(CI->getOperand(1)); | |
3515 NewCall = Builder.CreateCall(NewFn, Args); | |
3516 cast<Instruction>(NewCall)->copyFastMathFlags(CI); | |
3517 break; | |
3518 } | |
2331 case Intrinsic::arm_neon_vld1: | 3519 case Intrinsic::arm_neon_vld1: |
2332 case Intrinsic::arm_neon_vld2: | 3520 case Intrinsic::arm_neon_vld2: |
2333 case Intrinsic::arm_neon_vld3: | 3521 case Intrinsic::arm_neon_vld3: |
2334 case Intrinsic::arm_neon_vld4: | 3522 case Intrinsic::arm_neon_vld4: |
2335 case Intrinsic::arm_neon_vld2lane: | 3523 case Intrinsic::arm_neon_vld2lane: |
2362 | 3550 |
2363 case Intrinsic::objectsize: { | 3551 case Intrinsic::objectsize: { |
2364 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2 | 3552 Value *NullIsUnknownSize = CI->getNumArgOperands() == 2 |
2365 ? Builder.getFalse() | 3553 ? Builder.getFalse() |
2366 : CI->getArgOperand(2); | 3554 : CI->getArgOperand(2); |
3555 Value *Dynamic = | |
3556 CI->getNumArgOperands() < 4 ? Builder.getFalse() : CI->getArgOperand(3); | |
2367 NewCall = Builder.CreateCall( | 3557 NewCall = Builder.CreateCall( |
2368 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize}); | 3558 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic}); |
2369 break; | 3559 break; |
2370 } | 3560 } |
2371 | 3561 |
2372 case Intrinsic::ctpop: | 3562 case Intrinsic::ctpop: |
2373 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); | 3563 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); |
2430 | 3620 |
2431 NewCall = Builder.CreateCall(NewFn, {BC0, BC1}); | 3621 NewCall = Builder.CreateCall(NewFn, {BC0, BC1}); |
2432 break; | 3622 break; |
2433 } | 3623 } |
2434 | 3624 |
3625 case Intrinsic::x86_rdtscp: { | |
3626 // This used to take 1 arguments. If we have no arguments, it is already | |
3627 // upgraded. | |
3628 if (CI->getNumOperands() == 0) | |
3629 return; | |
3630 | |
3631 NewCall = Builder.CreateCall(NewFn); | |
3632 // Extract the second result and store it. | |
3633 Value *Data = Builder.CreateExtractValue(NewCall, 1); | |
3634 // Cast the pointer to the right type. | |
3635 Value *Ptr = Builder.CreateBitCast(CI->getArgOperand(0), | |
3636 llvm::PointerType::getUnqual(Data->getType())); | |
3637 Builder.CreateAlignedStore(Data, Ptr, 1); | |
3638 // Replace the original call result with the first result of the new call. | |
3639 Value *TSC = Builder.CreateExtractValue(NewCall, 0); | |
3640 | |
3641 std::string Name = CI->getName(); | |
3642 if (!Name.empty()) { | |
3643 CI->setName(Name + ".old"); | |
3644 NewCall->setName(Name); | |
3645 } | |
3646 CI->replaceAllUsesWith(TSC); | |
3647 CI->eraseFromParent(); | |
3648 return; | |
3649 } | |
3650 | |
2435 case Intrinsic::x86_sse41_insertps: | 3651 case Intrinsic::x86_sse41_insertps: |
2436 case Intrinsic::x86_sse41_dppd: | 3652 case Intrinsic::x86_sse41_dppd: |
2437 case Intrinsic::x86_sse41_dpps: | 3653 case Intrinsic::x86_sse41_dpps: |
2438 case Intrinsic::x86_sse41_mpsadbw: | 3654 case Intrinsic::x86_sse41_mpsadbw: |
2439 case Intrinsic::x86_avx_dp_ps_256: | 3655 case Intrinsic::x86_avx_dp_ps_256: |
2445 | 3661 |
2446 // Replace the last argument with a trunc. | 3662 // Replace the last argument with a trunc. |
2447 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); | 3663 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); |
2448 NewCall = Builder.CreateCall(NewFn, Args); | 3664 NewCall = Builder.CreateCall(NewFn, Args); |
2449 break; | 3665 break; |
2450 } | |
2451 | |
2452 case Intrinsic::x86_avx512_mask_cmp_pd_128: | |
2453 case Intrinsic::x86_avx512_mask_cmp_pd_256: | |
2454 case Intrinsic::x86_avx512_mask_cmp_pd_512: | |
2455 case Intrinsic::x86_avx512_mask_cmp_ps_128: | |
2456 case Intrinsic::x86_avx512_mask_cmp_ps_256: | |
2457 case Intrinsic::x86_avx512_mask_cmp_ps_512: { | |
2458 SmallVector<Value *, 4> Args; | |
2459 Args.push_back(CI->getArgOperand(0)); | |
2460 Args.push_back(CI->getArgOperand(1)); | |
2461 Args.push_back(CI->getArgOperand(2)); | |
2462 if (CI->getNumArgOperands() == 5) | |
2463 Args.push_back(CI->getArgOperand(4)); | |
2464 | |
2465 NewCall = Builder.CreateCall(NewFn, Args); | |
2466 unsigned NumElts = Args[0]->getType()->getVectorNumElements(); | |
2467 Value *Res = ApplyX86MaskOn1BitsVec(Builder, NewCall, CI->getArgOperand(3), | |
2468 NumElts); | |
2469 | |
2470 std::string Name = CI->getName(); | |
2471 if (!Name.empty()) { | |
2472 CI->setName(Name + ".old"); | |
2473 NewCall->setName(Name); | |
2474 } | |
2475 CI->replaceAllUsesWith(Res); | |
2476 CI->eraseFromParent(); | |
2477 return; | |
2478 } | 3666 } |
2479 | 3667 |
2480 case Intrinsic::thread_pointer: { | 3668 case Intrinsic::thread_pointer: { |
2481 NewCall = Builder.CreateCall(NewFn, {}); | 3669 NewCall = Builder.CreateCall(NewFn, {}); |
2482 break; | 3670 break; |
2638 // Diagnose a version mismatch. | 3826 // Diagnose a version mismatch. |
2639 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); | 3827 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); |
2640 M.getContext().diagnose(DiagVersion); | 3828 M.getContext().diagnose(DiagVersion); |
2641 } | 3829 } |
2642 return Modified; | 3830 return Modified; |
3831 } | |
3832 | |
3833 /// This checks for objc retain release marker which should be upgraded. It | |
3834 /// returns true if module is modified. | |
3835 static bool UpgradeRetainReleaseMarker(Module &M) { | |
3836 bool Changed = false; | |
3837 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker"; | |
3838 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey); | |
3839 if (ModRetainReleaseMarker) { | |
3840 MDNode *Op = ModRetainReleaseMarker->getOperand(0); | |
3841 if (Op) { | |
3842 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0)); | |
3843 if (ID) { | |
3844 SmallVector<StringRef, 4> ValueComp; | |
3845 ID->getString().split(ValueComp, "#"); | |
3846 if (ValueComp.size() == 2) { | |
3847 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str(); | |
3848 ID = MDString::get(M.getContext(), NewValue); | |
3849 } | |
3850 M.addModuleFlag(Module::Error, MarkerKey, ID); | |
3851 M.eraseNamedMetadata(ModRetainReleaseMarker); | |
3852 Changed = true; | |
3853 } | |
3854 } | |
3855 } | |
3856 return Changed; | |
3857 } | |
3858 | |
3859 void llvm::UpgradeARCRuntime(Module &M) { | |
3860 // This lambda converts normal function calls to ARC runtime functions to | |
3861 // intrinsic calls. | |
3862 auto UpgradeToIntrinsic = [&](const char *OldFunc, | |
3863 llvm::Intrinsic::ID IntrinsicFunc) { | |
3864 Function *Fn = M.getFunction(OldFunc); | |
3865 | |
3866 if (!Fn) | |
3867 return; | |
3868 | |
3869 Function *NewFn = llvm::Intrinsic::getDeclaration(&M, IntrinsicFunc); | |
3870 | |
3871 for (auto I = Fn->user_begin(), E = Fn->user_end(); I != E;) { | |
3872 CallInst *CI = dyn_cast<CallInst>(*I++); | |
3873 if (!CI || CI->getCalledFunction() != Fn) | |
3874 continue; | |
3875 | |
3876 IRBuilder<> Builder(CI->getParent(), CI->getIterator()); | |
3877 FunctionType *NewFuncTy = NewFn->getFunctionType(); | |
3878 SmallVector<Value *, 2> Args; | |
3879 | |
3880 for (unsigned I = 0, E = CI->getNumArgOperands(); I != E; ++I) { | |
3881 Value *Arg = CI->getArgOperand(I); | |
3882 // Bitcast argument to the parameter type of the new function if it's | |
3883 // not a variadic argument. | |
3884 if (I < NewFuncTy->getNumParams()) | |
3885 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I)); | |
3886 Args.push_back(Arg); | |
3887 } | |
3888 | |
3889 // Create a call instruction that calls the new function. | |
3890 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args); | |
3891 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind()); | |
3892 NewCall->setName(CI->getName()); | |
3893 | |
3894 // Bitcast the return value back to the type of the old call. | |
3895 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType()); | |
3896 | |
3897 if (!CI->use_empty()) | |
3898 CI->replaceAllUsesWith(NewRetVal); | |
3899 CI->eraseFromParent(); | |
3900 } | |
3901 | |
3902 if (Fn->use_empty()) | |
3903 Fn->eraseFromParent(); | |
3904 }; | |
3905 | |
3906 // Unconditionally convert a call to "clang.arc.use" to a call to | |
3907 // "llvm.objc.clang.arc.use". | |
3908 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use); | |
3909 | |
3910 // Upgrade the retain release marker. If there is no need to upgrade | |
3911 // the marker, that means either the module is already new enough to contain | |
3912 // new intrinsics or it is not ARC. There is no need to upgrade runtime call. | |
3913 if (!UpgradeRetainReleaseMarker(M)) | |
3914 return; | |
3915 | |
3916 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = { | |
3917 {"objc_autorelease", llvm::Intrinsic::objc_autorelease}, | |
3918 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop}, | |
3919 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush}, | |
3920 {"objc_autoreleaseReturnValue", | |
3921 llvm::Intrinsic::objc_autoreleaseReturnValue}, | |
3922 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak}, | |
3923 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak}, | |
3924 {"objc_initWeak", llvm::Intrinsic::objc_initWeak}, | |
3925 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak}, | |
3926 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained}, | |
3927 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak}, | |
3928 {"objc_release", llvm::Intrinsic::objc_release}, | |
3929 {"objc_retain", llvm::Intrinsic::objc_retain}, | |
3930 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease}, | |
3931 {"objc_retainAutoreleaseReturnValue", | |
3932 llvm::Intrinsic::objc_retainAutoreleaseReturnValue}, | |
3933 {"objc_retainAutoreleasedReturnValue", | |
3934 llvm::Intrinsic::objc_retainAutoreleasedReturnValue}, | |
3935 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock}, | |
3936 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong}, | |
3937 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak}, | |
3938 {"objc_unsafeClaimAutoreleasedReturnValue", | |
3939 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue}, | |
3940 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject}, | |
3941 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject}, | |
3942 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer}, | |
3943 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease}, | |
3944 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter}, | |
3945 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit}, | |
3946 {"objc_arc_annotation_topdown_bbstart", | |
3947 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart}, | |
3948 {"objc_arc_annotation_topdown_bbend", | |
3949 llvm::Intrinsic::objc_arc_annotation_topdown_bbend}, | |
3950 {"objc_arc_annotation_bottomup_bbstart", | |
3951 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart}, | |
3952 {"objc_arc_annotation_bottomup_bbend", | |
3953 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}}; | |
3954 | |
3955 for (auto &I : RuntimeFuncs) | |
3956 UpgradeToIntrinsic(I.first, I.second); | |
2643 } | 3957 } |
2644 | 3958 |
2645 bool llvm::UpgradeModuleFlags(Module &M) { | 3959 bool llvm::UpgradeModuleFlags(Module &M) { |
2646 NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); | 3960 NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); |
2647 if (!ModFlags) | 3961 if (!ModFlags) |