Mercurial > hg > CbC > CbC_llvm
comparison lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @ 0:95c75e76d11b LLVM3.4
LLVM 3.4
author | Kaito Tokumori <e105711@ie.u-ryukyu.ac.jp> |
---|---|
date | Thu, 12 Dec 2013 13:56:28 +0900 |
parents | |
children | 54457678186b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:95c75e76d11b |
---|---|
1 //===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===// | |
2 // | |
3 // The LLVM Compiler Infrastructure | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 // This file implements the SelectionDAG::LegalizeVectors method. | |
11 // | |
12 // The vector legalizer looks for vector operations which might need to be | |
13 // scalarized and legalizes them. This is a separate step from Legalize because | |
14 // scalarizing can introduce illegal types. For example, suppose we have an | |
15 // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition | |
16 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the | |
17 // operation, which introduces nodes with the illegal type i64 which must be | |
18 // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; | |
19 // the operation must be unrolled, which introduces nodes with the illegal | |
20 // type i8 which must be promoted. | |
21 // | |
22 // This does not legalize vector manipulations like ISD::BUILD_VECTOR, | |
23 // or operations that happen to take a vector which are custom-lowered; | |
24 // the legalization for such operations never produces nodes | |
25 // with illegal types, so it's okay to put off legalizing them until | |
26 // SelectionDAG::Legalize runs. | |
27 // | |
28 //===----------------------------------------------------------------------===// | |
29 | |
30 #include "llvm/CodeGen/SelectionDAG.h" | |
31 #include "llvm/Target/TargetLowering.h" | |
32 using namespace llvm; | |
33 | |
34 namespace { | |
35 class VectorLegalizer { | |
36 SelectionDAG& DAG; | |
37 const TargetLowering &TLI; | |
38 bool Changed; // Keep track of whether anything changed | |
39 | |
40 /// LegalizedNodes - For nodes that are of legal width, and that have more | |
41 /// than one use, this map indicates what regularized operand to use. This | |
42 /// allows us to avoid legalizing the same thing more than once. | |
43 SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; | |
44 | |
45 // Adds a node to the translation cache | |
46 void AddLegalizedOperand(SDValue From, SDValue To) { | |
47 LegalizedNodes.insert(std::make_pair(From, To)); | |
48 // If someone requests legalization of the new node, return itself. | |
49 if (From != To) | |
50 LegalizedNodes.insert(std::make_pair(To, To)); | |
51 } | |
52 | |
53 // Legalizes the given node | |
54 SDValue LegalizeOp(SDValue Op); | |
55 // Assuming the node is legal, "legalize" the results | |
56 SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); | |
57 // Implements unrolling a VSETCC. | |
58 SDValue UnrollVSETCC(SDValue Op); | |
59 // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB | |
60 // isn't legal. | |
61 // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if | |
62 // SINT_TO_FLOAT and SHR on vectors isn't legal. | |
63 SDValue ExpandUINT_TO_FLOAT(SDValue Op); | |
64 // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. | |
65 SDValue ExpandSEXTINREG(SDValue Op); | |
66 // Implement vselect in terms of XOR, AND, OR when blend is not supported | |
67 // by the target. | |
68 SDValue ExpandVSELECT(SDValue Op); | |
69 SDValue ExpandSELECT(SDValue Op); | |
70 SDValue ExpandLoad(SDValue Op); | |
71 SDValue ExpandStore(SDValue Op); | |
72 SDValue ExpandFNEG(SDValue Op); | |
73 // Implements vector promotion; this is essentially just bitcasting the | |
74 // operands to a different type and bitcasting the result back to the | |
75 // original type. | |
76 SDValue PromoteVectorOp(SDValue Op); | |
77 // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input | |
78 // operand to the next size up. | |
79 SDValue PromoteVectorOpINT_TO_FP(SDValue Op); | |
80 | |
81 public: | |
82 bool Run(); | |
83 VectorLegalizer(SelectionDAG& dag) : | |
84 DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} | |
85 }; | |
86 | |
87 bool VectorLegalizer::Run() { | |
88 // Before we start legalizing vector nodes, check if there are any vectors. | |
89 bool HasVectors = false; | |
90 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), | |
91 E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) { | |
92 // Check if the values of the nodes contain vectors. We don't need to check | |
93 // the operands because we are going to check their values at some point. | |
94 for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); | |
95 J != E; ++J) | |
96 HasVectors |= J->isVector(); | |
97 | |
98 // If we found a vector node we can start the legalization. | |
99 if (HasVectors) | |
100 break; | |
101 } | |
102 | |
103 // If this basic block has no vectors then no need to legalize vectors. | |
104 if (!HasVectors) | |
105 return false; | |
106 | |
107 // The legalize process is inherently a bottom-up recursive process (users | |
108 // legalize their uses before themselves). Given infinite stack space, we | |
109 // could just start legalizing on the root and traverse the whole graph. In | |
110 // practice however, this causes us to run out of stack space on large basic | |
111 // blocks. To avoid this problem, compute an ordering of the nodes where each | |
112 // node is only legalized after all of its operands are legalized. | |
113 DAG.AssignTopologicalOrder(); | |
114 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), | |
115 E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) | |
116 LegalizeOp(SDValue(I, 0)); | |
117 | |
118 // Finally, it's possible the root changed. Get the new root. | |
119 SDValue OldRoot = DAG.getRoot(); | |
120 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); | |
121 DAG.setRoot(LegalizedNodes[OldRoot]); | |
122 | |
123 LegalizedNodes.clear(); | |
124 | |
125 // Remove dead nodes now. | |
126 DAG.RemoveDeadNodes(); | |
127 | |
128 return Changed; | |
129 } | |
130 | |
131 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { | |
132 // Generic legalization: just pass the operand through. | |
133 for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) | |
134 AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); | |
135 return Result.getValue(Op.getResNo()); | |
136 } | |
137 | |
138 SDValue VectorLegalizer::LegalizeOp(SDValue Op) { | |
139 // Note that LegalizeOp may be reentered even from single-use nodes, which | |
140 // means that we always must cache transformed nodes. | |
141 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); | |
142 if (I != LegalizedNodes.end()) return I->second; | |
143 | |
144 SDNode* Node = Op.getNode(); | |
145 | |
146 // Legalize the operands | |
147 SmallVector<SDValue, 8> Ops; | |
148 for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) | |
149 Ops.push_back(LegalizeOp(Node->getOperand(i))); | |
150 | |
151 SDValue Result = | |
152 SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); | |
153 | |
154 if (Op.getOpcode() == ISD::LOAD) { | |
155 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); | |
156 ISD::LoadExtType ExtType = LD->getExtensionType(); | |
157 if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { | |
158 if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT())) | |
159 return TranslateLegalizeResults(Op, Result); | |
160 Changed = true; | |
161 return LegalizeOp(ExpandLoad(Op)); | |
162 } | |
163 } else if (Op.getOpcode() == ISD::STORE) { | |
164 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); | |
165 EVT StVT = ST->getMemoryVT(); | |
166 MVT ValVT = ST->getValue().getSimpleValueType(); | |
167 if (StVT.isVector() && ST->isTruncatingStore()) | |
168 switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) { | |
169 default: llvm_unreachable("This action is not supported yet!"); | |
170 case TargetLowering::Legal: | |
171 return TranslateLegalizeResults(Op, Result); | |
172 case TargetLowering::Custom: | |
173 Changed = true; | |
174 return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG)); | |
175 case TargetLowering::Expand: | |
176 Changed = true; | |
177 return LegalizeOp(ExpandStore(Op)); | |
178 } | |
179 } | |
180 | |
181 bool HasVectorValue = false; | |
182 for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); | |
183 J != E; | |
184 ++J) | |
185 HasVectorValue |= J->isVector(); | |
186 if (!HasVectorValue) | |
187 return TranslateLegalizeResults(Op, Result); | |
188 | |
189 EVT QueryType; | |
190 switch (Op.getOpcode()) { | |
191 default: | |
192 return TranslateLegalizeResults(Op, Result); | |
193 case ISD::ADD: | |
194 case ISD::SUB: | |
195 case ISD::MUL: | |
196 case ISD::SDIV: | |
197 case ISD::UDIV: | |
198 case ISD::SREM: | |
199 case ISD::UREM: | |
200 case ISD::FADD: | |
201 case ISD::FSUB: | |
202 case ISD::FMUL: | |
203 case ISD::FDIV: | |
204 case ISD::FREM: | |
205 case ISD::AND: | |
206 case ISD::OR: | |
207 case ISD::XOR: | |
208 case ISD::SHL: | |
209 case ISD::SRA: | |
210 case ISD::SRL: | |
211 case ISD::ROTL: | |
212 case ISD::ROTR: | |
213 case ISD::CTLZ: | |
214 case ISD::CTTZ: | |
215 case ISD::CTLZ_ZERO_UNDEF: | |
216 case ISD::CTTZ_ZERO_UNDEF: | |
217 case ISD::CTPOP: | |
218 case ISD::SELECT: | |
219 case ISD::VSELECT: | |
220 case ISD::SELECT_CC: | |
221 case ISD::SETCC: | |
222 case ISD::ZERO_EXTEND: | |
223 case ISD::ANY_EXTEND: | |
224 case ISD::TRUNCATE: | |
225 case ISD::SIGN_EXTEND: | |
226 case ISD::FP_TO_SINT: | |
227 case ISD::FP_TO_UINT: | |
228 case ISD::FNEG: | |
229 case ISD::FABS: | |
230 case ISD::FCOPYSIGN: | |
231 case ISD::FSQRT: | |
232 case ISD::FSIN: | |
233 case ISD::FCOS: | |
234 case ISD::FPOWI: | |
235 case ISD::FPOW: | |
236 case ISD::FLOG: | |
237 case ISD::FLOG2: | |
238 case ISD::FLOG10: | |
239 case ISD::FEXP: | |
240 case ISD::FEXP2: | |
241 case ISD::FCEIL: | |
242 case ISD::FTRUNC: | |
243 case ISD::FRINT: | |
244 case ISD::FNEARBYINT: | |
245 case ISD::FROUND: | |
246 case ISD::FFLOOR: | |
247 case ISD::FP_ROUND: | |
248 case ISD::FP_EXTEND: | |
249 case ISD::FMA: | |
250 case ISD::SIGN_EXTEND_INREG: | |
251 QueryType = Node->getValueType(0); | |
252 break; | |
253 case ISD::FP_ROUND_INREG: | |
254 QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); | |
255 break; | |
256 case ISD::SINT_TO_FP: | |
257 case ISD::UINT_TO_FP: | |
258 QueryType = Node->getOperand(0).getValueType(); | |
259 break; | |
260 } | |
261 | |
262 switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { | |
263 case TargetLowering::Promote: | |
264 switch (Op.getOpcode()) { | |
265 default: | |
266 // "Promote" the operation by bitcasting | |
267 Result = PromoteVectorOp(Op); | |
268 Changed = true; | |
269 break; | |
270 case ISD::SINT_TO_FP: | |
271 case ISD::UINT_TO_FP: | |
272 // "Promote" the operation by extending the operand. | |
273 Result = PromoteVectorOpINT_TO_FP(Op); | |
274 Changed = true; | |
275 break; | |
276 } | |
277 break; | |
278 case TargetLowering::Legal: break; | |
279 case TargetLowering::Custom: { | |
280 SDValue Tmp1 = TLI.LowerOperation(Op, DAG); | |
281 if (Tmp1.getNode()) { | |
282 Result = Tmp1; | |
283 break; | |
284 } | |
285 // FALL THROUGH | |
286 } | |
287 case TargetLowering::Expand: | |
288 if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) | |
289 Result = ExpandSEXTINREG(Op); | |
290 else if (Node->getOpcode() == ISD::VSELECT) | |
291 Result = ExpandVSELECT(Op); | |
292 else if (Node->getOpcode() == ISD::SELECT) | |
293 Result = ExpandSELECT(Op); | |
294 else if (Node->getOpcode() == ISD::UINT_TO_FP) | |
295 Result = ExpandUINT_TO_FLOAT(Op); | |
296 else if (Node->getOpcode() == ISD::FNEG) | |
297 Result = ExpandFNEG(Op); | |
298 else if (Node->getOpcode() == ISD::SETCC) | |
299 Result = UnrollVSETCC(Op); | |
300 else | |
301 Result = DAG.UnrollVectorOp(Op.getNode()); | |
302 break; | |
303 } | |
304 | |
305 // Make sure that the generated code is itself legal. | |
306 if (Result != Op) { | |
307 Result = LegalizeOp(Result); | |
308 Changed = true; | |
309 } | |
310 | |
311 // Note that LegalizeOp may be reentered even from single-use nodes, which | |
312 // means that we always must cache transformed nodes. | |
313 AddLegalizedOperand(Op, Result); | |
314 return Result; | |
315 } | |
316 | |
317 SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { | |
318 // Vector "promotion" is basically just bitcasting and doing the operation | |
319 // in a different type. For example, x86 promotes ISD::AND on v2i32 to | |
320 // v1i64. | |
321 MVT VT = Op.getSimpleValueType(); | |
322 assert(Op.getNode()->getNumValues() == 1 && | |
323 "Can't promote a vector with multiple results!"); | |
324 MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); | |
325 SDLoc dl(Op); | |
326 SmallVector<SDValue, 4> Operands(Op.getNumOperands()); | |
327 | |
328 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { | |
329 if (Op.getOperand(j).getValueType().isVector()) | |
330 Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); | |
331 else | |
332 Operands[j] = Op.getOperand(j); | |
333 } | |
334 | |
335 Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size()); | |
336 | |
337 return DAG.getNode(ISD::BITCAST, dl, VT, Op); | |
338 } | |
339 | |
340 SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { | |
341 // INT_TO_FP operations may require the input operand be promoted even | |
342 // when the type is otherwise legal. | |
343 EVT VT = Op.getOperand(0).getValueType(); | |
344 assert(Op.getNode()->getNumValues() == 1 && | |
345 "Can't promote a vector with multiple results!"); | |
346 | |
347 // Normal getTypeToPromoteTo() doesn't work here, as that will promote | |
348 // by widening the vector w/ the same element width and twice the number | |
349 // of elements. We want the other way around, the same number of elements, | |
350 // each twice the width. | |
351 // | |
352 // Increase the bitwidth of the element to the next pow-of-two | |
353 // (which is greater than 8 bits). | |
354 unsigned NumElts = VT.getVectorNumElements(); | |
355 EVT EltVT = VT.getVectorElementType(); | |
356 EltVT = EVT::getIntegerVT(*DAG.getContext(), 2 * EltVT.getSizeInBits()); | |
357 assert(EltVT.isSimple() && "Promoting to a non-simple vector type!"); | |
358 | |
359 // Build a new vector type and check if it is legal. | |
360 MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); | |
361 | |
362 SDLoc dl(Op); | |
363 SmallVector<SDValue, 4> Operands(Op.getNumOperands()); | |
364 | |
365 unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : | |
366 ISD::SIGN_EXTEND; | |
367 for (unsigned j = 0; j != Op.getNumOperands(); ++j) { | |
368 if (Op.getOperand(j).getValueType().isVector()) | |
369 Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); | |
370 else | |
371 Operands[j] = Op.getOperand(j); | |
372 } | |
373 | |
374 return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0], | |
375 Operands.size()); | |
376 } | |
377 | |
378 | |
379 SDValue VectorLegalizer::ExpandLoad(SDValue Op) { | |
380 SDLoc dl(Op); | |
381 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); | |
382 SDValue Chain = LD->getChain(); | |
383 SDValue BasePTR = LD->getBasePtr(); | |
384 EVT SrcVT = LD->getMemoryVT(); | |
385 ISD::LoadExtType ExtType = LD->getExtensionType(); | |
386 | |
387 SmallVector<SDValue, 8> Vals; | |
388 SmallVector<SDValue, 8> LoadChains; | |
389 unsigned NumElem = SrcVT.getVectorNumElements(); | |
390 | |
391 EVT SrcEltVT = SrcVT.getScalarType(); | |
392 EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); | |
393 | |
394 if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { | |
395 // When elements in a vector is not byte-addressable, we cannot directly | |
396 // load each element by advancing pointer, which could only address bytes. | |
397 // Instead, we load all significant words, mask bits off, and concatenate | |
398 // them to form each element. Finally, they are extended to destination | |
399 // scalar type to build the destination vector. | |
400 EVT WideVT = TLI.getPointerTy(); | |
401 | |
402 assert(WideVT.isRound() && | |
403 "Could not handle the sophisticated case when the widest integer is" | |
404 " not power of 2."); | |
405 assert(WideVT.bitsGE(SrcEltVT) && | |
406 "Type is not legalized?"); | |
407 | |
408 unsigned WideBytes = WideVT.getStoreSize(); | |
409 unsigned Offset = 0; | |
410 unsigned RemainingBytes = SrcVT.getStoreSize(); | |
411 SmallVector<SDValue, 8> LoadVals; | |
412 | |
413 while (RemainingBytes > 0) { | |
414 SDValue ScalarLoad; | |
415 unsigned LoadBytes = WideBytes; | |
416 | |
417 if (RemainingBytes >= LoadBytes) { | |
418 ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, | |
419 LD->getPointerInfo().getWithOffset(Offset), | |
420 LD->isVolatile(), LD->isNonTemporal(), | |
421 LD->isInvariant(), LD->getAlignment(), | |
422 LD->getTBAAInfo()); | |
423 } else { | |
424 EVT LoadVT = WideVT; | |
425 while (RemainingBytes < LoadBytes) { | |
426 LoadBytes >>= 1; // Reduce the load size by half. | |
427 LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); | |
428 } | |
429 ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, | |
430 LD->getPointerInfo().getWithOffset(Offset), | |
431 LoadVT, LD->isVolatile(), | |
432 LD->isNonTemporal(), LD->getAlignment(), | |
433 LD->getTBAAInfo()); | |
434 } | |
435 | |
436 RemainingBytes -= LoadBytes; | |
437 Offset += LoadBytes; | |
438 BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, | |
439 DAG.getConstant(LoadBytes, BasePTR.getValueType())); | |
440 | |
441 LoadVals.push_back(ScalarLoad.getValue(0)); | |
442 LoadChains.push_back(ScalarLoad.getValue(1)); | |
443 } | |
444 | |
445 // Extract bits, pack and extend/trunc them into destination type. | |
446 unsigned SrcEltBits = SrcEltVT.getSizeInBits(); | |
447 SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT); | |
448 | |
449 unsigned BitOffset = 0; | |
450 unsigned WideIdx = 0; | |
451 unsigned WideBits = WideVT.getSizeInBits(); | |
452 | |
453 for (unsigned Idx = 0; Idx != NumElem; ++Idx) { | |
454 SDValue Lo, Hi, ShAmt; | |
455 | |
456 if (BitOffset < WideBits) { | |
457 ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT)); | |
458 Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); | |
459 Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); | |
460 } | |
461 | |
462 BitOffset += SrcEltBits; | |
463 if (BitOffset >= WideBits) { | |
464 WideIdx++; | |
465 Offset -= WideBits; | |
466 if (Offset > 0) { | |
467 ShAmt = DAG.getConstant(SrcEltBits - Offset, | |
468 TLI.getShiftAmountTy(WideVT)); | |
469 Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); | |
470 Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); | |
471 } | |
472 } | |
473 | |
474 if (Hi.getNode()) | |
475 Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); | |
476 | |
477 switch (ExtType) { | |
478 default: llvm_unreachable("Unknown extended-load op!"); | |
479 case ISD::EXTLOAD: | |
480 Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); | |
481 break; | |
482 case ISD::ZEXTLOAD: | |
483 Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); | |
484 break; | |
485 case ISD::SEXTLOAD: | |
486 ShAmt = DAG.getConstant(WideBits - SrcEltBits, | |
487 TLI.getShiftAmountTy(WideVT)); | |
488 Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); | |
489 Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); | |
490 Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); | |
491 break; | |
492 } | |
493 Vals.push_back(Lo); | |
494 } | |
495 } else { | |
496 unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; | |
497 | |
498 for (unsigned Idx=0; Idx<NumElem; Idx++) { | |
499 SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, | |
500 Op.getNode()->getValueType(0).getScalarType(), | |
501 Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), | |
502 SrcVT.getScalarType(), | |
503 LD->isVolatile(), LD->isNonTemporal(), | |
504 LD->getAlignment(), LD->getTBAAInfo()); | |
505 | |
506 BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, | |
507 DAG.getConstant(Stride, BasePTR.getValueType())); | |
508 | |
509 Vals.push_back(ScalarLoad.getValue(0)); | |
510 LoadChains.push_back(ScalarLoad.getValue(1)); | |
511 } | |
512 } | |
513 | |
514 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, | |
515 &LoadChains[0], LoadChains.size()); | |
516 SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, | |
517 Op.getNode()->getValueType(0), &Vals[0], Vals.size()); | |
518 | |
519 AddLegalizedOperand(Op.getValue(0), Value); | |
520 AddLegalizedOperand(Op.getValue(1), NewChain); | |
521 | |
522 return (Op.getResNo() ? NewChain : Value); | |
523 } | |
524 | |
525 SDValue VectorLegalizer::ExpandStore(SDValue Op) { | |
526 SDLoc dl(Op); | |
527 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); | |
528 SDValue Chain = ST->getChain(); | |
529 SDValue BasePTR = ST->getBasePtr(); | |
530 SDValue Value = ST->getValue(); | |
531 EVT StVT = ST->getMemoryVT(); | |
532 | |
533 unsigned Alignment = ST->getAlignment(); | |
534 bool isVolatile = ST->isVolatile(); | |
535 bool isNonTemporal = ST->isNonTemporal(); | |
536 const MDNode *TBAAInfo = ST->getTBAAInfo(); | |
537 | |
538 unsigned NumElem = StVT.getVectorNumElements(); | |
539 // The type of the data we want to save | |
540 EVT RegVT = Value.getValueType(); | |
541 EVT RegSclVT = RegVT.getScalarType(); | |
542 // The type of data as saved in memory. | |
543 EVT MemSclVT = StVT.getScalarType(); | |
544 | |
545 // Cast floats into integers | |
546 unsigned ScalarSize = MemSclVT.getSizeInBits(); | |
547 | |
548 // Round odd types to the next pow of two. | |
549 if (!isPowerOf2_32(ScalarSize)) | |
550 ScalarSize = NextPowerOf2(ScalarSize); | |
551 | |
552 // Store Stride in bytes | |
553 unsigned Stride = ScalarSize/8; | |
554 // Extract each of the elements from the original vector | |
555 // and save them into memory individually. | |
556 SmallVector<SDValue, 8> Stores; | |
557 for (unsigned Idx = 0; Idx < NumElem; Idx++) { | |
558 SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, | |
559 RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy())); | |
560 | |
561 // This scalar TruncStore may be illegal, but we legalize it later. | |
562 SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, | |
563 ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, | |
564 isVolatile, isNonTemporal, Alignment, TBAAInfo); | |
565 | |
566 BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, | |
567 DAG.getConstant(Stride, BasePTR.getValueType())); | |
568 | |
569 Stores.push_back(Store); | |
570 } | |
571 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, | |
572 &Stores[0], Stores.size()); | |
573 AddLegalizedOperand(Op, TF); | |
574 return TF; | |
575 } | |
576 | |
577 SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { | |
578 // Lower a select instruction where the condition is a scalar and the | |
579 // operands are vectors. Lower this select to VSELECT and implement it | |
580 // using XOR AND OR. The selector bit is broadcasted. | |
581 EVT VT = Op.getValueType(); | |
582 SDLoc DL(Op); | |
583 | |
584 SDValue Mask = Op.getOperand(0); | |
585 SDValue Op1 = Op.getOperand(1); | |
586 SDValue Op2 = Op.getOperand(2); | |
587 | |
588 assert(VT.isVector() && !Mask.getValueType().isVector() | |
589 && Op1.getValueType() == Op2.getValueType() && "Invalid type"); | |
590 | |
591 unsigned NumElem = VT.getVectorNumElements(); | |
592 | |
593 // If we can't even use the basic vector operations of | |
594 // AND,OR,XOR, we will have to scalarize the op. | |
595 // Notice that the operation may be 'promoted' which means that it is | |
596 // 'bitcasted' to another type which is handled. | |
597 // Also, we need to be able to construct a splat vector using BUILD_VECTOR. | |
598 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || | |
599 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || | |
600 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || | |
601 TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) | |
602 return DAG.UnrollVectorOp(Op.getNode()); | |
603 | |
604 // Generate a mask operand. | |
605 EVT MaskTy = VT.changeVectorElementTypeToInteger(); | |
606 | |
607 // What is the size of each element in the vector mask. | |
608 EVT BitTy = MaskTy.getScalarType(); | |
609 | |
610 Mask = DAG.getSelect(DL, BitTy, Mask, | |
611 DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), | |
612 DAG.getConstant(0, BitTy)); | |
613 | |
614 // Broadcast the mask so that the entire vector is all-one or all zero. | |
615 SmallVector<SDValue, 8> Ops(NumElem, Mask); | |
616 Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size()); | |
617 | |
618 // Bitcast the operands to be the same type as the mask. | |
619 // This is needed when we select between FP types because | |
620 // the mask is a vector of integers. | |
621 Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); | |
622 Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); | |
623 | |
624 SDValue AllOnes = DAG.getConstant( | |
625 APInt::getAllOnesValue(BitTy.getSizeInBits()), MaskTy); | |
626 SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); | |
627 | |
628 Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); | |
629 Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); | |
630 SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); | |
631 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); | |
632 } | |
633 | |
634 SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { | |
635 EVT VT = Op.getValueType(); | |
636 | |
637 // Make sure that the SRA and SHL instructions are available. | |
638 if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || | |
639 TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) | |
640 return DAG.UnrollVectorOp(Op.getNode()); | |
641 | |
642 SDLoc DL(Op); | |
643 EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); | |
644 | |
645 unsigned BW = VT.getScalarType().getSizeInBits(); | |
646 unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); | |
647 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT); | |
648 | |
649 Op = Op.getOperand(0); | |
650 Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); | |
651 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); | |
652 } | |
653 | |
654 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { | |
655 // Implement VSELECT in terms of XOR, AND, OR | |
656 // on platforms which do not support blend natively. | |
657 SDLoc DL(Op); | |
658 | |
659 SDValue Mask = Op.getOperand(0); | |
660 SDValue Op1 = Op.getOperand(1); | |
661 SDValue Op2 = Op.getOperand(2); | |
662 | |
663 EVT VT = Mask.getValueType(); | |
664 | |
665 // If we can't even use the basic vector operations of | |
666 // AND,OR,XOR, we will have to scalarize the op. | |
667 // Notice that the operation may be 'promoted' which means that it is | |
668 // 'bitcasted' to another type which is handled. | |
669 // This operation also isn't safe with AND, OR, XOR when the boolean | |
670 // type is 0/1 as we need an all ones vector constant to mask with. | |
671 // FIXME: Sign extend 1 to all ones if thats legal on the target. | |
672 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || | |
673 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || | |
674 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || | |
675 TLI.getBooleanContents(true) != | |
676 TargetLowering::ZeroOrNegativeOneBooleanContent) | |
677 return DAG.UnrollVectorOp(Op.getNode()); | |
678 | |
679 // If the mask and the type are different sizes, unroll the vector op. This | |
680 // can occur when getSetCCResultType returns something that is different in | |
681 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. | |
682 if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits()) | |
683 return DAG.UnrollVectorOp(Op.getNode()); | |
684 | |
685 // Bitcast the operands to be the same type as the mask. | |
686 // This is needed when we select between FP types because | |
687 // the mask is a vector of integers. | |
688 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); | |
689 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); | |
690 | |
691 SDValue AllOnes = DAG.getConstant( | |
692 APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT); | |
693 SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); | |
694 | |
695 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); | |
696 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); | |
697 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); | |
698 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); | |
699 } | |
700 | |
701 SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { | |
702 EVT VT = Op.getOperand(0).getValueType(); | |
703 SDLoc DL(Op); | |
704 | |
705 // Make sure that the SINT_TO_FP and SRL instructions are available. | |
706 if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || | |
707 TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) | |
708 return DAG.UnrollVectorOp(Op.getNode()); | |
709 | |
710 EVT SVT = VT.getScalarType(); | |
711 assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && | |
712 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); | |
713 | |
714 unsigned BW = SVT.getSizeInBits(); | |
715 SDValue HalfWord = DAG.getConstant(BW/2, VT); | |
716 | |
717 // Constants to clear the upper part of the word. | |
718 // Notice that we can also use SHL+SHR, but using a constant is slightly | |
719 // faster on x86. | |
720 uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; | |
721 SDValue HalfWordMask = DAG.getConstant(HWMask, VT); | |
722 | |
723 // Two to the power of half-word-size. | |
724 SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType()); | |
725 | |
726 // Clear upper part of LO, lower HI | |
727 SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); | |
728 SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); | |
729 | |
730 // Convert hi and lo to floats | |
731 // Convert the hi part back to the upper values | |
732 SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); | |
733 fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); | |
734 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); | |
735 | |
736 // Add the two halves | |
737 return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); | |
738 } | |
739 | |
740 | |
741 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { | |
742 if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { | |
743 SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); | |
744 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), | |
745 Zero, Op.getOperand(0)); | |
746 } | |
747 return DAG.UnrollVectorOp(Op.getNode()); | |
748 } | |
749 | |
750 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { | |
751 EVT VT = Op.getValueType(); | |
752 unsigned NumElems = VT.getVectorNumElements(); | |
753 EVT EltVT = VT.getVectorElementType(); | |
754 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); | |
755 EVT TmpEltVT = LHS.getValueType().getVectorElementType(); | |
756 SDLoc dl(Op); | |
757 SmallVector<SDValue, 8> Ops(NumElems); | |
758 for (unsigned i = 0; i < NumElems; ++i) { | |
759 SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, | |
760 DAG.getConstant(i, TLI.getVectorIdxTy())); | |
761 SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, | |
762 DAG.getConstant(i, TLI.getVectorIdxTy())); | |
763 Ops[i] = DAG.getNode(ISD::SETCC, dl, | |
764 TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT), | |
765 LHSElem, RHSElem, CC); | |
766 Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], | |
767 DAG.getConstant(APInt::getAllOnesValue | |
768 (EltVT.getSizeInBits()), EltVT), | |
769 DAG.getConstant(0, EltVT)); | |
770 } | |
771 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); | |
772 } | |
773 | |
774 } | |
775 | |
776 bool SelectionDAG::LegalizeVectors() { | |
777 return VectorLegalizer(*this).Run(); | |
778 } |