Mercurial > hg > CbC > CbC_llvm
comparison clang/lib/CodeGen/CGStmtOpenMP.cpp @ 223:5f17cb93ff66 llvm-original
LLVM13 (2021/7/18)
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 18 Jul 2021 22:43:00 +0900 |
parents | 79ff65ed7e25 |
children | 5f20bc1ed4ff |
comparison
equal
deleted
inserted
replaced
222:81f6424ef0e3 | 223:5f17cb93ff66 |
---|---|
174 return false; | 174 return false; |
175 }); | 175 }); |
176 PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); | 176 PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); |
177 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { | 177 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { |
178 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); | 178 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); |
179 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) { | |
180 PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits()); | |
179 } else { | 181 } else { |
180 llvm_unreachable("Unknown loop-based directive kind."); | 182 llvm_unreachable("Unknown loop-based directive kind."); |
181 } | 183 } |
182 if (PreInits) { | 184 if (PreInits) { |
183 for (const auto *I : PreInits->decls()) | 185 for (const auto *I : PreInits->decls()) |
316 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); | 318 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); |
317 } | 319 } |
318 return CGM.getSize(SizeInChars); | 320 return CGM.getSize(SizeInChars); |
319 } | 321 } |
320 | 322 |
323 void CodeGenFunction::GenerateOpenMPCapturedVarsAggregate( | |
324 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { | |
325 const RecordDecl *RD = S.getCapturedRecordDecl(); | |
326 QualType RecordTy = getContext().getRecordType(RD); | |
327 // Create the aggregate argument struct for the outlined function. | |
328 LValue AggLV = MakeAddrLValue( | |
329 CreateMemTemp(RecordTy, "omp.outlined.arg.agg."), RecordTy); | |
330 | |
331 // Initialize the aggregate with captured values. | |
332 auto CurField = RD->field_begin(); | |
333 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), | |
334 E = S.capture_init_end(); | |
335 I != E; ++I, ++CurField) { | |
336 LValue LV = EmitLValueForFieldInitialization(AggLV, *CurField); | |
337 // Initialize for VLA. | |
338 if (CurField->hasCapturedVLAType()) { | |
339 EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV); | |
340 } else | |
341 // Initialize for capturesThis, capturesVariableByCopy, | |
342 // capturesVariable | |
343 EmitInitializerForField(*CurField, LV, *I); | |
344 } | |
345 | |
346 CapturedVars.push_back(AggLV.getPointer(*this)); | |
347 } | |
348 | |
321 void CodeGenFunction::GenerateOpenMPCapturedVars( | 349 void CodeGenFunction::GenerateOpenMPCapturedVars( |
322 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { | 350 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { |
323 const RecordDecl *RD = S.getCapturedRecordDecl(); | 351 const RecordDecl *RD = S.getCapturedRecordDecl(); |
324 auto CurField = RD->field_begin(); | 352 auto CurField = RD->field_begin(); |
325 auto CurCap = S.captures().begin(); | 353 auto CurCap = S.captures().begin(); |
415 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), | 443 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), |
416 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), | 444 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), |
417 FunctionName(FunctionName), Loc(Loc) {} | 445 FunctionName(FunctionName), Loc(Loc) {} |
418 }; | 446 }; |
419 } // namespace | 447 } // namespace |
448 | |
449 static llvm::Function *emitOutlinedFunctionPrologueAggregate( | |
450 CodeGenFunction &CGF, FunctionArgList &Args, | |
451 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> | |
452 &LocalAddrs, | |
453 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> | |
454 &VLASizes, | |
455 llvm::Value *&CXXThisValue, const CapturedStmt &CS, SourceLocation Loc, | |
456 StringRef FunctionName) { | |
457 const CapturedDecl *CD = CS.getCapturedDecl(); | |
458 const RecordDecl *RD = CS.getCapturedRecordDecl(); | |
459 assert(CD->hasBody() && "missing CapturedDecl body"); | |
460 | |
461 CXXThisValue = nullptr; | |
462 // Build the argument list. | |
463 CodeGenModule &CGM = CGF.CGM; | |
464 ASTContext &Ctx = CGM.getContext(); | |
465 Args.append(CD->param_begin(), CD->param_end()); | |
466 | |
467 // Create the function declaration. | |
468 const CGFunctionInfo &FuncInfo = | |
469 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); | |
470 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); | |
471 | |
472 auto *F = | |
473 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage, | |
474 FunctionName, &CGM.getModule()); | |
475 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); | |
476 if (CD->isNothrow()) | |
477 F->setDoesNotThrow(); | |
478 F->setDoesNotRecurse(); | |
479 | |
480 // Generate the function. | |
481 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc); | |
482 Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam()); | |
483 llvm::Value *ContextV = CGF.Builder.CreateLoad(ContextAddr); | |
484 LValue ContextLV = CGF.MakeNaturalAlignAddrLValue( | |
485 ContextV, CGM.getContext().getTagDeclType(RD)); | |
486 auto I = CS.captures().begin(); | |
487 for (const FieldDecl *FD : RD->fields()) { | |
488 LValue FieldLV = CGF.EmitLValueForFieldInitialization(ContextLV, FD); | |
489 // Do not map arguments if we emit function with non-original types. | |
490 Address LocalAddr = FieldLV.getAddress(CGF); | |
491 // If we are capturing a pointer by copy we don't need to do anything, just | |
492 // use the value that we get from the arguments. | |
493 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { | |
494 const VarDecl *CurVD = I->getCapturedVar(); | |
495 LocalAddrs.insert({FD, {CurVD, LocalAddr}}); | |
496 ++I; | |
497 continue; | |
498 } | |
499 | |
500 LValue ArgLVal = | |
501 CGF.MakeAddrLValue(LocalAddr, FD->getType(), AlignmentSource::Decl); | |
502 if (FD->hasCapturedVLAType()) { | |
503 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); | |
504 const VariableArrayType *VAT = FD->getCapturedVLAType(); | |
505 VLASizes.try_emplace(FD, VAT->getSizeExpr(), ExprArg); | |
506 } else if (I->capturesVariable()) { | |
507 const VarDecl *Var = I->getCapturedVar(); | |
508 QualType VarTy = Var->getType(); | |
509 Address ArgAddr = ArgLVal.getAddress(CGF); | |
510 if (ArgLVal.getType()->isLValueReferenceType()) { | |
511 ArgAddr = CGF.EmitLoadOfReference(ArgLVal); | |
512 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { | |
513 assert(ArgLVal.getType()->isPointerType()); | |
514 ArgAddr = CGF.EmitLoadOfPointer( | |
515 ArgAddr, ArgLVal.getType()->castAs<PointerType>()); | |
516 } | |
517 LocalAddrs.insert( | |
518 {FD, {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}}); | |
519 } else if (I->capturesVariableByCopy()) { | |
520 assert(!FD->getType()->isAnyPointerType() && | |
521 "Not expecting a captured pointer."); | |
522 const VarDecl *Var = I->getCapturedVar(); | |
523 Address CopyAddr = CGF.CreateMemTemp(FD->getType(), Ctx.getDeclAlign(FD), | |
524 Var->getName()); | |
525 LValue CopyLVal = | |
526 CGF.MakeAddrLValue(CopyAddr, FD->getType(), AlignmentSource::Decl); | |
527 | |
528 RValue ArgRVal = CGF.EmitLoadOfLValue(ArgLVal, I->getLocation()); | |
529 CGF.EmitStoreThroughLValue(ArgRVal, CopyLVal); | |
530 | |
531 LocalAddrs.insert({FD, {Var, CopyAddr}}); | |
532 } else { | |
533 // If 'this' is captured, load it into CXXThisValue. | |
534 assert(I->capturesThis()); | |
535 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation()); | |
536 LocalAddrs.insert({FD, {nullptr, ArgLVal.getAddress(CGF)}}); | |
537 } | |
538 ++I; | |
539 } | |
540 | |
541 return F; | |
542 } | |
420 | 543 |
421 static llvm::Function *emitOutlinedFunctionPrologue( | 544 static llvm::Function *emitOutlinedFunctionPrologue( |
422 CodeGenFunction &CGF, FunctionArgList &Args, | 545 CodeGenFunction &CGF, FunctionArgList &Args, |
423 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> | 546 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> |
424 &LocalAddrs, | 547 &LocalAddrs, |
591 } | 714 } |
592 | 715 |
593 return F; | 716 return F; |
594 } | 717 } |
595 | 718 |
719 llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate( | |
720 const CapturedStmt &S, SourceLocation Loc) { | |
721 assert( | |
722 CapturedStmtInfo && | |
723 "CapturedStmtInfo should be set when generating the captured function"); | |
724 const CapturedDecl *CD = S.getCapturedDecl(); | |
725 // Build the argument list. | |
726 FunctionArgList Args; | |
727 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; | |
728 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; | |
729 StringRef FunctionName = CapturedStmtInfo->getHelperName(); | |
730 llvm::Function *F = emitOutlinedFunctionPrologueAggregate( | |
731 *this, Args, LocalAddrs, VLASizes, CXXThisValue, S, Loc, FunctionName); | |
732 CodeGenFunction::OMPPrivateScope LocalScope(*this); | |
733 for (const auto &LocalAddrPair : LocalAddrs) { | |
734 if (LocalAddrPair.second.first) { | |
735 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() { | |
736 return LocalAddrPair.second.second; | |
737 }); | |
738 } | |
739 } | |
740 (void)LocalScope.Privatize(); | |
741 for (const auto &VLASizePair : VLASizes) | |
742 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; | |
743 PGO.assignRegionCounters(GlobalDecl(CD), F); | |
744 CapturedStmtInfo->EmitBody(*this, CD->getBody()); | |
745 (void)LocalScope.ForceCleanup(); | |
746 FinishFunction(CD->getBodyRBrace()); | |
747 return F; | |
748 } | |
749 | |
596 llvm::Function * | 750 llvm::Function * |
597 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, | 751 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, |
598 SourceLocation Loc) { | 752 SourceLocation Loc) { |
599 assert( | 753 assert( |
600 CapturedStmtInfo && | 754 CapturedStmtInfo && |
694 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); | 848 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); |
695 | 849 |
696 llvm::Value *SrcBegin = SrcAddr.getPointer(); | 850 llvm::Value *SrcBegin = SrcAddr.getPointer(); |
697 llvm::Value *DestBegin = DestAddr.getPointer(); | 851 llvm::Value *DestBegin = DestAddr.getPointer(); |
698 // Cast from pointer to array type to pointer to single element. | 852 // Cast from pointer to array type to pointer to single element. |
699 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); | 853 llvm::Value *DestEnd = |
854 Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); | |
700 // The basic structure here is a while-do loop. | 855 // The basic structure here is a while-do loop. |
701 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); | 856 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); |
702 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); | 857 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); |
703 llvm::Value *IsEmpty = | 858 llvm::Value *IsEmpty = |
704 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); | 859 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); |
727 // Emit copy. | 882 // Emit copy. |
728 CopyGen(DestElementCurrent, SrcElementCurrent); | 883 CopyGen(DestElementCurrent, SrcElementCurrent); |
729 | 884 |
730 // Shift the address forward by one element. | 885 // Shift the address forward by one element. |
731 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( | 886 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( |
732 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); | 887 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, |
888 "omp.arraycpy.dest.element"); | |
733 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( | 889 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( |
734 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); | 890 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, |
891 "omp.arraycpy.src.element"); | |
735 // Check whether we've reached the end. | 892 // Check whether we've reached the end. |
736 llvm::Value *Done = | 893 llvm::Value *Done = |
737 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); | 894 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); |
738 Builder.CreateCondBr(Done, DoneBB, BodyBB); | 895 Builder.CreateCondBr(Done, DoneBB, BodyBB); |
739 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); | 896 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); |
822 // Do not emit copy for firstprivate constant variables in target regions, | 979 // Do not emit copy for firstprivate constant variables in target regions, |
823 // captured by reference. | 980 // captured by reference. |
824 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && | 981 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && |
825 FD && FD->getType()->isReferenceType() && | 982 FD && FD->getType()->isReferenceType() && |
826 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { | 983 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
827 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, | 984 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); |
828 OrigVD); | |
829 ++IRef; | 985 ++IRef; |
830 ++InitsRef; | 986 ++InitsRef; |
831 continue; | 987 continue; |
832 } | 988 } |
833 FirstprivateIsLastprivate = | 989 FirstprivateIsLastprivate = |
1579 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk | 1735 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk |
1580 // lower and upper bounds with the pragma 'for' chunking mechanism. | 1736 // lower and upper bounds with the pragma 'for' chunking mechanism. |
1581 // The following lambda takes care of appending the lower and upper bound | 1737 // The following lambda takes care of appending the lower and upper bound |
1582 // parameters when necessary | 1738 // parameters when necessary |
1583 CodeGenBoundParameters(CGF, S, CapturedVars); | 1739 CodeGenBoundParameters(CGF, S, CapturedVars); |
1584 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); | 1740 CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars); |
1585 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, | 1741 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, |
1586 CapturedVars, IfCond); | 1742 CapturedVars, IfCond); |
1587 } | 1743 } |
1588 | 1744 |
1589 static bool isAllocatableDecl(const VarDecl *VD) { | 1745 static bool isAllocatableDecl(const VarDecl *VD) { |
1818 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); | 1974 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); |
1819 return; | 1975 return; |
1820 } | 1976 } |
1821 if (SimplifiedS == NextLoop) { | 1977 if (SimplifiedS == NextLoop) { |
1822 if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS)) | 1978 if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS)) |
1979 SimplifiedS = Dir->getTransformedStmt(); | |
1980 if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS)) | |
1823 SimplifiedS = Dir->getTransformedStmt(); | 1981 SimplifiedS = Dir->getTransformedStmt(); |
1824 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) | 1982 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) |
1825 SimplifiedS = CanonLoop->getLoopStmt(); | 1983 SimplifiedS = CanonLoop->getLoopStmt(); |
1826 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { | 1984 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { |
1827 S = For->getBody(); | 1985 S = For->getBody(); |
2317 } | 2475 } |
2318 } | 2476 } |
2319 } | 2477 } |
2320 | 2478 |
2321 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, | 2479 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, |
2322 const OMPExecutableDirective &D, | 2480 const OMPExecutableDirective &D) { |
2323 bool IsMonotonic) { | |
2324 if (!CGF.HaveInsertPoint()) | 2481 if (!CGF.HaveInsertPoint()) |
2325 return; | 2482 return; |
2326 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { | 2483 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { |
2327 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), | 2484 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), |
2328 /*ignoreResult=*/true); | 2485 /*ignoreResult=*/true); |
2329 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); | 2486 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2330 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); | 2487 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2331 // In presence of finite 'safelen', it may be unsafe to mark all | 2488 // In presence of finite 'safelen', it may be unsafe to mark all |
2332 // the memory instructions parallel, because loop-carried | 2489 // the memory instructions parallel, because loop-carried |
2333 // dependences of 'safelen' iterations are possible. | 2490 // dependences of 'safelen' iterations are possible. |
2334 if (!IsMonotonic) | 2491 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); |
2335 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); | |
2336 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { | 2492 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { |
2337 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), | 2493 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), |
2338 /*ignoreResult=*/true); | 2494 /*ignoreResult=*/true); |
2339 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); | 2495 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); |
2340 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); | 2496 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2343 // dependences of 'safelen' iterations are possible. | 2499 // dependences of 'safelen' iterations are possible. |
2344 CGF.LoopStack.setParallel(/*Enable=*/false); | 2500 CGF.LoopStack.setParallel(/*Enable=*/false); |
2345 } | 2501 } |
2346 } | 2502 } |
2347 | 2503 |
2348 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, | 2504 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { |
2349 bool IsMonotonic) { | |
2350 // Walk clauses and process safelen/lastprivate. | 2505 // Walk clauses and process safelen/lastprivate. |
2351 LoopStack.setParallel(!IsMonotonic); | 2506 LoopStack.setParallel(/*Enable=*/true); |
2352 LoopStack.setVectorizeEnable(); | 2507 LoopStack.setVectorizeEnable(); |
2353 emitSimdlenSafelenClause(*this, D, IsMonotonic); | 2508 emitSimdlenSafelenClause(*this, D); |
2354 if (const auto *C = D.getSingleClause<OMPOrderClause>()) | 2509 if (const auto *C = D.getSingleClause<OMPOrderClause>()) |
2355 if (C->getKind() == OMPC_ORDER_concurrent) | 2510 if (C->getKind() == OMPC_ORDER_concurrent) |
2356 LoopStack.setParallel(/*Enable=*/true); | 2511 LoopStack.setParallel(/*Enable=*/true); |
2357 if ((D.getDirectiveKind() == OMPD_simd || | 2512 if ((D.getDirectiveKind() == OMPD_simd || |
2358 (getLangOpts().OpenMPSimd && | 2513 (getLangOpts().OpenMPSimd && |
2577 // Emit the de-sugared statement. | 2732 // Emit the de-sugared statement. |
2578 OMPTransformDirectiveScopeRAII TileScope(*this, &S); | 2733 OMPTransformDirectiveScopeRAII TileScope(*this, &S); |
2579 EmitStmt(S.getTransformedStmt()); | 2734 EmitStmt(S.getTransformedStmt()); |
2580 } | 2735 } |
2581 | 2736 |
2737 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { | |
2738 // This function is only called if the unrolled loop is not consumed by any | |
2739 // other loop-associated construct. Such a loop-associated construct will have | |
2740 // used the transformed AST. | |
2741 | |
2742 // Set the unroll metadata for the next emitted loop. | |
2743 LoopStack.setUnrollState(LoopAttributes::Enable); | |
2744 | |
2745 if (S.hasClausesOfKind<OMPFullClause>()) { | |
2746 LoopStack.setUnrollState(LoopAttributes::Full); | |
2747 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { | |
2748 if (Expr *FactorExpr = PartialClause->getFactor()) { | |
2749 uint64_t Factor = | |
2750 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); | |
2751 assert(Factor >= 1 && "Only positive factors are valid"); | |
2752 LoopStack.setUnrollCount(Factor); | |
2753 } | |
2754 } | |
2755 | |
2756 EmitStmt(S.getAssociatedStmt()); | |
2757 } | |
2758 | |
2582 void CodeGenFunction::EmitOMPOuterLoop( | 2759 void CodeGenFunction::EmitOMPOuterLoop( |
2583 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, | 2760 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, |
2584 CodeGenFunction::OMPPrivateScope &LoopScope, | 2761 CodeGenFunction::OMPPrivateScope &LoopScope, |
2585 const CodeGenFunction::OMPLoopArguments &LoopArgs, | 2762 const CodeGenFunction::OMPLoopArguments &LoopArgs, |
2586 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, | 2763 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, |
2649 CGF.LoopStack.setParallel(!IsMonotonic); | 2826 CGF.LoopStack.setParallel(!IsMonotonic); |
2650 if (const auto *C = S.getSingleClause<OMPOrderClause>()) | 2827 if (const auto *C = S.getSingleClause<OMPOrderClause>()) |
2651 if (C->getKind() == OMPC_ORDER_concurrent) | 2828 if (C->getKind() == OMPC_ORDER_concurrent) |
2652 CGF.LoopStack.setParallel(/*Enable=*/true); | 2829 CGF.LoopStack.setParallel(/*Enable=*/true); |
2653 } else { | 2830 } else { |
2654 CGF.EmitOMPSimdInit(S, IsMonotonic); | 2831 CGF.EmitOMPSimdInit(S); |
2655 } | 2832 } |
2656 }, | 2833 }, |
2657 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, | 2834 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, |
2658 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { | 2835 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2659 SourceLocation Loc = S.getBeginLoc(); | 2836 SourceLocation Loc = S.getBeginLoc(); |
3159 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, | 3336 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, |
3160 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && | 3337 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && |
3161 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); | 3338 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); |
3162 bool IsMonotonic = | 3339 bool IsMonotonic = |
3163 Ordered || | 3340 Ordered || |
3164 ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || | 3341 (ScheduleKind.Schedule == OMPC_SCHEDULE_static && |
3165 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) && | |
3166 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || | 3342 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || |
3167 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || | 3343 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || |
3168 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || | 3344 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || |
3169 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; | 3345 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; |
3170 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, | 3346 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, |
3173 !Ordered) { | 3349 !Ordered) { |
3174 JumpDest LoopExit = | 3350 JumpDest LoopExit = |
3175 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); | 3351 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); |
3176 emitCommonSimdLoop( | 3352 emitCommonSimdLoop( |
3177 *this, S, | 3353 *this, S, |
3178 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { | 3354 [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3179 if (isOpenMPSimdDirective(S.getDirectiveKind())) { | 3355 if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
3180 CGF.EmitOMPSimdInit(S, IsMonotonic); | 3356 CGF.EmitOMPSimdInit(S); |
3181 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { | 3357 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
3182 if (C->getKind() == OMPC_ORDER_concurrent) | 3358 if (C->getKind() == OMPC_ORDER_concurrent) |
3183 CGF.LoopStack.setParallel(/*Enable=*/true); | 3359 CGF.LoopStack.setParallel(/*Enable=*/true); |
3184 } | 3360 } |
3185 }, | 3361 }, |
3671 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); | 3847 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); |
3672 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); | 3848 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
3673 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); | 3849 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); |
3674 // Generate condition for loop. | 3850 // Generate condition for loop. |
3675 BinaryOperator *Cond = BinaryOperator::Create( | 3851 BinaryOperator *Cond = BinaryOperator::Create( |
3676 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, | 3852 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary, |
3677 S.getBeginLoc(), FPOptionsOverride()); | 3853 S.getBeginLoc(), FPOptionsOverride()); |
3678 // Increment for loop counter. | 3854 // Increment for loop counter. |
3679 UnaryOperator *Inc = UnaryOperator::Create( | 3855 UnaryOperator *Inc = UnaryOperator::Create( |
3680 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, | 3856 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, |
3681 S.getBeginLoc(), true, FPOptionsOverride()); | 3857 S.getBeginLoc(), true, FPOptionsOverride()); |
3682 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { | 3858 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { |
3683 // Iterate through all sections and emit a switch construct: | 3859 // Iterate through all sections and emit a switch construct: |
3684 // switch (IV) { | 3860 // switch (IV) { |
3685 // case 0: | 3861 // case 0: |
4569 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, | 4745 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, |
4570 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); | 4746 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); |
4571 PrivateVD->setInitStyle(VarDecl::CInit); | 4747 PrivateVD->setInitStyle(VarDecl::CInit); |
4572 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, | 4748 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, |
4573 InitRef, /*BasePath=*/nullptr, | 4749 InitRef, /*BasePath=*/nullptr, |
4574 VK_RValue, FPOptionsOverride())); | 4750 VK_PRValue, FPOptionsOverride())); |
4575 Data.FirstprivateVars.emplace_back(OrigRef); | 4751 Data.FirstprivateVars.emplace_back(OrigRef); |
4576 Data.FirstprivateCopies.emplace_back(PrivateRef); | 4752 Data.FirstprivateCopies.emplace_back(PrivateRef); |
4577 Data.FirstprivateInits.emplace_back(InitRef); | 4753 Data.FirstprivateInits.emplace_back(InitRef); |
4578 return OrigVD; | 4754 return OrigVD; |
4579 } | 4755 } |
5197 // | 5373 // |
5198 emitCommonSimdLoop( | 5374 emitCommonSimdLoop( |
5199 *this, S, | 5375 *this, S, |
5200 [&S](CodeGenFunction &CGF, PrePostActionTy &) { | 5376 [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
5201 if (isOpenMPSimdDirective(S.getDirectiveKind())) | 5377 if (isOpenMPSimdDirective(S.getDirectiveKind())) |
5202 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); | 5378 CGF.EmitOMPSimdInit(S); |
5203 }, | 5379 }, |
5204 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, | 5380 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, |
5205 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { | 5381 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { |
5206 CGF.EmitOMPInnerLoop( | 5382 CGF.EmitOMPInnerLoop( |
5207 S, LoopScope.requiresCleanups(), Cond, IncExpr, | 5383 S, LoopScope.requiresCleanups(), Cond, IncExpr, |
5760 case OMPC_task_reduction: | 5936 case OMPC_task_reduction: |
5761 case OMPC_in_reduction: | 5937 case OMPC_in_reduction: |
5762 case OMPC_safelen: | 5938 case OMPC_safelen: |
5763 case OMPC_simdlen: | 5939 case OMPC_simdlen: |
5764 case OMPC_sizes: | 5940 case OMPC_sizes: |
5941 case OMPC_full: | |
5942 case OMPC_partial: | |
5765 case OMPC_allocator: | 5943 case OMPC_allocator: |
5766 case OMPC_allocate: | 5944 case OMPC_allocate: |
5767 case OMPC_collapse: | 5945 case OMPC_collapse: |
5768 case OMPC_default: | 5946 case OMPC_default: |
5769 case OMPC_seq_cst: | 5947 case OMPC_seq_cst: |
6025 S.getBeginLoc()); | 6203 S.getBeginLoc()); |
6026 } | 6204 } |
6027 | 6205 |
6028 OMPTeamsScope Scope(CGF, S); | 6206 OMPTeamsScope Scope(CGF, S); |
6029 llvm::SmallVector<llvm::Value *, 16> CapturedVars; | 6207 llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
6030 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); | 6208 CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars); |
6031 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, | 6209 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, |
6032 CapturedVars); | 6210 CapturedVars); |
6033 } | 6211 } |
6034 | 6212 |
6035 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { | 6213 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { |