comparison clang/lib/CodeGen/CGStmtOpenMP.cpp @ 223:5f17cb93ff66 llvm-original

LLVM13 (2021/7/18)
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Sun, 18 Jul 2021 22:43:00 +0900
parents 79ff65ed7e25
children 5f20bc1ed4ff
comparison
equal deleted inserted replaced
222:81f6424ef0e3 223:5f17cb93ff66
174 return false; 174 return false;
175 }); 175 });
176 PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); 176 PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
177 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { 177 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
178 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); 178 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
179 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
180 PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
179 } else { 181 } else {
180 llvm_unreachable("Unknown loop-based directive kind."); 182 llvm_unreachable("Unknown loop-based directive kind.");
181 } 183 }
182 if (PreInits) { 184 if (PreInits) {
183 for (const auto *I : PreInits->decls()) 185 for (const auto *I : PreInits->decls())
316 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars)); 318 return Builder.CreateNUWMul(Size, CGM.getSize(SizeInChars));
317 } 319 }
318 return CGM.getSize(SizeInChars); 320 return CGM.getSize(SizeInChars);
319 } 321 }
320 322
323 void CodeGenFunction::GenerateOpenMPCapturedVarsAggregate(
324 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
325 const RecordDecl *RD = S.getCapturedRecordDecl();
326 QualType RecordTy = getContext().getRecordType(RD);
327 // Create the aggregate argument struct for the outlined function.
328 LValue AggLV = MakeAddrLValue(
329 CreateMemTemp(RecordTy, "omp.outlined.arg.agg."), RecordTy);
330
331 // Initialize the aggregate with captured values.
332 auto CurField = RD->field_begin();
333 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
334 E = S.capture_init_end();
335 I != E; ++I, ++CurField) {
336 LValue LV = EmitLValueForFieldInitialization(AggLV, *CurField);
337 // Initialize for VLA.
338 if (CurField->hasCapturedVLAType()) {
339 EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV);
340 } else
341 // Initialize for capturesThis, capturesVariableByCopy,
342 // capturesVariable
343 EmitInitializerForField(*CurField, LV, *I);
344 }
345
346 CapturedVars.push_back(AggLV.getPointer(*this));
347 }
348
321 void CodeGenFunction::GenerateOpenMPCapturedVars( 349 void CodeGenFunction::GenerateOpenMPCapturedVars(
322 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { 350 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
323 const RecordDecl *RD = S.getCapturedRecordDecl(); 351 const RecordDecl *RD = S.getCapturedRecordDecl();
324 auto CurField = RD->field_begin(); 352 auto CurField = RD->field_begin();
325 auto CurCap = S.captures().begin(); 353 auto CurCap = S.captures().begin();
415 : S(S), UIntPtrCastRequired(UIntPtrCastRequired), 443 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
416 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), 444 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
417 FunctionName(FunctionName), Loc(Loc) {} 445 FunctionName(FunctionName), Loc(Loc) {}
418 }; 446 };
419 } // namespace 447 } // namespace
448
449 static llvm::Function *emitOutlinedFunctionPrologueAggregate(
450 CodeGenFunction &CGF, FunctionArgList &Args,
451 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
452 &LocalAddrs,
453 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
454 &VLASizes,
455 llvm::Value *&CXXThisValue, const CapturedStmt &CS, SourceLocation Loc,
456 StringRef FunctionName) {
457 const CapturedDecl *CD = CS.getCapturedDecl();
458 const RecordDecl *RD = CS.getCapturedRecordDecl();
459 assert(CD->hasBody() && "missing CapturedDecl body");
460
461 CXXThisValue = nullptr;
462 // Build the argument list.
463 CodeGenModule &CGM = CGF.CGM;
464 ASTContext &Ctx = CGM.getContext();
465 Args.append(CD->param_begin(), CD->param_end());
466
467 // Create the function declaration.
468 const CGFunctionInfo &FuncInfo =
469 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args);
470 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
471
472 auto *F =
473 llvm::Function::Create(FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
474 FunctionName, &CGM.getModule());
475 CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
476 if (CD->isNothrow())
477 F->setDoesNotThrow();
478 F->setDoesNotRecurse();
479
480 // Generate the function.
481 CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, Loc, Loc);
482 Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam());
483 llvm::Value *ContextV = CGF.Builder.CreateLoad(ContextAddr);
484 LValue ContextLV = CGF.MakeNaturalAlignAddrLValue(
485 ContextV, CGM.getContext().getTagDeclType(RD));
486 auto I = CS.captures().begin();
487 for (const FieldDecl *FD : RD->fields()) {
488 LValue FieldLV = CGF.EmitLValueForFieldInitialization(ContextLV, FD);
489 // Do not map arguments if we emit function with non-original types.
490 Address LocalAddr = FieldLV.getAddress(CGF);
491 // If we are capturing a pointer by copy we don't need to do anything, just
492 // use the value that we get from the arguments.
493 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
494 const VarDecl *CurVD = I->getCapturedVar();
495 LocalAddrs.insert({FD, {CurVD, LocalAddr}});
496 ++I;
497 continue;
498 }
499
500 LValue ArgLVal =
501 CGF.MakeAddrLValue(LocalAddr, FD->getType(), AlignmentSource::Decl);
502 if (FD->hasCapturedVLAType()) {
503 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
504 const VariableArrayType *VAT = FD->getCapturedVLAType();
505 VLASizes.try_emplace(FD, VAT->getSizeExpr(), ExprArg);
506 } else if (I->capturesVariable()) {
507 const VarDecl *Var = I->getCapturedVar();
508 QualType VarTy = Var->getType();
509 Address ArgAddr = ArgLVal.getAddress(CGF);
510 if (ArgLVal.getType()->isLValueReferenceType()) {
511 ArgAddr = CGF.EmitLoadOfReference(ArgLVal);
512 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
513 assert(ArgLVal.getType()->isPointerType());
514 ArgAddr = CGF.EmitLoadOfPointer(
515 ArgAddr, ArgLVal.getType()->castAs<PointerType>());
516 }
517 LocalAddrs.insert(
518 {FD, {Var, Address(ArgAddr.getPointer(), Ctx.getDeclAlign(Var))}});
519 } else if (I->capturesVariableByCopy()) {
520 assert(!FD->getType()->isAnyPointerType() &&
521 "Not expecting a captured pointer.");
522 const VarDecl *Var = I->getCapturedVar();
523 Address CopyAddr = CGF.CreateMemTemp(FD->getType(), Ctx.getDeclAlign(FD),
524 Var->getName());
525 LValue CopyLVal =
526 CGF.MakeAddrLValue(CopyAddr, FD->getType(), AlignmentSource::Decl);
527
528 RValue ArgRVal = CGF.EmitLoadOfLValue(ArgLVal, I->getLocation());
529 CGF.EmitStoreThroughLValue(ArgRVal, CopyLVal);
530
531 LocalAddrs.insert({FD, {Var, CopyAddr}});
532 } else {
533 // If 'this' is captured, load it into CXXThisValue.
534 assert(I->capturesThis());
535 CXXThisValue = CGF.EmitLoadOfScalar(ArgLVal, I->getLocation());
536 LocalAddrs.insert({FD, {nullptr, ArgLVal.getAddress(CGF)}});
537 }
538 ++I;
539 }
540
541 return F;
542 }
420 543
421 static llvm::Function *emitOutlinedFunctionPrologue( 544 static llvm::Function *emitOutlinedFunctionPrologue(
422 CodeGenFunction &CGF, FunctionArgList &Args, 545 CodeGenFunction &CGF, FunctionArgList &Args,
423 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> 546 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
424 &LocalAddrs, 547 &LocalAddrs,
591 } 714 }
592 715
593 return F; 716 return F;
594 } 717 }
595 718
719 llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate(
720 const CapturedStmt &S, SourceLocation Loc) {
721 assert(
722 CapturedStmtInfo &&
723 "CapturedStmtInfo should be set when generating the captured function");
724 const CapturedDecl *CD = S.getCapturedDecl();
725 // Build the argument list.
726 FunctionArgList Args;
727 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
728 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
729 StringRef FunctionName = CapturedStmtInfo->getHelperName();
730 llvm::Function *F = emitOutlinedFunctionPrologueAggregate(
731 *this, Args, LocalAddrs, VLASizes, CXXThisValue, S, Loc, FunctionName);
732 CodeGenFunction::OMPPrivateScope LocalScope(*this);
733 for (const auto &LocalAddrPair : LocalAddrs) {
734 if (LocalAddrPair.second.first) {
735 LocalScope.addPrivate(LocalAddrPair.second.first, [&LocalAddrPair]() {
736 return LocalAddrPair.second.second;
737 });
738 }
739 }
740 (void)LocalScope.Privatize();
741 for (const auto &VLASizePair : VLASizes)
742 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
743 PGO.assignRegionCounters(GlobalDecl(CD), F);
744 CapturedStmtInfo->EmitBody(*this, CD->getBody());
745 (void)LocalScope.ForceCleanup();
746 FinishFunction(CD->getBodyRBrace());
747 return F;
748 }
749
596 llvm::Function * 750 llvm::Function *
597 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, 751 CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
598 SourceLocation Loc) { 752 SourceLocation Loc) {
599 assert( 753 assert(
600 CapturedStmtInfo && 754 CapturedStmtInfo &&
694 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); 848 SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
695 849
696 llvm::Value *SrcBegin = SrcAddr.getPointer(); 850 llvm::Value *SrcBegin = SrcAddr.getPointer();
697 llvm::Value *DestBegin = DestAddr.getPointer(); 851 llvm::Value *DestBegin = DestAddr.getPointer();
698 // Cast from pointer to array type to pointer to single element. 852 // Cast from pointer to array type to pointer to single element.
699 llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); 853 llvm::Value *DestEnd =
854 Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
700 // The basic structure here is a while-do loop. 855 // The basic structure here is a while-do loop.
701 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); 856 llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
702 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); 857 llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
703 llvm::Value *IsEmpty = 858 llvm::Value *IsEmpty =
704 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty"); 859 Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arraycpy.isempty");
727 // Emit copy. 882 // Emit copy.
728 CopyGen(DestElementCurrent, SrcElementCurrent); 883 CopyGen(DestElementCurrent, SrcElementCurrent);
729 884
730 // Shift the address forward by one element. 885 // Shift the address forward by one element.
731 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( 886 llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
732 DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); 887 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
888 "omp.arraycpy.dest.element");
733 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( 889 llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
734 SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); 890 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
891 "omp.arraycpy.src.element");
735 // Check whether we've reached the end. 892 // Check whether we've reached the end.
736 llvm::Value *Done = 893 llvm::Value *Done =
737 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); 894 Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
738 Builder.CreateCondBr(Done, DoneBB, BodyBB); 895 Builder.CreateCondBr(Done, DoneBB, BodyBB);
739 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock()); 896 DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
822 // Do not emit copy for firstprivate constant variables in target regions, 979 // Do not emit copy for firstprivate constant variables in target regions,
823 // captured by reference. 980 // captured by reference.
824 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && 981 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
825 FD && FD->getType()->isReferenceType() && 982 FD && FD->getType()->isReferenceType() &&
826 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { 983 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
827 (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, 984 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
828 OrigVD);
829 ++IRef; 985 ++IRef;
830 ++InitsRef; 986 ++InitsRef;
831 continue; 987 continue;
832 } 988 }
833 FirstprivateIsLastprivate = 989 FirstprivateIsLastprivate =
1579 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk 1735 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1580 // lower and upper bounds with the pragma 'for' chunking mechanism. 1736 // lower and upper bounds with the pragma 'for' chunking mechanism.
1581 // The following lambda takes care of appending the lower and upper bound 1737 // The following lambda takes care of appending the lower and upper bound
1582 // parameters when necessary 1738 // parameters when necessary
1583 CodeGenBoundParameters(CGF, S, CapturedVars); 1739 CodeGenBoundParameters(CGF, S, CapturedVars);
1584 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1740 CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars);
1585 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1741 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1586 CapturedVars, IfCond); 1742 CapturedVars, IfCond);
1587 } 1743 }
1588 1744
1589 static bool isAllocatableDecl(const VarDecl *VD) { 1745 static bool isAllocatableDecl(const VarDecl *VD) {
1818 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1974 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1819 return; 1975 return;
1820 } 1976 }
1821 if (SimplifiedS == NextLoop) { 1977 if (SimplifiedS == NextLoop) {
1822 if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS)) 1978 if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS))
1979 SimplifiedS = Dir->getTransformedStmt();
1980 if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS))
1823 SimplifiedS = Dir->getTransformedStmt(); 1981 SimplifiedS = Dir->getTransformedStmt();
1824 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) 1982 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1825 SimplifiedS = CanonLoop->getLoopStmt(); 1983 SimplifiedS = CanonLoop->getLoopStmt();
1826 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1984 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1827 S = For->getBody(); 1985 S = For->getBody();
2317 } 2475 }
2318 } 2476 }
2319 } 2477 }
2320 2478
2321 static void emitSimdlenSafelenClause(CodeGenFunction &CGF, 2479 static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2322 const OMPExecutableDirective &D, 2480 const OMPExecutableDirective &D) {
2323 bool IsMonotonic) {
2324 if (!CGF.HaveInsertPoint()) 2481 if (!CGF.HaveInsertPoint())
2325 return; 2482 return;
2326 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { 2483 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2327 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(), 2484 RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
2328 /*ignoreResult=*/true); 2485 /*ignoreResult=*/true);
2329 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2486 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2330 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2487 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2331 // In presence of finite 'safelen', it may be unsafe to mark all 2488 // In presence of finite 'safelen', it may be unsafe to mark all
2332 // the memory instructions parallel, because loop-carried 2489 // the memory instructions parallel, because loop-carried
2333 // dependences of 'safelen' iterations are possible. 2490 // dependences of 'safelen' iterations are possible.
2334 if (!IsMonotonic) 2491 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2335 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2336 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { 2492 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2337 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), 2493 RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
2338 /*ignoreResult=*/true); 2494 /*ignoreResult=*/true);
2339 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal()); 2495 auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
2340 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); 2496 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2343 // dependences of 'safelen' iterations are possible. 2499 // dependences of 'safelen' iterations are possible.
2344 CGF.LoopStack.setParallel(/*Enable=*/false); 2500 CGF.LoopStack.setParallel(/*Enable=*/false);
2345 } 2501 }
2346 } 2502 }
2347 2503
2348 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, 2504 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2349 bool IsMonotonic) {
2350 // Walk clauses and process safelen/lastprivate. 2505 // Walk clauses and process safelen/lastprivate.
2351 LoopStack.setParallel(!IsMonotonic); 2506 LoopStack.setParallel(/*Enable=*/true);
2352 LoopStack.setVectorizeEnable(); 2507 LoopStack.setVectorizeEnable();
2353 emitSimdlenSafelenClause(*this, D, IsMonotonic); 2508 emitSimdlenSafelenClause(*this, D);
2354 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 2509 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2355 if (C->getKind() == OMPC_ORDER_concurrent) 2510 if (C->getKind() == OMPC_ORDER_concurrent)
2356 LoopStack.setParallel(/*Enable=*/true); 2511 LoopStack.setParallel(/*Enable=*/true);
2357 if ((D.getDirectiveKind() == OMPD_simd || 2512 if ((D.getDirectiveKind() == OMPD_simd ||
2358 (getLangOpts().OpenMPSimd && 2513 (getLangOpts().OpenMPSimd &&
2577 // Emit the de-sugared statement. 2732 // Emit the de-sugared statement.
2578 OMPTransformDirectiveScopeRAII TileScope(*this, &S); 2733 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2579 EmitStmt(S.getTransformedStmt()); 2734 EmitStmt(S.getTransformedStmt());
2580 } 2735 }
2581 2736
2737 void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2738 // This function is only called if the unrolled loop is not consumed by any
2739 // other loop-associated construct. Such a loop-associated construct will have
2740 // used the transformed AST.
2741
2742 // Set the unroll metadata for the next emitted loop.
2743 LoopStack.setUnrollState(LoopAttributes::Enable);
2744
2745 if (S.hasClausesOfKind<OMPFullClause>()) {
2746 LoopStack.setUnrollState(LoopAttributes::Full);
2747 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2748 if (Expr *FactorExpr = PartialClause->getFactor()) {
2749 uint64_t Factor =
2750 FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
2751 assert(Factor >= 1 && "Only positive factors are valid");
2752 LoopStack.setUnrollCount(Factor);
2753 }
2754 }
2755
2756 EmitStmt(S.getAssociatedStmt());
2757 }
2758
2582 void CodeGenFunction::EmitOMPOuterLoop( 2759 void CodeGenFunction::EmitOMPOuterLoop(
2583 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2760 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2584 CodeGenFunction::OMPPrivateScope &LoopScope, 2761 CodeGenFunction::OMPPrivateScope &LoopScope,
2585 const CodeGenFunction::OMPLoopArguments &LoopArgs, 2762 const CodeGenFunction::OMPLoopArguments &LoopArgs,
2586 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, 2763 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2649 CGF.LoopStack.setParallel(!IsMonotonic); 2826 CGF.LoopStack.setParallel(!IsMonotonic);
2650 if (const auto *C = S.getSingleClause<OMPOrderClause>()) 2827 if (const auto *C = S.getSingleClause<OMPOrderClause>())
2651 if (C->getKind() == OMPC_ORDER_concurrent) 2828 if (C->getKind() == OMPC_ORDER_concurrent)
2652 CGF.LoopStack.setParallel(/*Enable=*/true); 2829 CGF.LoopStack.setParallel(/*Enable=*/true);
2653 } else { 2830 } else {
2654 CGF.EmitOMPSimdInit(S, IsMonotonic); 2831 CGF.EmitOMPSimdInit(S);
2655 } 2832 }
2656 }, 2833 },
2657 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, 2834 [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2658 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2835 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2659 SourceLocation Loc = S.getBeginLoc(); 2836 SourceLocation Loc = S.getBeginLoc();
3159 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, 3336 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
3160 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && 3337 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
3161 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 3338 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3162 bool IsMonotonic = 3339 bool IsMonotonic =
3163 Ordered || 3340 Ordered ||
3164 ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || 3341 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3165 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) &&
3166 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || 3342 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3167 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || 3343 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3168 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || 3344 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3169 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; 3345 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3170 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 3346 if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
3173 !Ordered) { 3349 !Ordered) {
3174 JumpDest LoopExit = 3350 JumpDest LoopExit =
3175 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3351 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
3176 emitCommonSimdLoop( 3352 emitCommonSimdLoop(
3177 *this, S, 3353 *this, S,
3178 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { 3354 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3179 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3355 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3180 CGF.EmitOMPSimdInit(S, IsMonotonic); 3356 CGF.EmitOMPSimdInit(S);
3181 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 3357 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3182 if (C->getKind() == OMPC_ORDER_concurrent) 3358 if (C->getKind() == OMPC_ORDER_concurrent)
3183 CGF.LoopStack.setParallel(/*Enable=*/true); 3359 CGF.LoopStack.setParallel(/*Enable=*/true);
3184 } 3360 }
3185 }, 3361 },
3671 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); 3847 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
3672 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3848 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3673 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 3849 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3674 // Generate condition for loop. 3850 // Generate condition for loop.
3675 BinaryOperator *Cond = BinaryOperator::Create( 3851 BinaryOperator *Cond = BinaryOperator::Create(
3676 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, 3852 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
3677 S.getBeginLoc(), FPOptionsOverride()); 3853 S.getBeginLoc(), FPOptionsOverride());
3678 // Increment for loop counter. 3854 // Increment for loop counter.
3679 UnaryOperator *Inc = UnaryOperator::Create( 3855 UnaryOperator *Inc = UnaryOperator::Create(
3680 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 3856 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
3681 S.getBeginLoc(), true, FPOptionsOverride()); 3857 S.getBeginLoc(), true, FPOptionsOverride());
3682 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 3858 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3683 // Iterate through all sections and emit a switch construct: 3859 // Iterate through all sections and emit a switch construct:
3684 // switch (IV) { 3860 // switch (IV) {
3685 // case 0: 3861 // case 0:
4569 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 4745 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
4570 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 4746 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
4571 PrivateVD->setInitStyle(VarDecl::CInit); 4747 PrivateVD->setInitStyle(VarDecl::CInit);
4572 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 4748 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
4573 InitRef, /*BasePath=*/nullptr, 4749 InitRef, /*BasePath=*/nullptr,
4574 VK_RValue, FPOptionsOverride())); 4750 VK_PRValue, FPOptionsOverride()));
4575 Data.FirstprivateVars.emplace_back(OrigRef); 4751 Data.FirstprivateVars.emplace_back(OrigRef);
4576 Data.FirstprivateCopies.emplace_back(PrivateRef); 4752 Data.FirstprivateCopies.emplace_back(PrivateRef);
4577 Data.FirstprivateInits.emplace_back(InitRef); 4753 Data.FirstprivateInits.emplace_back(InitRef);
4578 return OrigVD; 4754 return OrigVD;
4579 } 4755 }
5197 // 5373 //
5198 emitCommonSimdLoop( 5374 emitCommonSimdLoop(
5199 *this, S, 5375 *this, S,
5200 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 5376 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5201 if (isOpenMPSimdDirective(S.getDirectiveKind())) 5377 if (isOpenMPSimdDirective(S.getDirectiveKind()))
5202 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); 5378 CGF.EmitOMPSimdInit(S);
5203 }, 5379 },
5204 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, 5380 [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5205 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { 5381 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5206 CGF.EmitOMPInnerLoop( 5382 CGF.EmitOMPInnerLoop(
5207 S, LoopScope.requiresCleanups(), Cond, IncExpr, 5383 S, LoopScope.requiresCleanups(), Cond, IncExpr,
5760 case OMPC_task_reduction: 5936 case OMPC_task_reduction:
5761 case OMPC_in_reduction: 5937 case OMPC_in_reduction:
5762 case OMPC_safelen: 5938 case OMPC_safelen:
5763 case OMPC_simdlen: 5939 case OMPC_simdlen:
5764 case OMPC_sizes: 5940 case OMPC_sizes:
5941 case OMPC_full:
5942 case OMPC_partial:
5765 case OMPC_allocator: 5943 case OMPC_allocator:
5766 case OMPC_allocate: 5944 case OMPC_allocate:
5767 case OMPC_collapse: 5945 case OMPC_collapse:
5768 case OMPC_default: 5946 case OMPC_default:
5769 case OMPC_seq_cst: 5947 case OMPC_seq_cst:
6025 S.getBeginLoc()); 6203 S.getBeginLoc());
6026 } 6204 }
6027 6205
6028 OMPTeamsScope Scope(CGF, S); 6206 OMPTeamsScope Scope(CGF, S);
6029 llvm::SmallVector<llvm::Value *, 16> CapturedVars; 6207 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6030 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 6208 CGF.GenerateOpenMPCapturedVarsAggregate(*CS, CapturedVars);
6031 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn, 6209 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getBeginLoc(), OutlinedFn,
6032 CapturedVars); 6210 CapturedVars);
6033 } 6211 }
6034 6212
6035 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { 6213 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {