comparison clang/lib/CodeGen/CGStmtOpenMP.cpp @ 221:79ff65ed7e25

LLVM12 Original
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 15 Jun 2021 19:15:29 +0900
parents 0572611fdcc8
children 5f17cb93ff66
comparison
equal deleted inserted replaced
220:42394fc6a535 221:79ff65ed7e25
19 #include "clang/AST/Attr.h" 19 #include "clang/AST/Attr.h"
20 #include "clang/AST/DeclOpenMP.h" 20 #include "clang/AST/DeclOpenMP.h"
21 #include "clang/AST/OpenMPClause.h" 21 #include "clang/AST/OpenMPClause.h"
22 #include "clang/AST/Stmt.h" 22 #include "clang/AST/Stmt.h"
23 #include "clang/AST/StmtOpenMP.h" 23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Basic/OpenMPKinds.h" 25 #include "clang/Basic/OpenMPKinds.h"
25 #include "clang/Basic/PrettyStackTrace.h" 26 #include "clang/Basic/PrettyStackTrace.h"
27 #include "llvm/Frontend/OpenMP/OMPConstants.h"
26 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" 28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
27 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Instructions.h"
29 #include "llvm/Support/AtomicOrdering.h" 31 #include "llvm/Support/AtomicOrdering.h"
30 using namespace clang; 32 using namespace clang;
31 using namespace CodeGen; 33 using namespace CodeGen;
32 using namespace llvm::omp; 34 using namespace llvm::omp;
35
36 static const VarDecl *getBaseDecl(const Expr *Ref);
33 37
34 namespace { 38 namespace {
35 /// Lexical scope for OpenMP executable constructs, that handles correct codegen 39 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
36 /// for captured expressions. 40 /// for captured expressions.
37 class OMPLexicalScope : public CodeGenFunction::LexicalScope { 41 class OMPLexicalScope : public CodeGenFunction::LexicalScope {
127 }; 131 };
128 132
129 /// Private scope for OpenMP loop-based directives, that supports capturing 133 /// Private scope for OpenMP loop-based directives, that supports capturing
130 /// of used expression from loop statement. 134 /// of used expression from loop statement.
131 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { 135 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
132 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { 136 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
137 const DeclStmt *PreInits;
133 CodeGenFunction::OMPMapVars PreCondVars; 138 CodeGenFunction::OMPMapVars PreCondVars;
134 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; 139 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
135 for (const auto *E : S.counters()) { 140 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
136 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 141 for (const auto *E : LD->counters()) {
137 EmittedAsPrivate.insert(VD->getCanonicalDecl()); 142 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
138 (void)PreCondVars.setVarAddr( 143 EmittedAsPrivate.insert(VD->getCanonicalDecl());
139 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); 144 (void)PreCondVars.setVarAddr(
140 } 145 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
141 // Mark private vars as undefs. 146 }
142 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { 147 // Mark private vars as undefs.
143 for (const Expr *IRef : C->varlists()) { 148 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
144 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); 149 for (const Expr *IRef : C->varlists()) {
145 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 150 const auto *OrigVD =
146 (void)PreCondVars.setVarAddr( 151 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
147 CGF, OrigVD, 152 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
148 Address(llvm::UndefValue::get( 153 (void)PreCondVars.setVarAddr(
149 CGF.ConvertTypeForMem(CGF.getContext().getPointerType( 154 CGF, OrigVD,
150 OrigVD->getType().getNonReferenceType()))), 155 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
151 CGF.getContext().getDeclAlign(OrigVD))); 156 CGF.getContext().getPointerType(
157 OrigVD->getType().getNonReferenceType()))),
158 CGF.getContext().getDeclAlign(OrigVD)));
159 }
152 } 160 }
153 } 161 }
154 } 162 (void)PreCondVars.apply(CGF);
155 (void)PreCondVars.apply(CGF); 163 // Emit init, __range and __end variables for C++ range loops.
156 // Emit init, __range and __end variables for C++ range loops. 164 (void)OMPLoopBasedDirective::doForAllLoops(
157 const Stmt *Body = 165 LD->getInnermostCapturedStmt()->getCapturedStmt(),
158 S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 166 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
159 for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { 167 [&CGF](unsigned Cnt, const Stmt *CurStmt) {
160 Body = OMPLoopDirective::tryToFindNextInnerLoop( 168 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
161 Body, /*TryImperfectlyNestedLoops=*/true); 169 if (const Stmt *Init = CXXFor->getInit())
162 if (auto *For = dyn_cast<ForStmt>(Body)) { 170 CGF.EmitStmt(Init);
163 Body = For->getBody(); 171 CGF.EmitStmt(CXXFor->getRangeStmt());
164 } else { 172 CGF.EmitStmt(CXXFor->getEndStmt());
165 assert(isa<CXXForRangeStmt>(Body) && 173 }
166 "Expected canonical for loop or range-based for loop."); 174 return false;
167 auto *CXXFor = cast<CXXForRangeStmt>(Body); 175 });
168 if (const Stmt *Init = CXXFor->getInit()) 176 PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
169 CGF.EmitStmt(Init); 177 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
170 CGF.EmitStmt(CXXFor->getRangeStmt()); 178 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
171 CGF.EmitStmt(CXXFor->getEndStmt()); 179 } else {
172 Body = CXXFor->getBody(); 180 llvm_unreachable("Unknown loop-based directive kind.");
173 } 181 }
174 } 182 if (PreInits) {
175 if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
176 for (const auto *I : PreInits->decls()) 183 for (const auto *I : PreInits->decls())
177 CGF.EmitVarDecl(cast<VarDecl>(*I)); 184 CGF.EmitVarDecl(cast<VarDecl>(*I));
178 } 185 }
179 PreCondVars.restore(CGF); 186 PreCondVars.restore(CGF);
180 } 187 }
181 188
182 public: 189 public:
183 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) 190 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
184 : CodeGenFunction::RunCleanupsScope(CGF) { 191 : CodeGenFunction::RunCleanupsScope(CGF) {
185 emitPreInitStmt(CGF, S); 192 emitPreInitStmt(CGF, S);
186 } 193 }
187 }; 194 };
188 195
218 for (const Expr *E : UDP->varlists()) { 225 for (const Expr *E : UDP->varlists()) {
219 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); 226 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
220 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) 227 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
221 CGF.EmitVarDecl(*OED); 228 CGF.EmitVarDecl(*OED);
222 } 229 }
230 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
231 for (const Expr *E : UDP->varlists()) {
232 const Decl *D = getBaseDecl(E);
233 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
234 CGF.EmitVarDecl(*OED);
235 }
223 } 236 }
224 } 237 }
225 if (!isOpenMPSimdDirective(S.getDirectiveKind())) 238 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
226 CGF.EmitOMPPrivateClause(S, InlinedShareds); 239 CGF.EmitOMPPrivateClause(S, InlinedShareds);
227 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { 240 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
228 if (const Expr *E = TG->getReductionRef()) 241 if (const Expr *E = TG->getReductionRef())
229 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); 242 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
230 } 243 }
244 // Temp copy arrays for inscan reductions should not be emitted as they are
245 // not used in simd only mode.
246 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
247 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
248 if (C->getModifier() != OMPC_REDUCTION_inscan)
249 continue;
250 for (const Expr *E : C->copy_array_temps())
251 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
252 }
231 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); 253 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
232 while (CS) { 254 while (CS) {
233 for (auto &C : CS->captures()) { 255 for (auto &C : CS->captures()) {
234 if (C.capturesVariable() || C.capturesVariableByCopy()) { 256 if (C.capturesVariable() || C.capturesVariableByCopy()) {
235 auto *VD = C.getCapturedVar(); 257 auto *VD = C.getCapturedVar();
258 if (CopyArrayTemps.contains(VD))
259 continue;
236 assert(VD == VD->getCanonicalDecl() && 260 assert(VD == VD->getCanonicalDecl() &&
237 "Canonical decl must be captured."); 261 "Canonical decl must be captured.");
238 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), 262 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
239 isCapturedVar(CGF, VD) || 263 isCapturedVar(CGF, VD) ||
240 (CGF.CapturedStmtInfo && 264 (CGF.CapturedStmtInfo &&
619 VLASizes.clear(); 643 VLASizes.clear();
620 llvm::Function *WrapperF = 644 llvm::Function *WrapperF =
621 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, 645 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
622 WrapperCGF.CXXThisValue, WrapperFO); 646 WrapperCGF.CXXThisValue, WrapperFO);
623 llvm::SmallVector<llvm::Value *, 4> CallArgs; 647 llvm::SmallVector<llvm::Value *, 4> CallArgs;
648 auto *PI = F->arg_begin();
624 for (const auto *Arg : Args) { 649 for (const auto *Arg : Args) {
625 llvm::Value *CallArg; 650 llvm::Value *CallArg;
626 auto I = LocalAddrs.find(Arg); 651 auto I = LocalAddrs.find(Arg);
627 if (I != LocalAddrs.end()) { 652 if (I != LocalAddrs.end()) {
628 LValue LV = WrapperCGF.MakeAddrLValue( 653 LValue LV = WrapperCGF.MakeAddrLValue(
629 I->second.second, 654 I->second.second,
630 I->second.first ? I->second.first->getType() : Arg->getType(), 655 I->second.first ? I->second.first->getType() : Arg->getType(),
631 AlignmentSource::Decl); 656 AlignmentSource::Decl);
657 if (LV.getType()->isAnyComplexType())
658 LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
659 LV.getAddress(WrapperCGF),
660 PI->getType()->getPointerTo(
661 LV.getAddress(WrapperCGF).getAddressSpace())));
632 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 662 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
633 } else { 663 } else {
634 auto EI = VLASizes.find(Arg); 664 auto EI = VLASizes.find(Arg);
635 if (EI != VLASizes.end()) { 665 if (EI != VLASizes.end()) {
636 CallArg = EI->second.second; 666 CallArg = EI->second.second;
640 AlignmentSource::Decl); 670 AlignmentSource::Decl);
641 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); 671 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
642 } 672 }
643 } 673 }
644 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); 674 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
675 ++PI;
645 } 676 }
646 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); 677 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
647 WrapperCGF.FinishFunction(); 678 WrapperCGF.FinishFunction();
648 return WrapperF; 679 return WrapperF;
649 } 680 }
973 if (CopiedVars.size() == 1) { 1004 if (CopiedVars.size() == 1) {
974 // At first check if current thread is a master thread. If it is, no 1005 // At first check if current thread is a master thread. If it is, no
975 // need to copy data. 1006 // need to copy data.
976 CopyBegin = createBasicBlock("copyin.not.master"); 1007 CopyBegin = createBasicBlock("copyin.not.master");
977 CopyEnd = createBasicBlock("copyin.not.master.end"); 1008 CopyEnd = createBasicBlock("copyin.not.master.end");
1009 // TODO: Avoid ptrtoint conversion.
1010 auto *MasterAddrInt =
1011 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
1012 auto *PrivateAddrInt =
1013 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
978 Builder.CreateCondBr( 1014 Builder.CreateCondBr(
979 Builder.CreateICmpNE( 1015 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
980 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), 1016 CopyEnd);
981 Builder.CreatePtrToInt(PrivateAddr.getPointer(),
982 CGM.IntPtrTy)),
983 CopyBegin, CopyEnd);
984 EmitBlock(CopyBegin); 1017 EmitBlock(CopyBegin);
985 } 1018 }
986 const auto *SrcVD = 1019 const auto *SrcVD =
987 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); 1020 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
988 const auto *DestVD = 1021 const auto *DestVD =
1159 EmitBlock(DoneBB, /*IsFinished=*/true); 1192 EmitBlock(DoneBB, /*IsFinished=*/true);
1160 } 1193 }
1161 1194
1162 void CodeGenFunction::EmitOMPReductionClauseInit( 1195 void CodeGenFunction::EmitOMPReductionClauseInit(
1163 const OMPExecutableDirective &D, 1196 const OMPExecutableDirective &D,
1164 CodeGenFunction::OMPPrivateScope &PrivateScope) { 1197 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1165 if (!HaveInsertPoint()) 1198 if (!HaveInsertPoint())
1166 return; 1199 return;
1167 SmallVector<const Expr *, 4> Shareds; 1200 SmallVector<const Expr *, 4> Shareds;
1168 SmallVector<const Expr *, 4> Privates; 1201 SmallVector<const Expr *, 4> Privates;
1169 SmallVector<const Expr *, 4> ReductionOps; 1202 SmallVector<const Expr *, 4> ReductionOps;
1171 SmallVector<const Expr *, 4> RHSs; 1204 SmallVector<const Expr *, 4> RHSs;
1172 OMPTaskDataTy Data; 1205 OMPTaskDataTy Data;
1173 SmallVector<const Expr *, 4> TaskLHSs; 1206 SmallVector<const Expr *, 4> TaskLHSs;
1174 SmallVector<const Expr *, 4> TaskRHSs; 1207 SmallVector<const Expr *, 4> TaskRHSs;
1175 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1208 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1209 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1210 continue;
1176 Shareds.append(C->varlist_begin(), C->varlist_end()); 1211 Shareds.append(C->varlist_begin(), C->varlist_end());
1177 Privates.append(C->privates().begin(), C->privates().end()); 1212 Privates.append(C->privates().begin(), C->privates().end());
1178 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1213 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1179 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1214 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1180 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1215 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1364 case OMPD_requires: 1399 case OMPD_requires:
1365 case OMPD_declare_variant: 1400 case OMPD_declare_variant:
1366 case OMPD_begin_declare_variant: 1401 case OMPD_begin_declare_variant:
1367 case OMPD_end_declare_variant: 1402 case OMPD_end_declare_variant:
1368 case OMPD_unknown: 1403 case OMPD_unknown:
1404 default:
1369 llvm_unreachable("Enexpected directive with task reductions."); 1405 llvm_unreachable("Enexpected directive with task reductions.");
1370 } 1406 }
1371 1407
1372 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); 1408 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
1373 EmitVarDecl(*VD); 1409 EmitVarDecl(*VD);
1385 llvm::SmallVector<const Expr *, 8> RHSExprs; 1421 llvm::SmallVector<const Expr *, 8> RHSExprs;
1386 llvm::SmallVector<const Expr *, 8> ReductionOps; 1422 llvm::SmallVector<const Expr *, 8> ReductionOps;
1387 bool HasAtLeastOneReduction = false; 1423 bool HasAtLeastOneReduction = false;
1388 bool IsReductionWithTaskMod = false; 1424 bool IsReductionWithTaskMod = false;
1389 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { 1425 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1426 // Do not emit for inscan reductions.
1427 if (C->getModifier() == OMPC_REDUCTION_inscan)
1428 continue;
1390 HasAtLeastOneReduction = true; 1429 HasAtLeastOneReduction = true;
1391 Privates.append(C->privates().begin(), C->privates().end()); 1430 Privates.append(C->privates().begin(), C->privates().end());
1392 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); 1431 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
1393 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); 1432 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
1394 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); 1433 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
1545 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); 1584 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
1546 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, 1585 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn,
1547 CapturedVars, IfCond); 1586 CapturedVars, IfCond);
1548 } 1587 }
1549 1588
1589 static bool isAllocatableDecl(const VarDecl *VD) {
1590 const VarDecl *CVD = VD->getCanonicalDecl();
1591 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1592 return false;
1593 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1594 // Use the default allocation.
1595 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1596 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1597 !AA->getAllocator());
1598 }
1599
1550 static void emitEmptyBoundParameters(CodeGenFunction &, 1600 static void emitEmptyBoundParameters(CodeGenFunction &,
1551 const OMPExecutableDirective &, 1601 const OMPExecutableDirective &,
1552 llvm::SmallVectorImpl<llvm::Value *> &) {} 1602 llvm::SmallVectorImpl<llvm::Value *> &) {}
1553 1603
1604 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1605 CodeGenFunction &CGF, const VarDecl *VD) {
1606 CodeGenModule &CGM = CGF.CGM;
1607 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1608
1609 if (!VD)
1610 return Address::invalid();
1611 const VarDecl *CVD = VD->getCanonicalDecl();
1612 if (!isAllocatableDecl(CVD))
1613 return Address::invalid();
1614 llvm::Value *Size;
1615 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1616 if (CVD->getType()->isVariablyModifiedType()) {
1617 Size = CGF.getTypeSize(CVD->getType());
1618 // Align the size: ((size + align - 1) / align) * align
1619 Size = CGF.Builder.CreateNUWAdd(
1620 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
1621 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
1622 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
1623 } else {
1624 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1625 Size = CGM.getSize(Sz.alignTo(Align));
1626 }
1627
1628 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1629 assert(AA->getAllocator() &&
1630 "Expected allocator expression for non-default allocator.");
1631 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
1632 // According to the standard, the original allocator type is a enum (integer).
1633 // Convert to pointer type, if required.
1634 if (Allocator->getType()->isIntegerTy())
1635 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
1636 else if (Allocator->getType()->isPointerTy())
1637 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
1638 CGM.VoidPtrTy);
1639
1640 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1641 CGF.Builder, Size, Allocator,
1642 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
1643 llvm::CallInst *FreeCI =
1644 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator);
1645
1646 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
1647 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1648 Addr,
1649 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
1650 getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
1651 return Address(Addr, Align);
1652 }
1653
1654 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1655 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1656 SourceLocation Loc) {
1657 CodeGenModule &CGM = CGF.CGM;
1658 if (CGM.getLangOpts().OpenMPUseTLS &&
1659 CGM.getContext().getTargetInfo().isTLSSupported())
1660 return VDAddr;
1661
1662 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1663
1664 llvm::Type *VarTy = VDAddr.getElementType();
1665 llvm::Value *Data =
1666 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
1667 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
1668 std::string Suffix = getNameWithSeparators({"cache", ""});
1669 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
1670
1671 llvm::CallInst *ThreadPrivateCacheCall =
1672 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
1673
1674 return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
1675 }
1676
1677 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1678 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1679 SmallString<128> Buffer;
1680 llvm::raw_svector_ostream OS(Buffer);
1681 StringRef Sep = FirstSeparator;
1682 for (StringRef Part : Parts) {
1683 OS << Sep << Part;
1684 Sep = Separator;
1685 }
1686 return OS.str().str();
1687 }
1554 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { 1688 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1555 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 1689 if (CGM.getLangOpts().OpenMPIRBuilder) {
1690 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1556 // Check if we have any if clause associated with the directive. 1691 // Check if we have any if clause associated with the directive.
1557 llvm::Value *IfCond = nullptr; 1692 llvm::Value *IfCond = nullptr;
1558 if (const auto *C = S.getSingleClause<OMPIfClause>()) 1693 if (const auto *C = S.getSingleClause<OMPIfClause>())
1559 IfCond = EmitScalarExpr(C->getCondition(), 1694 IfCond = EmitScalarExpr(C->getCondition(),
1560 /*IgnoreResultAssign=*/true); 1695 /*IgnoreResultAssign=*/true);
1579 // Privatization callback that performs appropriate action for 1714 // Privatization callback that performs appropriate action for
1580 // shared/private/firstprivate/lastprivate/copyin/... variables. 1715 // shared/private/firstprivate/lastprivate/copyin/... variables.
1581 // 1716 //
1582 // TODO: This defaults to shared right now. 1717 // TODO: This defaults to shared right now.
1583 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, 1718 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1584 llvm::Value &Val, llvm::Value *&ReplVal) { 1719 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1585 // The next line is appropriate only for variables (Val) with the 1720 // The next line is appropriate only for variables (Val) with the
1586 // data-sharing attribute "shared". 1721 // data-sharing attribute "shared".
1587 ReplVal = &Val; 1722 ReplVal = &Val;
1588 1723
1589 return CodeGenIP; 1724 return CodeGenIP;
1601 CodeGenIP, ContinuationBB); 1736 CodeGenIP, ContinuationBB);
1602 }; 1737 };
1603 1738
1604 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 1739 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1605 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 1740 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1606 Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, 1741 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1607 FiniCB, IfCond, NumThreads, 1742 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1608 ProcBind, S.hasCancel())); 1743 Builder.restoreIP(
1744 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1745 IfCond, NumThreads, ProcBind, S.hasCancel()));
1609 return; 1746 return;
1610 } 1747 }
1611 1748
1612 // Emit parallel region as a standalone region. 1749 // Emit parallel region as a standalone region.
1613 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 1750 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1639 } 1776 }
1640 // Check for outer lastprivate conditional update. 1777 // Check for outer lastprivate conditional update.
1641 checkForLastprivateConditionalUpdate(*this, S); 1778 checkForLastprivateConditionalUpdate(*this, S);
1642 } 1779 }
1643 1780
1781 namespace {
1782 /// RAII to handle scopes for loop transformation directives.
1783 class OMPTransformDirectiveScopeRAII {
1784 OMPLoopScope *Scope = nullptr;
1785 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1786 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1787
1788 public:
1789 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1790 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
1791 Scope = new OMPLoopScope(CGF, *Dir);
1792 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1793 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1794 }
1795 }
1796 ~OMPTransformDirectiveScopeRAII() {
1797 if (!Scope)
1798 return;
1799 delete CapInfoRAII;
1800 delete CGSI;
1801 delete Scope;
1802 }
1803 };
1804 } // namespace
1805
1644 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, 1806 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1645 int MaxLevel, int Level = 0) { 1807 int MaxLevel, int Level = 0) {
1646 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); 1808 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1647 const Stmt *SimplifiedS = S->IgnoreContainers(); 1809 const Stmt *SimplifiedS = S->IgnoreContainers();
1648 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { 1810 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) {
1655 for (const Stmt *CurStmt : CS->body()) 1817 for (const Stmt *CurStmt : CS->body())
1656 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); 1818 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level);
1657 return; 1819 return;
1658 } 1820 }
1659 if (SimplifiedS == NextLoop) { 1821 if (SimplifiedS == NextLoop) {
1822 if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS))
1823 SimplifiedS = Dir->getTransformedStmt();
1824 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
1825 SimplifiedS = CanonLoop->getLoopStmt();
1660 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { 1826 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
1661 S = For->getBody(); 1827 S = For->getBody();
1662 } else { 1828 } else {
1663 assert(isa<CXXForRangeStmt>(SimplifiedS) && 1829 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1664 "Expected canonical for loop or range-based for loop."); 1830 "Expected canonical for loop or range-based for loop.");
1703 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); 1869 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next");
1704 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), 1870 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(),
1705 getProfileCount(D.getBody())); 1871 getProfileCount(D.getBody()));
1706 EmitBlock(NextBB); 1872 EmitBlock(NextBB);
1707 } 1873 }
1874
1875 OMPPrivateScope InscanScope(*this);
1876 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1877 bool IsInscanRegion = InscanScope.Privatize();
1878 if (IsInscanRegion) {
1879 // Need to remember the block before and after scan directive
1880 // to dispatch them correctly depending on the clause used in
1881 // this directive, inclusive or exclusive. For inclusive scan the natural
1882 // order of the blocks is used, for exclusive clause the blocks must be
1883 // executed in reverse order.
1884 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
1885 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
1886 // No need to allocate inscan exit block, in simd mode it is selected in the
1887 // codegen for the scan directive.
1888 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1889 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
1890 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
1891 EmitBranch(OMPScanDispatch);
1892 EmitBlock(OMPBeforeScanBlock);
1893 }
1894
1708 // Emit loop variables for C++ range loops. 1895 // Emit loop variables for C++ range loops.
1709 const Stmt *Body = 1896 const Stmt *Body =
1710 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); 1897 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1711 // Emit loop body. 1898 // Emit loop body.
1712 emitBody(*this, Body, 1899 emitBody(*this, Body,
1713 OMPLoopDirective::tryToFindNextInnerLoop( 1900 OMPLoopBasedDirective::tryToFindNextInnerLoop(
1714 Body, /*TryImperfectlyNestedLoops=*/true), 1901 Body, /*TryImperfectlyNestedLoops=*/true),
1715 D.getCollapsedNumber()); 1902 D.getLoopsNumber());
1903
1904 // Jump to the dispatcher at the end of the loop body.
1905 if (IsInscanRegion)
1906 EmitBranch(OMPScanExitBlock);
1716 1907
1717 // The end (updates/cleanups). 1908 // The end (updates/cleanups).
1718 EmitBlock(Continue.getBlock()); 1909 EmitBlock(Continue.getBlock());
1719 BreakContinueStack.pop_back(); 1910 BreakContinueStack.pop_back();
1720 } 1911 }
1721 1912
1913 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1914
1915 /// Emit a captured statement and return the function as well as its captured
1916 /// closure context.
1917 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1918 const CapturedStmt *S) {
1919 LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
1920 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1921 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1922 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
1923 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1924 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
1925
1926 return {F, CapStruct.getPointer(ParentCGF)};
1927 }
1928
1929 /// Emit a call to a previously captured closure.
1930 static llvm::CallInst *
1931 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1932 llvm::ArrayRef<llvm::Value *> Args) {
1933 // Append the closure context to the argument.
1934 SmallVector<llvm::Value *> EffectiveArgs;
1935 EffectiveArgs.reserve(Args.size() + 1);
1936 llvm::append_range(EffectiveArgs, Args);
1937 EffectiveArgs.push_back(Cap.second);
1938
1939 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
1940 }
1941
1942 llvm::CanonicalLoopInfo *
1943 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1944 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1945
1946 EmitStmt(S);
1947 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
1948
1949 // The last added loop is the outermost one.
1950 return OMPLoopNestStack.back();
1951 }
1952
1953 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
1954 const Stmt *SyntacticalLoop = S->getLoopStmt();
1955 if (!getLangOpts().OpenMPIRBuilder) {
1956 // Ignore if OpenMPIRBuilder is not enabled.
1957 EmitStmt(SyntacticalLoop);
1958 return;
1959 }
1960
1961 LexicalScope ForScope(*this, S->getSourceRange());
1962
1963 // Emit init statements. The Distance/LoopVar funcs may reference variable
1964 // declarations they contain.
1965 const Stmt *BodyStmt;
1966 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
1967 if (const Stmt *InitStmt = For->getInit())
1968 EmitStmt(InitStmt);
1969 BodyStmt = For->getBody();
1970 } else if (const auto *RangeFor =
1971 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
1972 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
1973 EmitStmt(RangeStmt);
1974 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
1975 EmitStmt(BeginStmt);
1976 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
1977 EmitStmt(EndStmt);
1978 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
1979 EmitStmt(LoopVarStmt);
1980 BodyStmt = RangeFor->getBody();
1981 } else
1982 llvm_unreachable("Expected for-stmt or range-based for-stmt");
1983
1984 // Emit closure for later use. By-value captures will be captured here.
1985 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
1986 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
1987 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
1988 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
1989
1990 // Call the distance function to get the number of iterations of the loop to
1991 // come.
1992 QualType LogicalTy = DistanceFunc->getCapturedDecl()
1993 ->getParam(0)
1994 ->getType()
1995 .getNonReferenceType();
1996 Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
1997 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
1998 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
1999
2000 // Emit the loop structure.
2001 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2002 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2003 llvm::Value *IndVar) {
2004 Builder.restoreIP(CodeGenIP);
2005
2006 // Emit the loop body: Convert the logical iteration number to the loop
2007 // variable and emit the body.
2008 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2009 LValue LCVal = EmitLValue(LoopVarRef);
2010 Address LoopVarAddress = LCVal.getAddress(*this);
2011 emitCapturedStmtCall(*this, LoopVarClosure,
2012 {LoopVarAddress.getPointer(), IndVar});
2013
2014 RunCleanupsScope BodyScope(*this);
2015 EmitStmt(BodyStmt);
2016 };
2017 llvm::CanonicalLoopInfo *CL =
2018 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
2019
2020 // Finish up the loop.
2021 Builder.restoreIP(CL->getAfterIP());
2022 ForScope.ForceCleanup();
2023
2024 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2025 OMPLoopNestStack.push_back(CL);
2026 }
2027
1722 void CodeGenFunction::EmitOMPInnerLoop( 2028 void CodeGenFunction::EmitOMPInnerLoop(
1723 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, 2029 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
1724 const Expr *IncExpr, 2030 const Expr *IncExpr,
1725 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, 2031 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
1726 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { 2032 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
1727 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); 2033 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end");
1728 2034
1734 // If attributes are attached, push to the basic block with them. 2040 // If attributes are attached, push to the basic block with them.
1735 const auto &OMPED = cast<OMPExecutableDirective>(S); 2041 const auto &OMPED = cast<OMPExecutableDirective>(S);
1736 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); 2042 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
1737 const Stmt *SS = ICS->getCapturedStmt(); 2043 const Stmt *SS = ICS->getCapturedStmt();
1738 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); 2044 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
2045 OMPLoopNestStack.clear();
1739 if (AS) 2046 if (AS)
1740 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), 2047 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
1741 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), 2048 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
1742 SourceLocToDebugLoc(R.getEnd())); 2049 SourceLocToDebugLoc(R.getEnd()));
1743 else 2050 else
1921 } 2228 }
1922 // Privatize extra loop counters used in loops for ordered(n) clauses. 2229 // Privatize extra loop counters used in loops for ordered(n) clauses.
1923 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { 2230 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
1924 if (!C->getNumForLoops()) 2231 if (!C->getNumForLoops())
1925 continue; 2232 continue;
1926 for (unsigned I = S.getCollapsedNumber(), 2233 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
1927 E = C->getLoopNumIterations().size();
1928 I < E; ++I) { 2234 I < E; ++I) {
1929 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); 2235 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
1930 const auto *VD = cast<VarDecl>(DRE->getDecl()); 2236 const auto *VD = cast<VarDecl>(DRE->getDecl());
1931 // Override only those variables that can be captured to avoid re-emission 2237 // Override only those variables that can be captured to avoid re-emission
1932 // of the variables declared within the loops. 2238 // of the variables declared within the loops.
2046 LoopStack.setVectorizeEnable(); 2352 LoopStack.setVectorizeEnable();
2047 emitSimdlenSafelenClause(*this, D, IsMonotonic); 2353 emitSimdlenSafelenClause(*this, D, IsMonotonic);
2048 if (const auto *C = D.getSingleClause<OMPOrderClause>()) 2354 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2049 if (C->getKind() == OMPC_ORDER_concurrent) 2355 if (C->getKind() == OMPC_ORDER_concurrent)
2050 LoopStack.setParallel(/*Enable=*/true); 2356 LoopStack.setParallel(/*Enable=*/true);
2357 if ((D.getDirectiveKind() == OMPD_simd ||
2358 (getLangOpts().OpenMPSimd &&
2359 isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2360 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2361 [](const OMPReductionClause *C) {
2362 return C->getModifier() == OMPC_REDUCTION_inscan;
2363 }))
2364 // Disable parallel access in case of prefix sum.
2365 LoopStack.setParallel(/*Enable=*/false);
2051 } 2366 }
2052 2367
2053 void CodeGenFunction::EmitOMPSimdFinal( 2368 void CodeGenFunction::EmitOMPSimdFinal(
2054 const OMPLoopDirective &D, 2369 const OMPLoopDirective &D,
2055 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { 2370 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2219 }, 2534 },
2220 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 2535 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2221 CGF.EmitOMPInnerLoop( 2536 CGF.EmitOMPInnerLoop(
2222 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 2537 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2223 [&S](CodeGenFunction &CGF) { 2538 [&S](CodeGenFunction &CGF) {
2224 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 2539 emitOMPLoopBodyWithStopPoint(CGF, S,
2225 CGF.EmitStopPoint(&S); 2540 CodeGenFunction::JumpDest());
2226 }, 2541 },
2227 [](CodeGenFunction &) {}); 2542 [](CodeGenFunction &) {});
2228 }); 2543 });
2229 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); 2544 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; });
2230 // Emit final copy of the lastprivate variables at the end of loops. 2545 // Emit final copy of the lastprivate variables at the end of loops.
2241 CGF.EmitBlock(ContBlock, true); 2556 CGF.EmitBlock(ContBlock, true);
2242 } 2557 }
2243 } 2558 }
2244 2559
2245 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { 2560 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2561 ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2562 OMPFirstScanLoop = true;
2246 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 2563 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2247 emitOMPSimdRegion(CGF, S, Action); 2564 emitOMPSimdRegion(CGF, S, Action);
2248 }; 2565 };
2249 { 2566 {
2250 auto LPCRegion = 2567 auto LPCRegion =
2252 OMPLexicalScope Scope(*this, S, OMPD_unknown); 2569 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2253 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); 2570 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2254 } 2571 }
2255 // Check for outer lastprivate conditional update. 2572 // Check for outer lastprivate conditional update.
2256 checkForLastprivateConditionalUpdate(*this, S); 2573 checkForLastprivateConditionalUpdate(*this, S);
2574 }
2575
2576 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2577 // Emit the de-sugared statement.
2578 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2579 EmitStmt(S.getTransformedStmt());
2257 } 2580 }
2258 2581
2259 void CodeGenFunction::EmitOMPOuterLoop( 2582 void CodeGenFunction::EmitOMPOuterLoop(
2260 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, 2583 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2261 CodeGenFunction::OMPPrivateScope &LoopScope, 2584 CodeGenFunction::OMPPrivateScope &LoopScope,
2272 2595
2273 // Start the loop with a block that tests the condition. 2596 // Start the loop with a block that tests the condition.
2274 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); 2597 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
2275 EmitBlock(CondBlock); 2598 EmitBlock(CondBlock);
2276 const SourceRange R = S.getSourceRange(); 2599 const SourceRange R = S.getSourceRange();
2600 OMPLoopNestStack.clear();
2277 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), 2601 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
2278 SourceLocToDebugLoc(R.getEnd())); 2602 SourceLocToDebugLoc(R.getEnd()));
2279 2603
2280 llvm::Value *BoolCondVal = nullptr; 2604 llvm::Value *BoolCondVal = nullptr;
2281 if (!DynamicOrOrdered) { 2605 if (!DynamicOrOrdered) {
2355 EmitIgnoredExpr(LoopArgs.NextLB); 2679 EmitIgnoredExpr(LoopArgs.NextLB);
2356 EmitIgnoredExpr(LoopArgs.NextUB); 2680 EmitIgnoredExpr(LoopArgs.NextUB);
2357 } 2681 }
2358 2682
2359 EmitBranch(CondBlock); 2683 EmitBranch(CondBlock);
2684 OMPLoopNestStack.clear();
2360 LoopStack.pop(); 2685 LoopStack.pop();
2361 // Emit the fall-through block. 2686 // Emit the fall-through block.
2362 EmitBlock(LoopExit.getBlock()); 2687 EmitBlock(LoopExit.getBlock());
2363 2688
2364 // Tell the runtime we are done. 2689 // Tell the runtime we are done.
2832 // specified, and if no monotonic modifier is specified, the effect will 3157 // specified, and if no monotonic modifier is specified, the effect will
2833 // be as if the monotonic modifier was specified. 3158 // be as if the monotonic modifier was specified.
2834 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, 3159 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
2835 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && 3160 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
2836 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); 3161 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3162 bool IsMonotonic =
3163 Ordered ||
3164 ((ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
3165 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) &&
3166 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3167 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3168 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3169 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2837 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, 3170 if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
2838 /* Chunked */ Chunk != nullptr) || 3171 /* Chunked */ Chunk != nullptr) ||
2839 StaticChunkedOne) && 3172 StaticChunkedOne) &&
2840 !Ordered) { 3173 !Ordered) {
2841 JumpDest LoopExit = 3174 JumpDest LoopExit =
2842 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); 3175 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
2843 emitCommonSimdLoop( 3176 emitCommonSimdLoop(
2844 *this, S, 3177 *this, S,
2845 [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3178 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2846 if (isOpenMPSimdDirective(S.getDirectiveKind())) { 3179 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2847 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); 3180 CGF.EmitOMPSimdInit(S, IsMonotonic);
2848 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { 3181 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2849 if (C->getKind() == OMPC_ORDER_concurrent) 3182 if (C->getKind() == OMPC_ORDER_concurrent)
2850 CGF.LoopStack.setParallel(/*Enable=*/true); 3183 CGF.LoopStack.setParallel(/*Enable=*/true);
2851 } 3184 }
2852 }, 3185 },
2887 S, LoopScope.requiresCleanups(), 3220 S, LoopScope.requiresCleanups(),
2888 StaticChunkedOne ? S.getCombinedParForInDistCond() 3221 StaticChunkedOne ? S.getCombinedParForInDistCond()
2889 : S.getCond(), 3222 : S.getCond(),
2890 StaticChunkedOne ? S.getDistInc() : S.getInc(), 3223 StaticChunkedOne ? S.getDistInc() : S.getInc(),
2891 [&S, LoopExit](CodeGenFunction &CGF) { 3224 [&S, LoopExit](CodeGenFunction &CGF) {
2892 CGF.EmitOMPLoopBody(S, LoopExit); 3225 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
2893 CGF.EmitStopPoint(&S);
2894 }, 3226 },
2895 [](CodeGenFunction &) {}); 3227 [](CodeGenFunction &) {});
2896 }); 3228 });
2897 EmitBlock(LoopExit.getBlock()); 3229 EmitBlock(LoopExit.getBlock());
2898 // Tell the runtime we are done. 3230 // Tell the runtime we are done.
2900 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), 3232 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2901 S.getDirectiveKind()); 3233 S.getDirectiveKind());
2902 }; 3234 };
2903 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); 3235 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2904 } else { 3236 } else {
2905 const bool IsMonotonic =
2906 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
2907 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
2908 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
2909 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
2910 // Emit the outer loop, which requests its work chunk [LB..UB] from 3237 // Emit the outer loop, which requests its work chunk [LB..UB] from
2911 // runtime and runs the inner loop to process it. 3238 // runtime and runs the inner loop to process it.
2912 const OMPLoopArguments LoopArguments( 3239 const OMPLoopArguments LoopArguments(
2913 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), 3240 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this),
2914 IL.getAddress(*this), Chunk, EUB); 3241 IL.getAddress(*this), Chunk, EUB);
2977 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); 3304 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
2978 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); 3305 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
2979 return {LBVal, UBVal}; 3306 return {LBVal, UBVal};
2980 } 3307 }
2981 3308
2982 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { 3309 /// Emits internal temp array declarations for the directive with inscan
2983 bool HasLastprivates = false; 3310 /// reductions.
2984 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 3311 /// The code is the following:
2985 PrePostActionTy &) { 3312 /// \code
2986 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); 3313 /// size num_iters = <num_iters>;
3314 /// <type> buffer[num_iters];
3315 /// \endcode
3316 static void emitScanBasedDirectiveDecls(
3317 CodeGenFunction &CGF, const OMPLoopDirective &S,
3318 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3319 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3320 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3321 SmallVector<const Expr *, 4> Shareds;
3322 SmallVector<const Expr *, 4> Privates;
3323 SmallVector<const Expr *, 4> ReductionOps;
3324 SmallVector<const Expr *, 4> CopyArrayTemps;
3325 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3326 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3327 "Only inscan reductions are expected.");
3328 Shareds.append(C->varlist_begin(), C->varlist_end());
3329 Privates.append(C->privates().begin(), C->privates().end());
3330 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3331 CopyArrayTemps.append(C->copy_array_temps().begin(),
3332 C->copy_array_temps().end());
3333 }
3334 {
3335 // Emit buffers for each reduction variables.
3336 // ReductionCodeGen is required to emit correctly the code for array
3337 // reductions.
3338 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3339 unsigned Count = 0;
3340 auto *ITA = CopyArrayTemps.begin();
3341 for (const Expr *IRef : Privates) {
3342 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
3343 // Emit variably modified arrays, used for arrays/array sections
3344 // reductions.
3345 if (PrivateVD->getType()->isVariablyModifiedType()) {
3346 RedCG.emitSharedOrigLValue(CGF, Count);
3347 RedCG.emitAggregateType(CGF, Count);
3348 }
3349 CodeGenFunction::OpaqueValueMapping DimMapping(
3350 CGF,
3351 cast<OpaqueValueExpr>(
3352 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3353 ->getSizeExpr()),
3354 RValue::get(OMPScanNumIterations));
3355 // Emit temp buffer.
3356 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
3357 ++ITA;
3358 ++Count;
3359 }
3360 }
3361 }
3362
3363 /// Emits the code for the directive with inscan reductions.
3364 /// The code is the following:
3365 /// \code
3366 /// #pragma omp ...
3367 /// for (i: 0..<num_iters>) {
3368 /// <input phase>;
3369 /// buffer[i] = red;
3370 /// }
3371 /// #pragma omp master // in parallel region
3372 /// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3373 /// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3374 /// buffer[i] op= buffer[i-pow(2,k)];
3375 /// #pragma omp barrier // in parallel region
3376 /// #pragma omp ...
3377 /// for (0..<num_iters>) {
3378 /// red = InclusiveScan ? buffer[i] : buffer[i-1];
3379 /// <scan phase>;
3380 /// }
3381 /// \endcode
3382 static void emitScanBasedDirective(
3383 CodeGenFunction &CGF, const OMPLoopDirective &S,
3384 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3385 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3386 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3387 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3388 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
3389 SmallVector<const Expr *, 4> Privates;
3390 SmallVector<const Expr *, 4> ReductionOps;
3391 SmallVector<const Expr *, 4> LHSs;
3392 SmallVector<const Expr *, 4> RHSs;
3393 SmallVector<const Expr *, 4> CopyArrayElems;
3394 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3395 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3396 "Only inscan reductions are expected.");
3397 Privates.append(C->privates().begin(), C->privates().end());
3398 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3399 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3400 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3401 CopyArrayElems.append(C->copy_array_elems().begin(),
3402 C->copy_array_elems().end());
3403 }
3404 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3405 {
3406 // Emit loop with input phase:
3407 // #pragma omp ...
3408 // for (i: 0..<num_iters>) {
3409 // <input phase>;
3410 // buffer[i] = red;
3411 // }
3412 CGF.OMPFirstScanLoop = true;
3413 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3414 FirstGen(CGF);
3415 }
3416 // #pragma omp barrier // in parallel region
3417 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3418 &ReductionOps,
3419 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3420 Action.Enter(CGF);
3421 // Emit prefix reduction:
3422 // #pragma omp master // in parallel region
3423 // for (int k = 0; k <= ceil(log2(n)); ++k)
3424 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3425 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
3426 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
3427 llvm::Function *F =
3428 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3429 llvm::Value *Arg =
3430 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
3431 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
3432 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3433 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
3434 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
3435 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3436 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
3437 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
3438 CGF.EmitBlock(LoopBB);
3439 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
3440 // size pow2k = 1;
3441 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3442 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
3443 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
3444 // for (size i = n - 1; i >= 2 ^ k; --i)
3445 // tmp[i] op= tmp[i-pow2k];
3446 llvm::BasicBlock *InnerLoopBB =
3447 CGF.createBasicBlock("omp.inner.log.scan.body");
3448 llvm::BasicBlock *InnerExitBB =
3449 CGF.createBasicBlock("omp.inner.log.scan.exit");
3450 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
3451 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3452 CGF.EmitBlock(InnerLoopBB);
3453 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
3454 IVal->addIncoming(NMin1, LoopBB);
3455 {
3456 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3457 auto *ILHS = LHSs.begin();
3458 auto *IRHS = RHSs.begin();
3459 for (const Expr *CopyArrayElem : CopyArrayElems) {
3460 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
3461 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
3462 Address LHSAddr = Address::invalid();
3463 {
3464 CodeGenFunction::OpaqueValueMapping IdxMapping(
3465 CGF,
3466 cast<OpaqueValueExpr>(
3467 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3468 RValue::get(IVal));
3469 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3470 }
3471 PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
3472 Address RHSAddr = Address::invalid();
3473 {
3474 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
3475 CodeGenFunction::OpaqueValueMapping IdxMapping(
3476 CGF,
3477 cast<OpaqueValueExpr>(
3478 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
3479 RValue::get(OffsetIVal));
3480 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
3481 }
3482 PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
3483 ++ILHS;
3484 ++IRHS;
3485 }
3486 PrivScope.Privatize();
3487 CGF.CGM.getOpenMPRuntime().emitReduction(
3488 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3489 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3490 }
3491 llvm::Value *NextIVal =
3492 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
3493 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
3494 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
3495 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
3496 CGF.EmitBlock(InnerExitBB);
3497 llvm::Value *Next =
3498 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
3499 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
3500 // pow2k <<= 1;
3501 llvm::Value *NextPow2K =
3502 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
3503 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
3504 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
3505 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
3506 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
3507 CGF.EmitBlock(ExitBB);
3508 };
3509 if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3510 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3511 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3512 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3513 /*ForceSimpleCall=*/true);
3514 } else {
3515 RegionCodeGenTy RCG(CodeGen);
3516 RCG(CGF);
3517 }
3518
3519 CGF.OMPFirstScanLoop = false;
3520 SecondGen(CGF);
3521 }
3522
3523 static bool emitWorksharingDirective(CodeGenFunction &CGF,
3524 const OMPLoopDirective &S,
3525 bool HasCancel) {
3526 bool HasLastprivates;
3527 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3528 [](const OMPReductionClause *C) {
3529 return C->getModifier() == OMPC_REDUCTION_inscan;
3530 })) {
3531 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3532 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3533 OMPLoopScope LoopScope(CGF, S);
3534 return CGF.EmitScalarExpr(S.getNumIterations());
3535 };
3536 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3537 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3538 CGF, S.getDirectiveKind(), HasCancel);
3539 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3540 emitForLoopBounds,
3541 emitDispatchForLoopBounds);
3542 // Emit an implicit barrier at the end.
3543 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3544 OMPD_for);
3545 };
3546 const auto &&SecondGen = [&S, HasCancel,
3547 &HasLastprivates](CodeGenFunction &CGF) {
3548 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3549 CGF, S.getDirectiveKind(), HasCancel);
3550 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
3551 emitForLoopBounds,
3552 emitDispatchForLoopBounds);
3553 };
3554 if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3555 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3556 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3557 } else {
3558 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3559 HasCancel);
2987 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3560 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
2988 emitForLoopBounds, 3561 emitForLoopBounds,
2989 emitDispatchForLoopBounds); 3562 emitDispatchForLoopBounds);
3563 }
3564 return HasLastprivates;
3565 }
3566
3567 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3568 if (S.hasCancel())
3569 return false;
3570 for (OMPClause *C : S.clauses())
3571 if (!isa<OMPNowaitClause>(C))
3572 return false;
3573
3574 return true;
3575 }
3576
3577 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3578 bool HasLastprivates = false;
3579 bool UseOMPIRBuilder =
3580 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3581 auto &&CodeGen = [this, &S, &HasLastprivates,
3582 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3583 // Use the OpenMPIRBuilder if enabled.
3584 if (UseOMPIRBuilder) {
3585 // Emit the associated statement and get its loop representation.
3586 const Stmt *Inner = S.getRawStmt();
3587 llvm::CanonicalLoopInfo *CLI =
3588 EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
3589
3590 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3591 llvm::OpenMPIRBuilder &OMPBuilder =
3592 CGM.getOpenMPRuntime().getOMPBuilder();
3593 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3594 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3595 OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier);
3596 return;
3597 }
3598
3599 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
2990 }; 3600 };
2991 { 3601 {
2992 auto LPCRegion = 3602 auto LPCRegion =
2993 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3603 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2994 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3604 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2995 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, 3605 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
2996 S.hasCancel()); 3606 S.hasCancel());
2997 } 3607 }
2998 3608
2999 // Emit an implicit barrier at the end. 3609 if (!UseOMPIRBuilder) {
3000 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) 3610 // Emit an implicit barrier at the end.
3001 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); 3611 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3612 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3613 }
3002 // Check for outer lastprivate conditional update. 3614 // Check for outer lastprivate conditional update.
3003 checkForLastprivateConditionalUpdate(*this, S); 3615 checkForLastprivateConditionalUpdate(*this, S);
3004 } 3616 }
3005 3617
3006 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { 3618 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3007 bool HasLastprivates = false; 3619 bool HasLastprivates = false;
3008 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, 3620 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3009 PrePostActionTy &) { 3621 PrePostActionTy &) {
3010 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), 3622 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3011 emitForLoopBounds,
3012 emitDispatchForLoopBounds);
3013 }; 3623 };
3014 { 3624 {
3015 auto LPCRegion = 3625 auto LPCRegion =
3016 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3626 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3017 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3627 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3062 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); 3672 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
3063 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); 3673 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
3064 // Generate condition for loop. 3674 // Generate condition for loop.
3065 BinaryOperator *Cond = BinaryOperator::Create( 3675 BinaryOperator *Cond = BinaryOperator::Create(
3066 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, 3676 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary,
3067 S.getBeginLoc(), FPOptions(C.getLangOpts())); 3677 S.getBeginLoc(), FPOptionsOverride());
3068 // Increment for loop counter. 3678 // Increment for loop counter.
3069 UnaryOperator *Inc = UnaryOperator::Create( 3679 UnaryOperator *Inc = UnaryOperator::Create(
3070 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, 3680 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
3071 S.getBeginLoc(), true, FPOptions(C.getLangOpts())); 3681 S.getBeginLoc(), true, FPOptionsOverride());
3072 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { 3682 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
3073 // Iterate through all sections and emit a switch construct: 3683 // Iterate through all sections and emit a switch construct:
3074 // switch (IV) { 3684 // switch (IV) {
3075 // case 0: 3685 // case 0:
3076 // <SectionStmt[0]>; 3686 // <SectionStmt[0]>;
3179 OMPD_unknown); 3789 OMPD_unknown);
3180 } 3790 }
3181 } 3791 }
3182 3792
3183 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { 3793 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
3794 if (CGM.getLangOpts().OpenMPIRBuilder) {
3795 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3796 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3797 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
3798
3799 auto FiniCB = [this](InsertPointTy IP) {
3800 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3801 };
3802
3803 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
3804 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3805 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
3806 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
3807 if (CS) {
3808 for (const Stmt *SubStmt : CS->children()) {
3809 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
3810 InsertPointTy CodeGenIP,
3811 llvm::BasicBlock &FiniBB) {
3812 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
3813 FiniBB);
3814 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP,
3815 FiniBB);
3816 };
3817 SectionCBVector.push_back(SectionCB);
3818 }
3819 } else {
3820 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
3821 InsertPointTy CodeGenIP,
3822 llvm::BasicBlock &FiniBB) {
3823 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3824 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP,
3825 FiniBB);
3826 };
3827 SectionCBVector.push_back(SectionCB);
3828 }
3829
3830 // Privatization callback that performs appropriate action for
3831 // shared/private/firstprivate/lastprivate/copyin/... variables.
3832 //
3833 // TODO: This defaults to shared right now.
3834 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
3835 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
3836 // The next line is appropriate only for variables (Val) with the
3837 // data-sharing attribute "shared".
3838 ReplVal = &Val;
3839
3840 return CodeGenIP;
3841 };
3842
3843 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
3844 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
3845 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3846 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3847 Builder.restoreIP(OMPBuilder.createSections(
3848 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
3849 S.getSingleClause<OMPNowaitClause>()));
3850 return;
3851 }
3184 { 3852 {
3185 auto LPCRegion = 3853 auto LPCRegion =
3186 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 3854 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3187 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3855 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3188 EmitSections(S); 3856 EmitSections(S);
3195 // Check for outer lastprivate conditional update. 3863 // Check for outer lastprivate conditional update.
3196 checkForLastprivateConditionalUpdate(*this, S); 3864 checkForLastprivateConditionalUpdate(*this, S);
3197 } 3865 }
3198 3866
3199 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { 3867 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
3200 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { 3868 if (CGM.getLangOpts().OpenMPIRBuilder) {
3201 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3869 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3202 }; 3870 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3203 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3871
3204 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, 3872 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
3205 S.hasCancel()); 3873 auto FiniCB = [this](InsertPointTy IP) {
3874 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3875 };
3876
3877 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
3878 InsertPointTy CodeGenIP,
3879 llvm::BasicBlock &FiniBB) {
3880 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3881 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt,
3882 CodeGenIP, FiniBB);
3883 };
3884
3885 LexicalScope Scope(*this, S.getSourceRange());
3886 EmitStopPoint(&S);
3887 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
3888
3889 return;
3890 }
3891 LexicalScope Scope(*this, S.getSourceRange());
3892 EmitStopPoint(&S);
3893 EmitStmt(S.getAssociatedStmt());
3206 } 3894 }
3207 3895
3208 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { 3896 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
3209 llvm::SmallVector<const Expr *, 8> CopyprivateVars; 3897 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
3210 llvm::SmallVector<const Expr *, 8> DestExprs; 3898 llvm::SmallVector<const Expr *, 8> DestExprs;
3251 } 3939 }
3252 3940
3253 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { 3941 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3254 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 3942 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3255 Action.Enter(CGF); 3943 Action.Enter(CGF);
3256 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 3944 CGF.EmitStmt(S.getRawStmt());
3257 }; 3945 };
3258 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); 3946 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
3259 } 3947 }
3260 3948
3261 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { 3949 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
3262 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 3950 if (CGM.getLangOpts().OpenMPIRBuilder) {
3951 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3263 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 3952 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3264 3953
3265 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 3954 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
3266 const Stmt *MasterRegionBodyStmt = CS->getCapturedStmt();
3267 3955
3268 auto FiniCB = [this](InsertPointTy IP) { 3956 auto FiniCB = [this](InsertPointTy IP) {
3269 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); 3957 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
3270 }; 3958 };
3271 3959
3275 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 3963 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3276 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, 3964 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
3277 CodeGenIP, FiniBB); 3965 CodeGenIP, FiniBB);
3278 }; 3966 };
3279 3967
3280 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 3968 LexicalScope Scope(*this, S.getSourceRange());
3281 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 3969 EmitStopPoint(&S);
3282 Builder.restoreIP(OMPBuilder->CreateMaster(Builder, BodyGenCB, FiniCB)); 3970 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB));
3283 3971
3284 return; 3972 return;
3285 } 3973 }
3286 OMPLexicalScope Scope(*this, S, OMPD_unknown); 3974 LexicalScope Scope(*this, S.getSourceRange());
3975 EmitStopPoint(&S);
3287 emitMaster(*this, S); 3976 emitMaster(*this, S);
3288 } 3977 }
3289 3978
3979 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3980 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3981 Action.Enter(CGF);
3982 CGF.EmitStmt(S.getRawStmt());
3983 };
3984 Expr *Filter = nullptr;
3985 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
3986 Filter = FilterClause->getThreadID();
3987 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
3988 Filter);
3989 }
3990
3991 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
3992 if (CGM.getLangOpts().OpenMPIRBuilder) {
3993 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3994 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3995
3996 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
3997 const Expr *Filter = nullptr;
3998 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
3999 Filter = FilterClause->getThreadID();
4000 llvm::Value *FilterVal = Filter
4001 ? EmitScalarExpr(Filter, CGM.Int32Ty)
4002 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
4003
4004 auto FiniCB = [this](InsertPointTy IP) {
4005 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
4006 };
4007
4008 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4009 InsertPointTy CodeGenIP,
4010 llvm::BasicBlock &FiniBB) {
4011 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
4012 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt,
4013 CodeGenIP, FiniBB);
4014 };
4015
4016 LexicalScope Scope(*this, S.getSourceRange());
4017 EmitStopPoint(&S);
4018 Builder.restoreIP(
4019 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
4020
4021 return;
4022 }
4023 LexicalScope Scope(*this, S.getSourceRange());
4024 EmitStopPoint(&S);
4025 emitMasked(*this, S);
4026 }
4027
3290 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { 4028 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
3291 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 4029 if (CGM.getLangOpts().OpenMPIRBuilder) {
4030 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
3292 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; 4031 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
3293 4032
3294 const CapturedStmt *CS = S.getInnermostCapturedStmt(); 4033 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
3295 const Stmt *CriticalRegionBodyStmt = CS->getCapturedStmt();
3296 const Expr *Hint = nullptr; 4034 const Expr *Hint = nullptr;
3297 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4035 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3298 Hint = HintClause->getHint(); 4036 Hint = HintClause->getHint();
3299 4037
3300 // TODO: This is slightly different from what's currently being done in 4038 // TODO: This is slightly different from what's currently being done in
3315 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); 4053 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
3316 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, 4054 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
3317 CodeGenIP, FiniBB); 4055 CodeGenIP, FiniBB);
3318 }; 4056 };
3319 4057
3320 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); 4058 LexicalScope Scope(*this, S.getSourceRange());
3321 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); 4059 EmitStopPoint(&S);
3322 Builder.restoreIP(OMPBuilder->CreateCritical( 4060 Builder.restoreIP(OMPBuilder.createCritical(
3323 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), 4061 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
3324 HintInst)); 4062 HintInst));
3325 4063
3326 return; 4064 return;
3327 } 4065 }
3328 4066
3329 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4067 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3330 Action.Enter(CGF); 4068 Action.Enter(CGF);
3331 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); 4069 CGF.EmitStmt(S.getAssociatedStmt());
3332 }; 4070 };
3333 const Expr *Hint = nullptr; 4071 const Expr *Hint = nullptr;
3334 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) 4072 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
3335 Hint = HintClause->getHint(); 4073 Hint = HintClause->getHint();
3336 OMPLexicalScope Scope(*this, S, OMPD_unknown); 4074 LexicalScope Scope(*this, S.getSourceRange());
4075 EmitStopPoint(&S);
3337 CGM.getOpenMPRuntime().emitCriticalRegion(*this, 4076 CGM.getOpenMPRuntime().emitCriticalRegion(*this,
3338 S.getDirectiveName().getAsString(), 4077 S.getDirectiveName().getAsString(),
3339 CodeGen, S.getBeginLoc(), Hint); 4078 CodeGen, S.getBeginLoc(), Hint);
3340 } 4079 }
3341 4080
3343 const OMPParallelForDirective &S) { 4082 const OMPParallelForDirective &S) {
3344 // Emit directive as a combined directive that consists of two implicit 4083 // Emit directive as a combined directive that consists of two implicit
3345 // directives: 'parallel' with 'for' directive. 4084 // directives: 'parallel' with 'for' directive.
3346 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4085 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3347 Action.Enter(CGF); 4086 Action.Enter(CGF);
3348 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); 4087 (void)emitWorksharingDirective(CGF, S, S.hasCancel());
3349 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
3350 emitDispatchForLoopBounds);
3351 }; 4088 };
3352 { 4089 {
4090 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4091 [](const OMPReductionClause *C) {
4092 return C->getModifier() == OMPC_REDUCTION_inscan;
4093 })) {
4094 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4095 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4096 CGCapturedStmtInfo CGSI(CR_OpenMP);
4097 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4098 OMPLoopScope LoopScope(CGF, S);
4099 return CGF.EmitScalarExpr(S.getNumIterations());
4100 };
4101 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4102 }
3353 auto LPCRegion = 4103 auto LPCRegion =
3354 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4104 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3355 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, 4105 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
3356 emitEmptyBoundParameters); 4106 emitEmptyBoundParameters);
3357 } 4107 }
3363 const OMPParallelForSimdDirective &S) { 4113 const OMPParallelForSimdDirective &S) {
3364 // Emit directive as a combined directive that consists of two implicit 4114 // Emit directive as a combined directive that consists of two implicit
3365 // directives: 'parallel' with 'for' directive. 4115 // directives: 'parallel' with 'for' directive.
3366 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { 4116 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3367 Action.Enter(CGF); 4117 Action.Enter(CGF);
3368 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, 4118 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3369 emitDispatchForLoopBounds);
3370 }; 4119 };
3371 { 4120 {
4121 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4122 [](const OMPReductionClause *C) {
4123 return C->getModifier() == OMPC_REDUCTION_inscan;
4124 })) {
4125 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4126 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4127 CGCapturedStmtInfo CGSI(CR_OpenMP);
4128 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4129 OMPLoopScope LoopScope(CGF, S);
4130 return CGF.EmitScalarExpr(S.getNumIterations());
4131 };
4132 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4133 }
3372 auto LPCRegion = 4134 auto LPCRegion =
3373 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); 4135 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3374 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, 4136 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
3375 emitEmptyBoundParameters); 4137 emitEmptyBoundParameters);
3376 } 4138 }
3377 // Check for outer lastprivate conditional update. 4139 // Check for outer lastprivate conditional update.
3378 checkForLastprivateConditionalUpdate(*this, S); 4140 checkForLastprivateConditionalUpdate(*this, S);
3379 } 4141 }
3428 emitEmptyBoundParameters); 4190 emitEmptyBoundParameters);
3429 } 4191 }
3430 // Check for outer lastprivate conditional update. 4192 // Check for outer lastprivate conditional update.
3431 checkForLastprivateConditionalUpdate(*this, S); 4193 checkForLastprivateConditionalUpdate(*this, S);
3432 } 4194 }
4195
4196 namespace {
4197 /// Get the list of variables declared in the context of the untied tasks.
4198 class CheckVarsEscapingUntiedTaskDeclContext final
4199 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4200 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4201
4202 public:
4203 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4204 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4205 void VisitDeclStmt(const DeclStmt *S) {
4206 if (!S)
4207 return;
4208 // Need to privatize only local vars, static locals can be processed as is.
4209 for (const Decl *D : S->decls()) {
4210 if (const auto *VD = dyn_cast_or_null<VarDecl>(D))
4211 if (VD->hasLocalStorage())
4212 PrivateDecls.push_back(VD);
4213 }
4214 }
4215 void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; }
4216 void VisitCapturedStmt(const CapturedStmt *) { return; }
4217 void VisitLambdaExpr(const LambdaExpr *) { return; }
4218 void VisitBlockExpr(const BlockExpr *) { return; }
4219 void VisitStmt(const Stmt *S) {
4220 if (!S)
4221 return;
4222 for (const Stmt *Child : S->children())
4223 if (Child)
4224 Visit(Child);
4225 }
4226
4227 /// Swaps list of vars with the provided one.
4228 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4229 };
4230 } // anonymous namespace
3433 4231
3434 void CodeGenFunction::EmitOMPTaskBasedDirective( 4232 void CodeGenFunction::EmitOMPTaskBasedDirective(
3435 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, 4233 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
3436 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, 4234 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
3437 OMPTaskDataTy &Data) { 4235 OMPTaskDataTy &Data) {
3493 ++IRef; 4291 ++IRef;
3494 ++IElemInitRef; 4292 ++IElemInitRef;
3495 } 4293 }
3496 } 4294 }
3497 // Get list of lastprivate variables (for taskloops). 4295 // Get list of lastprivate variables (for taskloops).
3498 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; 4296 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
3499 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { 4297 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
3500 auto IRef = C->varlist_begin(); 4298 auto IRef = C->varlist_begin();
3501 auto ID = C->destination_exprs().begin(); 4299 auto ID = C->destination_exprs().begin();
3502 for (const Expr *IInit : C->private_copies()) { 4300 for (const Expr *IInit : C->private_copies()) {
3503 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); 4301 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
3504 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { 4302 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
3505 Data.LastprivateVars.push_back(*IRef); 4303 Data.LastprivateVars.push_back(*IRef);
3506 Data.LastprivateCopies.push_back(IInit); 4304 Data.LastprivateCopies.push_back(IInit);
3507 } 4305 }
3508 LastprivateDstsOrigs.insert( 4306 LastprivateDstsOrigs.insert(
3509 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), 4307 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
3510 cast<DeclRefExpr>(*IRef)}); 4308 cast<DeclRefExpr>(*IRef)));
3511 ++IRef; 4309 ++IRef;
3512 ++ID; 4310 ++ID;
3513 } 4311 }
3514 } 4312 }
3515 SmallVector<const Expr *, 4> LHSs; 4313 SmallVector<const Expr *, 4> LHSs;
3529 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4327 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
3530 OMPTaskDataTy::DependData &DD = 4328 OMPTaskDataTy::DependData &DD =
3531 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4329 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
3532 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4330 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
3533 } 4331 }
4332 // Get list of local vars for untied tasks.
4333 if (!Data.Tied) {
4334 CheckVarsEscapingUntiedTaskDeclContext Checker;
4335 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4336 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(),
4337 Checker.getPrivateDecls().end());
4338 }
3534 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, 4339 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
3535 CapturedRegion](CodeGenFunction &CGF, 4340 CapturedRegion](CodeGenFunction &CGF,
3536 PrePostActionTy &Action) { 4341 PrePostActionTy &Action) {
4342 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4343 std::pair<Address, Address>>
4344 UntiedLocalVars;
3537 // Set proper addresses for generated private copies. 4345 // Set proper addresses for generated private copies.
3538 OMPPrivateScope Scope(CGF); 4346 OMPPrivateScope Scope(CGF);
3539 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; 4347 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
3540 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || 4348 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
3541 !Data.LastprivateVars.empty()) { 4349 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
3542 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
3543 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
3544 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4350 enum { PrivatesParam = 2, CopyFnParam = 3 };
3545 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4351 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
3546 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4352 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
3547 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4353 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
3548 CS->getCapturedDecl()->getParam(PrivatesParam))); 4354 CS->getCapturedDecl()->getParam(PrivatesParam)));
3549 // Map privates. 4355 // Map privates.
3550 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4356 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3551 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4357 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4358 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
3552 CallArgs.push_back(PrivatesPtr); 4359 CallArgs.push_back(PrivatesPtr);
4360 ParamTypes.push_back(PrivatesPtr->getType());
3553 for (const Expr *E : Data.PrivateVars) { 4361 for (const Expr *E : Data.PrivateVars) {
3554 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4362 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3555 Address PrivatePtr = CGF.CreateMemTemp( 4363 Address PrivatePtr = CGF.CreateMemTemp(
3556 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); 4364 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
3557 PrivatePtrs.emplace_back(VD, PrivatePtr); 4365 PrivatePtrs.emplace_back(VD, PrivatePtr);
3558 CallArgs.push_back(PrivatePtr.getPointer()); 4366 CallArgs.push_back(PrivatePtr.getPointer());
4367 ParamTypes.push_back(PrivatePtr.getType());
3559 } 4368 }
3560 for (const Expr *E : Data.FirstprivateVars) { 4369 for (const Expr *E : Data.FirstprivateVars) {
3561 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4370 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3562 Address PrivatePtr = 4371 Address PrivatePtr =
3563 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4372 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3564 ".firstpriv.ptr.addr"); 4373 ".firstpriv.ptr.addr");
3565 PrivatePtrs.emplace_back(VD, PrivatePtr); 4374 PrivatePtrs.emplace_back(VD, PrivatePtr);
3566 FirstprivatePtrs.emplace_back(VD, PrivatePtr); 4375 FirstprivatePtrs.emplace_back(VD, PrivatePtr);
3567 CallArgs.push_back(PrivatePtr.getPointer()); 4376 CallArgs.push_back(PrivatePtr.getPointer());
4377 ParamTypes.push_back(PrivatePtr.getType());
3568 } 4378 }
3569 for (const Expr *E : Data.LastprivateVars) { 4379 for (const Expr *E : Data.LastprivateVars) {
3570 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4380 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3571 Address PrivatePtr = 4381 Address PrivatePtr =
3572 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4382 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3573 ".lastpriv.ptr.addr"); 4383 ".lastpriv.ptr.addr");
3574 PrivatePtrs.emplace_back(VD, PrivatePtr); 4384 PrivatePtrs.emplace_back(VD, PrivatePtr);
3575 CallArgs.push_back(PrivatePtr.getPointer()); 4385 CallArgs.push_back(PrivatePtr.getPointer());
4386 ParamTypes.push_back(PrivatePtr.getType());
3576 } 4387 }
4388 for (const VarDecl *VD : Data.PrivateLocals) {
4389 QualType Ty = VD->getType().getNonReferenceType();
4390 if (VD->getType()->isLValueReferenceType())
4391 Ty = CGF.getContext().getPointerType(Ty);
4392 if (isAllocatableDecl(VD))
4393 Ty = CGF.getContext().getPointerType(Ty);
4394 Address PrivatePtr = CGF.CreateMemTemp(
4395 CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
4396 auto Result = UntiedLocalVars.insert(
4397 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
4398 // If key exists update in place.
4399 if (Result.second == false)
4400 *Result.first = std::make_pair(
4401 VD, std::make_pair(PrivatePtr, Address::invalid()));
4402 CallArgs.push_back(PrivatePtr.getPointer());
4403 ParamTypes.push_back(PrivatePtr.getType());
4404 }
4405 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4406 ParamTypes, /*isVarArg=*/false);
4407 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4408 CopyFn, CopyFnTy->getPointerTo());
3577 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4409 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
3578 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4410 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
3579 for (const auto &Pair : LastprivateDstsOrigs) { 4411 for (const auto &Pair : LastprivateDstsOrigs) {
3580 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); 4412 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl());
3581 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), 4413 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
3589 } 4421 }
3590 for (const auto &Pair : PrivatePtrs) { 4422 for (const auto &Pair : PrivatePtrs) {
3591 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 4423 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3592 CGF.getContext().getDeclAlign(Pair.first)); 4424 CGF.getContext().getDeclAlign(Pair.first));
3593 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); 4425 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
4426 }
4427 // Adjust mapping for internal locals by mapping actual memory instead of
4428 // a pointer to this memory.
4429 for (auto &Pair : UntiedLocalVars) {
4430 if (isAllocatableDecl(Pair.first)) {
4431 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4432 Address Replacement(Ptr, CGF.getPointerAlign());
4433 Pair.second.first = Replacement;
4434 Ptr = CGF.Builder.CreateLoad(Replacement);
4435 Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4436 Pair.second.second = Replacement;
4437 } else {
4438 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
4439 Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
4440 Pair.second.first = Replacement;
4441 }
3594 } 4442 }
3595 } 4443 }
3596 if (Data.Reductions) { 4444 if (Data.Reductions) {
3597 OMPPrivateScope FirstprivateScope(CGF); 4445 OMPPrivateScope FirstprivateScope(CGF);
3598 for (const auto &Pair : FirstprivatePtrs) { 4446 for (const auto &Pair : FirstprivatePtrs) {
3684 [Replacement]() { return Replacement; }); 4532 [Replacement]() { return Replacement; });
3685 } 4533 }
3686 } 4534 }
3687 (void)InRedScope.Privatize(); 4535 (void)InRedScope.Privatize();
3688 4536
4537 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4538 UntiedLocalVars);
3689 Action.Enter(CGF); 4539 Action.Enter(CGF);
3690 BodyGen(CGF); 4540 BodyGen(CGF);
3691 }; 4541 };
3692 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( 4542 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
3693 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, 4543 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
3719 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, 4569 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
3720 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); 4570 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
3721 PrivateVD->setInitStyle(VarDecl::CInit); 4571 PrivateVD->setInitStyle(VarDecl::CInit);
3722 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, 4572 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
3723 InitRef, /*BasePath=*/nullptr, 4573 InitRef, /*BasePath=*/nullptr,
3724 VK_RValue)); 4574 VK_RValue, FPOptionsOverride()));
3725 Data.FirstprivateVars.emplace_back(OrigRef); 4575 Data.FirstprivateVars.emplace_back(OrigRef);
3726 Data.FirstprivateCopies.emplace_back(PrivateRef); 4576 Data.FirstprivateCopies.emplace_back(PrivateRef);
3727 Data.FirstprivateInits.emplace_back(InitRef); 4577 Data.FirstprivateInits.emplace_back(InitRef);
3728 return OrigVD; 4578 return OrigVD;
3729 } 4579 }
3755 } 4605 }
3756 OMPPrivateScope TargetScope(*this); 4606 OMPPrivateScope TargetScope(*this);
3757 VarDecl *BPVD = nullptr; 4607 VarDecl *BPVD = nullptr;
3758 VarDecl *PVD = nullptr; 4608 VarDecl *PVD = nullptr;
3759 VarDecl *SVD = nullptr; 4609 VarDecl *SVD = nullptr;
4610 VarDecl *MVD = nullptr;
3760 if (InputInfo.NumberOfTargetItems > 0) { 4611 if (InputInfo.NumberOfTargetItems > 0) {
3761 auto *CD = CapturedDecl::Create( 4612 auto *CD = CapturedDecl::Create(
3762 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); 4613 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
3763 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); 4614 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
3764 QualType BaseAndPointersType = getContext().getConstantArrayType( 4615 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
3765 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, 4616 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
3766 /*IndexTypeQuals=*/0); 4617 /*IndexTypeQuals=*/0);
3767 BPVD = createImplicitFirstprivateForType( 4618 BPVD = createImplicitFirstprivateForType(
3768 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 4619 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
3769 PVD = createImplicitFirstprivateForType( 4620 PVD = createImplicitFirstprivateForType(
3770 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); 4621 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
3771 QualType SizesType = getContext().getConstantArrayType( 4622 QualType SizesType = getContext().getConstantArrayType(
3772 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), 4623 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
3773 ArrSize, nullptr, ArrayType::Normal, 4624 ArrSize, nullptr, ArrayType::Normal,
3774 /*IndexTypeQuals=*/0); 4625 /*IndexTypeQuals=*/0);
3775 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, 4626 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
3778 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); 4629 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
3779 TargetScope.addPrivate(PVD, 4630 TargetScope.addPrivate(PVD,
3780 [&InputInfo]() { return InputInfo.PointersArray; }); 4631 [&InputInfo]() { return InputInfo.PointersArray; });
3781 TargetScope.addPrivate(SVD, 4632 TargetScope.addPrivate(SVD,
3782 [&InputInfo]() { return InputInfo.SizesArray; }); 4633 [&InputInfo]() { return InputInfo.SizesArray; });
4634 // If there is no user-defined mapper, the mapper array will be nullptr. In
4635 // this case, we don't need to privatize it.
4636 if (!dyn_cast_or_null<llvm::ConstantPointerNull>(
4637 InputInfo.MappersArray.getPointer())) {
4638 MVD = createImplicitFirstprivateForType(
4639 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
4640 TargetScope.addPrivate(MVD,
4641 [&InputInfo]() { return InputInfo.MappersArray; });
4642 }
3783 } 4643 }
3784 (void)TargetScope.Privatize(); 4644 (void)TargetScope.Privatize();
3785 // Build list of dependences. 4645 // Build list of dependences.
3786 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { 4646 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
3787 OMPTaskDataTy::DependData &DD = 4647 OMPTaskDataTy::DependData &DD =
3788 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); 4648 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
3789 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); 4649 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
3790 } 4650 }
3791 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, 4651 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
3792 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { 4652 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
3793 // Set proper addresses for generated private copies. 4653 // Set proper addresses for generated private copies.
3794 OMPPrivateScope Scope(CGF); 4654 OMPPrivateScope Scope(CGF);
3795 if (!Data.FirstprivateVars.empty()) { 4655 if (!Data.FirstprivateVars.empty()) {
3796 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
3797 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
3798 enum { PrivatesParam = 2, CopyFnParam = 3 }; 4656 enum { PrivatesParam = 2, CopyFnParam = 3 };
3799 llvm::Value *CopyFn = CGF.Builder.CreateLoad( 4657 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
3800 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); 4658 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
3801 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( 4659 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(
3802 CS->getCapturedDecl()->getParam(PrivatesParam))); 4660 CS->getCapturedDecl()->getParam(PrivatesParam)));
3803 // Map privates. 4661 // Map privates.
3804 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; 4662 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
3805 llvm::SmallVector<llvm::Value *, 16> CallArgs; 4663 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4664 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
3806 CallArgs.push_back(PrivatesPtr); 4665 CallArgs.push_back(PrivatesPtr);
4666 ParamTypes.push_back(PrivatesPtr->getType());
3807 for (const Expr *E : Data.FirstprivateVars) { 4667 for (const Expr *E : Data.FirstprivateVars) {
3808 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 4668 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3809 Address PrivatePtr = 4669 Address PrivatePtr =
3810 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), 4670 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
3811 ".firstpriv.ptr.addr"); 4671 ".firstpriv.ptr.addr");
3812 PrivatePtrs.emplace_back(VD, PrivatePtr); 4672 PrivatePtrs.emplace_back(VD, PrivatePtr);
3813 CallArgs.push_back(PrivatePtr.getPointer()); 4673 CallArgs.push_back(PrivatePtr.getPointer());
4674 ParamTypes.push_back(PrivatePtr.getType());
3814 } 4675 }
4676 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
4677 ParamTypes, /*isVarArg=*/false);
4678 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4679 CopyFn, CopyFnTy->getPointerTo());
3815 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( 4680 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
3816 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); 4681 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
3817 for (const auto &Pair : PrivatePtrs) { 4682 for (const auto &Pair : PrivatePtrs) {
3818 Address Replacement(CGF.Builder.CreateLoad(Pair.second), 4683 Address Replacement(CGF.Builder.CreateLoad(Pair.second),
3819 CGF.getContext().getDeclAlign(Pair.first)); 4684 CGF.getContext().getDeclAlign(Pair.first));
3827 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); 4692 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0);
3828 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( 4693 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
3829 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); 4694 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
3830 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( 4695 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
3831 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); 4696 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
4697 // If MVD is nullptr, the mapper array is not privatized
4698 if (MVD)
4699 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
4700 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
3832 } 4701 }
3833 4702
3834 Action.Enter(CGF); 4703 Action.Enter(CGF);
3835 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); 4704 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
3836 BodyGen(CGF); 4705 BodyGen(CGF);
3957 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { 4826 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
3958 CGM.getOpenMPRuntime().emitUpdateClause( 4827 CGM.getOpenMPRuntime().emitUpdateClause(
3959 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); 4828 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
3960 return; 4829 return;
3961 } 4830 }
4831 }
4832
4833 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
4834 if (!OMPParentLoopDirectiveForScan)
4835 return;
4836 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4837 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
4838 SmallVector<const Expr *, 4> Shareds;
4839 SmallVector<const Expr *, 4> Privates;
4840 SmallVector<const Expr *, 4> LHSs;
4841 SmallVector<const Expr *, 4> RHSs;
4842 SmallVector<const Expr *, 4> ReductionOps;
4843 SmallVector<const Expr *, 4> CopyOps;
4844 SmallVector<const Expr *, 4> CopyArrayTemps;
4845 SmallVector<const Expr *, 4> CopyArrayElems;
4846 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
4847 if (C->getModifier() != OMPC_REDUCTION_inscan)
4848 continue;
4849 Shareds.append(C->varlist_begin(), C->varlist_end());
4850 Privates.append(C->privates().begin(), C->privates().end());
4851 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
4852 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
4853 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
4854 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
4855 CopyArrayTemps.append(C->copy_array_temps().begin(),
4856 C->copy_array_temps().end());
4857 CopyArrayElems.append(C->copy_array_elems().begin(),
4858 C->copy_array_elems().end());
4859 }
4860 if (ParentDir.getDirectiveKind() == OMPD_simd ||
4861 (getLangOpts().OpenMPSimd &&
4862 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
4863 // For simd directive and simd-based directives in simd only mode, use the
4864 // following codegen:
4865 // int x = 0;
4866 // #pragma omp simd reduction(inscan, +: x)
4867 // for (..) {
4868 // <first part>
4869 // #pragma omp scan inclusive(x)
4870 // <second part>
4871 // }
4872 // is transformed to:
4873 // int x = 0;
4874 // for (..) {
4875 // int x_priv = 0;
4876 // <first part>
4877 // x = x_priv + x;
4878 // x_priv = x;
4879 // <second part>
4880 // }
4881 // and
4882 // int x = 0;
4883 // #pragma omp simd reduction(inscan, +: x)
4884 // for (..) {
4885 // <first part>
4886 // #pragma omp scan exclusive(x)
4887 // <second part>
4888 // }
4889 // to
4890 // int x = 0;
4891 // for (..) {
4892 // int x_priv = 0;
4893 // <second part>
4894 // int temp = x;
4895 // x = x_priv + x;
4896 // x_priv = temp;
4897 // <first part>
4898 // }
4899 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
4900 EmitBranch(IsInclusive
4901 ? OMPScanReduce
4902 : BreakContinueStack.back().ContinueBlock.getBlock());
4903 EmitBlock(OMPScanDispatch);
4904 {
4905 // New scope for correct construction/destruction of temp variables for
4906 // exclusive scan.
4907 LexicalScope Scope(*this, S.getSourceRange());
4908 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4909 EmitBlock(OMPScanReduce);
4910 if (!IsInclusive) {
4911 // Create temp var and copy LHS value to this temp value.
4912 // TMP = LHS;
4913 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4914 const Expr *PrivateExpr = Privates[I];
4915 const Expr *TempExpr = CopyArrayTemps[I];
4916 EmitAutoVarDecl(
4917 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
4918 LValue DestLVal = EmitLValue(TempExpr);
4919 LValue SrcLVal = EmitLValue(LHSs[I]);
4920 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4921 SrcLVal.getAddress(*this),
4922 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4923 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4924 CopyOps[I]);
4925 }
4926 }
4927 CGM.getOpenMPRuntime().emitReduction(
4928 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
4929 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
4930 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4931 const Expr *PrivateExpr = Privates[I];
4932 LValue DestLVal;
4933 LValue SrcLVal;
4934 if (IsInclusive) {
4935 DestLVal = EmitLValue(RHSs[I]);
4936 SrcLVal = EmitLValue(LHSs[I]);
4937 } else {
4938 const Expr *TempExpr = CopyArrayTemps[I];
4939 DestLVal = EmitLValue(RHSs[I]);
4940 SrcLVal = EmitLValue(TempExpr);
4941 }
4942 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4943 SrcLVal.getAddress(*this),
4944 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4945 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4946 CopyOps[I]);
4947 }
4948 }
4949 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
4950 OMPScanExitBlock = IsInclusive
4951 ? BreakContinueStack.back().ContinueBlock.getBlock()
4952 : OMPScanReduce;
4953 EmitBlock(OMPAfterScanBlock);
4954 return;
4955 }
4956 if (!IsInclusive) {
4957 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4958 EmitBlock(OMPScanExitBlock);
4959 }
4960 if (OMPFirstScanLoop) {
4961 // Emit buffer[i] = red; at the end of the input phase.
4962 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4963 .getIterationVariable()
4964 ->IgnoreParenImpCasts();
4965 LValue IdxLVal = EmitLValue(IVExpr);
4966 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4967 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
4968 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
4969 const Expr *PrivateExpr = Privates[I];
4970 const Expr *OrigExpr = Shareds[I];
4971 const Expr *CopyArrayElem = CopyArrayElems[I];
4972 OpaqueValueMapping IdxMapping(
4973 *this,
4974 cast<OpaqueValueExpr>(
4975 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
4976 RValue::get(IdxVal));
4977 LValue DestLVal = EmitLValue(CopyArrayElem);
4978 LValue SrcLVal = EmitLValue(OrigExpr);
4979 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
4980 SrcLVal.getAddress(*this),
4981 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
4982 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
4983 CopyOps[I]);
4984 }
4985 }
4986 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4987 if (IsInclusive) {
4988 EmitBlock(OMPScanExitBlock);
4989 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
4990 }
4991 EmitBlock(OMPScanDispatch);
4992 if (!OMPFirstScanLoop) {
4993 // Emit red = buffer[i]; at the entrance to the scan phase.
4994 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
4995 .getIterationVariable()
4996 ->IgnoreParenImpCasts();
4997 LValue IdxLVal = EmitLValue(IVExpr);
4998 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
4999 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
5000 llvm::BasicBlock *ExclusiveExitBB = nullptr;
5001 if (!IsInclusive) {
5002 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
5003 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
5004 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
5005 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
5006 EmitBlock(ContBB);
5007 // Use idx - 1 iteration for exclusive scan.
5008 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
5009 }
5010 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5011 const Expr *PrivateExpr = Privates[I];
5012 const Expr *OrigExpr = Shareds[I];
5013 const Expr *CopyArrayElem = CopyArrayElems[I];
5014 OpaqueValueMapping IdxMapping(
5015 *this,
5016 cast<OpaqueValueExpr>(
5017 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
5018 RValue::get(IdxVal));
5019 LValue SrcLVal = EmitLValue(CopyArrayElem);
5020 LValue DestLVal = EmitLValue(OrigExpr);
5021 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
5022 SrcLVal.getAddress(*this),
5023 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
5024 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
5025 CopyOps[I]);
5026 }
5027 if (!IsInclusive) {
5028 EmitBlock(ExclusiveExitBB);
5029 }
5030 }
5031 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5032 : OMPAfterScanBlock);
5033 EmitBlock(OMPAfterScanBlock);
3962 } 5034 }
3963 5035
3964 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, 5036 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
3965 const CodeGenLoopTy &CodeGenLoop, 5037 const CodeGenLoopTy &CodeGenLoop,
3966 Expr *IncExpr) { 5038 Expr *IncExpr) {
4210 return Fn; 5282 return Fn;
4211 } 5283 }
4212 5284
4213 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { 5285 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
4214 if (S.hasClausesOfKind<OMPDependClause>()) { 5286 if (S.hasClausesOfKind<OMPDependClause>()) {
4215 assert(!S.getAssociatedStmt() && 5287 assert(!S.hasAssociatedStmt() &&
4216 "No associated statement must be in ordered depend construct."); 5288 "No associated statement must be in ordered depend construct.");
4217 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) 5289 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
4218 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); 5290 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
4219 return; 5291 return;
4220 } 5292 }
4687 case OMPC_reduction: 5759 case OMPC_reduction:
4688 case OMPC_task_reduction: 5760 case OMPC_task_reduction:
4689 case OMPC_in_reduction: 5761 case OMPC_in_reduction:
4690 case OMPC_safelen: 5762 case OMPC_safelen:
4691 case OMPC_simdlen: 5763 case OMPC_simdlen:
5764 case OMPC_sizes:
4692 case OMPC_allocator: 5765 case OMPC_allocator:
4693 case OMPC_allocate: 5766 case OMPC_allocate:
4694 case OMPC_collapse: 5767 case OMPC_collapse:
4695 case OMPC_default: 5768 case OMPC_default:
4696 case OMPC_seq_cst: 5769 case OMPC_seq_cst:
4728 case OMPC_defaultmap: 5801 case OMPC_defaultmap:
4729 case OMPC_uniform: 5802 case OMPC_uniform:
4730 case OMPC_to: 5803 case OMPC_to:
4731 case OMPC_from: 5804 case OMPC_from:
4732 case OMPC_use_device_ptr: 5805 case OMPC_use_device_ptr:
5806 case OMPC_use_device_addr:
4733 case OMPC_is_device_ptr: 5807 case OMPC_is_device_ptr:
4734 case OMPC_unified_address: 5808 case OMPC_unified_address:
4735 case OMPC_unified_shared_memory: 5809 case OMPC_unified_shared_memory:
4736 case OMPC_reverse_offload: 5810 case OMPC_reverse_offload:
4737 case OMPC_dynamic_allocators: 5811 case OMPC_dynamic_allocators:
4744 case OMPC_detach: 5818 case OMPC_detach:
4745 case OMPC_inclusive: 5819 case OMPC_inclusive:
4746 case OMPC_exclusive: 5820 case OMPC_exclusive:
4747 case OMPC_uses_allocators: 5821 case OMPC_uses_allocators:
4748 case OMPC_affinity: 5822 case OMPC_affinity:
5823 case OMPC_init:
5824 case OMPC_inbranch:
5825 case OMPC_notinbranch:
5826 case OMPC_link:
5827 case OMPC_use:
5828 case OMPC_novariants:
5829 case OMPC_nocontext:
5830 case OMPC_filter:
4749 llvm_unreachable("Clause is not allowed in 'omp atomic'."); 5831 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
4750 } 5832 }
4751 } 5833 }
4752 5834
4753 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { 5835 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
4775 // if it is first). 5857 // if it is first).
4776 if (C->getClauseKind() != OMPC_seq_cst && 5858 if (C->getClauseKind() != OMPC_seq_cst &&
4777 C->getClauseKind() != OMPC_acq_rel && 5859 C->getClauseKind() != OMPC_acq_rel &&
4778 C->getClauseKind() != OMPC_acquire && 5860 C->getClauseKind() != OMPC_acquire &&
4779 C->getClauseKind() != OMPC_release && 5861 C->getClauseKind() != OMPC_release &&
4780 C->getClauseKind() != OMPC_relaxed) { 5862 C->getClauseKind() != OMPC_relaxed && C->getClauseKind() != OMPC_hint) {
4781 Kind = C->getClauseKind(); 5863 Kind = C->getClauseKind();
4782 break; 5864 break;
4783 } 5865 }
4784 } 5866 }
4785 if (!MemOrderingSpecified) { 5867 if (!MemOrderingSpecified) {
4798 AO = llvm::AtomicOrdering::Acquire; 5880 AO = llvm::AtomicOrdering::Acquire;
4799 } 5881 }
4800 } 5882 }
4801 } 5883 }
4802 5884
4803 const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); 5885 LexicalScope Scope(*this, S.getSourceRange());
4804 if (const auto *FE = dyn_cast<FullExpr>(CS)) 5886 EmitStopPoint(S.getAssociatedStmt());
4805 enterFullExpression(FE); 5887 emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
4806 // Processing for statements under 'atomic capture'. 5888 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(),
4807 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { 5889 S.getBeginLoc());
4808 for (const Stmt *C : Compound->body()) {
4809 if (const auto *FE = dyn_cast<FullExpr>(C))
4810 enterFullExpression(FE);
4811 }
4812 }
4813
4814 auto &&CodeGen = [&S, Kind, AO, CS](CodeGenFunction &CGF,
4815 PrePostActionTy &) {
4816 CGF.EmitStopPoint(CS);
4817 emitOMPAtomicExpr(CGF, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
4818 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(),
4819 S.getBeginLoc());
4820 };
4821 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4822 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
4823 } 5890 }
4824 5891
4825 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, 5892 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
4826 const OMPExecutableDirective &S, 5893 const OMPExecutableDirective &S,
4827 const RegionCodeGenTy &CodeGen) { 5894 const RegionCodeGenTy &CodeGen) {
4913 (void)PrivateScope.Privatize(); 5980 (void)PrivateScope.Privatize();
4914 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) 5981 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4915 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); 5982 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
4916 5983
4917 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); 5984 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
5985 CGF.EnsureInsertPoint();
4918 } 5986 }
4919 5987
4920 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, 5988 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
4921 StringRef ParentName, 5989 StringRef ParentName,
4922 const OMPTargetDirective &S) { 5990 const OMPTargetDirective &S) {
5325 C->getNameModifier() == OMPD_cancel) { 6393 C->getNameModifier() == OMPD_cancel) {
5326 IfCond = C->getCondition(); 6394 IfCond = C->getCondition();
5327 break; 6395 break;
5328 } 6396 }
5329 } 6397 }
5330 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { 6398 if (CGM.getLangOpts().OpenMPIRBuilder) {
6399 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5331 // TODO: This check is necessary as we only generate `omp parallel` through 6400 // TODO: This check is necessary as we only generate `omp parallel` through
5332 // the OpenMPIRBuilder for now. 6401 // the OpenMPIRBuilder for now.
5333 if (S.getCancelRegion() == OMPD_parallel) { 6402 if (S.getCancelRegion() == OMPD_parallel ||
6403 S.getCancelRegion() == OMPD_sections ||
6404 S.getCancelRegion() == OMPD_section) {
5334 llvm::Value *IfCondition = nullptr; 6405 llvm::Value *IfCondition = nullptr;
5335 if (IfCond) 6406 if (IfCond)
5336 IfCondition = EmitScalarExpr(IfCond, 6407 IfCondition = EmitScalarExpr(IfCond,
5337 /*IgnoreResultAssign=*/true); 6408 /*IgnoreResultAssign=*/true);
5338 return Builder.restoreIP( 6409 return Builder.restoreIP(
5339 OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); 6410 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
5340 } 6411 }
5341 } 6412 }
5342 6413
5343 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, 6414 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
5344 S.getCancelRegion()); 6415 S.getCancelRegion());
5358 Kind == OMPD_target_teams_distribute_parallel_for); 6429 Kind == OMPD_target_teams_distribute_parallel_for);
5359 return OMPCancelStack.getExitBlock(); 6430 return OMPCancelStack.getExitBlock();
5360 } 6431 }
5361 6432
5362 void CodeGenFunction::EmitOMPUseDevicePtrClause( 6433 void CodeGenFunction::EmitOMPUseDevicePtrClause(
5363 const OMPClause &NC, OMPPrivateScope &PrivateScope, 6434 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
5364 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { 6435 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
5365 const auto &C = cast<OMPUseDevicePtrClause>(NC);
5366 auto OrigVarIt = C.varlist_begin(); 6436 auto OrigVarIt = C.varlist_begin();
5367 auto InitIt = C.inits().begin(); 6437 auto InitIt = C.inits().begin();
5368 for (const Expr *PvtVarIt : C.private_copies()) { 6438 for (const Expr *PvtVarIt : C.private_copies()) {
5369 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); 6439 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl());
5370 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); 6440 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl());
5421 ++OrigVarIt; 6491 ++OrigVarIt;
5422 ++InitIt; 6492 ++InitIt;
5423 } 6493 }
5424 } 6494 }
5425 6495
6496 static const VarDecl *getBaseDecl(const Expr *Ref) {
6497 const Expr *Base = Ref->IgnoreParenImpCasts();
6498 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
6499 Base = OASE->getBase()->IgnoreParenImpCasts();
6500 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
6501 Base = ASE->getBase()->IgnoreParenImpCasts();
6502 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
6503 }
6504
6505 void CodeGenFunction::EmitOMPUseDeviceAddrClause(
6506 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
6507 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
6508 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
6509 for (const Expr *Ref : C.varlists()) {
6510 const VarDecl *OrigVD = getBaseDecl(Ref);
6511 if (!Processed.insert(OrigVD).second)
6512 continue;
6513 // In order to identify the right initializer we need to match the
6514 // declaration used by the mapping logic. In some cases we may get
6515 // OMPCapturedExprDecl that refers to the original declaration.
6516 const ValueDecl *MatchingVD = OrigVD;
6517 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
6518 // OMPCapturedExprDecl are used to privative fields of the current
6519 // structure.
6520 const auto *ME = cast<MemberExpr>(OED->getInit());
6521 assert(isa<CXXThisExpr>(ME->getBase()) &&
6522 "Base should be the current struct!");
6523 MatchingVD = ME->getMemberDecl();
6524 }
6525
6526 // If we don't have information about the current list item, move on to
6527 // the next one.
6528 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
6529 if (InitAddrIt == CaptureDeviceAddrMap.end())
6530 continue;
6531
6532 Address PrivAddr = InitAddrIt->getSecond();
6533 // For declrefs and variable length array need to load the pointer for
6534 // correct mapping, since the pointer to the data was passed to the runtime.
6535 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
6536 MatchingVD->getType()->isArrayType())
6537 PrivAddr =
6538 EmitLoadOfPointer(PrivAddr, getContext()
6539 .getPointerType(OrigVD->getType())
6540 ->castAs<PointerType>());
6541 llvm::Type *RealTy =
6542 ConvertTypeForMem(OrigVD->getType().getNonReferenceType())
6543 ->getPointerTo();
6544 PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy);
6545
6546 (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
6547 }
6548 }
6549
5426 // Generate the instructions for '#pragma omp target data' directive. 6550 // Generate the instructions for '#pragma omp target data' directive.
5427 void CodeGenFunction::EmitOMPTargetDataDirective( 6551 void CodeGenFunction::EmitOMPTargetDataDirective(
5428 const OMPTargetDataDirective &S) { 6552 const OMPTargetDataDirective &S) {
5429 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); 6553 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
6554 /*SeparateBeginEndCalls=*/true);
5430 6555
5431 // Create a pre/post action to signal the privatization of the device pointer. 6556 // Create a pre/post action to signal the privatization of the device pointer.
5432 // This action can be replaced by the OpenMP runtime code generation to 6557 // This action can be replaced by the OpenMP runtime code generation to
5433 // deactivate privatization. 6558 // deactivate privatization.
5434 bool PrivatizeDevicePointers = false; 6559 bool PrivatizeDevicePointers = false;
5465 OMPPrivateScope PrivateScope(CGF); 6590 OMPPrivateScope PrivateScope(CGF);
5466 // Emit all instances of the use_device_ptr clause. 6591 // Emit all instances of the use_device_ptr clause.
5467 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) 6592 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
5468 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, 6593 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
5469 Info.CaptureDeviceAddrMap); 6594 Info.CaptureDeviceAddrMap);
6595 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
6596 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
6597 Info.CaptureDeviceAddrMap);
5470 (void)PrivateScope.Privatize(); 6598 (void)PrivateScope.Privatize();
5471 RCG(CGF); 6599 RCG(CGF);
5472 } else { 6600 } else {
6601 OMPLexicalScope Scope(CGF, S, OMPD_unknown);
5473 RCG(CGF); 6602 RCG(CGF);
5474 } 6603 }
5475 }; 6604 };
5476 6605
5477 // Forward the provided action to the privatization codegen. 6606 // Forward the provided action to the privatization codegen.
5796 }, 6925 },
5797 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { 6926 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
5798 CGF.EmitOMPInnerLoop( 6927 CGF.EmitOMPInnerLoop(
5799 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), 6928 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
5800 [&S](CodeGenFunction &CGF) { 6929 [&S](CodeGenFunction &CGF) {
5801 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); 6930 emitOMPLoopBodyWithStopPoint(CGF, S,
5802 CGF.EmitStopPoint(&S); 6931 CodeGenFunction::JumpDest());
5803 }, 6932 },
5804 [](CodeGenFunction &) {}); 6933 [](CodeGenFunction &) {});
5805 }); 6934 });
5806 } 6935 }
5807 // Emit: if (PreCond) - end. 6936 // Emit: if (PreCond) - end.
5947 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); 7076 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
5948 } 7077 }
5949 7078
5950 void CodeGenFunction::EmitSimpleOMPExecutableDirective( 7079 void CodeGenFunction::EmitSimpleOMPExecutableDirective(
5951 const OMPExecutableDirective &D) { 7080 const OMPExecutableDirective &D) {
7081 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
7082 EmitOMPScanDirective(*SD);
7083 return;
7084 }
5952 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) 7085 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
5953 return; 7086 return;
5954 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { 7087 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
5955 OMPPrivateScope GlobalsScope(CGF); 7088 OMPPrivateScope GlobalsScope(CGF);
5956 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { 7089 if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
5971 } 7104 }
5972 } 7105 }
5973 } 7106 }
5974 if (isOpenMPSimdDirective(D.getDirectiveKind())) { 7107 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
5975 (void)GlobalsScope.Privatize(); 7108 (void)GlobalsScope.Privatize();
7109 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
5976 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); 7110 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
5977 } else { 7111 } else {
5978 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { 7112 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
5979 for (const Expr *E : LD->counters()) { 7113 for (const Expr *E : LD->counters()) {
5980 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); 7114 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5990 } 7124 }
5991 } 7125 }
5992 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { 7126 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
5993 if (!C->getNumForLoops()) 7127 if (!C->getNumForLoops())
5994 continue; 7128 continue;
5995 for (unsigned I = LD->getCollapsedNumber(), 7129 for (unsigned I = LD->getLoopsNumber(),
5996 E = C->getLoopNumIterations().size(); 7130 E = C->getLoopNumIterations().size();
5997 I < E; ++I) { 7131 I < E; ++I) {
5998 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( 7132 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
5999 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { 7133 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
6000 // Emit only those that were not explicitly referenced in clauses. 7134 // Emit only those that were not explicitly referenced in clauses.
6006 } 7140 }
6007 (void)GlobalsScope.Privatize(); 7141 (void)GlobalsScope.Privatize();
6008 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); 7142 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
6009 } 7143 }
6010 }; 7144 };
6011 { 7145 if (D.getDirectiveKind() == OMPD_atomic ||
7146 D.getDirectiveKind() == OMPD_critical ||
7147 D.getDirectiveKind() == OMPD_section ||
7148 D.getDirectiveKind() == OMPD_master ||
7149 D.getDirectiveKind() == OMPD_masked) {
7150 EmitStmt(D.getAssociatedStmt());
7151 } else {
6012 auto LPCRegion = 7152 auto LPCRegion =
6013 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); 7153 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
6014 OMPSimdLexicalScope Scope(*this, D); 7154 OMPSimdLexicalScope Scope(*this, D);
6015 CGM.getOpenMPRuntime().emitInlinedDirective( 7155 CGM.getOpenMPRuntime().emitInlinedDirective(
6016 *this, 7156 *this,