Mercurial > hg > CbC > CbC_llvm
comparison clang/lib/CodeGen/CGStmtOpenMP.cpp @ 221:79ff65ed7e25
LLVM12 Original
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 15 Jun 2021 19:15:29 +0900 |
parents | 0572611fdcc8 |
children | 5f17cb93ff66 |
comparison
equal
deleted
inserted
replaced
220:42394fc6a535 | 221:79ff65ed7e25 |
---|---|
19 #include "clang/AST/Attr.h" | 19 #include "clang/AST/Attr.h" |
20 #include "clang/AST/DeclOpenMP.h" | 20 #include "clang/AST/DeclOpenMP.h" |
21 #include "clang/AST/OpenMPClause.h" | 21 #include "clang/AST/OpenMPClause.h" |
22 #include "clang/AST/Stmt.h" | 22 #include "clang/AST/Stmt.h" |
23 #include "clang/AST/StmtOpenMP.h" | 23 #include "clang/AST/StmtOpenMP.h" |
24 #include "clang/AST/StmtVisitor.h" | |
24 #include "clang/Basic/OpenMPKinds.h" | 25 #include "clang/Basic/OpenMPKinds.h" |
25 #include "clang/Basic/PrettyStackTrace.h" | 26 #include "clang/Basic/PrettyStackTrace.h" |
27 #include "llvm/Frontend/OpenMP/OMPConstants.h" | |
26 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" | 28 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" |
27 #include "llvm/IR/Constants.h" | 29 #include "llvm/IR/Constants.h" |
28 #include "llvm/IR/Instructions.h" | 30 #include "llvm/IR/Instructions.h" |
29 #include "llvm/Support/AtomicOrdering.h" | 31 #include "llvm/Support/AtomicOrdering.h" |
30 using namespace clang; | 32 using namespace clang; |
31 using namespace CodeGen; | 33 using namespace CodeGen; |
32 using namespace llvm::omp; | 34 using namespace llvm::omp; |
35 | |
36 static const VarDecl *getBaseDecl(const Expr *Ref); | |
33 | 37 |
34 namespace { | 38 namespace { |
35 /// Lexical scope for OpenMP executable constructs, that handles correct codegen | 39 /// Lexical scope for OpenMP executable constructs, that handles correct codegen |
36 /// for captured expressions. | 40 /// for captured expressions. |
37 class OMPLexicalScope : public CodeGenFunction::LexicalScope { | 41 class OMPLexicalScope : public CodeGenFunction::LexicalScope { |
127 }; | 131 }; |
128 | 132 |
129 /// Private scope for OpenMP loop-based directives, that supports capturing | 133 /// Private scope for OpenMP loop-based directives, that supports capturing |
130 /// of used expression from loop statement. | 134 /// of used expression from loop statement. |
131 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { | 135 class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { |
132 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { | 136 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { |
137 const DeclStmt *PreInits; | |
133 CodeGenFunction::OMPMapVars PreCondVars; | 138 CodeGenFunction::OMPMapVars PreCondVars; |
134 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; | 139 if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { |
135 for (const auto *E : S.counters()) { | 140 llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
136 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); | 141 for (const auto *E : LD->counters()) { |
137 EmittedAsPrivate.insert(VD->getCanonicalDecl()); | 142 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
138 (void)PreCondVars.setVarAddr( | 143 EmittedAsPrivate.insert(VD->getCanonicalDecl()); |
139 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); | 144 (void)PreCondVars.setVarAddr( |
140 } | 145 CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); |
141 // Mark private vars as undefs. | 146 } |
142 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { | 147 // Mark private vars as undefs. |
143 for (const Expr *IRef : C->varlists()) { | 148 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { |
144 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); | 149 for (const Expr *IRef : C->varlists()) { |
145 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { | 150 const auto *OrigVD = |
146 (void)PreCondVars.setVarAddr( | 151 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); |
147 CGF, OrigVD, | 152 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
148 Address(llvm::UndefValue::get( | 153 (void)PreCondVars.setVarAddr( |
149 CGF.ConvertTypeForMem(CGF.getContext().getPointerType( | 154 CGF, OrigVD, |
150 OrigVD->getType().getNonReferenceType()))), | 155 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( |
151 CGF.getContext().getDeclAlign(OrigVD))); | 156 CGF.getContext().getPointerType( |
157 OrigVD->getType().getNonReferenceType()))), | |
158 CGF.getContext().getDeclAlign(OrigVD))); | |
159 } | |
152 } | 160 } |
153 } | 161 } |
154 } | 162 (void)PreCondVars.apply(CGF); |
155 (void)PreCondVars.apply(CGF); | 163 // Emit init, __range and __end variables for C++ range loops. |
156 // Emit init, __range and __end variables for C++ range loops. | 164 (void)OMPLoopBasedDirective::doForAllLoops( |
157 const Stmt *Body = | 165 LD->getInnermostCapturedStmt()->getCapturedStmt(), |
158 S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); | 166 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), |
159 for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { | 167 [&CGF](unsigned Cnt, const Stmt *CurStmt) { |
160 Body = OMPLoopDirective::tryToFindNextInnerLoop( | 168 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { |
161 Body, /*TryImperfectlyNestedLoops=*/true); | 169 if (const Stmt *Init = CXXFor->getInit()) |
162 if (auto *For = dyn_cast<ForStmt>(Body)) { | 170 CGF.EmitStmt(Init); |
163 Body = For->getBody(); | 171 CGF.EmitStmt(CXXFor->getRangeStmt()); |
164 } else { | 172 CGF.EmitStmt(CXXFor->getEndStmt()); |
165 assert(isa<CXXForRangeStmt>(Body) && | 173 } |
166 "Expected canonical for loop or range-based for loop."); | 174 return false; |
167 auto *CXXFor = cast<CXXForRangeStmt>(Body); | 175 }); |
168 if (const Stmt *Init = CXXFor->getInit()) | 176 PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); |
169 CGF.EmitStmt(Init); | 177 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { |
170 CGF.EmitStmt(CXXFor->getRangeStmt()); | 178 PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); |
171 CGF.EmitStmt(CXXFor->getEndStmt()); | 179 } else { |
172 Body = CXXFor->getBody(); | 180 llvm_unreachable("Unknown loop-based directive kind."); |
173 } | 181 } |
174 } | 182 if (PreInits) { |
175 if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { | |
176 for (const auto *I : PreInits->decls()) | 183 for (const auto *I : PreInits->decls()) |
177 CGF.EmitVarDecl(cast<VarDecl>(*I)); | 184 CGF.EmitVarDecl(cast<VarDecl>(*I)); |
178 } | 185 } |
179 PreCondVars.restore(CGF); | 186 PreCondVars.restore(CGF); |
180 } | 187 } |
181 | 188 |
182 public: | 189 public: |
183 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) | 190 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) |
184 : CodeGenFunction::RunCleanupsScope(CGF) { | 191 : CodeGenFunction::RunCleanupsScope(CGF) { |
185 emitPreInitStmt(CGF, S); | 192 emitPreInitStmt(CGF, S); |
186 } | 193 } |
187 }; | 194 }; |
188 | 195 |
218 for (const Expr *E : UDP->varlists()) { | 225 for (const Expr *E : UDP->varlists()) { |
219 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); | 226 const Decl *D = cast<DeclRefExpr>(E)->getDecl(); |
220 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) | 227 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
221 CGF.EmitVarDecl(*OED); | 228 CGF.EmitVarDecl(*OED); |
222 } | 229 } |
230 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { | |
231 for (const Expr *E : UDP->varlists()) { | |
232 const Decl *D = getBaseDecl(E); | |
233 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) | |
234 CGF.EmitVarDecl(*OED); | |
235 } | |
223 } | 236 } |
224 } | 237 } |
225 if (!isOpenMPSimdDirective(S.getDirectiveKind())) | 238 if (!isOpenMPSimdDirective(S.getDirectiveKind())) |
226 CGF.EmitOMPPrivateClause(S, InlinedShareds); | 239 CGF.EmitOMPPrivateClause(S, InlinedShareds); |
227 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { | 240 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) { |
228 if (const Expr *E = TG->getReductionRef()) | 241 if (const Expr *E = TG->getReductionRef()) |
229 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); | 242 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); |
230 } | 243 } |
244 // Temp copy arrays for inscan reductions should not be emitted as they are | |
245 // not used in simd only mode. | |
246 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; | |
247 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { | |
248 if (C->getModifier() != OMPC_REDUCTION_inscan) | |
249 continue; | |
250 for (const Expr *E : C->copy_array_temps()) | |
251 CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); | |
252 } | |
231 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); | 253 const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); |
232 while (CS) { | 254 while (CS) { |
233 for (auto &C : CS->captures()) { | 255 for (auto &C : CS->captures()) { |
234 if (C.capturesVariable() || C.capturesVariableByCopy()) { | 256 if (C.capturesVariable() || C.capturesVariableByCopy()) { |
235 auto *VD = C.getCapturedVar(); | 257 auto *VD = C.getCapturedVar(); |
258 if (CopyArrayTemps.contains(VD)) | |
259 continue; | |
236 assert(VD == VD->getCanonicalDecl() && | 260 assert(VD == VD->getCanonicalDecl() && |
237 "Canonical decl must be captured."); | 261 "Canonical decl must be captured."); |
238 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), | 262 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
239 isCapturedVar(CGF, VD) || | 263 isCapturedVar(CGF, VD) || |
240 (CGF.CapturedStmtInfo && | 264 (CGF.CapturedStmtInfo && |
619 VLASizes.clear(); | 643 VLASizes.clear(); |
620 llvm::Function *WrapperF = | 644 llvm::Function *WrapperF = |
621 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, | 645 emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, |
622 WrapperCGF.CXXThisValue, WrapperFO); | 646 WrapperCGF.CXXThisValue, WrapperFO); |
623 llvm::SmallVector<llvm::Value *, 4> CallArgs; | 647 llvm::SmallVector<llvm::Value *, 4> CallArgs; |
648 auto *PI = F->arg_begin(); | |
624 for (const auto *Arg : Args) { | 649 for (const auto *Arg : Args) { |
625 llvm::Value *CallArg; | 650 llvm::Value *CallArg; |
626 auto I = LocalAddrs.find(Arg); | 651 auto I = LocalAddrs.find(Arg); |
627 if (I != LocalAddrs.end()) { | 652 if (I != LocalAddrs.end()) { |
628 LValue LV = WrapperCGF.MakeAddrLValue( | 653 LValue LV = WrapperCGF.MakeAddrLValue( |
629 I->second.second, | 654 I->second.second, |
630 I->second.first ? I->second.first->getType() : Arg->getType(), | 655 I->second.first ? I->second.first->getType() : Arg->getType(), |
631 AlignmentSource::Decl); | 656 AlignmentSource::Decl); |
657 if (LV.getType()->isAnyComplexType()) | |
658 LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast( | |
659 LV.getAddress(WrapperCGF), | |
660 PI->getType()->getPointerTo( | |
661 LV.getAddress(WrapperCGF).getAddressSpace()))); | |
632 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); | 662 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); |
633 } else { | 663 } else { |
634 auto EI = VLASizes.find(Arg); | 664 auto EI = VLASizes.find(Arg); |
635 if (EI != VLASizes.end()) { | 665 if (EI != VLASizes.end()) { |
636 CallArg = EI->second.second; | 666 CallArg = EI->second.second; |
640 AlignmentSource::Decl); | 670 AlignmentSource::Decl); |
641 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); | 671 CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); |
642 } | 672 } |
643 } | 673 } |
644 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); | 674 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); |
675 ++PI; | |
645 } | 676 } |
646 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); | 677 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); |
647 WrapperCGF.FinishFunction(); | 678 WrapperCGF.FinishFunction(); |
648 return WrapperF; | 679 return WrapperF; |
649 } | 680 } |
973 if (CopiedVars.size() == 1) { | 1004 if (CopiedVars.size() == 1) { |
974 // At first check if current thread is a master thread. If it is, no | 1005 // At first check if current thread is a master thread. If it is, no |
975 // need to copy data. | 1006 // need to copy data. |
976 CopyBegin = createBasicBlock("copyin.not.master"); | 1007 CopyBegin = createBasicBlock("copyin.not.master"); |
977 CopyEnd = createBasicBlock("copyin.not.master.end"); | 1008 CopyEnd = createBasicBlock("copyin.not.master.end"); |
1009 // TODO: Avoid ptrtoint conversion. | |
1010 auto *MasterAddrInt = | |
1011 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy); | |
1012 auto *PrivateAddrInt = | |
1013 Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy); | |
978 Builder.CreateCondBr( | 1014 Builder.CreateCondBr( |
979 Builder.CreateICmpNE( | 1015 Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, |
980 Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), | 1016 CopyEnd); |
981 Builder.CreatePtrToInt(PrivateAddr.getPointer(), | |
982 CGM.IntPtrTy)), | |
983 CopyBegin, CopyEnd); | |
984 EmitBlock(CopyBegin); | 1017 EmitBlock(CopyBegin); |
985 } | 1018 } |
986 const auto *SrcVD = | 1019 const auto *SrcVD = |
987 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); | 1020 cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl()); |
988 const auto *DestVD = | 1021 const auto *DestVD = |
1159 EmitBlock(DoneBB, /*IsFinished=*/true); | 1192 EmitBlock(DoneBB, /*IsFinished=*/true); |
1160 } | 1193 } |
1161 | 1194 |
1162 void CodeGenFunction::EmitOMPReductionClauseInit( | 1195 void CodeGenFunction::EmitOMPReductionClauseInit( |
1163 const OMPExecutableDirective &D, | 1196 const OMPExecutableDirective &D, |
1164 CodeGenFunction::OMPPrivateScope &PrivateScope) { | 1197 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { |
1165 if (!HaveInsertPoint()) | 1198 if (!HaveInsertPoint()) |
1166 return; | 1199 return; |
1167 SmallVector<const Expr *, 4> Shareds; | 1200 SmallVector<const Expr *, 4> Shareds; |
1168 SmallVector<const Expr *, 4> Privates; | 1201 SmallVector<const Expr *, 4> Privates; |
1169 SmallVector<const Expr *, 4> ReductionOps; | 1202 SmallVector<const Expr *, 4> ReductionOps; |
1171 SmallVector<const Expr *, 4> RHSs; | 1204 SmallVector<const Expr *, 4> RHSs; |
1172 OMPTaskDataTy Data; | 1205 OMPTaskDataTy Data; |
1173 SmallVector<const Expr *, 4> TaskLHSs; | 1206 SmallVector<const Expr *, 4> TaskLHSs; |
1174 SmallVector<const Expr *, 4> TaskRHSs; | 1207 SmallVector<const Expr *, 4> TaskRHSs; |
1175 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { | 1208 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1209 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) | |
1210 continue; | |
1176 Shareds.append(C->varlist_begin(), C->varlist_end()); | 1211 Shareds.append(C->varlist_begin(), C->varlist_end()); |
1177 Privates.append(C->privates().begin(), C->privates().end()); | 1212 Privates.append(C->privates().begin(), C->privates().end()); |
1178 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); | 1213 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
1179 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); | 1214 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1180 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); | 1215 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1364 case OMPD_requires: | 1399 case OMPD_requires: |
1365 case OMPD_declare_variant: | 1400 case OMPD_declare_variant: |
1366 case OMPD_begin_declare_variant: | 1401 case OMPD_begin_declare_variant: |
1367 case OMPD_end_declare_variant: | 1402 case OMPD_end_declare_variant: |
1368 case OMPD_unknown: | 1403 case OMPD_unknown: |
1404 default: | |
1369 llvm_unreachable("Enexpected directive with task reductions."); | 1405 llvm_unreachable("Enexpected directive with task reductions."); |
1370 } | 1406 } |
1371 | 1407 |
1372 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); | 1408 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); |
1373 EmitVarDecl(*VD); | 1409 EmitVarDecl(*VD); |
1385 llvm::SmallVector<const Expr *, 8> RHSExprs; | 1421 llvm::SmallVector<const Expr *, 8> RHSExprs; |
1386 llvm::SmallVector<const Expr *, 8> ReductionOps; | 1422 llvm::SmallVector<const Expr *, 8> ReductionOps; |
1387 bool HasAtLeastOneReduction = false; | 1423 bool HasAtLeastOneReduction = false; |
1388 bool IsReductionWithTaskMod = false; | 1424 bool IsReductionWithTaskMod = false; |
1389 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { | 1425 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1426 // Do not emit for inscan reductions. | |
1427 if (C->getModifier() == OMPC_REDUCTION_inscan) | |
1428 continue; | |
1390 HasAtLeastOneReduction = true; | 1429 HasAtLeastOneReduction = true; |
1391 Privates.append(C->privates().begin(), C->privates().end()); | 1430 Privates.append(C->privates().begin(), C->privates().end()); |
1392 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); | 1431 LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
1393 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); | 1432 RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
1394 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); | 1433 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
1545 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); | 1584 CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); |
1546 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, | 1585 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getBeginLoc(), OutlinedFn, |
1547 CapturedVars, IfCond); | 1586 CapturedVars, IfCond); |
1548 } | 1587 } |
1549 | 1588 |
1589 static bool isAllocatableDecl(const VarDecl *VD) { | |
1590 const VarDecl *CVD = VD->getCanonicalDecl(); | |
1591 if (!CVD->hasAttr<OMPAllocateDeclAttr>()) | |
1592 return false; | |
1593 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); | |
1594 // Use the default allocation. | |
1595 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || | |
1596 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && | |
1597 !AA->getAllocator()); | |
1598 } | |
1599 | |
1550 static void emitEmptyBoundParameters(CodeGenFunction &, | 1600 static void emitEmptyBoundParameters(CodeGenFunction &, |
1551 const OMPExecutableDirective &, | 1601 const OMPExecutableDirective &, |
1552 llvm::SmallVectorImpl<llvm::Value *> &) {} | 1602 llvm::SmallVectorImpl<llvm::Value *> &) {} |
1553 | 1603 |
1604 Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( | |
1605 CodeGenFunction &CGF, const VarDecl *VD) { | |
1606 CodeGenModule &CGM = CGF.CGM; | |
1607 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); | |
1608 | |
1609 if (!VD) | |
1610 return Address::invalid(); | |
1611 const VarDecl *CVD = VD->getCanonicalDecl(); | |
1612 if (!isAllocatableDecl(CVD)) | |
1613 return Address::invalid(); | |
1614 llvm::Value *Size; | |
1615 CharUnits Align = CGM.getContext().getDeclAlign(CVD); | |
1616 if (CVD->getType()->isVariablyModifiedType()) { | |
1617 Size = CGF.getTypeSize(CVD->getType()); | |
1618 // Align the size: ((size + align - 1) / align) * align | |
1619 Size = CGF.Builder.CreateNUWAdd( | |
1620 Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); | |
1621 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); | |
1622 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); | |
1623 } else { | |
1624 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); | |
1625 Size = CGM.getSize(Sz.alignTo(Align)); | |
1626 } | |
1627 | |
1628 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); | |
1629 assert(AA->getAllocator() && | |
1630 "Expected allocator expression for non-default allocator."); | |
1631 llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); | |
1632 // According to the standard, the original allocator type is a enum (integer). | |
1633 // Convert to pointer type, if required. | |
1634 if (Allocator->getType()->isIntegerTy()) | |
1635 Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); | |
1636 else if (Allocator->getType()->isPointerTy()) | |
1637 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, | |
1638 CGM.VoidPtrTy); | |
1639 | |
1640 llvm::Value *Addr = OMPBuilder.createOMPAlloc( | |
1641 CGF.Builder, Size, Allocator, | |
1642 getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); | |
1643 llvm::CallInst *FreeCI = | |
1644 OMPBuilder.createOMPFree(CGF.Builder, Addr, Allocator); | |
1645 | |
1646 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); | |
1647 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( | |
1648 Addr, | |
1649 CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), | |
1650 getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); | |
1651 return Address(Addr, Align); | |
1652 } | |
1653 | |
1654 Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( | |
1655 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, | |
1656 SourceLocation Loc) { | |
1657 CodeGenModule &CGM = CGF.CGM; | |
1658 if (CGM.getLangOpts().OpenMPUseTLS && | |
1659 CGM.getContext().getTargetInfo().isTLSSupported()) | |
1660 return VDAddr; | |
1661 | |
1662 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); | |
1663 | |
1664 llvm::Type *VarTy = VDAddr.getElementType(); | |
1665 llvm::Value *Data = | |
1666 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); | |
1667 llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); | |
1668 std::string Suffix = getNameWithSeparators({"cache", ""}); | |
1669 llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); | |
1670 | |
1671 llvm::CallInst *ThreadPrivateCacheCall = | |
1672 OMPBuilder.createCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); | |
1673 | |
1674 return Address(ThreadPrivateCacheCall, VDAddr.getAlignment()); | |
1675 } | |
1676 | |
1677 std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( | |
1678 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { | |
1679 SmallString<128> Buffer; | |
1680 llvm::raw_svector_ostream OS(Buffer); | |
1681 StringRef Sep = FirstSeparator; | |
1682 for (StringRef Part : Parts) { | |
1683 OS << Sep << Part; | |
1684 Sep = Separator; | |
1685 } | |
1686 return OS.str().str(); | |
1687 } | |
1554 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { | 1688 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { |
1555 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { | 1689 if (CGM.getLangOpts().OpenMPIRBuilder) { |
1690 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); | |
1556 // Check if we have any if clause associated with the directive. | 1691 // Check if we have any if clause associated with the directive. |
1557 llvm::Value *IfCond = nullptr; | 1692 llvm::Value *IfCond = nullptr; |
1558 if (const auto *C = S.getSingleClause<OMPIfClause>()) | 1693 if (const auto *C = S.getSingleClause<OMPIfClause>()) |
1559 IfCond = EmitScalarExpr(C->getCondition(), | 1694 IfCond = EmitScalarExpr(C->getCondition(), |
1560 /*IgnoreResultAssign=*/true); | 1695 /*IgnoreResultAssign=*/true); |
1579 // Privatization callback that performs appropriate action for | 1714 // Privatization callback that performs appropriate action for |
1580 // shared/private/firstprivate/lastprivate/copyin/... variables. | 1715 // shared/private/firstprivate/lastprivate/copyin/... variables. |
1581 // | 1716 // |
1582 // TODO: This defaults to shared right now. | 1717 // TODO: This defaults to shared right now. |
1583 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, | 1718 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
1584 llvm::Value &Val, llvm::Value *&ReplVal) { | 1719 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
1585 // The next line is appropriate only for variables (Val) with the | 1720 // The next line is appropriate only for variables (Val) with the |
1586 // data-sharing attribute "shared". | 1721 // data-sharing attribute "shared". |
1587 ReplVal = &Val; | 1722 ReplVal = &Val; |
1588 | 1723 |
1589 return CodeGenIP; | 1724 return CodeGenIP; |
1601 CodeGenIP, ContinuationBB); | 1736 CodeGenIP, ContinuationBB); |
1602 }; | 1737 }; |
1603 | 1738 |
1604 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); | 1739 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); |
1605 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); | 1740 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
1606 Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, | 1741 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
1607 FiniCB, IfCond, NumThreads, | 1742 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
1608 ProcBind, S.hasCancel())); | 1743 Builder.restoreIP( |
1744 OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, | |
1745 IfCond, NumThreads, ProcBind, S.hasCancel())); | |
1609 return; | 1746 return; |
1610 } | 1747 } |
1611 | 1748 |
1612 // Emit parallel region as a standalone region. | 1749 // Emit parallel region as a standalone region. |
1613 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { | 1750 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
1639 } | 1776 } |
1640 // Check for outer lastprivate conditional update. | 1777 // Check for outer lastprivate conditional update. |
1641 checkForLastprivateConditionalUpdate(*this, S); | 1778 checkForLastprivateConditionalUpdate(*this, S); |
1642 } | 1779 } |
1643 | 1780 |
1781 namespace { | |
1782 /// RAII to handle scopes for loop transformation directives. | |
1783 class OMPTransformDirectiveScopeRAII { | |
1784 OMPLoopScope *Scope = nullptr; | |
1785 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; | |
1786 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; | |
1787 | |
1788 public: | |
1789 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { | |
1790 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { | |
1791 Scope = new OMPLoopScope(CGF, *Dir); | |
1792 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); | |
1793 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); | |
1794 } | |
1795 } | |
1796 ~OMPTransformDirectiveScopeRAII() { | |
1797 if (!Scope) | |
1798 return; | |
1799 delete CapInfoRAII; | |
1800 delete CGSI; | |
1801 delete Scope; | |
1802 } | |
1803 }; | |
1804 } // namespace | |
1805 | |
1644 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, | 1806 static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, |
1645 int MaxLevel, int Level = 0) { | 1807 int MaxLevel, int Level = 0) { |
1646 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); | 1808 assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); |
1647 const Stmt *SimplifiedS = S->IgnoreContainers(); | 1809 const Stmt *SimplifiedS = S->IgnoreContainers(); |
1648 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { | 1810 if (const auto *CS = dyn_cast<CompoundStmt>(SimplifiedS)) { |
1655 for (const Stmt *CurStmt : CS->body()) | 1817 for (const Stmt *CurStmt : CS->body()) |
1656 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); | 1818 emitBody(CGF, CurStmt, NextLoop, MaxLevel, Level); |
1657 return; | 1819 return; |
1658 } | 1820 } |
1659 if (SimplifiedS == NextLoop) { | 1821 if (SimplifiedS == NextLoop) { |
1822 if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS)) | |
1823 SimplifiedS = Dir->getTransformedStmt(); | |
1824 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) | |
1825 SimplifiedS = CanonLoop->getLoopStmt(); | |
1660 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { | 1826 if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { |
1661 S = For->getBody(); | 1827 S = For->getBody(); |
1662 } else { | 1828 } else { |
1663 assert(isa<CXXForRangeStmt>(SimplifiedS) && | 1829 assert(isa<CXXForRangeStmt>(SimplifiedS) && |
1664 "Expected canonical for loop or range-based for loop."); | 1830 "Expected canonical for loop or range-based for loop."); |
1703 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); | 1869 llvm::BasicBlock *NextBB = createBasicBlock("omp.body.next"); |
1704 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), | 1870 EmitBranchOnBoolExpr(E, NextBB, Continue.getBlock(), |
1705 getProfileCount(D.getBody())); | 1871 getProfileCount(D.getBody())); |
1706 EmitBlock(NextBB); | 1872 EmitBlock(NextBB); |
1707 } | 1873 } |
1874 | |
1875 OMPPrivateScope InscanScope(*this); | |
1876 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); | |
1877 bool IsInscanRegion = InscanScope.Privatize(); | |
1878 if (IsInscanRegion) { | |
1879 // Need to remember the block before and after scan directive | |
1880 // to dispatch them correctly depending on the clause used in | |
1881 // this directive, inclusive or exclusive. For inclusive scan the natural | |
1882 // order of the blocks is used, for exclusive clause the blocks must be | |
1883 // executed in reverse order. | |
1884 OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); | |
1885 OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); | |
1886 // No need to allocate inscan exit block, in simd mode it is selected in the | |
1887 // codegen for the scan directive. | |
1888 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) | |
1889 OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); | |
1890 OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); | |
1891 EmitBranch(OMPScanDispatch); | |
1892 EmitBlock(OMPBeforeScanBlock); | |
1893 } | |
1894 | |
1708 // Emit loop variables for C++ range loops. | 1895 // Emit loop variables for C++ range loops. |
1709 const Stmt *Body = | 1896 const Stmt *Body = |
1710 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); | 1897 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); |
1711 // Emit loop body. | 1898 // Emit loop body. |
1712 emitBody(*this, Body, | 1899 emitBody(*this, Body, |
1713 OMPLoopDirective::tryToFindNextInnerLoop( | 1900 OMPLoopBasedDirective::tryToFindNextInnerLoop( |
1714 Body, /*TryImperfectlyNestedLoops=*/true), | 1901 Body, /*TryImperfectlyNestedLoops=*/true), |
1715 D.getCollapsedNumber()); | 1902 D.getLoopsNumber()); |
1903 | |
1904 // Jump to the dispatcher at the end of the loop body. | |
1905 if (IsInscanRegion) | |
1906 EmitBranch(OMPScanExitBlock); | |
1716 | 1907 |
1717 // The end (updates/cleanups). | 1908 // The end (updates/cleanups). |
1718 EmitBlock(Continue.getBlock()); | 1909 EmitBlock(Continue.getBlock()); |
1719 BreakContinueStack.pop_back(); | 1910 BreakContinueStack.pop_back(); |
1720 } | 1911 } |
1721 | 1912 |
1913 using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; | |
1914 | |
1915 /// Emit a captured statement and return the function as well as its captured | |
1916 /// closure context. | |
1917 static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, | |
1918 const CapturedStmt *S) { | |
1919 LValue CapStruct = ParentCGF.InitCapturedStruct(*S); | |
1920 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); | |
1921 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = | |
1922 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S); | |
1923 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); | |
1924 llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); | |
1925 | |
1926 return {F, CapStruct.getPointer(ParentCGF)}; | |
1927 } | |
1928 | |
1929 /// Emit a call to a previously captured closure. | |
1930 static llvm::CallInst * | |
1931 emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, | |
1932 llvm::ArrayRef<llvm::Value *> Args) { | |
1933 // Append the closure context to the argument. | |
1934 SmallVector<llvm::Value *> EffectiveArgs; | |
1935 EffectiveArgs.reserve(Args.size() + 1); | |
1936 llvm::append_range(EffectiveArgs, Args); | |
1937 EffectiveArgs.push_back(Cap.second); | |
1938 | |
1939 return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); | |
1940 } | |
1941 | |
1942 llvm::CanonicalLoopInfo * | |
1943 CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { | |
1944 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); | |
1945 | |
1946 EmitStmt(S); | |
1947 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); | |
1948 | |
1949 // The last added loop is the outermost one. | |
1950 return OMPLoopNestStack.back(); | |
1951 } | |
1952 | |
1953 void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { | |
1954 const Stmt *SyntacticalLoop = S->getLoopStmt(); | |
1955 if (!getLangOpts().OpenMPIRBuilder) { | |
1956 // Ignore if OpenMPIRBuilder is not enabled. | |
1957 EmitStmt(SyntacticalLoop); | |
1958 return; | |
1959 } | |
1960 | |
1961 LexicalScope ForScope(*this, S->getSourceRange()); | |
1962 | |
1963 // Emit init statements. The Distance/LoopVar funcs may reference variable | |
1964 // declarations they contain. | |
1965 const Stmt *BodyStmt; | |
1966 if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) { | |
1967 if (const Stmt *InitStmt = For->getInit()) | |
1968 EmitStmt(InitStmt); | |
1969 BodyStmt = For->getBody(); | |
1970 } else if (const auto *RangeFor = | |
1971 dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) { | |
1972 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) | |
1973 EmitStmt(RangeStmt); | |
1974 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) | |
1975 EmitStmt(BeginStmt); | |
1976 if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) | |
1977 EmitStmt(EndStmt); | |
1978 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) | |
1979 EmitStmt(LoopVarStmt); | |
1980 BodyStmt = RangeFor->getBody(); | |
1981 } else | |
1982 llvm_unreachable("Expected for-stmt or range-based for-stmt"); | |
1983 | |
1984 // Emit closure for later use. By-value captures will be captured here. | |
1985 const CapturedStmt *DistanceFunc = S->getDistanceFunc(); | |
1986 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); | |
1987 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); | |
1988 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); | |
1989 | |
1990 // Call the distance function to get the number of iterations of the loop to | |
1991 // come. | |
1992 QualType LogicalTy = DistanceFunc->getCapturedDecl() | |
1993 ->getParam(0) | |
1994 ->getType() | |
1995 .getNonReferenceType(); | |
1996 Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr"); | |
1997 emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); | |
1998 llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count"); | |
1999 | |
2000 // Emit the loop structure. | |
2001 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); | |
2002 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, | |
2003 llvm::Value *IndVar) { | |
2004 Builder.restoreIP(CodeGenIP); | |
2005 | |
2006 // Emit the loop body: Convert the logical iteration number to the loop | |
2007 // variable and emit the body. | |
2008 const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); | |
2009 LValue LCVal = EmitLValue(LoopVarRef); | |
2010 Address LoopVarAddress = LCVal.getAddress(*this); | |
2011 emitCapturedStmtCall(*this, LoopVarClosure, | |
2012 {LoopVarAddress.getPointer(), IndVar}); | |
2013 | |
2014 RunCleanupsScope BodyScope(*this); | |
2015 EmitStmt(BodyStmt); | |
2016 }; | |
2017 llvm::CanonicalLoopInfo *CL = | |
2018 OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal); | |
2019 | |
2020 // Finish up the loop. | |
2021 Builder.restoreIP(CL->getAfterIP()); | |
2022 ForScope.ForceCleanup(); | |
2023 | |
2024 // Remember the CanonicalLoopInfo for parent AST nodes consuming it. | |
2025 OMPLoopNestStack.push_back(CL); | |
2026 } | |
2027 | |
1722 void CodeGenFunction::EmitOMPInnerLoop( | 2028 void CodeGenFunction::EmitOMPInnerLoop( |
1723 const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, | 2029 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, |
1724 const Expr *IncExpr, | 2030 const Expr *IncExpr, |
1725 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, | 2031 const llvm::function_ref<void(CodeGenFunction &)> BodyGen, |
1726 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { | 2032 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { |
1727 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); | 2033 auto LoopExit = getJumpDestInCurrentScope("omp.inner.for.end"); |
1728 | 2034 |
1734 // If attributes are attached, push to the basic block with them. | 2040 // If attributes are attached, push to the basic block with them. |
1735 const auto &OMPED = cast<OMPExecutableDirective>(S); | 2041 const auto &OMPED = cast<OMPExecutableDirective>(S); |
1736 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); | 2042 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); |
1737 const Stmt *SS = ICS->getCapturedStmt(); | 2043 const Stmt *SS = ICS->getCapturedStmt(); |
1738 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); | 2044 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); |
2045 OMPLoopNestStack.clear(); | |
1739 if (AS) | 2046 if (AS) |
1740 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), | 2047 LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), |
1741 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), | 2048 AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), |
1742 SourceLocToDebugLoc(R.getEnd())); | 2049 SourceLocToDebugLoc(R.getEnd())); |
1743 else | 2050 else |
1921 } | 2228 } |
1922 // Privatize extra loop counters used in loops for ordered(n) clauses. | 2229 // Privatize extra loop counters used in loops for ordered(n) clauses. |
1923 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { | 2230 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { |
1924 if (!C->getNumForLoops()) | 2231 if (!C->getNumForLoops()) |
1925 continue; | 2232 continue; |
1926 for (unsigned I = S.getCollapsedNumber(), | 2233 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); |
1927 E = C->getLoopNumIterations().size(); | |
1928 I < E; ++I) { | 2234 I < E; ++I) { |
1929 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); | 2235 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); |
1930 const auto *VD = cast<VarDecl>(DRE->getDecl()); | 2236 const auto *VD = cast<VarDecl>(DRE->getDecl()); |
1931 // Override only those variables that can be captured to avoid re-emission | 2237 // Override only those variables that can be captured to avoid re-emission |
1932 // of the variables declared within the loops. | 2238 // of the variables declared within the loops. |
2046 LoopStack.setVectorizeEnable(); | 2352 LoopStack.setVectorizeEnable(); |
2047 emitSimdlenSafelenClause(*this, D, IsMonotonic); | 2353 emitSimdlenSafelenClause(*this, D, IsMonotonic); |
2048 if (const auto *C = D.getSingleClause<OMPOrderClause>()) | 2354 if (const auto *C = D.getSingleClause<OMPOrderClause>()) |
2049 if (C->getKind() == OMPC_ORDER_concurrent) | 2355 if (C->getKind() == OMPC_ORDER_concurrent) |
2050 LoopStack.setParallel(/*Enable=*/true); | 2356 LoopStack.setParallel(/*Enable=*/true); |
2357 if ((D.getDirectiveKind() == OMPD_simd || | |
2358 (getLangOpts().OpenMPSimd && | |
2359 isOpenMPSimdDirective(D.getDirectiveKind()))) && | |
2360 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), | |
2361 [](const OMPReductionClause *C) { | |
2362 return C->getModifier() == OMPC_REDUCTION_inscan; | |
2363 })) | |
2364 // Disable parallel access in case of prefix sum. | |
2365 LoopStack.setParallel(/*Enable=*/false); | |
2051 } | 2366 } |
2052 | 2367 |
2053 void CodeGenFunction::EmitOMPSimdFinal( | 2368 void CodeGenFunction::EmitOMPSimdFinal( |
2054 const OMPLoopDirective &D, | 2369 const OMPLoopDirective &D, |
2055 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { | 2370 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2219 }, | 2534 }, |
2220 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { | 2535 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2221 CGF.EmitOMPInnerLoop( | 2536 CGF.EmitOMPInnerLoop( |
2222 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), | 2537 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), |
2223 [&S](CodeGenFunction &CGF) { | 2538 [&S](CodeGenFunction &CGF) { |
2224 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); | 2539 emitOMPLoopBodyWithStopPoint(CGF, S, |
2225 CGF.EmitStopPoint(&S); | 2540 CodeGenFunction::JumpDest()); |
2226 }, | 2541 }, |
2227 [](CodeGenFunction &) {}); | 2542 [](CodeGenFunction &) {}); |
2228 }); | 2543 }); |
2229 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); | 2544 CGF.EmitOMPSimdFinal(S, [](CodeGenFunction &) { return nullptr; }); |
2230 // Emit final copy of the lastprivate variables at the end of loops. | 2545 // Emit final copy of the lastprivate variables at the end of loops. |
2241 CGF.EmitBlock(ContBlock, true); | 2556 CGF.EmitBlock(ContBlock, true); |
2242 } | 2557 } |
2243 } | 2558 } |
2244 | 2559 |
2245 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { | 2560 void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { |
2561 ParentLoopDirectiveForScanRegion ScanRegion(*this, S); | |
2562 OMPFirstScanLoop = true; | |
2246 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { | 2563 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
2247 emitOMPSimdRegion(CGF, S, Action); | 2564 emitOMPSimdRegion(CGF, S, Action); |
2248 }; | 2565 }; |
2249 { | 2566 { |
2250 auto LPCRegion = | 2567 auto LPCRegion = |
2252 OMPLexicalScope Scope(*this, S, OMPD_unknown); | 2569 OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2253 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); | 2570 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
2254 } | 2571 } |
2255 // Check for outer lastprivate conditional update. | 2572 // Check for outer lastprivate conditional update. |
2256 checkForLastprivateConditionalUpdate(*this, S); | 2573 checkForLastprivateConditionalUpdate(*this, S); |
2574 } | |
2575 | |
2576 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { | |
2577 // Emit the de-sugared statement. | |
2578 OMPTransformDirectiveScopeRAII TileScope(*this, &S); | |
2579 EmitStmt(S.getTransformedStmt()); | |
2257 } | 2580 } |
2258 | 2581 |
2259 void CodeGenFunction::EmitOMPOuterLoop( | 2582 void CodeGenFunction::EmitOMPOuterLoop( |
2260 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, | 2583 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, |
2261 CodeGenFunction::OMPPrivateScope &LoopScope, | 2584 CodeGenFunction::OMPPrivateScope &LoopScope, |
2272 | 2595 |
2273 // Start the loop with a block that tests the condition. | 2596 // Start the loop with a block that tests the condition. |
2274 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); | 2597 llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); |
2275 EmitBlock(CondBlock); | 2598 EmitBlock(CondBlock); |
2276 const SourceRange R = S.getSourceRange(); | 2599 const SourceRange R = S.getSourceRange(); |
2600 OMPLoopNestStack.clear(); | |
2277 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), | 2601 LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), |
2278 SourceLocToDebugLoc(R.getEnd())); | 2602 SourceLocToDebugLoc(R.getEnd())); |
2279 | 2603 |
2280 llvm::Value *BoolCondVal = nullptr; | 2604 llvm::Value *BoolCondVal = nullptr; |
2281 if (!DynamicOrOrdered) { | 2605 if (!DynamicOrOrdered) { |
2355 EmitIgnoredExpr(LoopArgs.NextLB); | 2679 EmitIgnoredExpr(LoopArgs.NextLB); |
2356 EmitIgnoredExpr(LoopArgs.NextUB); | 2680 EmitIgnoredExpr(LoopArgs.NextUB); |
2357 } | 2681 } |
2358 | 2682 |
2359 EmitBranch(CondBlock); | 2683 EmitBranch(CondBlock); |
2684 OMPLoopNestStack.clear(); | |
2360 LoopStack.pop(); | 2685 LoopStack.pop(); |
2361 // Emit the fall-through block. | 2686 // Emit the fall-through block. |
2362 EmitBlock(LoopExit.getBlock()); | 2687 EmitBlock(LoopExit.getBlock()); |
2363 | 2688 |
2364 // Tell the runtime we are done. | 2689 // Tell the runtime we are done. |
2832 // specified, and if no monotonic modifier is specified, the effect will | 3157 // specified, and if no monotonic modifier is specified, the effect will |
2833 // be as if the monotonic modifier was specified. | 3158 // be as if the monotonic modifier was specified. |
2834 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, | 3159 bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, |
2835 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && | 3160 /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && |
2836 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); | 3161 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); |
3162 bool IsMonotonic = | |
3163 Ordered || | |
3164 ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || | |
3165 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) && | |
3166 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || | |
3167 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || | |
3168 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || | |
3169 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; | |
2837 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, | 3170 if ((RT.isStaticNonchunked(ScheduleKind.Schedule, |
2838 /* Chunked */ Chunk != nullptr) || | 3171 /* Chunked */ Chunk != nullptr) || |
2839 StaticChunkedOne) && | 3172 StaticChunkedOne) && |
2840 !Ordered) { | 3173 !Ordered) { |
2841 JumpDest LoopExit = | 3174 JumpDest LoopExit = |
2842 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); | 3175 getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); |
2843 emitCommonSimdLoop( | 3176 emitCommonSimdLoop( |
2844 *this, S, | 3177 *this, S, |
2845 [&S](CodeGenFunction &CGF, PrePostActionTy &) { | 3178 [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { |
2846 if (isOpenMPSimdDirective(S.getDirectiveKind())) { | 3179 if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
2847 CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); | 3180 CGF.EmitOMPSimdInit(S, IsMonotonic); |
2848 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { | 3181 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
2849 if (C->getKind() == OMPC_ORDER_concurrent) | 3182 if (C->getKind() == OMPC_ORDER_concurrent) |
2850 CGF.LoopStack.setParallel(/*Enable=*/true); | 3183 CGF.LoopStack.setParallel(/*Enable=*/true); |
2851 } | 3184 } |
2852 }, | 3185 }, |
2887 S, LoopScope.requiresCleanups(), | 3220 S, LoopScope.requiresCleanups(), |
2888 StaticChunkedOne ? S.getCombinedParForInDistCond() | 3221 StaticChunkedOne ? S.getCombinedParForInDistCond() |
2889 : S.getCond(), | 3222 : S.getCond(), |
2890 StaticChunkedOne ? S.getDistInc() : S.getInc(), | 3223 StaticChunkedOne ? S.getDistInc() : S.getInc(), |
2891 [&S, LoopExit](CodeGenFunction &CGF) { | 3224 [&S, LoopExit](CodeGenFunction &CGF) { |
2892 CGF.EmitOMPLoopBody(S, LoopExit); | 3225 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); |
2893 CGF.EmitStopPoint(&S); | |
2894 }, | 3226 }, |
2895 [](CodeGenFunction &) {}); | 3227 [](CodeGenFunction &) {}); |
2896 }); | 3228 }); |
2897 EmitBlock(LoopExit.getBlock()); | 3229 EmitBlock(LoopExit.getBlock()); |
2898 // Tell the runtime we are done. | 3230 // Tell the runtime we are done. |
2900 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), | 3232 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
2901 S.getDirectiveKind()); | 3233 S.getDirectiveKind()); |
2902 }; | 3234 }; |
2903 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); | 3235 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); |
2904 } else { | 3236 } else { |
2905 const bool IsMonotonic = | |
2906 Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || | |
2907 ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || | |
2908 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || | |
2909 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; | |
2910 // Emit the outer loop, which requests its work chunk [LB..UB] from | 3237 // Emit the outer loop, which requests its work chunk [LB..UB] from |
2911 // runtime and runs the inner loop to process it. | 3238 // runtime and runs the inner loop to process it. |
2912 const OMPLoopArguments LoopArguments( | 3239 const OMPLoopArguments LoopArguments( |
2913 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), | 3240 LB.getAddress(*this), UB.getAddress(*this), ST.getAddress(*this), |
2914 IL.getAddress(*this), Chunk, EUB); | 3241 IL.getAddress(*this), Chunk, EUB); |
2977 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); | 3304 llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0); |
2978 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); | 3305 llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration()); |
2979 return {LBVal, UBVal}; | 3306 return {LBVal, UBVal}; |
2980 } | 3307 } |
2981 | 3308 |
2982 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { | 3309 /// Emits internal temp array declarations for the directive with inscan |
2983 bool HasLastprivates = false; | 3310 /// reductions. |
2984 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, | 3311 /// The code is the following: |
2985 PrePostActionTy &) { | 3312 /// \code |
2986 OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); | 3313 /// size num_iters = <num_iters>; |
3314 /// <type> buffer[num_iters]; | |
3315 /// \endcode | |
3316 static void emitScanBasedDirectiveDecls( | |
3317 CodeGenFunction &CGF, const OMPLoopDirective &S, | |
3318 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { | |
3319 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( | |
3320 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); | |
3321 SmallVector<const Expr *, 4> Shareds; | |
3322 SmallVector<const Expr *, 4> Privates; | |
3323 SmallVector<const Expr *, 4> ReductionOps; | |
3324 SmallVector<const Expr *, 4> CopyArrayTemps; | |
3325 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { | |
3326 assert(C->getModifier() == OMPC_REDUCTION_inscan && | |
3327 "Only inscan reductions are expected."); | |
3328 Shareds.append(C->varlist_begin(), C->varlist_end()); | |
3329 Privates.append(C->privates().begin(), C->privates().end()); | |
3330 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); | |
3331 CopyArrayTemps.append(C->copy_array_temps().begin(), | |
3332 C->copy_array_temps().end()); | |
3333 } | |
3334 { | |
3335 // Emit buffers for each reduction variables. | |
3336 // ReductionCodeGen is required to emit correctly the code for array | |
3337 // reductions. | |
3338 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); | |
3339 unsigned Count = 0; | |
3340 auto *ITA = CopyArrayTemps.begin(); | |
3341 for (const Expr *IRef : Privates) { | |
3342 const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); | |
3343 // Emit variably modified arrays, used for arrays/array sections | |
3344 // reductions. | |
3345 if (PrivateVD->getType()->isVariablyModifiedType()) { | |
3346 RedCG.emitSharedOrigLValue(CGF, Count); | |
3347 RedCG.emitAggregateType(CGF, Count); | |
3348 } | |
3349 CodeGenFunction::OpaqueValueMapping DimMapping( | |
3350 CGF, | |
3351 cast<OpaqueValueExpr>( | |
3352 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) | |
3353 ->getSizeExpr()), | |
3354 RValue::get(OMPScanNumIterations)); | |
3355 // Emit temp buffer. | |
3356 CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); | |
3357 ++ITA; | |
3358 ++Count; | |
3359 } | |
3360 } | |
3361 } | |
3362 | |
3363 /// Emits the code for the directive with inscan reductions. | |
3364 /// The code is the following: | |
3365 /// \code | |
3366 /// #pragma omp ... | |
3367 /// for (i: 0..<num_iters>) { | |
3368 /// <input phase>; | |
3369 /// buffer[i] = red; | |
3370 /// } | |
3371 /// #pragma omp master // in parallel region | |
3372 /// for (int k = 0; k != ceil(log2(num_iters)); ++k) | |
3373 /// for (size cnt = last_iter; cnt >= pow(2, k); --k) | |
3374 /// buffer[i] op= buffer[i-pow(2,k)]; | |
3375 /// #pragma omp barrier // in parallel region | |
3376 /// #pragma omp ... | |
3377 /// for (0..<num_iters>) { | |
3378 /// red = InclusiveScan ? buffer[i] : buffer[i-1]; | |
3379 /// <scan phase>; | |
3380 /// } | |
3381 /// \endcode | |
3382 static void emitScanBasedDirective( | |
3383 CodeGenFunction &CGF, const OMPLoopDirective &S, | |
3384 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, | |
3385 llvm::function_ref<void(CodeGenFunction &)> FirstGen, | |
3386 llvm::function_ref<void(CodeGenFunction &)> SecondGen) { | |
3387 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( | |
3388 NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); | |
3389 SmallVector<const Expr *, 4> Privates; | |
3390 SmallVector<const Expr *, 4> ReductionOps; | |
3391 SmallVector<const Expr *, 4> LHSs; | |
3392 SmallVector<const Expr *, 4> RHSs; | |
3393 SmallVector<const Expr *, 4> CopyArrayElems; | |
3394 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { | |
3395 assert(C->getModifier() == OMPC_REDUCTION_inscan && | |
3396 "Only inscan reductions are expected."); | |
3397 Privates.append(C->privates().begin(), C->privates().end()); | |
3398 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); | |
3399 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); | |
3400 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); | |
3401 CopyArrayElems.append(C->copy_array_elems().begin(), | |
3402 C->copy_array_elems().end()); | |
3403 } | |
3404 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); | |
3405 { | |
3406 // Emit loop with input phase: | |
3407 // #pragma omp ... | |
3408 // for (i: 0..<num_iters>) { | |
3409 // <input phase>; | |
3410 // buffer[i] = red; | |
3411 // } | |
3412 CGF.OMPFirstScanLoop = true; | |
3413 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); | |
3414 FirstGen(CGF); | |
3415 } | |
3416 // #pragma omp barrier // in parallel region | |
3417 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, | |
3418 &ReductionOps, | |
3419 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { | |
3420 Action.Enter(CGF); | |
3421 // Emit prefix reduction: | |
3422 // #pragma omp master // in parallel region | |
3423 // for (int k = 0; k <= ceil(log2(n)); ++k) | |
3424 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); | |
3425 llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); | |
3426 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); | |
3427 llvm::Function *F = | |
3428 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); | |
3429 llvm::Value *Arg = | |
3430 CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); | |
3431 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); | |
3432 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); | |
3433 LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); | |
3434 LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); | |
3435 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( | |
3436 OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); | |
3437 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); | |
3438 CGF.EmitBlock(LoopBB); | |
3439 auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); | |
3440 // size pow2k = 1; | |
3441 auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); | |
3442 Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); | |
3443 Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); | |
3444 // for (size i = n - 1; i >= 2 ^ k; --i) | |
3445 // tmp[i] op= tmp[i-pow2k]; | |
3446 llvm::BasicBlock *InnerLoopBB = | |
3447 CGF.createBasicBlock("omp.inner.log.scan.body"); | |
3448 llvm::BasicBlock *InnerExitBB = | |
3449 CGF.createBasicBlock("omp.inner.log.scan.exit"); | |
3450 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); | |
3451 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); | |
3452 CGF.EmitBlock(InnerLoopBB); | |
3453 auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); | |
3454 IVal->addIncoming(NMin1, LoopBB); | |
3455 { | |
3456 CodeGenFunction::OMPPrivateScope PrivScope(CGF); | |
3457 auto *ILHS = LHSs.begin(); | |
3458 auto *IRHS = RHSs.begin(); | |
3459 for (const Expr *CopyArrayElem : CopyArrayElems) { | |
3460 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); | |
3461 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); | |
3462 Address LHSAddr = Address::invalid(); | |
3463 { | |
3464 CodeGenFunction::OpaqueValueMapping IdxMapping( | |
3465 CGF, | |
3466 cast<OpaqueValueExpr>( | |
3467 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), | |
3468 RValue::get(IVal)); | |
3469 LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); | |
3470 } | |
3471 PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); | |
3472 Address RHSAddr = Address::invalid(); | |
3473 { | |
3474 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); | |
3475 CodeGenFunction::OpaqueValueMapping IdxMapping( | |
3476 CGF, | |
3477 cast<OpaqueValueExpr>( | |
3478 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), | |
3479 RValue::get(OffsetIVal)); | |
3480 RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); | |
3481 } | |
3482 PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); | |
3483 ++ILHS; | |
3484 ++IRHS; | |
3485 } | |
3486 PrivScope.Privatize(); | |
3487 CGF.CGM.getOpenMPRuntime().emitReduction( | |
3488 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, | |
3489 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); | |
3490 } | |
3491 llvm::Value *NextIVal = | |
3492 CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); | |
3493 IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); | |
3494 CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); | |
3495 CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); | |
3496 CGF.EmitBlock(InnerExitBB); | |
3497 llvm::Value *Next = | |
3498 CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); | |
3499 Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); | |
3500 // pow2k <<= 1; | |
3501 llvm::Value *NextPow2K = | |
3502 CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); | |
3503 Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); | |
3504 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); | |
3505 CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); | |
3506 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); | |
3507 CGF.EmitBlock(ExitBB); | |
3508 }; | |
3509 if (isOpenMPParallelDirective(S.getDirectiveKind())) { | |
3510 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); | |
3511 CGF.CGM.getOpenMPRuntime().emitBarrierCall( | |
3512 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, | |
3513 /*ForceSimpleCall=*/true); | |
3514 } else { | |
3515 RegionCodeGenTy RCG(CodeGen); | |
3516 RCG(CGF); | |
3517 } | |
3518 | |
3519 CGF.OMPFirstScanLoop = false; | |
3520 SecondGen(CGF); | |
3521 } | |
3522 | |
3523 static bool emitWorksharingDirective(CodeGenFunction &CGF, | |
3524 const OMPLoopDirective &S, | |
3525 bool HasCancel) { | |
3526 bool HasLastprivates; | |
3527 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), | |
3528 [](const OMPReductionClause *C) { | |
3529 return C->getModifier() == OMPC_REDUCTION_inscan; | |
3530 })) { | |
3531 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { | |
3532 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); | |
3533 OMPLoopScope LoopScope(CGF, S); | |
3534 return CGF.EmitScalarExpr(S.getNumIterations()); | |
3535 }; | |
3536 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { | |
3537 CodeGenFunction::OMPCancelStackRAII CancelRegion( | |
3538 CGF, S.getDirectiveKind(), HasCancel); | |
3539 (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), | |
3540 emitForLoopBounds, | |
3541 emitDispatchForLoopBounds); | |
3542 // Emit an implicit barrier at the end. | |
3543 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), | |
3544 OMPD_for); | |
3545 }; | |
3546 const auto &&SecondGen = [&S, HasCancel, | |
3547 &HasLastprivates](CodeGenFunction &CGF) { | |
3548 CodeGenFunction::OMPCancelStackRAII CancelRegion( | |
3549 CGF, S.getDirectiveKind(), HasCancel); | |
3550 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), | |
3551 emitForLoopBounds, | |
3552 emitDispatchForLoopBounds); | |
3553 }; | |
3554 if (!isOpenMPParallelDirective(S.getDirectiveKind())) | |
3555 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); | |
3556 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); | |
3557 } else { | |
3558 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), | |
3559 HasCancel); | |
2987 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), | 3560 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), |
2988 emitForLoopBounds, | 3561 emitForLoopBounds, |
2989 emitDispatchForLoopBounds); | 3562 emitDispatchForLoopBounds); |
3563 } | |
3564 return HasLastprivates; | |
3565 } | |
3566 | |
3567 static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { | |
3568 if (S.hasCancel()) | |
3569 return false; | |
3570 for (OMPClause *C : S.clauses()) | |
3571 if (!isa<OMPNowaitClause>(C)) | |
3572 return false; | |
3573 | |
3574 return true; | |
3575 } | |
3576 | |
3577 void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { | |
3578 bool HasLastprivates = false; | |
3579 bool UseOMPIRBuilder = | |
3580 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); | |
3581 auto &&CodeGen = [this, &S, &HasLastprivates, | |
3582 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { | |
3583 // Use the OpenMPIRBuilder if enabled. | |
3584 if (UseOMPIRBuilder) { | |
3585 // Emit the associated statement and get its loop representation. | |
3586 const Stmt *Inner = S.getRawStmt(); | |
3587 llvm::CanonicalLoopInfo *CLI = | |
3588 EmitOMPCollapsedCanonicalLoopNest(Inner, 1); | |
3589 | |
3590 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); | |
3591 llvm::OpenMPIRBuilder &OMPBuilder = | |
3592 CGM.getOpenMPRuntime().getOMPBuilder(); | |
3593 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( | |
3594 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); | |
3595 OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier); | |
3596 return; | |
3597 } | |
3598 | |
3599 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); | |
2990 }; | 3600 }; |
2991 { | 3601 { |
2992 auto LPCRegion = | 3602 auto LPCRegion = |
2993 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); | 3603 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
2994 OMPLexicalScope Scope(*this, S, OMPD_unknown); | 3604 OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2995 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, | 3605 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, |
2996 S.hasCancel()); | 3606 S.hasCancel()); |
2997 } | 3607 } |
2998 | 3608 |
2999 // Emit an implicit barrier at the end. | 3609 if (!UseOMPIRBuilder) { |
3000 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) | 3610 // Emit an implicit barrier at the end. |
3001 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); | 3611 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) |
3612 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); | |
3613 } | |
3002 // Check for outer lastprivate conditional update. | 3614 // Check for outer lastprivate conditional update. |
3003 checkForLastprivateConditionalUpdate(*this, S); | 3615 checkForLastprivateConditionalUpdate(*this, S); |
3004 } | 3616 } |
3005 | 3617 |
3006 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { | 3618 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { |
3007 bool HasLastprivates = false; | 3619 bool HasLastprivates = false; |
3008 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, | 3620 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, |
3009 PrePostActionTy &) { | 3621 PrePostActionTy &) { |
3010 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), | 3622 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
3011 emitForLoopBounds, | |
3012 emitDispatchForLoopBounds); | |
3013 }; | 3623 }; |
3014 { | 3624 { |
3015 auto LPCRegion = | 3625 auto LPCRegion = |
3016 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); | 3626 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3017 OMPLexicalScope Scope(*this, S, OMPD_unknown); | 3627 OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3062 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); | 3672 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
3063 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); | 3673 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); |
3064 // Generate condition for loop. | 3674 // Generate condition for loop. |
3065 BinaryOperator *Cond = BinaryOperator::Create( | 3675 BinaryOperator *Cond = BinaryOperator::Create( |
3066 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, | 3676 C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, |
3067 S.getBeginLoc(), FPOptions(C.getLangOpts())); | 3677 S.getBeginLoc(), FPOptionsOverride()); |
3068 // Increment for loop counter. | 3678 // Increment for loop counter. |
3069 UnaryOperator *Inc = UnaryOperator::Create( | 3679 UnaryOperator *Inc = UnaryOperator::Create( |
3070 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, | 3680 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, |
3071 S.getBeginLoc(), true, FPOptions(C.getLangOpts())); | 3681 S.getBeginLoc(), true, FPOptionsOverride()); |
3072 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { | 3682 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { |
3073 // Iterate through all sections and emit a switch construct: | 3683 // Iterate through all sections and emit a switch construct: |
3074 // switch (IV) { | 3684 // switch (IV) { |
3075 // case 0: | 3685 // case 0: |
3076 // <SectionStmt[0]>; | 3686 // <SectionStmt[0]>; |
3179 OMPD_unknown); | 3789 OMPD_unknown); |
3180 } | 3790 } |
3181 } | 3791 } |
3182 | 3792 |
3183 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { | 3793 void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { |
3794 if (CGM.getLangOpts().OpenMPIRBuilder) { | |
3795 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); | |
3796 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; | |
3797 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; | |
3798 | |
3799 auto FiniCB = [this](InsertPointTy IP) { | |
3800 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); | |
3801 }; | |
3802 | |
3803 const CapturedStmt *ICS = S.getInnermostCapturedStmt(); | |
3804 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); | |
3805 const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); | |
3806 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; | |
3807 if (CS) { | |
3808 for (const Stmt *SubStmt : CS->children()) { | |
3809 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, | |
3810 InsertPointTy CodeGenIP, | |
3811 llvm::BasicBlock &FiniBB) { | |
3812 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, | |
3813 FiniBB); | |
3814 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP, | |
3815 FiniBB); | |
3816 }; | |
3817 SectionCBVector.push_back(SectionCB); | |
3818 } | |
3819 } else { | |
3820 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, | |
3821 InsertPointTy CodeGenIP, | |
3822 llvm::BasicBlock &FiniBB) { | |
3823 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); | |
3824 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP, | |
3825 FiniBB); | |
3826 }; | |
3827 SectionCBVector.push_back(SectionCB); | |
3828 } | |
3829 | |
3830 // Privatization callback that performs appropriate action for | |
3831 // shared/private/firstprivate/lastprivate/copyin/... variables. | |
3832 // | |
3833 // TODO: This defaults to shared right now. | |
3834 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, | |
3835 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { | |
3836 // The next line is appropriate only for variables (Val) with the | |
3837 // data-sharing attribute "shared". | |
3838 ReplVal = &Val; | |
3839 | |
3840 return CodeGenIP; | |
3841 }; | |
3842 | |
3843 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); | |
3844 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); | |
3845 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( | |
3846 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); | |
3847 Builder.restoreIP(OMPBuilder.createSections( | |
3848 Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), | |
3849 S.getSingleClause<OMPNowaitClause>())); | |
3850 return; | |
3851 } | |
3184 { | 3852 { |
3185 auto LPCRegion = | 3853 auto LPCRegion = |
3186 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); | 3854 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3187 OMPLexicalScope Scope(*this, S, OMPD_unknown); | 3855 OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3188 EmitSections(S); | 3856 EmitSections(S); |
3195 // Check for outer lastprivate conditional update. | 3863 // Check for outer lastprivate conditional update. |
3196 checkForLastprivateConditionalUpdate(*this, S); | 3864 checkForLastprivateConditionalUpdate(*this, S); |
3197 } | 3865 } |
3198 | 3866 |
3199 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { | 3867 void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { |
3200 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { | 3868 if (CGM.getLangOpts().OpenMPIRBuilder) { |
3201 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); | 3869 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
3202 }; | 3870 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
3203 OMPLexicalScope Scope(*this, S, OMPD_unknown); | 3871 |
3204 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, | 3872 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); |
3205 S.hasCancel()); | 3873 auto FiniCB = [this](InsertPointTy IP) { |
3874 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); | |
3875 }; | |
3876 | |
3877 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, | |
3878 InsertPointTy CodeGenIP, | |
3879 llvm::BasicBlock &FiniBB) { | |
3880 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); | |
3881 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt, | |
3882 CodeGenIP, FiniBB); | |
3883 }; | |
3884 | |
3885 LexicalScope Scope(*this, S.getSourceRange()); | |
3886 EmitStopPoint(&S); | |
3887 Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); | |
3888 | |
3889 return; | |
3890 } | |
3891 LexicalScope Scope(*this, S.getSourceRange()); | |
3892 EmitStopPoint(&S); | |
3893 EmitStmt(S.getAssociatedStmt()); | |
3206 } | 3894 } |
3207 | 3895 |
3208 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { | 3896 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { |
3209 llvm::SmallVector<const Expr *, 8> CopyprivateVars; | 3897 llvm::SmallVector<const Expr *, 8> CopyprivateVars; |
3210 llvm::SmallVector<const Expr *, 8> DestExprs; | 3898 llvm::SmallVector<const Expr *, 8> DestExprs; |
3251 } | 3939 } |
3252 | 3940 |
3253 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { | 3941 static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
3254 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { | 3942 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3255 Action.Enter(CGF); | 3943 Action.Enter(CGF); |
3256 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); | 3944 CGF.EmitStmt(S.getRawStmt()); |
3257 }; | 3945 }; |
3258 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); | 3946 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); |
3259 } | 3947 } |
3260 | 3948 |
3261 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { | 3949 void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { |
3262 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { | 3950 if (CGM.getLangOpts().OpenMPIRBuilder) { |
3951 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); | |
3263 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; | 3952 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
3264 | 3953 |
3265 const CapturedStmt *CS = S.getInnermostCapturedStmt(); | 3954 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); |
3266 const Stmt *MasterRegionBodyStmt = CS->getCapturedStmt(); | |
3267 | 3955 |
3268 auto FiniCB = [this](InsertPointTy IP) { | 3956 auto FiniCB = [this](InsertPointTy IP) { |
3269 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); | 3957 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); |
3270 }; | 3958 }; |
3271 | 3959 |
3275 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); | 3963 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); |
3276 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, | 3964 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, |
3277 CodeGenIP, FiniBB); | 3965 CodeGenIP, FiniBB); |
3278 }; | 3966 }; |
3279 | 3967 |
3280 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); | 3968 LexicalScope Scope(*this, S.getSourceRange()); |
3281 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); | 3969 EmitStopPoint(&S); |
3282 Builder.restoreIP(OMPBuilder->CreateMaster(Builder, BodyGenCB, FiniCB)); | 3970 Builder.restoreIP(OMPBuilder.createMaster(Builder, BodyGenCB, FiniCB)); |
3283 | 3971 |
3284 return; | 3972 return; |
3285 } | 3973 } |
3286 OMPLexicalScope Scope(*this, S, OMPD_unknown); | 3974 LexicalScope Scope(*this, S.getSourceRange()); |
3975 EmitStopPoint(&S); | |
3287 emitMaster(*this, S); | 3976 emitMaster(*this, S); |
3288 } | 3977 } |
3289 | 3978 |
3979 static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { | |
3980 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { | |
3981 Action.Enter(CGF); | |
3982 CGF.EmitStmt(S.getRawStmt()); | |
3983 }; | |
3984 Expr *Filter = nullptr; | |
3985 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) | |
3986 Filter = FilterClause->getThreadID(); | |
3987 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(), | |
3988 Filter); | |
3989 } | |
3990 | |
3991 void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { | |
3992 if (CGM.getLangOpts().OpenMPIRBuilder) { | |
3993 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); | |
3994 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; | |
3995 | |
3996 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); | |
3997 const Expr *Filter = nullptr; | |
3998 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) | |
3999 Filter = FilterClause->getThreadID(); | |
4000 llvm::Value *FilterVal = Filter | |
4001 ? EmitScalarExpr(Filter, CGM.Int32Ty) | |
4002 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); | |
4003 | |
4004 auto FiniCB = [this](InsertPointTy IP) { | |
4005 OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); | |
4006 }; | |
4007 | |
4008 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, | |
4009 InsertPointTy CodeGenIP, | |
4010 llvm::BasicBlock &FiniBB) { | |
4011 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); | |
4012 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt, | |
4013 CodeGenIP, FiniBB); | |
4014 }; | |
4015 | |
4016 LexicalScope Scope(*this, S.getSourceRange()); | |
4017 EmitStopPoint(&S); | |
4018 Builder.restoreIP( | |
4019 OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); | |
4020 | |
4021 return; | |
4022 } | |
4023 LexicalScope Scope(*this, S.getSourceRange()); | |
4024 EmitStopPoint(&S); | |
4025 emitMasked(*this, S); | |
4026 } | |
4027 | |
3290 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { | 4028 void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { |
3291 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { | 4029 if (CGM.getLangOpts().OpenMPIRBuilder) { |
4030 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); | |
3292 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; | 4031 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
3293 | 4032 |
3294 const CapturedStmt *CS = S.getInnermostCapturedStmt(); | 4033 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); |
3295 const Stmt *CriticalRegionBodyStmt = CS->getCapturedStmt(); | |
3296 const Expr *Hint = nullptr; | 4034 const Expr *Hint = nullptr; |
3297 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) | 4035 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
3298 Hint = HintClause->getHint(); | 4036 Hint = HintClause->getHint(); |
3299 | 4037 |
3300 // TODO: This is slightly different from what's currently being done in | 4038 // TODO: This is slightly different from what's currently being done in |
3315 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); | 4053 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); |
3316 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, | 4054 OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, |
3317 CodeGenIP, FiniBB); | 4055 CodeGenIP, FiniBB); |
3318 }; | 4056 }; |
3319 | 4057 |
3320 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); | 4058 LexicalScope Scope(*this, S.getSourceRange()); |
3321 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); | 4059 EmitStopPoint(&S); |
3322 Builder.restoreIP(OMPBuilder->CreateCritical( | 4060 Builder.restoreIP(OMPBuilder.createCritical( |
3323 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), | 4061 Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), |
3324 HintInst)); | 4062 HintInst)); |
3325 | 4063 |
3326 return; | 4064 return; |
3327 } | 4065 } |
3328 | 4066 |
3329 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { | 4067 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3330 Action.Enter(CGF); | 4068 Action.Enter(CGF); |
3331 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); | 4069 CGF.EmitStmt(S.getAssociatedStmt()); |
3332 }; | 4070 }; |
3333 const Expr *Hint = nullptr; | 4071 const Expr *Hint = nullptr; |
3334 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) | 4072 if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
3335 Hint = HintClause->getHint(); | 4073 Hint = HintClause->getHint(); |
3336 OMPLexicalScope Scope(*this, S, OMPD_unknown); | 4074 LexicalScope Scope(*this, S.getSourceRange()); |
4075 EmitStopPoint(&S); | |
3337 CGM.getOpenMPRuntime().emitCriticalRegion(*this, | 4076 CGM.getOpenMPRuntime().emitCriticalRegion(*this, |
3338 S.getDirectiveName().getAsString(), | 4077 S.getDirectiveName().getAsString(), |
3339 CodeGen, S.getBeginLoc(), Hint); | 4078 CodeGen, S.getBeginLoc(), Hint); |
3340 } | 4079 } |
3341 | 4080 |
3343 const OMPParallelForDirective &S) { | 4082 const OMPParallelForDirective &S) { |
3344 // Emit directive as a combined directive that consists of two implicit | 4083 // Emit directive as a combined directive that consists of two implicit |
3345 // directives: 'parallel' with 'for' directive. | 4084 // directives: 'parallel' with 'for' directive. |
3346 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { | 4085 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3347 Action.Enter(CGF); | 4086 Action.Enter(CGF); |
3348 OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); | 4087 (void)emitWorksharingDirective(CGF, S, S.hasCancel()); |
3349 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, | |
3350 emitDispatchForLoopBounds); | |
3351 }; | 4088 }; |
3352 { | 4089 { |
4090 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), | |
4091 [](const OMPReductionClause *C) { | |
4092 return C->getModifier() == OMPC_REDUCTION_inscan; | |
4093 })) { | |
4094 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { | |
4095 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); | |
4096 CGCapturedStmtInfo CGSI(CR_OpenMP); | |
4097 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); | |
4098 OMPLoopScope LoopScope(CGF, S); | |
4099 return CGF.EmitScalarExpr(S.getNumIterations()); | |
4100 }; | |
4101 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); | |
4102 } | |
3353 auto LPCRegion = | 4103 auto LPCRegion = |
3354 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); | 4104 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3355 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, | 4105 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, |
3356 emitEmptyBoundParameters); | 4106 emitEmptyBoundParameters); |
3357 } | 4107 } |
3363 const OMPParallelForSimdDirective &S) { | 4113 const OMPParallelForSimdDirective &S) { |
3364 // Emit directive as a combined directive that consists of two implicit | 4114 // Emit directive as a combined directive that consists of two implicit |
3365 // directives: 'parallel' with 'for' directive. | 4115 // directives: 'parallel' with 'for' directive. |
3366 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { | 4116 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3367 Action.Enter(CGF); | 4117 Action.Enter(CGF); |
3368 CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, | 4118 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
3369 emitDispatchForLoopBounds); | |
3370 }; | 4119 }; |
3371 { | 4120 { |
4121 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), | |
4122 [](const OMPReductionClause *C) { | |
4123 return C->getModifier() == OMPC_REDUCTION_inscan; | |
4124 })) { | |
4125 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { | |
4126 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); | |
4127 CGCapturedStmtInfo CGSI(CR_OpenMP); | |
4128 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); | |
4129 OMPLoopScope LoopScope(CGF, S); | |
4130 return CGF.EmitScalarExpr(S.getNumIterations()); | |
4131 }; | |
4132 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); | |
4133 } | |
3372 auto LPCRegion = | 4134 auto LPCRegion = |
3373 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); | 4135 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3374 emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, | 4136 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, |
3375 emitEmptyBoundParameters); | 4137 emitEmptyBoundParameters); |
3376 } | 4138 } |
3377 // Check for outer lastprivate conditional update. | 4139 // Check for outer lastprivate conditional update. |
3378 checkForLastprivateConditionalUpdate(*this, S); | 4140 checkForLastprivateConditionalUpdate(*this, S); |
3379 } | 4141 } |
3428 emitEmptyBoundParameters); | 4190 emitEmptyBoundParameters); |
3429 } | 4191 } |
3430 // Check for outer lastprivate conditional update. | 4192 // Check for outer lastprivate conditional update. |
3431 checkForLastprivateConditionalUpdate(*this, S); | 4193 checkForLastprivateConditionalUpdate(*this, S); |
3432 } | 4194 } |
4195 | |
4196 namespace { | |
4197 /// Get the list of variables declared in the context of the untied tasks. | |
4198 class CheckVarsEscapingUntiedTaskDeclContext final | |
4199 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { | |
4200 llvm::SmallVector<const VarDecl *, 4> PrivateDecls; | |
4201 | |
4202 public: | |
4203 explicit CheckVarsEscapingUntiedTaskDeclContext() = default; | |
4204 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; | |
4205 void VisitDeclStmt(const DeclStmt *S) { | |
4206 if (!S) | |
4207 return; | |
4208 // Need to privatize only local vars, static locals can be processed as is. | |
4209 for (const Decl *D : S->decls()) { | |
4210 if (const auto *VD = dyn_cast_or_null<VarDecl>(D)) | |
4211 if (VD->hasLocalStorage()) | |
4212 PrivateDecls.push_back(VD); | |
4213 } | |
4214 } | |
4215 void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; } | |
4216 void VisitCapturedStmt(const CapturedStmt *) { return; } | |
4217 void VisitLambdaExpr(const LambdaExpr *) { return; } | |
4218 void VisitBlockExpr(const BlockExpr *) { return; } | |
4219 void VisitStmt(const Stmt *S) { | |
4220 if (!S) | |
4221 return; | |
4222 for (const Stmt *Child : S->children()) | |
4223 if (Child) | |
4224 Visit(Child); | |
4225 } | |
4226 | |
4227 /// Swaps list of vars with the provided one. | |
4228 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } | |
4229 }; | |
4230 } // anonymous namespace | |
3433 | 4231 |
3434 void CodeGenFunction::EmitOMPTaskBasedDirective( | 4232 void CodeGenFunction::EmitOMPTaskBasedDirective( |
3435 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, | 4233 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, |
3436 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, | 4234 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, |
3437 OMPTaskDataTy &Data) { | 4235 OMPTaskDataTy &Data) { |
3493 ++IRef; | 4291 ++IRef; |
3494 ++IElemInitRef; | 4292 ++IElemInitRef; |
3495 } | 4293 } |
3496 } | 4294 } |
3497 // Get list of lastprivate variables (for taskloops). | 4295 // Get list of lastprivate variables (for taskloops). |
3498 llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; | 4296 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; |
3499 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { | 4297 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
3500 auto IRef = C->varlist_begin(); | 4298 auto IRef = C->varlist_begin(); |
3501 auto ID = C->destination_exprs().begin(); | 4299 auto ID = C->destination_exprs().begin(); |
3502 for (const Expr *IInit : C->private_copies()) { | 4300 for (const Expr *IInit : C->private_copies()) { |
3503 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); | 4301 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
3504 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { | 4302 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
3505 Data.LastprivateVars.push_back(*IRef); | 4303 Data.LastprivateVars.push_back(*IRef); |
3506 Data.LastprivateCopies.push_back(IInit); | 4304 Data.LastprivateCopies.push_back(IInit); |
3507 } | 4305 } |
3508 LastprivateDstsOrigs.insert( | 4306 LastprivateDstsOrigs.insert( |
3509 {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), | 4307 std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), |
3510 cast<DeclRefExpr>(*IRef)}); | 4308 cast<DeclRefExpr>(*IRef))); |
3511 ++IRef; | 4309 ++IRef; |
3512 ++ID; | 4310 ++ID; |
3513 } | 4311 } |
3514 } | 4312 } |
3515 SmallVector<const Expr *, 4> LHSs; | 4313 SmallVector<const Expr *, 4> LHSs; |
3529 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { | 4327 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { |
3530 OMPTaskDataTy::DependData &DD = | 4328 OMPTaskDataTy::DependData &DD = |
3531 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); | 4329 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); |
3532 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); | 4330 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); |
3533 } | 4331 } |
4332 // Get list of local vars for untied tasks. | |
4333 if (!Data.Tied) { | |
4334 CheckVarsEscapingUntiedTaskDeclContext Checker; | |
4335 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); | |
4336 Data.PrivateLocals.append(Checker.getPrivateDecls().begin(), | |
4337 Checker.getPrivateDecls().end()); | |
4338 } | |
3534 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, | 4339 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, |
3535 CapturedRegion](CodeGenFunction &CGF, | 4340 CapturedRegion](CodeGenFunction &CGF, |
3536 PrePostActionTy &Action) { | 4341 PrePostActionTy &Action) { |
4342 llvm::MapVector<CanonicalDeclPtr<const VarDecl>, | |
4343 std::pair<Address, Address>> | |
4344 UntiedLocalVars; | |
3537 // Set proper addresses for generated private copies. | 4345 // Set proper addresses for generated private copies. |
3538 OMPPrivateScope Scope(CGF); | 4346 OMPPrivateScope Scope(CGF); |
3539 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; | 4347 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; |
3540 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || | 4348 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || |
3541 !Data.LastprivateVars.empty()) { | 4349 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { |
3542 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( | |
3543 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); | |
3544 enum { PrivatesParam = 2, CopyFnParam = 3 }; | 4350 enum { PrivatesParam = 2, CopyFnParam = 3 }; |
3545 llvm::Value *CopyFn = CGF.Builder.CreateLoad( | 4351 llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
3546 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); | 4352 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); |
3547 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( | 4353 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( |
3548 CS->getCapturedDecl()->getParam(PrivatesParam))); | 4354 CS->getCapturedDecl()->getParam(PrivatesParam))); |
3549 // Map privates. | 4355 // Map privates. |
3550 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; | 4356 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
3551 llvm::SmallVector<llvm::Value *, 16> CallArgs; | 4357 llvm::SmallVector<llvm::Value *, 16> CallArgs; |
4358 llvm::SmallVector<llvm::Type *, 4> ParamTypes; | |
3552 CallArgs.push_back(PrivatesPtr); | 4359 CallArgs.push_back(PrivatesPtr); |
4360 ParamTypes.push_back(PrivatesPtr->getType()); | |
3553 for (const Expr *E : Data.PrivateVars) { | 4361 for (const Expr *E : Data.PrivateVars) { |
3554 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); | 4362 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3555 Address PrivatePtr = CGF.CreateMemTemp( | 4363 Address PrivatePtr = CGF.CreateMemTemp( |
3556 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); | 4364 CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); |
3557 PrivatePtrs.emplace_back(VD, PrivatePtr); | 4365 PrivatePtrs.emplace_back(VD, PrivatePtr); |
3558 CallArgs.push_back(PrivatePtr.getPointer()); | 4366 CallArgs.push_back(PrivatePtr.getPointer()); |
4367 ParamTypes.push_back(PrivatePtr.getType()); | |
3559 } | 4368 } |
3560 for (const Expr *E : Data.FirstprivateVars) { | 4369 for (const Expr *E : Data.FirstprivateVars) { |
3561 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); | 4370 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3562 Address PrivatePtr = | 4371 Address PrivatePtr = |
3563 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), | 4372 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
3564 ".firstpriv.ptr.addr"); | 4373 ".firstpriv.ptr.addr"); |
3565 PrivatePtrs.emplace_back(VD, PrivatePtr); | 4374 PrivatePtrs.emplace_back(VD, PrivatePtr); |
3566 FirstprivatePtrs.emplace_back(VD, PrivatePtr); | 4375 FirstprivatePtrs.emplace_back(VD, PrivatePtr); |
3567 CallArgs.push_back(PrivatePtr.getPointer()); | 4376 CallArgs.push_back(PrivatePtr.getPointer()); |
4377 ParamTypes.push_back(PrivatePtr.getType()); | |
3568 } | 4378 } |
3569 for (const Expr *E : Data.LastprivateVars) { | 4379 for (const Expr *E : Data.LastprivateVars) { |
3570 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); | 4380 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3571 Address PrivatePtr = | 4381 Address PrivatePtr = |
3572 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), | 4382 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
3573 ".lastpriv.ptr.addr"); | 4383 ".lastpriv.ptr.addr"); |
3574 PrivatePtrs.emplace_back(VD, PrivatePtr); | 4384 PrivatePtrs.emplace_back(VD, PrivatePtr); |
3575 CallArgs.push_back(PrivatePtr.getPointer()); | 4385 CallArgs.push_back(PrivatePtr.getPointer()); |
4386 ParamTypes.push_back(PrivatePtr.getType()); | |
3576 } | 4387 } |
4388 for (const VarDecl *VD : Data.PrivateLocals) { | |
4389 QualType Ty = VD->getType().getNonReferenceType(); | |
4390 if (VD->getType()->isLValueReferenceType()) | |
4391 Ty = CGF.getContext().getPointerType(Ty); | |
4392 if (isAllocatableDecl(VD)) | |
4393 Ty = CGF.getContext().getPointerType(Ty); | |
4394 Address PrivatePtr = CGF.CreateMemTemp( | |
4395 CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); | |
4396 auto Result = UntiedLocalVars.insert( | |
4397 std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid()))); | |
4398 // If key exists update in place. | |
4399 if (Result.second == false) | |
4400 *Result.first = std::make_pair( | |
4401 VD, std::make_pair(PrivatePtr, Address::invalid())); | |
4402 CallArgs.push_back(PrivatePtr.getPointer()); | |
4403 ParamTypes.push_back(PrivatePtr.getType()); | |
4404 } | |
4405 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), | |
4406 ParamTypes, /*isVarArg=*/false); | |
4407 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( | |
4408 CopyFn, CopyFnTy->getPointerTo()); | |
3577 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( | 4409 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
3578 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); | 4410 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); |
3579 for (const auto &Pair : LastprivateDstsOrigs) { | 4411 for (const auto &Pair : LastprivateDstsOrigs) { |
3580 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); | 4412 const auto *OrigVD = cast<VarDecl>(Pair.second->getDecl()); |
3581 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), | 4413 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), |
3589 } | 4421 } |
3590 for (const auto &Pair : PrivatePtrs) { | 4422 for (const auto &Pair : PrivatePtrs) { |
3591 Address Replacement(CGF.Builder.CreateLoad(Pair.second), | 4423 Address Replacement(CGF.Builder.CreateLoad(Pair.second), |
3592 CGF.getContext().getDeclAlign(Pair.first)); | 4424 CGF.getContext().getDeclAlign(Pair.first)); |
3593 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); | 4425 Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); |
4426 } | |
4427 // Adjust mapping for internal locals by mapping actual memory instead of | |
4428 // a pointer to this memory. | |
4429 for (auto &Pair : UntiedLocalVars) { | |
4430 if (isAllocatableDecl(Pair.first)) { | |
4431 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); | |
4432 Address Replacement(Ptr, CGF.getPointerAlign()); | |
4433 Pair.second.first = Replacement; | |
4434 Ptr = CGF.Builder.CreateLoad(Replacement); | |
4435 Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first)); | |
4436 Pair.second.second = Replacement; | |
4437 } else { | |
4438 llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); | |
4439 Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first)); | |
4440 Pair.second.first = Replacement; | |
4441 } | |
3594 } | 4442 } |
3595 } | 4443 } |
3596 if (Data.Reductions) { | 4444 if (Data.Reductions) { |
3597 OMPPrivateScope FirstprivateScope(CGF); | 4445 OMPPrivateScope FirstprivateScope(CGF); |
3598 for (const auto &Pair : FirstprivatePtrs) { | 4446 for (const auto &Pair : FirstprivatePtrs) { |
3684 [Replacement]() { return Replacement; }); | 4532 [Replacement]() { return Replacement; }); |
3685 } | 4533 } |
3686 } | 4534 } |
3687 (void)InRedScope.Privatize(); | 4535 (void)InRedScope.Privatize(); |
3688 | 4536 |
4537 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF, | |
4538 UntiedLocalVars); | |
3689 Action.Enter(CGF); | 4539 Action.Enter(CGF); |
3690 BodyGen(CGF); | 4540 BodyGen(CGF); |
3691 }; | 4541 }; |
3692 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( | 4542 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
3693 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, | 4543 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, |
3719 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, | 4569 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, |
3720 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); | 4570 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); |
3721 PrivateVD->setInitStyle(VarDecl::CInit); | 4571 PrivateVD->setInitStyle(VarDecl::CInit); |
3722 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, | 4572 PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, |
3723 InitRef, /*BasePath=*/nullptr, | 4573 InitRef, /*BasePath=*/nullptr, |
3724 VK_RValue)); | 4574 VK_RValue, FPOptionsOverride())); |
3725 Data.FirstprivateVars.emplace_back(OrigRef); | 4575 Data.FirstprivateVars.emplace_back(OrigRef); |
3726 Data.FirstprivateCopies.emplace_back(PrivateRef); | 4576 Data.FirstprivateCopies.emplace_back(PrivateRef); |
3727 Data.FirstprivateInits.emplace_back(InitRef); | 4577 Data.FirstprivateInits.emplace_back(InitRef); |
3728 return OrigVD; | 4578 return OrigVD; |
3729 } | 4579 } |
3755 } | 4605 } |
3756 OMPPrivateScope TargetScope(*this); | 4606 OMPPrivateScope TargetScope(*this); |
3757 VarDecl *BPVD = nullptr; | 4607 VarDecl *BPVD = nullptr; |
3758 VarDecl *PVD = nullptr; | 4608 VarDecl *PVD = nullptr; |
3759 VarDecl *SVD = nullptr; | 4609 VarDecl *SVD = nullptr; |
4610 VarDecl *MVD = nullptr; | |
3760 if (InputInfo.NumberOfTargetItems > 0) { | 4611 if (InputInfo.NumberOfTargetItems > 0) { |
3761 auto *CD = CapturedDecl::Create( | 4612 auto *CD = CapturedDecl::Create( |
3762 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); | 4613 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); |
3763 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); | 4614 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); |
3764 QualType BaseAndPointersType = getContext().getConstantArrayType( | 4615 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( |
3765 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, | 4616 getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal, |
3766 /*IndexTypeQuals=*/0); | 4617 /*IndexTypeQuals=*/0); |
3767 BPVD = createImplicitFirstprivateForType( | 4618 BPVD = createImplicitFirstprivateForType( |
3768 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); | 4619 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
3769 PVD = createImplicitFirstprivateForType( | 4620 PVD = createImplicitFirstprivateForType( |
3770 getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc()); | 4621 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
3771 QualType SizesType = getContext().getConstantArrayType( | 4622 QualType SizesType = getContext().getConstantArrayType( |
3772 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), | 4623 getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), |
3773 ArrSize, nullptr, ArrayType::Normal, | 4624 ArrSize, nullptr, ArrayType::Normal, |
3774 /*IndexTypeQuals=*/0); | 4625 /*IndexTypeQuals=*/0); |
3775 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, | 4626 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, |
3778 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); | 4629 BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; }); |
3779 TargetScope.addPrivate(PVD, | 4630 TargetScope.addPrivate(PVD, |
3780 [&InputInfo]() { return InputInfo.PointersArray; }); | 4631 [&InputInfo]() { return InputInfo.PointersArray; }); |
3781 TargetScope.addPrivate(SVD, | 4632 TargetScope.addPrivate(SVD, |
3782 [&InputInfo]() { return InputInfo.SizesArray; }); | 4633 [&InputInfo]() { return InputInfo.SizesArray; }); |
4634 // If there is no user-defined mapper, the mapper array will be nullptr. In | |
4635 // this case, we don't need to privatize it. | |
4636 if (!dyn_cast_or_null<llvm::ConstantPointerNull>( | |
4637 InputInfo.MappersArray.getPointer())) { | |
4638 MVD = createImplicitFirstprivateForType( | |
4639 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); | |
4640 TargetScope.addPrivate(MVD, | |
4641 [&InputInfo]() { return InputInfo.MappersArray; }); | |
4642 } | |
3783 } | 4643 } |
3784 (void)TargetScope.Privatize(); | 4644 (void)TargetScope.Privatize(); |
3785 // Build list of dependences. | 4645 // Build list of dependences. |
3786 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { | 4646 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { |
3787 OMPTaskDataTy::DependData &DD = | 4647 OMPTaskDataTy::DependData &DD = |
3788 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); | 4648 Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); |
3789 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); | 4649 DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); |
3790 } | 4650 } |
3791 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, | 4651 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, |
3792 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { | 4652 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3793 // Set proper addresses for generated private copies. | 4653 // Set proper addresses for generated private copies. |
3794 OMPPrivateScope Scope(CGF); | 4654 OMPPrivateScope Scope(CGF); |
3795 if (!Data.FirstprivateVars.empty()) { | 4655 if (!Data.FirstprivateVars.empty()) { |
3796 llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( | |
3797 CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); | |
3798 enum { PrivatesParam = 2, CopyFnParam = 3 }; | 4656 enum { PrivatesParam = 2, CopyFnParam = 3 }; |
3799 llvm::Value *CopyFn = CGF.Builder.CreateLoad( | 4657 llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
3800 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); | 4658 CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); |
3801 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( | 4659 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar( |
3802 CS->getCapturedDecl()->getParam(PrivatesParam))); | 4660 CS->getCapturedDecl()->getParam(PrivatesParam))); |
3803 // Map privates. | 4661 // Map privates. |
3804 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; | 4662 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
3805 llvm::SmallVector<llvm::Value *, 16> CallArgs; | 4663 llvm::SmallVector<llvm::Value *, 16> CallArgs; |
4664 llvm::SmallVector<llvm::Type *, 4> ParamTypes; | |
3806 CallArgs.push_back(PrivatesPtr); | 4665 CallArgs.push_back(PrivatesPtr); |
4666 ParamTypes.push_back(PrivatesPtr->getType()); | |
3807 for (const Expr *E : Data.FirstprivateVars) { | 4667 for (const Expr *E : Data.FirstprivateVars) { |
3808 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); | 4668 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3809 Address PrivatePtr = | 4669 Address PrivatePtr = |
3810 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), | 4670 CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), |
3811 ".firstpriv.ptr.addr"); | 4671 ".firstpriv.ptr.addr"); |
3812 PrivatePtrs.emplace_back(VD, PrivatePtr); | 4672 PrivatePtrs.emplace_back(VD, PrivatePtr); |
3813 CallArgs.push_back(PrivatePtr.getPointer()); | 4673 CallArgs.push_back(PrivatePtr.getPointer()); |
4674 ParamTypes.push_back(PrivatePtr.getType()); | |
3814 } | 4675 } |
4676 auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), | |
4677 ParamTypes, /*isVarArg=*/false); | |
4678 CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( | |
4679 CopyFn, CopyFnTy->getPointerTo()); | |
3815 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( | 4680 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
3816 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); | 4681 CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); |
3817 for (const auto &Pair : PrivatePtrs) { | 4682 for (const auto &Pair : PrivatePtrs) { |
3818 Address Replacement(CGF.Builder.CreateLoad(Pair.second), | 4683 Address Replacement(CGF.Builder.CreateLoad(Pair.second), |
3819 CGF.getContext().getDeclAlign(Pair.first)); | 4684 CGF.getContext().getDeclAlign(Pair.first)); |
3827 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); | 4692 CGF.GetAddrOfLocalVar(BPVD), /*Index=*/0); |
3828 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( | 4693 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( |
3829 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); | 4694 CGF.GetAddrOfLocalVar(PVD), /*Index=*/0); |
3830 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( | 4695 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( |
3831 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); | 4696 CGF.GetAddrOfLocalVar(SVD), /*Index=*/0); |
4697 // If MVD is nullptr, the mapper array is not privatized | |
4698 if (MVD) | |
4699 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( | |
4700 CGF.GetAddrOfLocalVar(MVD), /*Index=*/0); | |
3832 } | 4701 } |
3833 | 4702 |
3834 Action.Enter(CGF); | 4703 Action.Enter(CGF); |
3835 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); | 4704 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); |
3836 BodyGen(CGF); | 4705 BodyGen(CGF); |
3957 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { | 4826 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { |
3958 CGM.getOpenMPRuntime().emitUpdateClause( | 4827 CGM.getOpenMPRuntime().emitUpdateClause( |
3959 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); | 4828 *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); |
3960 return; | 4829 return; |
3961 } | 4830 } |
4831 } | |
4832 | |
4833 void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { | |
4834 if (!OMPParentLoopDirectiveForScan) | |
4835 return; | |
4836 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; | |
4837 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); | |
4838 SmallVector<const Expr *, 4> Shareds; | |
4839 SmallVector<const Expr *, 4> Privates; | |
4840 SmallVector<const Expr *, 4> LHSs; | |
4841 SmallVector<const Expr *, 4> RHSs; | |
4842 SmallVector<const Expr *, 4> ReductionOps; | |
4843 SmallVector<const Expr *, 4> CopyOps; | |
4844 SmallVector<const Expr *, 4> CopyArrayTemps; | |
4845 SmallVector<const Expr *, 4> CopyArrayElems; | |
4846 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { | |
4847 if (C->getModifier() != OMPC_REDUCTION_inscan) | |
4848 continue; | |
4849 Shareds.append(C->varlist_begin(), C->varlist_end()); | |
4850 Privates.append(C->privates().begin(), C->privates().end()); | |
4851 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); | |
4852 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); | |
4853 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); | |
4854 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); | |
4855 CopyArrayTemps.append(C->copy_array_temps().begin(), | |
4856 C->copy_array_temps().end()); | |
4857 CopyArrayElems.append(C->copy_array_elems().begin(), | |
4858 C->copy_array_elems().end()); | |
4859 } | |
4860 if (ParentDir.getDirectiveKind() == OMPD_simd || | |
4861 (getLangOpts().OpenMPSimd && | |
4862 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { | |
4863 // For simd directive and simd-based directives in simd only mode, use the | |
4864 // following codegen: | |
4865 // int x = 0; | |
4866 // #pragma omp simd reduction(inscan, +: x) | |
4867 // for (..) { | |
4868 // <first part> | |
4869 // #pragma omp scan inclusive(x) | |
4870 // <second part> | |
4871 // } | |
4872 // is transformed to: | |
4873 // int x = 0; | |
4874 // for (..) { | |
4875 // int x_priv = 0; | |
4876 // <first part> | |
4877 // x = x_priv + x; | |
4878 // x_priv = x; | |
4879 // <second part> | |
4880 // } | |
4881 // and | |
4882 // int x = 0; | |
4883 // #pragma omp simd reduction(inscan, +: x) | |
4884 // for (..) { | |
4885 // <first part> | |
4886 // #pragma omp scan exclusive(x) | |
4887 // <second part> | |
4888 // } | |
4889 // to | |
4890 // int x = 0; | |
4891 // for (..) { | |
4892 // int x_priv = 0; | |
4893 // <second part> | |
4894 // int temp = x; | |
4895 // x = x_priv + x; | |
4896 // x_priv = temp; | |
4897 // <first part> | |
4898 // } | |
4899 llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); | |
4900 EmitBranch(IsInclusive | |
4901 ? OMPScanReduce | |
4902 : BreakContinueStack.back().ContinueBlock.getBlock()); | |
4903 EmitBlock(OMPScanDispatch); | |
4904 { | |
4905 // New scope for correct construction/destruction of temp variables for | |
4906 // exclusive scan. | |
4907 LexicalScope Scope(*this, S.getSourceRange()); | |
4908 EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); | |
4909 EmitBlock(OMPScanReduce); | |
4910 if (!IsInclusive) { | |
4911 // Create temp var and copy LHS value to this temp value. | |
4912 // TMP = LHS; | |
4913 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { | |
4914 const Expr *PrivateExpr = Privates[I]; | |
4915 const Expr *TempExpr = CopyArrayTemps[I]; | |
4916 EmitAutoVarDecl( | |
4917 *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); | |
4918 LValue DestLVal = EmitLValue(TempExpr); | |
4919 LValue SrcLVal = EmitLValue(LHSs[I]); | |
4920 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), | |
4921 SrcLVal.getAddress(*this), | |
4922 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), | |
4923 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), | |
4924 CopyOps[I]); | |
4925 } | |
4926 } | |
4927 CGM.getOpenMPRuntime().emitReduction( | |
4928 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, | |
4929 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); | |
4930 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { | |
4931 const Expr *PrivateExpr = Privates[I]; | |
4932 LValue DestLVal; | |
4933 LValue SrcLVal; | |
4934 if (IsInclusive) { | |
4935 DestLVal = EmitLValue(RHSs[I]); | |
4936 SrcLVal = EmitLValue(LHSs[I]); | |
4937 } else { | |
4938 const Expr *TempExpr = CopyArrayTemps[I]; | |
4939 DestLVal = EmitLValue(RHSs[I]); | |
4940 SrcLVal = EmitLValue(TempExpr); | |
4941 } | |
4942 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), | |
4943 SrcLVal.getAddress(*this), | |
4944 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), | |
4945 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), | |
4946 CopyOps[I]); | |
4947 } | |
4948 } | |
4949 EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); | |
4950 OMPScanExitBlock = IsInclusive | |
4951 ? BreakContinueStack.back().ContinueBlock.getBlock() | |
4952 : OMPScanReduce; | |
4953 EmitBlock(OMPAfterScanBlock); | |
4954 return; | |
4955 } | |
4956 if (!IsInclusive) { | |
4957 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); | |
4958 EmitBlock(OMPScanExitBlock); | |
4959 } | |
4960 if (OMPFirstScanLoop) { | |
4961 // Emit buffer[i] = red; at the end of the input phase. | |
4962 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) | |
4963 .getIterationVariable() | |
4964 ->IgnoreParenImpCasts(); | |
4965 LValue IdxLVal = EmitLValue(IVExpr); | |
4966 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); | |
4967 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); | |
4968 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { | |
4969 const Expr *PrivateExpr = Privates[I]; | |
4970 const Expr *OrigExpr = Shareds[I]; | |
4971 const Expr *CopyArrayElem = CopyArrayElems[I]; | |
4972 OpaqueValueMapping IdxMapping( | |
4973 *this, | |
4974 cast<OpaqueValueExpr>( | |
4975 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), | |
4976 RValue::get(IdxVal)); | |
4977 LValue DestLVal = EmitLValue(CopyArrayElem); | |
4978 LValue SrcLVal = EmitLValue(OrigExpr); | |
4979 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), | |
4980 SrcLVal.getAddress(*this), | |
4981 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), | |
4982 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), | |
4983 CopyOps[I]); | |
4984 } | |
4985 } | |
4986 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); | |
4987 if (IsInclusive) { | |
4988 EmitBlock(OMPScanExitBlock); | |
4989 EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); | |
4990 } | |
4991 EmitBlock(OMPScanDispatch); | |
4992 if (!OMPFirstScanLoop) { | |
4993 // Emit red = buffer[i]; at the entrance to the scan phase. | |
4994 const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) | |
4995 .getIterationVariable() | |
4996 ->IgnoreParenImpCasts(); | |
4997 LValue IdxLVal = EmitLValue(IVExpr); | |
4998 llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); | |
4999 IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); | |
5000 llvm::BasicBlock *ExclusiveExitBB = nullptr; | |
5001 if (!IsInclusive) { | |
5002 llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); | |
5003 ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); | |
5004 llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); | |
5005 Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); | |
5006 EmitBlock(ContBB); | |
5007 // Use idx - 1 iteration for exclusive scan. | |
5008 IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); | |
5009 } | |
5010 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { | |
5011 const Expr *PrivateExpr = Privates[I]; | |
5012 const Expr *OrigExpr = Shareds[I]; | |
5013 const Expr *CopyArrayElem = CopyArrayElems[I]; | |
5014 OpaqueValueMapping IdxMapping( | |
5015 *this, | |
5016 cast<OpaqueValueExpr>( | |
5017 cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), | |
5018 RValue::get(IdxVal)); | |
5019 LValue SrcLVal = EmitLValue(CopyArrayElem); | |
5020 LValue DestLVal = EmitLValue(OrigExpr); | |
5021 EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), | |
5022 SrcLVal.getAddress(*this), | |
5023 cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), | |
5024 cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), | |
5025 CopyOps[I]); | |
5026 } | |
5027 if (!IsInclusive) { | |
5028 EmitBlock(ExclusiveExitBB); | |
5029 } | |
5030 } | |
5031 EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock | |
5032 : OMPAfterScanBlock); | |
5033 EmitBlock(OMPAfterScanBlock); | |
3962 } | 5034 } |
3963 | 5035 |
3964 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, | 5036 void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, |
3965 const CodeGenLoopTy &CodeGenLoop, | 5037 const CodeGenLoopTy &CodeGenLoop, |
3966 Expr *IncExpr) { | 5038 Expr *IncExpr) { |
4210 return Fn; | 5282 return Fn; |
4211 } | 5283 } |
4212 | 5284 |
4213 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { | 5285 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { |
4214 if (S.hasClausesOfKind<OMPDependClause>()) { | 5286 if (S.hasClausesOfKind<OMPDependClause>()) { |
4215 assert(!S.getAssociatedStmt() && | 5287 assert(!S.hasAssociatedStmt() && |
4216 "No associated statement must be in ordered depend construct."); | 5288 "No associated statement must be in ordered depend construct."); |
4217 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) | 5289 for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
4218 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); | 5290 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); |
4219 return; | 5291 return; |
4220 } | 5292 } |
4687 case OMPC_reduction: | 5759 case OMPC_reduction: |
4688 case OMPC_task_reduction: | 5760 case OMPC_task_reduction: |
4689 case OMPC_in_reduction: | 5761 case OMPC_in_reduction: |
4690 case OMPC_safelen: | 5762 case OMPC_safelen: |
4691 case OMPC_simdlen: | 5763 case OMPC_simdlen: |
5764 case OMPC_sizes: | |
4692 case OMPC_allocator: | 5765 case OMPC_allocator: |
4693 case OMPC_allocate: | 5766 case OMPC_allocate: |
4694 case OMPC_collapse: | 5767 case OMPC_collapse: |
4695 case OMPC_default: | 5768 case OMPC_default: |
4696 case OMPC_seq_cst: | 5769 case OMPC_seq_cst: |
4728 case OMPC_defaultmap: | 5801 case OMPC_defaultmap: |
4729 case OMPC_uniform: | 5802 case OMPC_uniform: |
4730 case OMPC_to: | 5803 case OMPC_to: |
4731 case OMPC_from: | 5804 case OMPC_from: |
4732 case OMPC_use_device_ptr: | 5805 case OMPC_use_device_ptr: |
5806 case OMPC_use_device_addr: | |
4733 case OMPC_is_device_ptr: | 5807 case OMPC_is_device_ptr: |
4734 case OMPC_unified_address: | 5808 case OMPC_unified_address: |
4735 case OMPC_unified_shared_memory: | 5809 case OMPC_unified_shared_memory: |
4736 case OMPC_reverse_offload: | 5810 case OMPC_reverse_offload: |
4737 case OMPC_dynamic_allocators: | 5811 case OMPC_dynamic_allocators: |
4744 case OMPC_detach: | 5818 case OMPC_detach: |
4745 case OMPC_inclusive: | 5819 case OMPC_inclusive: |
4746 case OMPC_exclusive: | 5820 case OMPC_exclusive: |
4747 case OMPC_uses_allocators: | 5821 case OMPC_uses_allocators: |
4748 case OMPC_affinity: | 5822 case OMPC_affinity: |
5823 case OMPC_init: | |
5824 case OMPC_inbranch: | |
5825 case OMPC_notinbranch: | |
5826 case OMPC_link: | |
5827 case OMPC_use: | |
5828 case OMPC_novariants: | |
5829 case OMPC_nocontext: | |
5830 case OMPC_filter: | |
4749 llvm_unreachable("Clause is not allowed in 'omp atomic'."); | 5831 llvm_unreachable("Clause is not allowed in 'omp atomic'."); |
4750 } | 5832 } |
4751 } | 5833 } |
4752 | 5834 |
4753 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { | 5835 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { |
4775 // if it is first). | 5857 // if it is first). |
4776 if (C->getClauseKind() != OMPC_seq_cst && | 5858 if (C->getClauseKind() != OMPC_seq_cst && |
4777 C->getClauseKind() != OMPC_acq_rel && | 5859 C->getClauseKind() != OMPC_acq_rel && |
4778 C->getClauseKind() != OMPC_acquire && | 5860 C->getClauseKind() != OMPC_acquire && |
4779 C->getClauseKind() != OMPC_release && | 5861 C->getClauseKind() != OMPC_release && |
4780 C->getClauseKind() != OMPC_relaxed) { | 5862 C->getClauseKind() != OMPC_relaxed && C->getClauseKind() != OMPC_hint) { |
4781 Kind = C->getClauseKind(); | 5863 Kind = C->getClauseKind(); |
4782 break; | 5864 break; |
4783 } | 5865 } |
4784 } | 5866 } |
4785 if (!MemOrderingSpecified) { | 5867 if (!MemOrderingSpecified) { |
4798 AO = llvm::AtomicOrdering::Acquire; | 5880 AO = llvm::AtomicOrdering::Acquire; |
4799 } | 5881 } |
4800 } | 5882 } |
4801 } | 5883 } |
4802 | 5884 |
4803 const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); | 5885 LexicalScope Scope(*this, S.getSourceRange()); |
4804 if (const auto *FE = dyn_cast<FullExpr>(CS)) | 5886 EmitStopPoint(S.getAssociatedStmt()); |
4805 enterFullExpression(FE); | 5887 emitOMPAtomicExpr(*this, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), |
4806 // Processing for statements under 'atomic capture'. | 5888 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), |
4807 if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { | 5889 S.getBeginLoc()); |
4808 for (const Stmt *C : Compound->body()) { | |
4809 if (const auto *FE = dyn_cast<FullExpr>(C)) | |
4810 enterFullExpression(FE); | |
4811 } | |
4812 } | |
4813 | |
4814 auto &&CodeGen = [&S, Kind, AO, CS](CodeGenFunction &CGF, | |
4815 PrePostActionTy &) { | |
4816 CGF.EmitStopPoint(CS); | |
4817 emitOMPAtomicExpr(CGF, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), | |
4818 S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), | |
4819 S.getBeginLoc()); | |
4820 }; | |
4821 OMPLexicalScope Scope(*this, S, OMPD_unknown); | |
4822 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); | |
4823 } | 5890 } |
4824 | 5891 |
4825 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, | 5892 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
4826 const OMPExecutableDirective &S, | 5893 const OMPExecutableDirective &S, |
4827 const RegionCodeGenTy &CodeGen) { | 5894 const RegionCodeGenTy &CodeGen) { |
4913 (void)PrivateScope.Privatize(); | 5980 (void)PrivateScope.Privatize(); |
4914 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) | 5981 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
4915 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); | 5982 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
4916 | 5983 |
4917 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); | 5984 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); |
5985 CGF.EnsureInsertPoint(); | |
4918 } | 5986 } |
4919 | 5987 |
4920 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, | 5988 void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, |
4921 StringRef ParentName, | 5989 StringRef ParentName, |
4922 const OMPTargetDirective &S) { | 5990 const OMPTargetDirective &S) { |
5325 C->getNameModifier() == OMPD_cancel) { | 6393 C->getNameModifier() == OMPD_cancel) { |
5326 IfCond = C->getCondition(); | 6394 IfCond = C->getCondition(); |
5327 break; | 6395 break; |
5328 } | 6396 } |
5329 } | 6397 } |
5330 if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { | 6398 if (CGM.getLangOpts().OpenMPIRBuilder) { |
6399 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); | |
5331 // TODO: This check is necessary as we only generate `omp parallel` through | 6400 // TODO: This check is necessary as we only generate `omp parallel` through |
5332 // the OpenMPIRBuilder for now. | 6401 // the OpenMPIRBuilder for now. |
5333 if (S.getCancelRegion() == OMPD_parallel) { | 6402 if (S.getCancelRegion() == OMPD_parallel || |
6403 S.getCancelRegion() == OMPD_sections || | |
6404 S.getCancelRegion() == OMPD_section) { | |
5334 llvm::Value *IfCondition = nullptr; | 6405 llvm::Value *IfCondition = nullptr; |
5335 if (IfCond) | 6406 if (IfCond) |
5336 IfCondition = EmitScalarExpr(IfCond, | 6407 IfCondition = EmitScalarExpr(IfCond, |
5337 /*IgnoreResultAssign=*/true); | 6408 /*IgnoreResultAssign=*/true); |
5338 return Builder.restoreIP( | 6409 return Builder.restoreIP( |
5339 OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); | 6410 OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); |
5340 } | 6411 } |
5341 } | 6412 } |
5342 | 6413 |
5343 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, | 6414 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, |
5344 S.getCancelRegion()); | 6415 S.getCancelRegion()); |
5358 Kind == OMPD_target_teams_distribute_parallel_for); | 6429 Kind == OMPD_target_teams_distribute_parallel_for); |
5359 return OMPCancelStack.getExitBlock(); | 6430 return OMPCancelStack.getExitBlock(); |
5360 } | 6431 } |
5361 | 6432 |
5362 void CodeGenFunction::EmitOMPUseDevicePtrClause( | 6433 void CodeGenFunction::EmitOMPUseDevicePtrClause( |
5363 const OMPClause &NC, OMPPrivateScope &PrivateScope, | 6434 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, |
5364 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { | 6435 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { |
5365 const auto &C = cast<OMPUseDevicePtrClause>(NC); | |
5366 auto OrigVarIt = C.varlist_begin(); | 6436 auto OrigVarIt = C.varlist_begin(); |
5367 auto InitIt = C.inits().begin(); | 6437 auto InitIt = C.inits().begin(); |
5368 for (const Expr *PvtVarIt : C.private_copies()) { | 6438 for (const Expr *PvtVarIt : C.private_copies()) { |
5369 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); | 6439 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*OrigVarIt)->getDecl()); |
5370 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); | 6440 const auto *InitVD = cast<VarDecl>(cast<DeclRefExpr>(*InitIt)->getDecl()); |
5421 ++OrigVarIt; | 6491 ++OrigVarIt; |
5422 ++InitIt; | 6492 ++InitIt; |
5423 } | 6493 } |
5424 } | 6494 } |
5425 | 6495 |
6496 static const VarDecl *getBaseDecl(const Expr *Ref) { | |
6497 const Expr *Base = Ref->IgnoreParenImpCasts(); | |
6498 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base)) | |
6499 Base = OASE->getBase()->IgnoreParenImpCasts(); | |
6500 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) | |
6501 Base = ASE->getBase()->IgnoreParenImpCasts(); | |
6502 return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); | |
6503 } | |
6504 | |
6505 void CodeGenFunction::EmitOMPUseDeviceAddrClause( | |
6506 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, | |
6507 const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { | |
6508 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; | |
6509 for (const Expr *Ref : C.varlists()) { | |
6510 const VarDecl *OrigVD = getBaseDecl(Ref); | |
6511 if (!Processed.insert(OrigVD).second) | |
6512 continue; | |
6513 // In order to identify the right initializer we need to match the | |
6514 // declaration used by the mapping logic. In some cases we may get | |
6515 // OMPCapturedExprDecl that refers to the original declaration. | |
6516 const ValueDecl *MatchingVD = OrigVD; | |
6517 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { | |
6518 // OMPCapturedExprDecl are used to privative fields of the current | |
6519 // structure. | |
6520 const auto *ME = cast<MemberExpr>(OED->getInit()); | |
6521 assert(isa<CXXThisExpr>(ME->getBase()) && | |
6522 "Base should be the current struct!"); | |
6523 MatchingVD = ME->getMemberDecl(); | |
6524 } | |
6525 | |
6526 // If we don't have information about the current list item, move on to | |
6527 // the next one. | |
6528 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); | |
6529 if (InitAddrIt == CaptureDeviceAddrMap.end()) | |
6530 continue; | |
6531 | |
6532 Address PrivAddr = InitAddrIt->getSecond(); | |
6533 // For declrefs and variable length array need to load the pointer for | |
6534 // correct mapping, since the pointer to the data was passed to the runtime. | |
6535 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || | |
6536 MatchingVD->getType()->isArrayType()) | |
6537 PrivAddr = | |
6538 EmitLoadOfPointer(PrivAddr, getContext() | |
6539 .getPointerType(OrigVD->getType()) | |
6540 ->castAs<PointerType>()); | |
6541 llvm::Type *RealTy = | |
6542 ConvertTypeForMem(OrigVD->getType().getNonReferenceType()) | |
6543 ->getPointerTo(); | |
6544 PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy); | |
6545 | |
6546 (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; }); | |
6547 } | |
6548 } | |
6549 | |
5426 // Generate the instructions for '#pragma omp target data' directive. | 6550 // Generate the instructions for '#pragma omp target data' directive. |
5427 void CodeGenFunction::EmitOMPTargetDataDirective( | 6551 void CodeGenFunction::EmitOMPTargetDataDirective( |
5428 const OMPTargetDataDirective &S) { | 6552 const OMPTargetDataDirective &S) { |
5429 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true); | 6553 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, |
6554 /*SeparateBeginEndCalls=*/true); | |
5430 | 6555 |
5431 // Create a pre/post action to signal the privatization of the device pointer. | 6556 // Create a pre/post action to signal the privatization of the device pointer. |
5432 // This action can be replaced by the OpenMP runtime code generation to | 6557 // This action can be replaced by the OpenMP runtime code generation to |
5433 // deactivate privatization. | 6558 // deactivate privatization. |
5434 bool PrivatizeDevicePointers = false; | 6559 bool PrivatizeDevicePointers = false; |
5465 OMPPrivateScope PrivateScope(CGF); | 6590 OMPPrivateScope PrivateScope(CGF); |
5466 // Emit all instances of the use_device_ptr clause. | 6591 // Emit all instances of the use_device_ptr clause. |
5467 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) | 6592 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) |
5468 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, | 6593 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, |
5469 Info.CaptureDeviceAddrMap); | 6594 Info.CaptureDeviceAddrMap); |
6595 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) | |
6596 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, | |
6597 Info.CaptureDeviceAddrMap); | |
5470 (void)PrivateScope.Privatize(); | 6598 (void)PrivateScope.Privatize(); |
5471 RCG(CGF); | 6599 RCG(CGF); |
5472 } else { | 6600 } else { |
6601 OMPLexicalScope Scope(CGF, S, OMPD_unknown); | |
5473 RCG(CGF); | 6602 RCG(CGF); |
5474 } | 6603 } |
5475 }; | 6604 }; |
5476 | 6605 |
5477 // Forward the provided action to the privatization codegen. | 6606 // Forward the provided action to the privatization codegen. |
5796 }, | 6925 }, |
5797 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { | 6926 [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
5798 CGF.EmitOMPInnerLoop( | 6927 CGF.EmitOMPInnerLoop( |
5799 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), | 6928 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), |
5800 [&S](CodeGenFunction &CGF) { | 6929 [&S](CodeGenFunction &CGF) { |
5801 CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); | 6930 emitOMPLoopBodyWithStopPoint(CGF, S, |
5802 CGF.EmitStopPoint(&S); | 6931 CodeGenFunction::JumpDest()); |
5803 }, | 6932 }, |
5804 [](CodeGenFunction &) {}); | 6933 [](CodeGenFunction &) {}); |
5805 }); | 6934 }); |
5806 } | 6935 } |
5807 // Emit: if (PreCond) - end. | 6936 // Emit: if (PreCond) - end. |
5947 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); | 7076 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); |
5948 } | 7077 } |
5949 | 7078 |
5950 void CodeGenFunction::EmitSimpleOMPExecutableDirective( | 7079 void CodeGenFunction::EmitSimpleOMPExecutableDirective( |
5951 const OMPExecutableDirective &D) { | 7080 const OMPExecutableDirective &D) { |
7081 if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { | |
7082 EmitOMPScanDirective(*SD); | |
7083 return; | |
7084 } | |
5952 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) | 7085 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) |
5953 return; | 7086 return; |
5954 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { | 7087 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5955 OMPPrivateScope GlobalsScope(CGF); | 7088 OMPPrivateScope GlobalsScope(CGF); |
5956 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { | 7089 if (isOpenMPTaskingDirective(D.getDirectiveKind())) { |
5971 } | 7104 } |
5972 } | 7105 } |
5973 } | 7106 } |
5974 if (isOpenMPSimdDirective(D.getDirectiveKind())) { | 7107 if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
5975 (void)GlobalsScope.Privatize(); | 7108 (void)GlobalsScope.Privatize(); |
7109 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); | |
5976 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); | 7110 emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); |
5977 } else { | 7111 } else { |
5978 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { | 7112 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { |
5979 for (const Expr *E : LD->counters()) { | 7113 for (const Expr *E : LD->counters()) { |
5980 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); | 7114 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
5990 } | 7124 } |
5991 } | 7125 } |
5992 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { | 7126 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { |
5993 if (!C->getNumForLoops()) | 7127 if (!C->getNumForLoops()) |
5994 continue; | 7128 continue; |
5995 for (unsigned I = LD->getCollapsedNumber(), | 7129 for (unsigned I = LD->getLoopsNumber(), |
5996 E = C->getLoopNumIterations().size(); | 7130 E = C->getLoopNumIterations().size(); |
5997 I < E; ++I) { | 7131 I < E; ++I) { |
5998 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( | 7132 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( |
5999 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { | 7133 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { |
6000 // Emit only those that were not explicitly referenced in clauses. | 7134 // Emit only those that were not explicitly referenced in clauses. |
6006 } | 7140 } |
6007 (void)GlobalsScope.Privatize(); | 7141 (void)GlobalsScope.Privatize(); |
6008 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); | 7142 CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); |
6009 } | 7143 } |
6010 }; | 7144 }; |
6011 { | 7145 if (D.getDirectiveKind() == OMPD_atomic || |
7146 D.getDirectiveKind() == OMPD_critical || | |
7147 D.getDirectiveKind() == OMPD_section || | |
7148 D.getDirectiveKind() == OMPD_master || | |
7149 D.getDirectiveKind() == OMPD_masked) { | |
7150 EmitStmt(D.getAssociatedStmt()); | |
7151 } else { | |
6012 auto LPCRegion = | 7152 auto LPCRegion = |
6013 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); | 7153 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); |
6014 OMPSimdLexicalScope Scope(*this, D); | 7154 OMPSimdLexicalScope Scope(*this, D); |
6015 CGM.getOpenMPRuntime().emitInlinedDirective( | 7155 CGM.getOpenMPRuntime().emitInlinedDirective( |
6016 *this, | 7156 *this, |