150
|
1 //===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===//
|
|
2 //
|
|
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
4 // See https://llvm.org/LICENSE.txt for license information.
|
|
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
6 //
|
|
7 //===----------------------------------------------------------------------===//
|
|
8 //
|
|
9 // This file contains functions to create parallel loops as LLVM-IR.
|
|
10 //
|
|
11 //===----------------------------------------------------------------------===//
|
|
12
|
|
13 #include "polly/CodeGen/LoopGeneratorsKMP.h"
|
|
14 #include "llvm/IR/Dominators.h"
|
|
15 #include "llvm/IR/Module.h"
|
|
16
|
|
17 using namespace llvm;
|
|
18 using namespace polly;
|
|
19
|
|
20 void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn,
|
|
21 Value *SubFnParam,
|
|
22 Value *LB, Value *UB,
|
|
23 Value *Stride) {
|
|
24 const std::string Name = "__kmpc_fork_call";
|
|
25 Function *F = M->getFunction(Name);
|
207
|
26 Type *KMPCMicroTy = StructType::getTypeByName(M->getContext(), "kmpc_micro");
|
150
|
27
|
|
28 if (!KMPCMicroTy) {
|
|
29 // void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...)
|
|
30 Type *MicroParams[] = {Builder.getInt32Ty()->getPointerTo(),
|
|
31 Builder.getInt32Ty()->getPointerTo()};
|
|
32
|
|
33 KMPCMicroTy = FunctionType::get(Builder.getVoidTy(), MicroParams, true);
|
|
34 }
|
|
35
|
|
36 // If F is not available, declare it.
|
|
37 if (!F) {
|
207
|
38 StructType *IdentTy =
|
|
39 StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
150
|
40
|
|
41 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
42 Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(),
|
|
43 KMPCMicroTy->getPointerTo()};
|
|
44
|
|
45 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, true);
|
|
46 F = Function::Create(Ty, Linkage, Name, M);
|
|
47 }
|
|
48
|
|
49 Value *Task = Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
50 SubFn, KMPCMicroTy->getPointerTo());
|
|
51
|
|
52 Value *Args[] = {SourceLocationInfo,
|
|
53 Builder.getInt32(4) /* Number of arguments (w/o Task) */,
|
|
54 Task,
|
|
55 LB,
|
|
56 UB,
|
|
57 Stride,
|
|
58 SubFnParam};
|
|
59
|
|
60 Builder.CreateCall(F, Args);
|
|
61 }
|
|
62
|
173
|
63 void ParallelLoopGeneratorKMP::deployParallelExecution(Function *SubFn,
|
150
|
64 Value *SubFnParam,
|
|
65 Value *LB, Value *UB,
|
|
66 Value *Stride) {
|
|
67 // Inform OpenMP runtime about the number of threads if greater than zero
|
|
68 if (PollyNumThreads > 0) {
|
|
69 Value *GlobalThreadID = createCallGlobalThreadNum();
|
|
70 createCallPushNumThreads(GlobalThreadID, Builder.getInt32(PollyNumThreads));
|
|
71 }
|
|
72
|
|
73 // Tell the runtime we start a parallel loop
|
|
74 createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
|
|
75 }
|
|
76
|
|
77 Function *ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function *F) const {
|
|
78 std::vector<Type *> Arguments = {Builder.getInt32Ty()->getPointerTo(),
|
|
79 Builder.getInt32Ty()->getPointerTo(),
|
|
80 LongType,
|
|
81 LongType,
|
|
82 LongType,
|
|
83 Builder.getInt8PtrTy()};
|
|
84
|
|
85 FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
|
|
86 Function *SubFn = Function::Create(FT, Function::InternalLinkage,
|
|
87 F->getName() + "_polly_subfn", M);
|
|
88 // Name the function's arguments
|
|
89 Function::arg_iterator AI = SubFn->arg_begin();
|
|
90 AI->setName("polly.kmpc.global_tid");
|
|
91 std::advance(AI, 1);
|
|
92 AI->setName("polly.kmpc.bound_tid");
|
|
93 std::advance(AI, 1);
|
|
94 AI->setName("polly.kmpc.lb");
|
|
95 std::advance(AI, 1);
|
|
96 AI->setName("polly.kmpc.ub");
|
|
97 std::advance(AI, 1);
|
|
98 AI->setName("polly.kmpc.inc");
|
|
99 std::advance(AI, 1);
|
|
100 AI->setName("polly.kmpc.shared");
|
|
101
|
|
102 return SubFn;
|
|
103 }
|
|
104
|
|
105 // Create a subfunction of the following (preliminary) structure:
|
|
106 //
|
|
107 // PrevBB
|
|
108 // |
|
|
109 // v
|
|
110 // HeaderBB
|
|
111 // / | _____
|
|
112 // / v v |
|
|
113 // / PreHeaderBB |
|
|
114 // | | |
|
|
115 // | v |
|
|
116 // | CheckNextBB |
|
|
117 // \ | \_____/
|
|
118 // \ |
|
|
119 // v v
|
|
120 // ExitBB
|
|
121 //
|
|
122 // HeaderBB will hold allocations, loading of variables and kmp-init calls.
|
|
123 // CheckNextBB will check for more work (dynamic / static chunked) or will be
|
|
124 // empty (static non chunked).
|
|
125 // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
|
|
126 // PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
|
|
127 // Just like CheckNextBB: PreHeaderBB is (preliminary) empty in the static non
|
|
128 // chunked scheduling case. ExitBB marks the end of the parallel execution.
|
|
129 // The possibly empty BasicBlocks will automatically be removed.
|
|
130 std::tuple<Value *, Function *>
|
|
131 ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride,
|
|
132 AllocaInst *StructData,
|
|
133 SetVector<Value *> Data, ValueMapT &Map) {
|
|
134 Function *SubFn = createSubFnDefinition();
|
|
135 LLVMContext &Context = SubFn->getContext();
|
|
136
|
|
137 // Store the previous basic block.
|
|
138 BasicBlock *PrevBB = Builder.GetInsertBlock();
|
|
139
|
|
140 // Create basic blocks.
|
|
141 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
|
|
142 BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
|
|
143 BasicBlock *CheckNextBB =
|
|
144 BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
|
|
145 BasicBlock *PreHeaderBB =
|
|
146 BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
|
|
147
|
|
148 DT.addNewBlock(HeaderBB, PrevBB);
|
|
149 DT.addNewBlock(ExitBB, HeaderBB);
|
|
150 DT.addNewBlock(CheckNextBB, HeaderBB);
|
|
151 DT.addNewBlock(PreHeaderBB, HeaderBB);
|
|
152
|
|
153 // Fill up basic block HeaderBB.
|
|
154 Builder.SetInsertPoint(HeaderBB);
|
|
155 Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
|
|
156 Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
|
|
157 Value *IsLastPtr = Builder.CreateAlloca(Builder.getInt32Ty(), nullptr,
|
|
158 "polly.par.lastIterPtr");
|
|
159 Value *StridePtr =
|
|
160 Builder.CreateAlloca(LongType, nullptr, "polly.par.StridePtr");
|
|
161
|
|
162 // Get iterator for retrieving the previously defined parameters.
|
|
163 Function::arg_iterator AI = SubFn->arg_begin();
|
|
164 // First argument holds "global thread ID".
|
|
165 Value *IDPtr = &*AI;
|
|
166 // Skip "bound thread ID" since it is not used (but had to be defined).
|
|
167 std::advance(AI, 2);
|
|
168 // Move iterator to: LB, UB, Stride, Shared variable struct.
|
|
169 Value *LB = &*AI;
|
|
170 std::advance(AI, 1);
|
|
171 Value *UB = &*AI;
|
|
172 std::advance(AI, 1);
|
|
173 Value *Stride = &*AI;
|
|
174 std::advance(AI, 1);
|
|
175 Value *Shared = &*AI;
|
|
176
|
|
177 Value *UserContext = Builder.CreateBitCast(Shared, StructData->getType(),
|
|
178 "polly.par.userContext");
|
|
179
|
|
180 extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
|
|
181 Map);
|
|
182
|
|
183 const auto Alignment = llvm::Align(is64BitArch() ? 8 : 4);
|
207
|
184 Value *ID = Builder.CreateAlignedLoad(Builder.getInt32Ty(), IDPtr, Alignment,
|
|
185 "polly.par.global_tid");
|
150
|
186
|
|
187 Builder.CreateAlignedStore(LB, LBPtr, Alignment);
|
|
188 Builder.CreateAlignedStore(UB, UBPtr, Alignment);
|
|
189 Builder.CreateAlignedStore(Builder.getInt32(0), IsLastPtr, Alignment);
|
|
190 Builder.CreateAlignedStore(Stride, StridePtr, Alignment);
|
|
191
|
|
192 // Subtract one as the upper bound provided by openmp is a < comparison
|
|
193 // whereas the codegenForSequential function creates a <= comparison.
|
|
194 Value *AdjustedUB = Builder.CreateAdd(UB, ConstantInt::get(LongType, -1),
|
|
195 "polly.indvar.UBAdjusted");
|
|
196
|
|
197 Value *ChunkSize =
|
|
198 ConstantInt::get(LongType, std::max<int>(PollyChunkSize, 1));
|
|
199
|
|
200 OMPGeneralSchedulingType Scheduling =
|
|
201 getSchedType(PollyChunkSize, PollyScheduling);
|
|
202
|
|
203 switch (Scheduling) {
|
|
204 case OMPGeneralSchedulingType::Dynamic:
|
|
205 case OMPGeneralSchedulingType::Guided:
|
|
206 case OMPGeneralSchedulingType::Runtime:
|
|
207 // "DYNAMIC" scheduling types are handled below (including 'runtime')
|
|
208 {
|
|
209 UB = AdjustedUB;
|
|
210 createCallDispatchInit(ID, LB, UB, Stride, ChunkSize);
|
|
211 Value *HasWork =
|
|
212 createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
|
|
213 Value *HasIteration =
|
|
214 Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
|
|
215 Builder.getInt32(1), "polly.hasIteration");
|
|
216 Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
|
|
217
|
|
218 Builder.SetInsertPoint(CheckNextBB);
|
|
219 HasWork = createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
|
|
220 HasIteration =
|
|
221 Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
|
|
222 Builder.getInt32(1), "polly.hasWork");
|
|
223 Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
|
|
224
|
|
225 Builder.SetInsertPoint(PreHeaderBB);
|
207
|
226 LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
|
|
227 "polly.indvar.LB");
|
|
228 UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
|
|
229 "polly.indvar.UB");
|
150
|
230 }
|
|
231 break;
|
|
232 case OMPGeneralSchedulingType::StaticChunked:
|
|
233 case OMPGeneralSchedulingType::StaticNonChunked:
|
|
234 // "STATIC" scheduling types are handled below
|
|
235 {
|
|
236 Builder.CreateAlignedStore(AdjustedUB, UBPtr, Alignment);
|
|
237 createCallStaticInit(ID, IsLastPtr, LBPtr, UBPtr, StridePtr, ChunkSize);
|
|
238
|
207
|
239 Value *ChunkedStride = Builder.CreateAlignedLoad(
|
|
240 LongType, StridePtr, Alignment, "polly.kmpc.stride");
|
150
|
241
|
207
|
242 LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
|
|
243 "polly.indvar.LB");
|
|
244 UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
|
|
245 "polly.indvar.UB.temp");
|
150
|
246
|
|
247 Value *UBInRange =
|
|
248 Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, UB, AdjustedUB,
|
|
249 "polly.indvar.UB.inRange");
|
|
250 UB = Builder.CreateSelect(UBInRange, UB, AdjustedUB, "polly.indvar.UB");
|
|
251 Builder.CreateAlignedStore(UB, UBPtr, Alignment);
|
|
252
|
|
253 Value *HasIteration = Builder.CreateICmp(
|
|
254 llvm::CmpInst::Predicate::ICMP_SLE, LB, UB, "polly.hasIteration");
|
|
255 Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
|
|
256
|
|
257 if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
|
|
258 Builder.SetInsertPoint(PreHeaderBB);
|
207
|
259 LB = Builder.CreateAlignedLoad(LongType, LBPtr, Alignment,
|
150
|
260 "polly.indvar.LB.entry");
|
207
|
261 UB = Builder.CreateAlignedLoad(LongType, UBPtr, Alignment,
|
150
|
262 "polly.indvar.UB.entry");
|
|
263 }
|
|
264
|
|
265 Builder.SetInsertPoint(CheckNextBB);
|
|
266
|
|
267 if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
|
|
268 Value *NextLB =
|
|
269 Builder.CreateAdd(LB, ChunkedStride, "polly.indvar.nextLB");
|
|
270 Value *NextUB = Builder.CreateAdd(UB, ChunkedStride);
|
|
271
|
|
272 Value *NextUBOutOfBounds =
|
|
273 Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SGT, NextUB,
|
|
274 AdjustedUB, "polly.indvar.nextUB.outOfBounds");
|
|
275 NextUB = Builder.CreateSelect(NextUBOutOfBounds, AdjustedUB, NextUB,
|
|
276 "polly.indvar.nextUB");
|
|
277
|
|
278 Builder.CreateAlignedStore(NextLB, LBPtr, Alignment);
|
|
279 Builder.CreateAlignedStore(NextUB, UBPtr, Alignment);
|
|
280
|
|
281 Value *HasWork =
|
|
282 Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, NextLB,
|
|
283 AdjustedUB, "polly.hasWork");
|
|
284 Builder.CreateCondBr(HasWork, PreHeaderBB, ExitBB);
|
|
285 } else {
|
|
286 Builder.CreateBr(ExitBB);
|
|
287 }
|
|
288
|
|
289 Builder.SetInsertPoint(PreHeaderBB);
|
|
290 }
|
|
291 break;
|
|
292 }
|
|
293
|
|
294 Builder.CreateBr(CheckNextBB);
|
|
295 Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
|
|
296 BasicBlock *AfterBB;
|
|
297 Value *IV = createLoop(LB, UB, SequentialLoopStride, Builder, LI, DT, AfterBB,
|
|
298 ICmpInst::ICMP_SLE, nullptr, true,
|
|
299 /* UseGuard */ false);
|
|
300
|
|
301 BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
|
|
302
|
|
303 // Add code to terminate this subfunction.
|
|
304 Builder.SetInsertPoint(ExitBB);
|
|
305 // Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call
|
|
306 if (Scheduling == OMPGeneralSchedulingType::StaticChunked ||
|
|
307 Scheduling == OMPGeneralSchedulingType::StaticNonChunked) {
|
|
308 createCallStaticFini(ID);
|
|
309 }
|
|
310 Builder.CreateRetVoid();
|
|
311 Builder.SetInsertPoint(&*LoopBody);
|
|
312
|
|
313 return std::make_tuple(IV, SubFn);
|
|
314 }
|
|
315
|
|
316 Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() {
|
|
317 const std::string Name = "__kmpc_global_thread_num";
|
|
318 Function *F = M->getFunction(Name);
|
|
319
|
|
320 // If F is not available, declare it.
|
|
321 if (!F) {
|
207
|
322 StructType *IdentTy =
|
|
323 StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
150
|
324
|
|
325 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
326 Type *Params[] = {IdentTy->getPointerTo()};
|
|
327
|
|
328 FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
|
|
329 F = Function::Create(Ty, Linkage, Name, M);
|
|
330 }
|
|
331
|
|
332 return Builder.CreateCall(F, {SourceLocationInfo});
|
|
333 }
|
|
334
|
|
335 void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID,
|
|
336 Value *NumThreads) {
|
|
337 const std::string Name = "__kmpc_push_num_threads";
|
|
338 Function *F = M->getFunction(Name);
|
|
339
|
|
340 // If F is not available, declare it.
|
|
341 if (!F) {
|
207
|
342 StructType *IdentTy =
|
|
343 StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
150
|
344
|
|
345 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
346 Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(),
|
|
347 Builder.getInt32Ty()};
|
|
348
|
|
349 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
350 F = Function::Create(Ty, Linkage, Name, M);
|
|
351 }
|
|
352
|
|
353 Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads};
|
|
354
|
|
355 Builder.CreateCall(F, Args);
|
|
356 }
|
|
357
|
|
358 void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID,
|
|
359 Value *IsLastPtr,
|
|
360 Value *LBPtr, Value *UBPtr,
|
|
361 Value *StridePtr,
|
|
362 Value *ChunkSize) {
|
|
363 const std::string Name =
|
|
364 is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4";
|
|
365 Function *F = M->getFunction(Name);
|
207
|
366 StructType *IdentTy =
|
|
367 StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
150
|
368
|
|
369 // If F is not available, declare it.
|
|
370 if (!F) {
|
|
371 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
372
|
|
373 Type *Params[] = {IdentTy->getPointerTo(),
|
|
374 Builder.getInt32Ty(),
|
|
375 Builder.getInt32Ty(),
|
|
376 Builder.getInt32Ty()->getPointerTo(),
|
|
377 LongType->getPointerTo(),
|
|
378 LongType->getPointerTo(),
|
|
379 LongType->getPointerTo(),
|
|
380 LongType,
|
|
381 LongType};
|
|
382
|
|
383 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
384 F = Function::Create(Ty, Linkage, Name, M);
|
|
385 }
|
|
386
|
|
387 // The parameter 'ChunkSize' will hold strictly positive integer values,
|
|
388 // regardless of PollyChunkSize's value
|
|
389 Value *Args[] = {
|
|
390 SourceLocationInfo,
|
|
391 GlobalThreadID,
|
|
392 Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
|
|
393 IsLastPtr,
|
|
394 LBPtr,
|
|
395 UBPtr,
|
|
396 StridePtr,
|
|
397 ConstantInt::get(LongType, 1),
|
|
398 ChunkSize};
|
|
399
|
|
400 Builder.CreateCall(F, Args);
|
|
401 }
|
|
402
|
|
403 void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) {
|
|
404 const std::string Name = "__kmpc_for_static_fini";
|
|
405 Function *F = M->getFunction(Name);
|
207
|
406 StructType *IdentTy =
|
|
407 StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
150
|
408
|
|
409 // If F is not available, declare it.
|
|
410 if (!F) {
|
|
411 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
412 Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty()};
|
|
413 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
414 F = Function::Create(Ty, Linkage, Name, M);
|
|
415 }
|
|
416
|
|
417 Value *Args[] = {SourceLocationInfo, GlobalThreadID};
|
|
418
|
|
419 Builder.CreateCall(F, Args);
|
|
420 }
|
|
421
|
|
422 void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID,
|
|
423 Value *LB, Value *UB,
|
|
424 Value *Inc,
|
|
425 Value *ChunkSize) {
|
|
426 const std::string Name =
|
|
427 is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4";
|
|
428 Function *F = M->getFunction(Name);
|
207
|
429 StructType *IdentTy =
|
|
430 StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
150
|
431
|
|
432 // If F is not available, declare it.
|
|
433 if (!F) {
|
|
434 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
435
|
|
436 Type *Params[] = {IdentTy->getPointerTo(),
|
|
437 Builder.getInt32Ty(),
|
|
438 Builder.getInt32Ty(),
|
|
439 LongType,
|
|
440 LongType,
|
|
441 LongType,
|
|
442 LongType};
|
|
443
|
|
444 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
|
|
445 F = Function::Create(Ty, Linkage, Name, M);
|
|
446 }
|
|
447
|
|
448 // The parameter 'ChunkSize' will hold strictly positive integer values,
|
|
449 // regardless of PollyChunkSize's value
|
|
450 Value *Args[] = {
|
|
451 SourceLocationInfo,
|
|
452 GlobalThreadID,
|
|
453 Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
|
|
454 LB,
|
|
455 UB,
|
|
456 Inc,
|
|
457 ChunkSize};
|
|
458
|
|
459 Builder.CreateCall(F, Args);
|
|
460 }
|
|
461
|
|
462 Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID,
|
|
463 Value *IsLastPtr,
|
|
464 Value *LBPtr,
|
|
465 Value *UBPtr,
|
|
466 Value *StridePtr) {
|
|
467 const std::string Name =
|
|
468 is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4";
|
|
469 Function *F = M->getFunction(Name);
|
207
|
470 StructType *IdentTy =
|
|
471 StructType::getTypeByName(M->getContext(), "struct.ident_t");
|
150
|
472
|
|
473 // If F is not available, declare it.
|
|
474 if (!F) {
|
|
475 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
|
|
476
|
|
477 Type *Params[] = {IdentTy->getPointerTo(),
|
|
478 Builder.getInt32Ty(),
|
|
479 Builder.getInt32Ty()->getPointerTo(),
|
|
480 LongType->getPointerTo(),
|
|
481 LongType->getPointerTo(),
|
|
482 LongType->getPointerTo()};
|
|
483
|
|
484 FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
|
|
485 F = Function::Create(Ty, Linkage, Name, M);
|
|
486 }
|
|
487
|
|
488 Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr,
|
|
489 StridePtr};
|
|
490
|
|
491 return Builder.CreateCall(F, Args);
|
|
492 }
|
|
493
|
|
494 // TODO: This function currently creates a source location dummy. It might be
|
|
495 // necessary to (actually) provide information, in the future.
|
|
496 GlobalVariable *ParallelLoopGeneratorKMP::createSourceLocation() {
|
|
497 const std::string LocName = ".loc.dummy";
|
|
498 GlobalVariable *SourceLocDummy = M->getGlobalVariable(LocName);
|
|
499
|
|
500 if (SourceLocDummy == nullptr) {
|
|
501 const std::string StructName = "struct.ident_t";
|
207
|
502 StructType *IdentTy =
|
|
503 StructType::getTypeByName(M->getContext(), StructName);
|
150
|
504
|
|
505 // If the ident_t StructType is not available, declare it.
|
|
506 // in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* }
|
|
507 if (!IdentTy) {
|
|
508 Type *LocMembers[] = {Builder.getInt32Ty(), Builder.getInt32Ty(),
|
|
509 Builder.getInt32Ty(), Builder.getInt32Ty(),
|
|
510 Builder.getInt8PtrTy()};
|
|
511
|
|
512 IdentTy =
|
|
513 StructType::create(M->getContext(), LocMembers, StructName, false);
|
|
514 }
|
|
515
|
|
516 const auto ArrayType =
|
|
517 llvm::ArrayType::get(Builder.getInt8Ty(), /* Length */ 23);
|
|
518
|
|
519 // Global Variable Definitions
|
|
520 GlobalVariable *StrVar = new GlobalVariable(
|
|
521 *M, ArrayType, true, GlobalValue::PrivateLinkage, 0, ".str.ident");
|
|
522 StrVar->setAlignment(llvm::Align(1));
|
|
523
|
|
524 SourceLocDummy = new GlobalVariable(
|
|
525 *M, IdentTy, true, GlobalValue::PrivateLinkage, nullptr, LocName);
|
|
526 SourceLocDummy->setAlignment(llvm::Align(8));
|
|
527
|
|
528 // Constant Definitions
|
|
529 Constant *InitStr = ConstantDataArray::getString(
|
|
530 M->getContext(), "Source location dummy.", true);
|
|
531
|
|
532 Constant *StrPtr = static_cast<Constant *>(Builder.CreateInBoundsGEP(
|
|
533 ArrayType, StrVar, {Builder.getInt32(0), Builder.getInt32(0)}));
|
|
534
|
|
535 Constant *LocInitStruct = ConstantStruct::get(
|
|
536 IdentTy, {Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(0),
|
|
537 Builder.getInt32(0), StrPtr});
|
|
538
|
|
539 // Initialize variables
|
|
540 StrVar->setInitializer(InitStr);
|
|
541 SourceLocDummy->setInitializer(LocInitStruct);
|
|
542 }
|
|
543
|
|
544 return SourceLocDummy;
|
|
545 }
|
|
546
|
|
547 bool ParallelLoopGeneratorKMP::is64BitArch() {
|
|
548 return (LongType->getIntegerBitWidth() == 64);
|
|
549 }
|
|
550
|
|
551 OMPGeneralSchedulingType ParallelLoopGeneratorKMP::getSchedType(
|
|
552 int ChunkSize, OMPGeneralSchedulingType Scheduling) const {
|
|
553 if (ChunkSize == 0 && Scheduling == OMPGeneralSchedulingType::StaticChunked)
|
|
554 return OMPGeneralSchedulingType::StaticNonChunked;
|
|
555
|
|
556 return Scheduling;
|
|
557 }
|