annotate clang/lib/CodeGen/CGCUDANV.cpp @ 221:79ff65ed7e25

LLVM12 Original
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Tue, 15 Jun 2021 19:15:29 +0900
parents 0572611fdcc8
children 5f17cb93ff66
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
150
anatofuz
parents:
diff changeset
1 //===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
anatofuz
parents:
diff changeset
2 //
anatofuz
parents:
diff changeset
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
anatofuz
parents:
diff changeset
4 // See https://llvm.org/LICENSE.txt for license information.
anatofuz
parents:
diff changeset
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
anatofuz
parents:
diff changeset
6 //
anatofuz
parents:
diff changeset
7 //===----------------------------------------------------------------------===//
anatofuz
parents:
diff changeset
8 //
anatofuz
parents:
diff changeset
9 // This provides a class for CUDA code generation targeting the NVIDIA CUDA
anatofuz
parents:
diff changeset
10 // runtime library.
anatofuz
parents:
diff changeset
11 //
anatofuz
parents:
diff changeset
12 //===----------------------------------------------------------------------===//
anatofuz
parents:
diff changeset
13
anatofuz
parents:
diff changeset
14 #include "CGCUDARuntime.h"
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
15 #include "CGCXXABI.h"
150
anatofuz
parents:
diff changeset
16 #include "CodeGenFunction.h"
anatofuz
parents:
diff changeset
17 #include "CodeGenModule.h"
anatofuz
parents:
diff changeset
18 #include "clang/AST/Decl.h"
anatofuz
parents:
diff changeset
19 #include "clang/Basic/Cuda.h"
anatofuz
parents:
diff changeset
20 #include "clang/CodeGen/CodeGenABITypes.h"
anatofuz
parents:
diff changeset
21 #include "clang/CodeGen/ConstantInitBuilder.h"
anatofuz
parents:
diff changeset
22 #include "llvm/IR/BasicBlock.h"
anatofuz
parents:
diff changeset
23 #include "llvm/IR/Constants.h"
anatofuz
parents:
diff changeset
24 #include "llvm/IR/DerivedTypes.h"
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
25 #include "llvm/IR/ReplaceConstant.h"
150
anatofuz
parents:
diff changeset
26 #include "llvm/Support/Format.h"
anatofuz
parents:
diff changeset
27
anatofuz
parents:
diff changeset
28 using namespace clang;
anatofuz
parents:
diff changeset
29 using namespace CodeGen;
anatofuz
parents:
diff changeset
30
anatofuz
parents:
diff changeset
31 namespace {
anatofuz
parents:
diff changeset
32 constexpr unsigned CudaFatMagic = 0x466243b1;
anatofuz
parents:
diff changeset
33 constexpr unsigned HIPFatMagic = 0x48495046; // "HIPF"
anatofuz
parents:
diff changeset
34
anatofuz
parents:
diff changeset
35 class CGNVCUDARuntime : public CGCUDARuntime {
anatofuz
parents:
diff changeset
36
anatofuz
parents:
diff changeset
37 private:
anatofuz
parents:
diff changeset
38 llvm::IntegerType *IntTy, *SizeTy;
anatofuz
parents:
diff changeset
39 llvm::Type *VoidTy;
anatofuz
parents:
diff changeset
40 llvm::PointerType *CharPtrTy, *VoidPtrTy, *VoidPtrPtrTy;
anatofuz
parents:
diff changeset
41
anatofuz
parents:
diff changeset
42 /// Convenience reference to LLVM Context
anatofuz
parents:
diff changeset
43 llvm::LLVMContext &Context;
anatofuz
parents:
diff changeset
44 /// Convenience reference to the current module
anatofuz
parents:
diff changeset
45 llvm::Module &TheModule;
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
46 /// Keeps track of kernel launch stubs and handles emitted in this module
150
anatofuz
parents:
diff changeset
47 struct KernelInfo {
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
48 llvm::Function *Kernel; // stub function to help launch kernel
150
anatofuz
parents:
diff changeset
49 const Decl *D;
anatofuz
parents:
diff changeset
50 };
anatofuz
parents:
diff changeset
51 llvm::SmallVector<KernelInfo, 16> EmittedKernels;
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
52 // Map a device stub function to a symbol for identifying kernel in host code.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
53 // For CUDA, the symbol for identifying the kernel is the same as the device
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
54 // stub function. For HIP, they are different.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
55 llvm::DenseMap<llvm::Function *, llvm::GlobalValue *> KernelHandles;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
56 // Map a kernel handle to the kernel stub.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
57 llvm::DenseMap<llvm::GlobalValue *, llvm::Function *> KernelStubs;
150
anatofuz
parents:
diff changeset
58 struct VarInfo {
anatofuz
parents:
diff changeset
59 llvm::GlobalVariable *Var;
anatofuz
parents:
diff changeset
60 const VarDecl *D;
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
61 DeviceVarFlags Flags;
150
anatofuz
parents:
diff changeset
62 };
anatofuz
parents:
diff changeset
63 llvm::SmallVector<VarInfo, 16> DeviceVars;
anatofuz
parents:
diff changeset
64 /// Keeps track of variable containing handle of GPU binary. Populated by
anatofuz
parents:
diff changeset
65 /// ModuleCtorFunction() and used to create corresponding cleanup calls in
anatofuz
parents:
diff changeset
66 /// ModuleDtorFunction()
anatofuz
parents:
diff changeset
67 llvm::GlobalVariable *GpuBinaryHandle = nullptr;
anatofuz
parents:
diff changeset
68 /// Whether we generate relocatable device code.
anatofuz
parents:
diff changeset
69 bool RelocatableDeviceCode;
anatofuz
parents:
diff changeset
70 /// Mangle context for device.
anatofuz
parents:
diff changeset
71 std::unique_ptr<MangleContext> DeviceMC;
anatofuz
parents:
diff changeset
72
anatofuz
parents:
diff changeset
73 llvm::FunctionCallee getSetupArgumentFn() const;
anatofuz
parents:
diff changeset
74 llvm::FunctionCallee getLaunchFn() const;
anatofuz
parents:
diff changeset
75
anatofuz
parents:
diff changeset
76 llvm::FunctionType *getRegisterGlobalsFnTy() const;
anatofuz
parents:
diff changeset
77 llvm::FunctionType *getCallbackFnTy() const;
anatofuz
parents:
diff changeset
78 llvm::FunctionType *getRegisterLinkedBinaryFnTy() const;
anatofuz
parents:
diff changeset
79 std::string addPrefixToName(StringRef FuncName) const;
anatofuz
parents:
diff changeset
80 std::string addUnderscoredPrefixToName(StringRef FuncName) const;
anatofuz
parents:
diff changeset
81
anatofuz
parents:
diff changeset
82 /// Creates a function to register all kernel stubs generated in this module.
anatofuz
parents:
diff changeset
83 llvm::Function *makeRegisterGlobalsFn();
anatofuz
parents:
diff changeset
84
anatofuz
parents:
diff changeset
85 /// Helper function that generates a constant string and returns a pointer to
anatofuz
parents:
diff changeset
86 /// the start of the string. The result of this function can be used anywhere
anatofuz
parents:
diff changeset
87 /// where the C code specifies const char*.
anatofuz
parents:
diff changeset
88 llvm::Constant *makeConstantString(const std::string &Str,
anatofuz
parents:
diff changeset
89 const std::string &Name = "",
anatofuz
parents:
diff changeset
90 const std::string &SectionName = "",
anatofuz
parents:
diff changeset
91 unsigned Alignment = 0) {
anatofuz
parents:
diff changeset
92 llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
anatofuz
parents:
diff changeset
93 llvm::ConstantInt::get(SizeTy, 0)};
anatofuz
parents:
diff changeset
94 auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
anatofuz
parents:
diff changeset
95 llvm::GlobalVariable *GV =
anatofuz
parents:
diff changeset
96 cast<llvm::GlobalVariable>(ConstStr.getPointer());
anatofuz
parents:
diff changeset
97 if (!SectionName.empty()) {
anatofuz
parents:
diff changeset
98 GV->setSection(SectionName);
anatofuz
parents:
diff changeset
99 // Mark the address as used which make sure that this section isn't
anatofuz
parents:
diff changeset
100 // merged and we will really have it in the object file.
anatofuz
parents:
diff changeset
101 GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
anatofuz
parents:
diff changeset
102 }
anatofuz
parents:
diff changeset
103 if (Alignment)
anatofuz
parents:
diff changeset
104 GV->setAlignment(llvm::Align(Alignment));
anatofuz
parents:
diff changeset
105
anatofuz
parents:
diff changeset
106 return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
anatofuz
parents:
diff changeset
107 ConstStr.getPointer(), Zeros);
anatofuz
parents:
diff changeset
108 }
anatofuz
parents:
diff changeset
109
anatofuz
parents:
diff changeset
110 /// Helper function that generates an empty dummy function returning void.
anatofuz
parents:
diff changeset
111 llvm::Function *makeDummyFunction(llvm::FunctionType *FnTy) {
anatofuz
parents:
diff changeset
112 assert(FnTy->getReturnType()->isVoidTy() &&
anatofuz
parents:
diff changeset
113 "Can only generate dummy functions returning void!");
anatofuz
parents:
diff changeset
114 llvm::Function *DummyFunc = llvm::Function::Create(
anatofuz
parents:
diff changeset
115 FnTy, llvm::GlobalValue::InternalLinkage, "dummy", &TheModule);
anatofuz
parents:
diff changeset
116
anatofuz
parents:
diff changeset
117 llvm::BasicBlock *DummyBlock =
anatofuz
parents:
diff changeset
118 llvm::BasicBlock::Create(Context, "", DummyFunc);
anatofuz
parents:
diff changeset
119 CGBuilderTy FuncBuilder(CGM, Context);
anatofuz
parents:
diff changeset
120 FuncBuilder.SetInsertPoint(DummyBlock);
anatofuz
parents:
diff changeset
121 FuncBuilder.CreateRetVoid();
anatofuz
parents:
diff changeset
122
anatofuz
parents:
diff changeset
123 return DummyFunc;
anatofuz
parents:
diff changeset
124 }
anatofuz
parents:
diff changeset
125
anatofuz
parents:
diff changeset
126 void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
anatofuz
parents:
diff changeset
127 void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
128 std::string getDeviceSideName(const NamedDecl *ND) override;
150
anatofuz
parents:
diff changeset
129
anatofuz
parents:
diff changeset
130 void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
131 bool Extern, bool Constant) {
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
132 DeviceVars.push_back({&Var,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
133 VD,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
134 {DeviceVarFlags::Variable, Extern, Constant,
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
135 VD->hasAttr<HIPManagedAttr>(),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
136 /*Normalized*/ false, 0}});
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
137 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
138 void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
139 bool Extern, int Type) {
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
140 DeviceVars.push_back({&Var,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
141 VD,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
142 {DeviceVarFlags::Surface, Extern, /*Constant*/ false,
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
143 /*Managed*/ false,
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
144 /*Normalized*/ false, Type}});
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
145 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
146 void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
147 bool Extern, int Type, bool Normalized) {
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
148 DeviceVars.push_back({&Var,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
149 VD,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
150 {DeviceVarFlags::Texture, Extern, /*Constant*/ false,
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
151 /*Managed*/ false, Normalized, Type}});
150
anatofuz
parents:
diff changeset
152 }
anatofuz
parents:
diff changeset
153
anatofuz
parents:
diff changeset
154 /// Creates module constructor function
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
155 llvm::Function *makeModuleCtorFunction();
150
anatofuz
parents:
diff changeset
156 /// Creates module destructor function
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
157 llvm::Function *makeModuleDtorFunction();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
158 /// Transform managed variables for device compilation.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
159 void transformManagedVars();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
160
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
161 public:
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
162 CGNVCUDARuntime(CodeGenModule &CGM);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
163
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
164 llvm::GlobalValue *getKernelHandle(llvm::Function *F, GlobalDecl GD) override;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
165 llvm::Function *getKernelStub(llvm::GlobalValue *Handle) override {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
166 auto Loc = KernelStubs.find(Handle);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
167 assert(Loc != KernelStubs.end());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
168 return Loc->second;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
169 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
170 void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
171 void handleVarRegistration(const VarDecl *VD,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
172 llvm::GlobalVariable &Var) override;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
173 void
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
174 internalizeDeviceSideVar(const VarDecl *D,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
175 llvm::GlobalValue::LinkageTypes &Linkage) override;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
176
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
177 llvm::Function *finalizeModule() override;
150
anatofuz
parents:
diff changeset
178 };
anatofuz
parents:
diff changeset
179
anatofuz
parents:
diff changeset
180 }
anatofuz
parents:
diff changeset
181
anatofuz
parents:
diff changeset
182 std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const {
anatofuz
parents:
diff changeset
183 if (CGM.getLangOpts().HIP)
anatofuz
parents:
diff changeset
184 return ((Twine("hip") + Twine(FuncName)).str());
anatofuz
parents:
diff changeset
185 return ((Twine("cuda") + Twine(FuncName)).str());
anatofuz
parents:
diff changeset
186 }
anatofuz
parents:
diff changeset
187 std::string
anatofuz
parents:
diff changeset
188 CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const {
anatofuz
parents:
diff changeset
189 if (CGM.getLangOpts().HIP)
anatofuz
parents:
diff changeset
190 return ((Twine("__hip") + Twine(FuncName)).str());
anatofuz
parents:
diff changeset
191 return ((Twine("__cuda") + Twine(FuncName)).str());
anatofuz
parents:
diff changeset
192 }
anatofuz
parents:
diff changeset
193
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
194 static std::unique_ptr<MangleContext> InitDeviceMC(CodeGenModule &CGM) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
195 // If the host and device have different C++ ABIs, mark it as the device
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
196 // mangle context so that the mangling needs to retrieve the additional
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
197 // device lambda mangling number instead of the regular host one.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
198 if (CGM.getContext().getAuxTargetInfo() &&
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
199 CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() &&
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
200 CGM.getContext().getAuxTargetInfo()->getCXXABI().isItaniumFamily()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
201 return std::unique_ptr<MangleContext>(
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
202 CGM.getContext().createDeviceMangleContext(
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
203 *CGM.getContext().getAuxTargetInfo()));
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
204 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
205
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
206 return std::unique_ptr<MangleContext>(CGM.getContext().createMangleContext(
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
207 CGM.getContext().getAuxTargetInfo()));
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
208 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
209
150
anatofuz
parents:
diff changeset
210 CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
anatofuz
parents:
diff changeset
211 : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
anatofuz
parents:
diff changeset
212 TheModule(CGM.getModule()),
anatofuz
parents:
diff changeset
213 RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
214 DeviceMC(InitDeviceMC(CGM)) {
150
anatofuz
parents:
diff changeset
215 CodeGen::CodeGenTypes &Types = CGM.getTypes();
anatofuz
parents:
diff changeset
216 ASTContext &Ctx = CGM.getContext();
anatofuz
parents:
diff changeset
217
anatofuz
parents:
diff changeset
218 IntTy = CGM.IntTy;
anatofuz
parents:
diff changeset
219 SizeTy = CGM.SizeTy;
anatofuz
parents:
diff changeset
220 VoidTy = CGM.VoidTy;
anatofuz
parents:
diff changeset
221
anatofuz
parents:
diff changeset
222 CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
anatofuz
parents:
diff changeset
223 VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
anatofuz
parents:
diff changeset
224 VoidPtrPtrTy = VoidPtrTy->getPointerTo();
anatofuz
parents:
diff changeset
225 }
anatofuz
parents:
diff changeset
226
anatofuz
parents:
diff changeset
227 llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
anatofuz
parents:
diff changeset
228 // cudaError_t cudaSetupArgument(void *, size_t, size_t)
anatofuz
parents:
diff changeset
229 llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
anatofuz
parents:
diff changeset
230 return CGM.CreateRuntimeFunction(
anatofuz
parents:
diff changeset
231 llvm::FunctionType::get(IntTy, Params, false),
anatofuz
parents:
diff changeset
232 addPrefixToName("SetupArgument"));
anatofuz
parents:
diff changeset
233 }
anatofuz
parents:
diff changeset
234
anatofuz
parents:
diff changeset
235 llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const {
anatofuz
parents:
diff changeset
236 if (CGM.getLangOpts().HIP) {
anatofuz
parents:
diff changeset
237 // hipError_t hipLaunchByPtr(char *);
anatofuz
parents:
diff changeset
238 return CGM.CreateRuntimeFunction(
anatofuz
parents:
diff changeset
239 llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
anatofuz
parents:
diff changeset
240 } else {
anatofuz
parents:
diff changeset
241 // cudaError_t cudaLaunch(char *);
anatofuz
parents:
diff changeset
242 return CGM.CreateRuntimeFunction(
anatofuz
parents:
diff changeset
243 llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
anatofuz
parents:
diff changeset
244 }
anatofuz
parents:
diff changeset
245 }
anatofuz
parents:
diff changeset
246
anatofuz
parents:
diff changeset
247 llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
anatofuz
parents:
diff changeset
248 return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
anatofuz
parents:
diff changeset
249 }
anatofuz
parents:
diff changeset
250
anatofuz
parents:
diff changeset
251 llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
anatofuz
parents:
diff changeset
252 return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
anatofuz
parents:
diff changeset
253 }
anatofuz
parents:
diff changeset
254
anatofuz
parents:
diff changeset
255 llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
anatofuz
parents:
diff changeset
256 auto CallbackFnTy = getCallbackFnTy();
anatofuz
parents:
diff changeset
257 auto RegisterGlobalsFnTy = getRegisterGlobalsFnTy();
anatofuz
parents:
diff changeset
258 llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy,
anatofuz
parents:
diff changeset
259 VoidPtrTy, CallbackFnTy->getPointerTo()};
anatofuz
parents:
diff changeset
260 return llvm::FunctionType::get(VoidTy, Params, false);
anatofuz
parents:
diff changeset
261 }
anatofuz
parents:
diff changeset
262
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
263 std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
264 GlobalDecl GD;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
265 // D could be either a kernel or a variable.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
266 if (auto *FD = dyn_cast<FunctionDecl>(ND))
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
267 GD = GlobalDecl(FD, KernelReferenceKind::Kernel);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
268 else
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
269 GD = GlobalDecl(ND);
150
anatofuz
parents:
diff changeset
270 std::string DeviceSideName;
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
271 MangleContext *MC;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
272 if (CGM.getLangOpts().CUDAIsDevice)
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
273 MC = &CGM.getCXXABI().getMangleContext();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
274 else
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
275 MC = DeviceMC.get();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
276 if (MC->shouldMangleDeclName(ND)) {
150
anatofuz
parents:
diff changeset
277 SmallString<256> Buffer;
anatofuz
parents:
diff changeset
278 llvm::raw_svector_ostream Out(Buffer);
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
279 MC->mangleName(GD, Out);
150
anatofuz
parents:
diff changeset
280 DeviceSideName = std::string(Out.str());
anatofuz
parents:
diff changeset
281 } else
anatofuz
parents:
diff changeset
282 DeviceSideName = std::string(ND->getIdentifier()->getName());
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
283
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
284 // Make unique name for device side static file-scope variable for HIP.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
285 if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
286 CGM.getLangOpts().GPURelocatableDeviceCode &&
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
287 !CGM.getLangOpts().CUID.empty()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
288 SmallString<256> Buffer;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
289 llvm::raw_svector_ostream Out(Buffer);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
290 Out << DeviceSideName;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
291 CGM.printPostfixForExternalizedStaticVar(Out);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
292 DeviceSideName = std::string(Out.str());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
293 }
150
anatofuz
parents:
diff changeset
294 return DeviceSideName;
anatofuz
parents:
diff changeset
295 }
anatofuz
parents:
diff changeset
296
anatofuz
parents:
diff changeset
297 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
anatofuz
parents:
diff changeset
298 FunctionArgList &Args) {
anatofuz
parents:
diff changeset
299 EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
300 if (auto *GV = dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn])) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
301 GV->setLinkage(CGF.CurFn->getLinkage());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
302 GV->setInitializer(CGF.CurFn);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
303 }
150
anatofuz
parents:
diff changeset
304 if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
anatofuz
parents:
diff changeset
305 CudaFeature::CUDA_USES_NEW_LAUNCH) ||
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
306 (CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI))
150
anatofuz
parents:
diff changeset
307 emitDeviceStubBodyNew(CGF, Args);
anatofuz
parents:
diff changeset
308 else
anatofuz
parents:
diff changeset
309 emitDeviceStubBodyLegacy(CGF, Args);
anatofuz
parents:
diff changeset
310 }
anatofuz
parents:
diff changeset
311
anatofuz
parents:
diff changeset
312 // CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local
anatofuz
parents:
diff changeset
313 // array and kernels are launched using cudaLaunchKernel().
anatofuz
parents:
diff changeset
314 void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
anatofuz
parents:
diff changeset
315 FunctionArgList &Args) {
anatofuz
parents:
diff changeset
316 // Build the shadow stack entry at the very start of the function.
anatofuz
parents:
diff changeset
317
anatofuz
parents:
diff changeset
318 // Calculate amount of space we will need for all arguments. If we have no
anatofuz
parents:
diff changeset
319 // args, allocate a single pointer so we still have a valid pointer to the
anatofuz
parents:
diff changeset
320 // argument array that we can pass to runtime, even if it will be unused.
anatofuz
parents:
diff changeset
321 Address KernelArgs = CGF.CreateTempAlloca(
anatofuz
parents:
diff changeset
322 VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args",
anatofuz
parents:
diff changeset
323 llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
anatofuz
parents:
diff changeset
324 // Store pointers to the arguments in a locally allocated launch_args.
anatofuz
parents:
diff changeset
325 for (unsigned i = 0; i < Args.size(); ++i) {
anatofuz
parents:
diff changeset
326 llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer();
anatofuz
parents:
diff changeset
327 llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy);
anatofuz
parents:
diff changeset
328 CGF.Builder.CreateDefaultAlignedStore(
anatofuz
parents:
diff changeset
329 VoidVarPtr, CGF.Builder.CreateConstGEP1_32(KernelArgs.getPointer(), i));
anatofuz
parents:
diff changeset
330 }
anatofuz
parents:
diff changeset
331
anatofuz
parents:
diff changeset
332 llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
anatofuz
parents:
diff changeset
333
anatofuz
parents:
diff changeset
334 // Lookup cudaLaunchKernel/hipLaunchKernel function.
anatofuz
parents:
diff changeset
335 // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
anatofuz
parents:
diff changeset
336 // void **args, size_t sharedMem,
anatofuz
parents:
diff changeset
337 // cudaStream_t stream);
anatofuz
parents:
diff changeset
338 // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
anatofuz
parents:
diff changeset
339 // void **args, size_t sharedMem,
anatofuz
parents:
diff changeset
340 // hipStream_t stream);
anatofuz
parents:
diff changeset
341 TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
anatofuz
parents:
diff changeset
342 DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
anatofuz
parents:
diff changeset
343 auto LaunchKernelName = addPrefixToName("LaunchKernel");
anatofuz
parents:
diff changeset
344 IdentifierInfo &cudaLaunchKernelII =
anatofuz
parents:
diff changeset
345 CGM.getContext().Idents.get(LaunchKernelName);
anatofuz
parents:
diff changeset
346 FunctionDecl *cudaLaunchKernelFD = nullptr;
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
347 for (auto *Result : DC->lookup(&cudaLaunchKernelII)) {
150
anatofuz
parents:
diff changeset
348 if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
anatofuz
parents:
diff changeset
349 cudaLaunchKernelFD = FD;
anatofuz
parents:
diff changeset
350 }
anatofuz
parents:
diff changeset
351
anatofuz
parents:
diff changeset
352 if (cudaLaunchKernelFD == nullptr) {
anatofuz
parents:
diff changeset
353 CGM.Error(CGF.CurFuncDecl->getLocation(),
anatofuz
parents:
diff changeset
354 "Can't find declaration for " + LaunchKernelName);
anatofuz
parents:
diff changeset
355 return;
anatofuz
parents:
diff changeset
356 }
anatofuz
parents:
diff changeset
357 // Create temporary dim3 grid_dim, block_dim.
anatofuz
parents:
diff changeset
358 ParmVarDecl *GridDimParam = cudaLaunchKernelFD->getParamDecl(1);
anatofuz
parents:
diff changeset
359 QualType Dim3Ty = GridDimParam->getType();
anatofuz
parents:
diff changeset
360 Address GridDim =
anatofuz
parents:
diff changeset
361 CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "grid_dim");
anatofuz
parents:
diff changeset
362 Address BlockDim =
anatofuz
parents:
diff changeset
363 CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim");
anatofuz
parents:
diff changeset
364 Address ShmemSize =
anatofuz
parents:
diff changeset
365 CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size");
anatofuz
parents:
diff changeset
366 Address Stream =
anatofuz
parents:
diff changeset
367 CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream");
anatofuz
parents:
diff changeset
368 llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction(
anatofuz
parents:
diff changeset
369 llvm::FunctionType::get(IntTy,
anatofuz
parents:
diff changeset
370 {/*gridDim=*/GridDim.getType(),
anatofuz
parents:
diff changeset
371 /*blockDim=*/BlockDim.getType(),
anatofuz
parents:
diff changeset
372 /*ShmemSize=*/ShmemSize.getType(),
anatofuz
parents:
diff changeset
373 /*Stream=*/Stream.getType()},
anatofuz
parents:
diff changeset
374 /*isVarArg=*/false),
anatofuz
parents:
diff changeset
375 addUnderscoredPrefixToName("PopCallConfiguration"));
anatofuz
parents:
diff changeset
376
anatofuz
parents:
diff changeset
377 CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
anatofuz
parents:
diff changeset
378 {GridDim.getPointer(), BlockDim.getPointer(),
anatofuz
parents:
diff changeset
379 ShmemSize.getPointer(), Stream.getPointer()});
anatofuz
parents:
diff changeset
380
anatofuz
parents:
diff changeset
381 // Emit the call to cudaLaunch
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
382 llvm::Value *Kernel =
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
383 CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy);
150
anatofuz
parents:
diff changeset
384 CallArgList LaunchKernelArgs;
anatofuz
parents:
diff changeset
385 LaunchKernelArgs.add(RValue::get(Kernel),
anatofuz
parents:
diff changeset
386 cudaLaunchKernelFD->getParamDecl(0)->getType());
anatofuz
parents:
diff changeset
387 LaunchKernelArgs.add(RValue::getAggregate(GridDim), Dim3Ty);
anatofuz
parents:
diff changeset
388 LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty);
anatofuz
parents:
diff changeset
389 LaunchKernelArgs.add(RValue::get(KernelArgs.getPointer()),
anatofuz
parents:
diff changeset
390 cudaLaunchKernelFD->getParamDecl(3)->getType());
anatofuz
parents:
diff changeset
391 LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(ShmemSize)),
anatofuz
parents:
diff changeset
392 cudaLaunchKernelFD->getParamDecl(4)->getType());
anatofuz
parents:
diff changeset
393 LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(Stream)),
anatofuz
parents:
diff changeset
394 cudaLaunchKernelFD->getParamDecl(5)->getType());
anatofuz
parents:
diff changeset
395
anatofuz
parents:
diff changeset
396 QualType QT = cudaLaunchKernelFD->getType();
anatofuz
parents:
diff changeset
397 QualType CQT = QT.getCanonicalType();
anatofuz
parents:
diff changeset
398 llvm::Type *Ty = CGM.getTypes().ConvertType(CQT);
anatofuz
parents:
diff changeset
399 llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty);
anatofuz
parents:
diff changeset
400
anatofuz
parents:
diff changeset
401 const CGFunctionInfo &FI =
anatofuz
parents:
diff changeset
402 CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
anatofuz
parents:
diff changeset
403 llvm::FunctionCallee cudaLaunchKernelFn =
anatofuz
parents:
diff changeset
404 CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
anatofuz
parents:
diff changeset
405 CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
anatofuz
parents:
diff changeset
406 LaunchKernelArgs);
anatofuz
parents:
diff changeset
407 CGF.EmitBranch(EndBlock);
anatofuz
parents:
diff changeset
408
anatofuz
parents:
diff changeset
409 CGF.EmitBlock(EndBlock);
anatofuz
parents:
diff changeset
410 }
anatofuz
parents:
diff changeset
411
anatofuz
parents:
diff changeset
412 void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
anatofuz
parents:
diff changeset
413 FunctionArgList &Args) {
anatofuz
parents:
diff changeset
414 // Emit a call to cudaSetupArgument for each arg in Args.
anatofuz
parents:
diff changeset
415 llvm::FunctionCallee cudaSetupArgFn = getSetupArgumentFn();
anatofuz
parents:
diff changeset
416 llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
anatofuz
parents:
diff changeset
417 CharUnits Offset = CharUnits::Zero();
anatofuz
parents:
diff changeset
418 for (const VarDecl *A : Args) {
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
419 auto TInfo = CGM.getContext().getTypeInfoInChars(A->getType());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
420 Offset = Offset.alignTo(TInfo.Align);
150
anatofuz
parents:
diff changeset
421 llvm::Value *Args[] = {
anatofuz
parents:
diff changeset
422 CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
anatofuz
parents:
diff changeset
423 VoidPtrTy),
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
424 llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
150
anatofuz
parents:
diff changeset
425 llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
anatofuz
parents:
diff changeset
426 };
anatofuz
parents:
diff changeset
427 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
anatofuz
parents:
diff changeset
428 llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
anatofuz
parents:
diff changeset
429 llvm::Value *CBZero = CGF.Builder.CreateICmpEQ(CB, Zero);
anatofuz
parents:
diff changeset
430 llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
anatofuz
parents:
diff changeset
431 CGF.Builder.CreateCondBr(CBZero, NextBlock, EndBlock);
anatofuz
parents:
diff changeset
432 CGF.EmitBlock(NextBlock);
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
433 Offset += TInfo.Width;
150
anatofuz
parents:
diff changeset
434 }
anatofuz
parents:
diff changeset
435
anatofuz
parents:
diff changeset
436 // Emit the call to cudaLaunch
anatofuz
parents:
diff changeset
437 llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
438 llvm::Value *Arg =
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
439 CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy);
150
anatofuz
parents:
diff changeset
440 CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
anatofuz
parents:
diff changeset
441 CGF.EmitBranch(EndBlock);
anatofuz
parents:
diff changeset
442
anatofuz
parents:
diff changeset
443 CGF.EmitBlock(EndBlock);
anatofuz
parents:
diff changeset
444 }
anatofuz
parents:
diff changeset
445
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
446 // Replace the original variable Var with the address loaded from variable
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
447 // ManagedVar populated by HIP runtime.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
448 static void replaceManagedVar(llvm::GlobalVariable *Var,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
449 llvm::GlobalVariable *ManagedVar) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
450 SmallVector<SmallVector<llvm::User *, 8>, 8> WorkList;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
451 for (auto &&VarUse : Var->uses()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
452 WorkList.push_back({VarUse.getUser()});
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
453 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
454 while (!WorkList.empty()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
455 auto &&WorkItem = WorkList.pop_back_val();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
456 auto *U = WorkItem.back();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
457 if (isa<llvm::ConstantExpr>(U)) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
458 for (auto &&UU : U->uses()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
459 WorkItem.push_back(UU.getUser());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
460 WorkList.push_back(WorkItem);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
461 WorkItem.pop_back();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
462 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
463 continue;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
464 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
465 if (auto *I = dyn_cast<llvm::Instruction>(U)) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
466 llvm::Value *OldV = Var;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
467 llvm::Instruction *NewV =
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
468 new llvm::LoadInst(Var->getType(), ManagedVar, "ld.managed", false,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
469 llvm::Align(Var->getAlignment()), I);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
470 WorkItem.pop_back();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
471 // Replace constant expressions directly or indirectly using the managed
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
472 // variable with instructions.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
473 for (auto &&Op : WorkItem) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
474 auto *CE = cast<llvm::ConstantExpr>(Op);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
475 auto *NewInst = llvm::createReplacementInstr(CE, I);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
476 NewInst->replaceUsesOfWith(OldV, NewV);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
477 OldV = CE;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
478 NewV = NewInst;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
479 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
480 I->replaceUsesOfWith(OldV, NewV);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
481 } else {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
482 llvm_unreachable("Invalid use of managed variable");
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
483 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
484 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
485 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
486
150
anatofuz
parents:
diff changeset
487 /// Creates a function that sets up state on the host side for CUDA objects that
anatofuz
parents:
diff changeset
488 /// have a presence on both the host and device sides. Specifically, registers
anatofuz
parents:
diff changeset
489 /// the host side of kernel functions and device global variables with the CUDA
anatofuz
parents:
diff changeset
490 /// runtime.
anatofuz
parents:
diff changeset
491 /// \code
anatofuz
parents:
diff changeset
492 /// void __cuda_register_globals(void** GpuBinaryHandle) {
anatofuz
parents:
diff changeset
493 /// __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...);
anatofuz
parents:
diff changeset
494 /// ...
anatofuz
parents:
diff changeset
495 /// __cudaRegisterFunction(GpuBinaryHandle,KernelM,...);
anatofuz
parents:
diff changeset
496 /// __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...);
anatofuz
parents:
diff changeset
497 /// ...
anatofuz
parents:
diff changeset
498 /// __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...);
anatofuz
parents:
diff changeset
499 /// }
anatofuz
parents:
diff changeset
500 /// \endcode
anatofuz
parents:
diff changeset
501 llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
anatofuz
parents:
diff changeset
502 // No need to register anything
anatofuz
parents:
diff changeset
503 if (EmittedKernels.empty() && DeviceVars.empty())
anatofuz
parents:
diff changeset
504 return nullptr;
anatofuz
parents:
diff changeset
505
anatofuz
parents:
diff changeset
506 llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
anatofuz
parents:
diff changeset
507 getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage,
anatofuz
parents:
diff changeset
508 addUnderscoredPrefixToName("_register_globals"), &TheModule);
anatofuz
parents:
diff changeset
509 llvm::BasicBlock *EntryBB =
anatofuz
parents:
diff changeset
510 llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
anatofuz
parents:
diff changeset
511 CGBuilderTy Builder(CGM, Context);
anatofuz
parents:
diff changeset
512 Builder.SetInsertPoint(EntryBB);
anatofuz
parents:
diff changeset
513
anatofuz
parents:
diff changeset
514 // void __cudaRegisterFunction(void **, const char *, char *, const char *,
anatofuz
parents:
diff changeset
515 // int, uint3*, uint3*, dim3*, dim3*, int*)
anatofuz
parents:
diff changeset
516 llvm::Type *RegisterFuncParams[] = {
anatofuz
parents:
diff changeset
517 VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy,
anatofuz
parents:
diff changeset
518 VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
anatofuz
parents:
diff changeset
519 llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction(
anatofuz
parents:
diff changeset
520 llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
anatofuz
parents:
diff changeset
521 addUnderscoredPrefixToName("RegisterFunction"));
anatofuz
parents:
diff changeset
522
anatofuz
parents:
diff changeset
523 // Extract GpuBinaryHandle passed as the first argument passed to
anatofuz
parents:
diff changeset
524 // __cuda_register_globals() and generate __cudaRegisterFunction() call for
anatofuz
parents:
diff changeset
525 // each emitted kernel.
anatofuz
parents:
diff changeset
526 llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
anatofuz
parents:
diff changeset
527 for (auto &&I : EmittedKernels) {
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
528 llvm::Constant *KernelName =
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
529 makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));
150
anatofuz
parents:
diff changeset
530 llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
anatofuz
parents:
diff changeset
531 llvm::Value *Args[] = {
anatofuz
parents:
diff changeset
532 &GpuBinaryHandlePtr,
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
533 Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy),
150
anatofuz
parents:
diff changeset
534 KernelName,
anatofuz
parents:
diff changeset
535 KernelName,
anatofuz
parents:
diff changeset
536 llvm::ConstantInt::get(IntTy, -1),
anatofuz
parents:
diff changeset
537 NullPtr,
anatofuz
parents:
diff changeset
538 NullPtr,
anatofuz
parents:
diff changeset
539 NullPtr,
anatofuz
parents:
diff changeset
540 NullPtr,
anatofuz
parents:
diff changeset
541 llvm::ConstantPointerNull::get(IntTy->getPointerTo())};
anatofuz
parents:
diff changeset
542 Builder.CreateCall(RegisterFunc, Args);
anatofuz
parents:
diff changeset
543 }
anatofuz
parents:
diff changeset
544
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
545 llvm::Type *VarSizeTy = IntTy;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
546 // For HIP or CUDA 9.0+, device variable size is type of `size_t`.
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
547 if (CGM.getLangOpts().HIP ||
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
548 ToCudaVersion(CGM.getTarget().getSDKVersion()) >= CudaVersion::CUDA_90)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
549 VarSizeTy = SizeTy;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
550
150
anatofuz
parents:
diff changeset
551 // void __cudaRegisterVar(void **, char *, char *, const char *,
anatofuz
parents:
diff changeset
552 // int, int, int, int)
anatofuz
parents:
diff changeset
553 llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
554 CharPtrTy, IntTy, VarSizeTy,
150
anatofuz
parents:
diff changeset
555 IntTy, IntTy};
anatofuz
parents:
diff changeset
556 llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
557 llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
150
anatofuz
parents:
diff changeset
558 addUnderscoredPrefixToName("RegisterVar"));
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
559 // void __hipRegisterManagedVar(void **, char *, char *, const char *,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
560 // size_t, unsigned)
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
561 llvm::Type *RegisterManagedVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
562 CharPtrTy, VarSizeTy, IntTy};
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
563 llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction(
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
564 llvm::FunctionType::get(VoidTy, RegisterManagedVarParams, false),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
565 addUnderscoredPrefixToName("RegisterManagedVar"));
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
566 // void __cudaRegisterSurface(void **, const struct surfaceReference *,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
567 // const void **, const char *, int, int);
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
568 llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
569 llvm::FunctionType::get(
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
570 VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
571 false),
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
572 addUnderscoredPrefixToName("RegisterSurface"));
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
573 // void __cudaRegisterTexture(void **, const struct textureReference *,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
574 // const void **, const char *, int, int, int)
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
575 llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
576 llvm::FunctionType::get(
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
577 VoidTy,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
578 {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
579 false),
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
580 addUnderscoredPrefixToName("RegisterTexture"));
150
anatofuz
parents:
diff changeset
581 for (auto &&Info : DeviceVars) {
anatofuz
parents:
diff changeset
582 llvm::GlobalVariable *Var = Info.Var;
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
583 assert((!Var->isDeclaration() || Info.Flags.isManaged()) &&
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
584 "External variables should not show up here, except HIP managed "
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
585 "variables");
150
anatofuz
parents:
diff changeset
586 llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
587 switch (Info.Flags.getKind()) {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
588 case DeviceVarFlags::Variable: {
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
589 uint64_t VarSize =
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
590 CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
591 if (Info.Flags.isManaged()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
592 auto ManagedVar = new llvm::GlobalVariable(
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
593 CGM.getModule(), Var->getType(),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
594 /*isConstant=*/false, Var->getLinkage(),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
595 /*Init=*/Var->isDeclaration()
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
596 ? nullptr
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
597 : llvm::ConstantPointerNull::get(Var->getType()),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
598 /*Name=*/"", /*InsertBefore=*/nullptr,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
599 llvm::GlobalVariable::NotThreadLocal);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
600 ManagedVar->setDSOLocal(Var->isDSOLocal());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
601 ManagedVar->setVisibility(Var->getVisibility());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
602 ManagedVar->setExternallyInitialized(true);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
603 ManagedVar->takeName(Var);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
604 Var->setName(Twine(ManagedVar->getName() + ".managed"));
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
605 replaceManagedVar(Var, ManagedVar);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
606 llvm::Value *Args[] = {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
607 &GpuBinaryHandlePtr,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
608 Builder.CreateBitCast(ManagedVar, VoidPtrTy),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
609 Builder.CreateBitCast(Var, VoidPtrTy),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
610 VarName,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
611 llvm::ConstantInt::get(VarSizeTy, VarSize),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
612 llvm::ConstantInt::get(IntTy, Var->getAlignment())};
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
613 if (!Var->isDeclaration())
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
614 Builder.CreateCall(RegisterManagedVar, Args);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
615 } else {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
616 llvm::Value *Args[] = {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
617 &GpuBinaryHandlePtr,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
618 Builder.CreateBitCast(Var, VoidPtrTy),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
619 VarName,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
620 VarName,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
621 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
622 llvm::ConstantInt::get(VarSizeTy, VarSize),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
623 llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
624 llvm::ConstantInt::get(IntTy, 0)};
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
625 Builder.CreateCall(RegisterVar, Args);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
626 }
173
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
627 break;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
628 }
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
629 case DeviceVarFlags::Surface:
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
630 Builder.CreateCall(
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
631 RegisterSurf,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
632 {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
633 VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
634 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
635 break;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
636 case DeviceVarFlags::Texture:
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
637 Builder.CreateCall(
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
638 RegisterTex,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
639 {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
640 VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
641 llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()),
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
642 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
643 break;
0572611fdcc8 reorgnization done
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 150
diff changeset
644 }
150
anatofuz
parents:
diff changeset
645 }
anatofuz
parents:
diff changeset
646
anatofuz
parents:
diff changeset
647 Builder.CreateRetVoid();
anatofuz
parents:
diff changeset
648 return RegisterKernelsFunc;
anatofuz
parents:
diff changeset
649 }
anatofuz
parents:
diff changeset
650
anatofuz
parents:
diff changeset
651 /// Creates a global constructor function for the module:
anatofuz
parents:
diff changeset
652 ///
anatofuz
parents:
diff changeset
653 /// For CUDA:
anatofuz
parents:
diff changeset
654 /// \code
anatofuz
parents:
diff changeset
655 /// void __cuda_module_ctor(void*) {
anatofuz
parents:
diff changeset
656 /// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
anatofuz
parents:
diff changeset
657 /// __cuda_register_globals(Handle);
anatofuz
parents:
diff changeset
658 /// }
anatofuz
parents:
diff changeset
659 /// \endcode
anatofuz
parents:
diff changeset
660 ///
anatofuz
parents:
diff changeset
661 /// For HIP:
anatofuz
parents:
diff changeset
662 /// \code
anatofuz
parents:
diff changeset
663 /// void __hip_module_ctor(void*) {
anatofuz
parents:
diff changeset
664 /// if (__hip_gpubin_handle == 0) {
anatofuz
parents:
diff changeset
665 /// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob);
anatofuz
parents:
diff changeset
666 /// __hip_register_globals(__hip_gpubin_handle);
anatofuz
parents:
diff changeset
667 /// }
anatofuz
parents:
diff changeset
668 /// }
anatofuz
parents:
diff changeset
669 /// \endcode
anatofuz
parents:
diff changeset
670 llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
anatofuz
parents:
diff changeset
671 bool IsHIP = CGM.getLangOpts().HIP;
anatofuz
parents:
diff changeset
672 bool IsCUDA = CGM.getLangOpts().CUDA;
anatofuz
parents:
diff changeset
673 // No need to generate ctors/dtors if there is no GPU binary.
anatofuz
parents:
diff changeset
674 StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
anatofuz
parents:
diff changeset
675 if (CudaGpuBinaryFileName.empty() && !IsHIP)
anatofuz
parents:
diff changeset
676 return nullptr;
anatofuz
parents:
diff changeset
677 if ((IsHIP || (IsCUDA && !RelocatableDeviceCode)) && EmittedKernels.empty() &&
anatofuz
parents:
diff changeset
678 DeviceVars.empty())
anatofuz
parents:
diff changeset
679 return nullptr;
anatofuz
parents:
diff changeset
680
anatofuz
parents:
diff changeset
681 // void __{cuda|hip}_register_globals(void* handle);
anatofuz
parents:
diff changeset
682 llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
anatofuz
parents:
diff changeset
683 // We always need a function to pass in as callback. Create a dummy
anatofuz
parents:
diff changeset
684 // implementation if we don't need to register anything.
anatofuz
parents:
diff changeset
685 if (RelocatableDeviceCode && !RegisterGlobalsFunc)
anatofuz
parents:
diff changeset
686 RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());
anatofuz
parents:
diff changeset
687
anatofuz
parents:
diff changeset
688 // void ** __{cuda|hip}RegisterFatBinary(void *);
anatofuz
parents:
diff changeset
689 llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction(
anatofuz
parents:
diff changeset
690 llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
anatofuz
parents:
diff changeset
691 addUnderscoredPrefixToName("RegisterFatBinary"));
anatofuz
parents:
diff changeset
692 // struct { int magic, int version, void * gpu_binary, void * dont_care };
anatofuz
parents:
diff changeset
693 llvm::StructType *FatbinWrapperTy =
anatofuz
parents:
diff changeset
694 llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
anatofuz
parents:
diff changeset
695
anatofuz
parents:
diff changeset
696 // Register GPU binary with the CUDA runtime, store returned handle in a
anatofuz
parents:
diff changeset
697 // global variable and save a reference in GpuBinaryHandle to be cleaned up
anatofuz
parents:
diff changeset
698 // in destructor on exit. Then associate all known kernels with the GPU binary
anatofuz
parents:
diff changeset
699 // handle so CUDA runtime can figure out what to call on the GPU side.
anatofuz
parents:
diff changeset
700 std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr;
anatofuz
parents:
diff changeset
701 if (!CudaGpuBinaryFileName.empty()) {
anatofuz
parents:
diff changeset
702 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
anatofuz
parents:
diff changeset
703 llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
anatofuz
parents:
diff changeset
704 if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
anatofuz
parents:
diff changeset
705 CGM.getDiags().Report(diag::err_cannot_open_file)
anatofuz
parents:
diff changeset
706 << CudaGpuBinaryFileName << EC.message();
anatofuz
parents:
diff changeset
707 return nullptr;
anatofuz
parents:
diff changeset
708 }
anatofuz
parents:
diff changeset
709 CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());
anatofuz
parents:
diff changeset
710 }
anatofuz
parents:
diff changeset
711
anatofuz
parents:
diff changeset
712 llvm::Function *ModuleCtorFunc = llvm::Function::Create(
anatofuz
parents:
diff changeset
713 llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
anatofuz
parents:
diff changeset
714 llvm::GlobalValue::InternalLinkage,
anatofuz
parents:
diff changeset
715 addUnderscoredPrefixToName("_module_ctor"), &TheModule);
anatofuz
parents:
diff changeset
716 llvm::BasicBlock *CtorEntryBB =
anatofuz
parents:
diff changeset
717 llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
anatofuz
parents:
diff changeset
718 CGBuilderTy CtorBuilder(CGM, Context);
anatofuz
parents:
diff changeset
719
anatofuz
parents:
diff changeset
720 CtorBuilder.SetInsertPoint(CtorEntryBB);
anatofuz
parents:
diff changeset
721
anatofuz
parents:
diff changeset
722 const char *FatbinConstantName;
anatofuz
parents:
diff changeset
723 const char *FatbinSectionName;
anatofuz
parents:
diff changeset
724 const char *ModuleIDSectionName;
anatofuz
parents:
diff changeset
725 StringRef ModuleIDPrefix;
anatofuz
parents:
diff changeset
726 llvm::Constant *FatBinStr;
anatofuz
parents:
diff changeset
727 unsigned FatMagic;
anatofuz
parents:
diff changeset
728 if (IsHIP) {
anatofuz
parents:
diff changeset
729 FatbinConstantName = ".hip_fatbin";
anatofuz
parents:
diff changeset
730 FatbinSectionName = ".hipFatBinSegment";
anatofuz
parents:
diff changeset
731
anatofuz
parents:
diff changeset
732 ModuleIDSectionName = "__hip_module_id";
anatofuz
parents:
diff changeset
733 ModuleIDPrefix = "__hip_";
anatofuz
parents:
diff changeset
734
anatofuz
parents:
diff changeset
735 if (CudaGpuBinary) {
anatofuz
parents:
diff changeset
736 // If fatbin is available from early finalization, create a string
anatofuz
parents:
diff changeset
737 // literal containing the fat binary loaded from the given file.
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
738 const unsigned HIPCodeObjectAlign = 4096;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
739 FatBinStr =
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
740 makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
741 FatbinConstantName, HIPCodeObjectAlign);
150
anatofuz
parents:
diff changeset
742 } else {
anatofuz
parents:
diff changeset
743 // If fatbin is not available, create an external symbol
anatofuz
parents:
diff changeset
744 // __hip_fatbin in section .hip_fatbin. The external symbol is supposed
anatofuz
parents:
diff changeset
745 // to contain the fat binary but will be populated somewhere else,
anatofuz
parents:
diff changeset
746 // e.g. by lld through link script.
anatofuz
parents:
diff changeset
747 FatBinStr = new llvm::GlobalVariable(
anatofuz
parents:
diff changeset
748 CGM.getModule(), CGM.Int8Ty,
anatofuz
parents:
diff changeset
749 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, nullptr,
anatofuz
parents:
diff changeset
750 "__hip_fatbin", nullptr,
anatofuz
parents:
diff changeset
751 llvm::GlobalVariable::NotThreadLocal);
anatofuz
parents:
diff changeset
752 cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
anatofuz
parents:
diff changeset
753 }
anatofuz
parents:
diff changeset
754
anatofuz
parents:
diff changeset
755 FatMagic = HIPFatMagic;
anatofuz
parents:
diff changeset
756 } else {
anatofuz
parents:
diff changeset
757 if (RelocatableDeviceCode)
anatofuz
parents:
diff changeset
758 FatbinConstantName = CGM.getTriple().isMacOSX()
anatofuz
parents:
diff changeset
759 ? "__NV_CUDA,__nv_relfatbin"
anatofuz
parents:
diff changeset
760 : "__nv_relfatbin";
anatofuz
parents:
diff changeset
761 else
anatofuz
parents:
diff changeset
762 FatbinConstantName =
anatofuz
parents:
diff changeset
763 CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
anatofuz
parents:
diff changeset
764 // NVIDIA's cuobjdump looks for fatbins in this section.
anatofuz
parents:
diff changeset
765 FatbinSectionName =
anatofuz
parents:
diff changeset
766 CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
anatofuz
parents:
diff changeset
767
anatofuz
parents:
diff changeset
768 ModuleIDSectionName = CGM.getTriple().isMacOSX()
anatofuz
parents:
diff changeset
769 ? "__NV_CUDA,__nv_module_id"
anatofuz
parents:
diff changeset
770 : "__nv_module_id";
anatofuz
parents:
diff changeset
771 ModuleIDPrefix = "__nv_";
anatofuz
parents:
diff changeset
772
anatofuz
parents:
diff changeset
773 // For CUDA, create a string literal containing the fat binary loaded from
anatofuz
parents:
diff changeset
774 // the given file.
anatofuz
parents:
diff changeset
775 FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
anatofuz
parents:
diff changeset
776 FatbinConstantName, 8);
anatofuz
parents:
diff changeset
777 FatMagic = CudaFatMagic;
anatofuz
parents:
diff changeset
778 }
anatofuz
parents:
diff changeset
779
anatofuz
parents:
diff changeset
780 // Create initialized wrapper structure that points to the loaded GPU binary
anatofuz
parents:
diff changeset
781 ConstantInitBuilder Builder(CGM);
anatofuz
parents:
diff changeset
782 auto Values = Builder.beginStruct(FatbinWrapperTy);
anatofuz
parents:
diff changeset
783 // Fatbin wrapper magic.
anatofuz
parents:
diff changeset
784 Values.addInt(IntTy, FatMagic);
anatofuz
parents:
diff changeset
785 // Fatbin version.
anatofuz
parents:
diff changeset
786 Values.addInt(IntTy, 1);
anatofuz
parents:
diff changeset
787 // Data.
anatofuz
parents:
diff changeset
788 Values.add(FatBinStr);
anatofuz
parents:
diff changeset
789 // Unused in fatbin v1.
anatofuz
parents:
diff changeset
790 Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
anatofuz
parents:
diff changeset
791 llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
anatofuz
parents:
diff changeset
792 addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(),
anatofuz
parents:
diff changeset
793 /*constant*/ true);
anatofuz
parents:
diff changeset
794 FatbinWrapper->setSection(FatbinSectionName);
anatofuz
parents:
diff changeset
795
anatofuz
parents:
diff changeset
796 // There is only one HIP fat binary per linked module, however there are
anatofuz
parents:
diff changeset
797 // multiple constructor functions. Make sure the fat binary is registered
anatofuz
parents:
diff changeset
798 // only once. The constructor functions are executed by the dynamic loader
anatofuz
parents:
diff changeset
799 // before the program gains control. The dynamic loader cannot execute the
anatofuz
parents:
diff changeset
800 // constructor functions concurrently since doing that would not guarantee
anatofuz
parents:
diff changeset
801 // thread safety of the loaded program. Therefore we can assume sequential
anatofuz
parents:
diff changeset
802 // execution of constructor functions here.
anatofuz
parents:
diff changeset
803 if (IsHIP) {
anatofuz
parents:
diff changeset
804 auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage :
anatofuz
parents:
diff changeset
805 llvm::GlobalValue::LinkOnceAnyLinkage;
anatofuz
parents:
diff changeset
806 llvm::BasicBlock *IfBlock =
anatofuz
parents:
diff changeset
807 llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
anatofuz
parents:
diff changeset
808 llvm::BasicBlock *ExitBlock =
anatofuz
parents:
diff changeset
809 llvm::BasicBlock::Create(Context, "exit", ModuleCtorFunc);
anatofuz
parents:
diff changeset
810 // The name, size, and initialization pattern of this variable is part
anatofuz
parents:
diff changeset
811 // of HIP ABI.
anatofuz
parents:
diff changeset
812 GpuBinaryHandle = new llvm::GlobalVariable(
anatofuz
parents:
diff changeset
813 TheModule, VoidPtrPtrTy, /*isConstant=*/false,
anatofuz
parents:
diff changeset
814 Linkage,
anatofuz
parents:
diff changeset
815 /*Initializer=*/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
anatofuz
parents:
diff changeset
816 "__hip_gpubin_handle");
anatofuz
parents:
diff changeset
817 GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
anatofuz
parents:
diff changeset
818 // Prevent the weak symbol in different shared libraries being merged.
anatofuz
parents:
diff changeset
819 if (Linkage != llvm::GlobalValue::InternalLinkage)
anatofuz
parents:
diff changeset
820 GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
anatofuz
parents:
diff changeset
821 Address GpuBinaryAddr(
anatofuz
parents:
diff changeset
822 GpuBinaryHandle,
anatofuz
parents:
diff changeset
823 CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
anatofuz
parents:
diff changeset
824 {
anatofuz
parents:
diff changeset
825 auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
anatofuz
parents:
diff changeset
826 llvm::Constant *Zero =
anatofuz
parents:
diff changeset
827 llvm::Constant::getNullValue(HandleValue->getType());
anatofuz
parents:
diff changeset
828 llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);
anatofuz
parents:
diff changeset
829 CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock);
anatofuz
parents:
diff changeset
830 }
anatofuz
parents:
diff changeset
831 {
anatofuz
parents:
diff changeset
832 CtorBuilder.SetInsertPoint(IfBlock);
anatofuz
parents:
diff changeset
833 // GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper);
anatofuz
parents:
diff changeset
834 llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
anatofuz
parents:
diff changeset
835 RegisterFatbinFunc,
anatofuz
parents:
diff changeset
836 CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
anatofuz
parents:
diff changeset
837 CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr);
anatofuz
parents:
diff changeset
838 CtorBuilder.CreateBr(ExitBlock);
anatofuz
parents:
diff changeset
839 }
anatofuz
parents:
diff changeset
840 {
anatofuz
parents:
diff changeset
841 CtorBuilder.SetInsertPoint(ExitBlock);
anatofuz
parents:
diff changeset
842 // Call __hip_register_globals(GpuBinaryHandle);
anatofuz
parents:
diff changeset
843 if (RegisterGlobalsFunc) {
anatofuz
parents:
diff changeset
844 auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
anatofuz
parents:
diff changeset
845 CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);
anatofuz
parents:
diff changeset
846 }
anatofuz
parents:
diff changeset
847 }
anatofuz
parents:
diff changeset
848 } else if (!RelocatableDeviceCode) {
anatofuz
parents:
diff changeset
849 // Register binary with CUDA runtime. This is substantially different in
anatofuz
parents:
diff changeset
850 // default mode vs. separate compilation!
anatofuz
parents:
diff changeset
851 // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
anatofuz
parents:
diff changeset
852 llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
anatofuz
parents:
diff changeset
853 RegisterFatbinFunc,
anatofuz
parents:
diff changeset
854 CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
anatofuz
parents:
diff changeset
855 GpuBinaryHandle = new llvm::GlobalVariable(
anatofuz
parents:
diff changeset
856 TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
anatofuz
parents:
diff changeset
857 llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
anatofuz
parents:
diff changeset
858 GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
anatofuz
parents:
diff changeset
859 CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
anatofuz
parents:
diff changeset
860 CGM.getPointerAlign());
anatofuz
parents:
diff changeset
861
anatofuz
parents:
diff changeset
862 // Call __cuda_register_globals(GpuBinaryHandle);
anatofuz
parents:
diff changeset
863 if (RegisterGlobalsFunc)
anatofuz
parents:
diff changeset
864 CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
anatofuz
parents:
diff changeset
865
anatofuz
parents:
diff changeset
866 // Call __cudaRegisterFatBinaryEnd(Handle) if this CUDA version needs it.
anatofuz
parents:
diff changeset
867 if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
anatofuz
parents:
diff changeset
868 CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
anatofuz
parents:
diff changeset
869 // void __cudaRegisterFatBinaryEnd(void **);
anatofuz
parents:
diff changeset
870 llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction(
anatofuz
parents:
diff changeset
871 llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
anatofuz
parents:
diff changeset
872 "__cudaRegisterFatBinaryEnd");
anatofuz
parents:
diff changeset
873 CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
anatofuz
parents:
diff changeset
874 }
anatofuz
parents:
diff changeset
875 } else {
anatofuz
parents:
diff changeset
876 // Generate a unique module ID.
anatofuz
parents:
diff changeset
877 SmallString<64> ModuleID;
anatofuz
parents:
diff changeset
878 llvm::raw_svector_ostream OS(ModuleID);
anatofuz
parents:
diff changeset
879 OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID());
anatofuz
parents:
diff changeset
880 llvm::Constant *ModuleIDConstant = makeConstantString(
anatofuz
parents:
diff changeset
881 std::string(ModuleID.str()), "", ModuleIDSectionName, 32);
anatofuz
parents:
diff changeset
882
anatofuz
parents:
diff changeset
883 // Create an alias for the FatbinWrapper that nvcc will look for.
anatofuz
parents:
diff changeset
884 llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
anatofuz
parents:
diff changeset
885 Twine("__fatbinwrap") + ModuleID, FatbinWrapper);
anatofuz
parents:
diff changeset
886
anatofuz
parents:
diff changeset
887 // void __cudaRegisterLinkedBinary%ModuleID%(void (*)(void *), void *,
anatofuz
parents:
diff changeset
888 // void *, void (*)(void **))
anatofuz
parents:
diff changeset
889 SmallString<128> RegisterLinkedBinaryName("__cudaRegisterLinkedBinary");
anatofuz
parents:
diff changeset
890 RegisterLinkedBinaryName += ModuleID;
anatofuz
parents:
diff changeset
891 llvm::FunctionCallee RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction(
anatofuz
parents:
diff changeset
892 getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
anatofuz
parents:
diff changeset
893
anatofuz
parents:
diff changeset
894 assert(RegisterGlobalsFunc && "Expecting at least dummy function!");
anatofuz
parents:
diff changeset
895 llvm::Value *Args[] = {RegisterGlobalsFunc,
anatofuz
parents:
diff changeset
896 CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy),
anatofuz
parents:
diff changeset
897 ModuleIDConstant,
anatofuz
parents:
diff changeset
898 makeDummyFunction(getCallbackFnTy())};
anatofuz
parents:
diff changeset
899 CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
anatofuz
parents:
diff changeset
900 }
anatofuz
parents:
diff changeset
901
anatofuz
parents:
diff changeset
902 // Create destructor and register it with atexit() the way NVCC does it. Doing
anatofuz
parents:
diff changeset
903 // it during regular destructor phase worked in CUDA before 9.2 but results in
anatofuz
parents:
diff changeset
904 // double-free in 9.2.
anatofuz
parents:
diff changeset
905 if (llvm::Function *CleanupFn = makeModuleDtorFunction()) {
anatofuz
parents:
diff changeset
906 // extern "C" int atexit(void (*f)(void));
anatofuz
parents:
diff changeset
907 llvm::FunctionType *AtExitTy =
anatofuz
parents:
diff changeset
908 llvm::FunctionType::get(IntTy, CleanupFn->getType(), false);
anatofuz
parents:
diff changeset
909 llvm::FunctionCallee AtExitFunc =
anatofuz
parents:
diff changeset
910 CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(),
anatofuz
parents:
diff changeset
911 /*Local=*/true);
anatofuz
parents:
diff changeset
912 CtorBuilder.CreateCall(AtExitFunc, CleanupFn);
anatofuz
parents:
diff changeset
913 }
anatofuz
parents:
diff changeset
914
anatofuz
parents:
diff changeset
915 CtorBuilder.CreateRetVoid();
anatofuz
parents:
diff changeset
916 return ModuleCtorFunc;
anatofuz
parents:
diff changeset
917 }
anatofuz
parents:
diff changeset
918
anatofuz
parents:
diff changeset
919 /// Creates a global destructor function that unregisters the GPU code blob
anatofuz
parents:
diff changeset
920 /// registered by constructor.
anatofuz
parents:
diff changeset
921 ///
anatofuz
parents:
diff changeset
922 /// For CUDA:
anatofuz
parents:
diff changeset
923 /// \code
anatofuz
parents:
diff changeset
924 /// void __cuda_module_dtor(void*) {
anatofuz
parents:
diff changeset
925 /// __cudaUnregisterFatBinary(Handle);
anatofuz
parents:
diff changeset
926 /// }
anatofuz
parents:
diff changeset
927 /// \endcode
anatofuz
parents:
diff changeset
928 ///
anatofuz
parents:
diff changeset
929 /// For HIP:
anatofuz
parents:
diff changeset
930 /// \code
anatofuz
parents:
diff changeset
931 /// void __hip_module_dtor(void*) {
anatofuz
parents:
diff changeset
932 /// if (__hip_gpubin_handle) {
anatofuz
parents:
diff changeset
933 /// __hipUnregisterFatBinary(__hip_gpubin_handle);
anatofuz
parents:
diff changeset
934 /// __hip_gpubin_handle = 0;
anatofuz
parents:
diff changeset
935 /// }
anatofuz
parents:
diff changeset
936 /// }
anatofuz
parents:
diff changeset
937 /// \endcode
anatofuz
parents:
diff changeset
938 llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
anatofuz
parents:
diff changeset
939 // No need for destructor if we don't have a handle to unregister.
anatofuz
parents:
diff changeset
940 if (!GpuBinaryHandle)
anatofuz
parents:
diff changeset
941 return nullptr;
anatofuz
parents:
diff changeset
942
anatofuz
parents:
diff changeset
943 // void __cudaUnregisterFatBinary(void ** handle);
anatofuz
parents:
diff changeset
944 llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
anatofuz
parents:
diff changeset
945 llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
anatofuz
parents:
diff changeset
946 addUnderscoredPrefixToName("UnregisterFatBinary"));
anatofuz
parents:
diff changeset
947
anatofuz
parents:
diff changeset
948 llvm::Function *ModuleDtorFunc = llvm::Function::Create(
anatofuz
parents:
diff changeset
949 llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
anatofuz
parents:
diff changeset
950 llvm::GlobalValue::InternalLinkage,
anatofuz
parents:
diff changeset
951 addUnderscoredPrefixToName("_module_dtor"), &TheModule);
anatofuz
parents:
diff changeset
952
anatofuz
parents:
diff changeset
953 llvm::BasicBlock *DtorEntryBB =
anatofuz
parents:
diff changeset
954 llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
anatofuz
parents:
diff changeset
955 CGBuilderTy DtorBuilder(CGM, Context);
anatofuz
parents:
diff changeset
956 DtorBuilder.SetInsertPoint(DtorEntryBB);
anatofuz
parents:
diff changeset
957
anatofuz
parents:
diff changeset
958 Address GpuBinaryAddr(GpuBinaryHandle, CharUnits::fromQuantity(
anatofuz
parents:
diff changeset
959 GpuBinaryHandle->getAlignment()));
anatofuz
parents:
diff changeset
960 auto HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr);
anatofuz
parents:
diff changeset
961 // There is only one HIP fat binary per linked module, however there are
anatofuz
parents:
diff changeset
962 // multiple destructor functions. Make sure the fat binary is unregistered
anatofuz
parents:
diff changeset
963 // only once.
anatofuz
parents:
diff changeset
964 if (CGM.getLangOpts().HIP) {
anatofuz
parents:
diff changeset
965 llvm::BasicBlock *IfBlock =
anatofuz
parents:
diff changeset
966 llvm::BasicBlock::Create(Context, "if", ModuleDtorFunc);
anatofuz
parents:
diff changeset
967 llvm::BasicBlock *ExitBlock =
anatofuz
parents:
diff changeset
968 llvm::BasicBlock::Create(Context, "exit", ModuleDtorFunc);
anatofuz
parents:
diff changeset
969 llvm::Constant *Zero = llvm::Constant::getNullValue(HandleValue->getType());
anatofuz
parents:
diff changeset
970 llvm::Value *NEZero = DtorBuilder.CreateICmpNE(HandleValue, Zero);
anatofuz
parents:
diff changeset
971 DtorBuilder.CreateCondBr(NEZero, IfBlock, ExitBlock);
anatofuz
parents:
diff changeset
972
anatofuz
parents:
diff changeset
973 DtorBuilder.SetInsertPoint(IfBlock);
anatofuz
parents:
diff changeset
974 DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
anatofuz
parents:
diff changeset
975 DtorBuilder.CreateStore(Zero, GpuBinaryAddr);
anatofuz
parents:
diff changeset
976 DtorBuilder.CreateBr(ExitBlock);
anatofuz
parents:
diff changeset
977
anatofuz
parents:
diff changeset
978 DtorBuilder.SetInsertPoint(ExitBlock);
anatofuz
parents:
diff changeset
979 } else {
anatofuz
parents:
diff changeset
980 DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
anatofuz
parents:
diff changeset
981 }
anatofuz
parents:
diff changeset
982 DtorBuilder.CreateRetVoid();
anatofuz
parents:
diff changeset
983 return ModuleDtorFunc;
anatofuz
parents:
diff changeset
984 }
anatofuz
parents:
diff changeset
985
anatofuz
parents:
diff changeset
986 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
anatofuz
parents:
diff changeset
987 return new CGNVCUDARuntime(CGM);
anatofuz
parents:
diff changeset
988 }
221
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
989
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
990 void CGNVCUDARuntime::internalizeDeviceSideVar(
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
991 const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
992 // For -fno-gpu-rdc, host-side shadows of external declarations of device-side
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
993 // global variables become internal definitions. These have to be internal in
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
994 // order to prevent name conflicts with global host variables with the same
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
995 // name in a different TUs.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
996 //
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
997 // For -fgpu-rdc, the shadow variables should not be internalized because
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
998 // they may be accessed by different TU.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
999 if (CGM.getLangOpts().GPURelocatableDeviceCode)
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1000 return;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1001
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1002 // __shared__ variables are odd. Shadows do get created, but
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1003 // they are not registered with the CUDA runtime, so they
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1004 // can't really be used to access their device-side
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1005 // counterparts. It's not clear yet whether it's nvcc's bug or
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1006 // a feature, but we've got to do the same for compatibility.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1007 if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>() ||
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1008 D->hasAttr<CUDASharedAttr>() ||
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1009 D->getType()->isCUDADeviceBuiltinSurfaceType() ||
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1010 D->getType()->isCUDADeviceBuiltinTextureType()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1011 Linkage = llvm::GlobalValue::InternalLinkage;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1012 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1013 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1014
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1015 void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1016 llvm::GlobalVariable &GV) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1017 if (D->hasAttr<CUDADeviceAttr>() || D->hasAttr<CUDAConstantAttr>()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1018 // Shadow variables and their properties must be registered with CUDA
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1019 // runtime. Skip Extern global variables, which will be registered in
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1020 // the TU where they are defined.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1021 //
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1022 // Don't register a C++17 inline variable. The local symbol can be
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1023 // discarded and referencing a discarded local symbol from outside the
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1024 // comdat (__cuda_register_globals) is disallowed by the ELF spec.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1025 //
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1026 // HIP managed variables need to be always recorded in device and host
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1027 // compilations for transformation.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1028 //
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1029 // HIP managed variables and variables in CUDADeviceVarODRUsedByHost are
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1030 // added to llvm.compiler-used, therefore they are safe to be registered.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1031 if ((!D->hasExternalStorage() && !D->isInline()) ||
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1032 CGM.getContext().CUDADeviceVarODRUsedByHost.contains(D) ||
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1033 D->hasAttr<HIPManagedAttr>()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1034 registerDeviceVar(D, GV, !D->hasDefinition(),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1035 D->hasAttr<CUDAConstantAttr>());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1036 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1037 } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() ||
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1038 D->getType()->isCUDADeviceBuiltinTextureType()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1039 // Builtin surfaces and textures and their template arguments are
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1040 // also registered with CUDA runtime.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1041 const auto *TD = cast<ClassTemplateSpecializationDecl>(
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1042 D->getType()->castAs<RecordType>()->getDecl());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1043 const TemplateArgumentList &Args = TD->getTemplateArgs();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1044 if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1045 assert(Args.size() == 2 &&
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1046 "Unexpected number of template arguments of CUDA device "
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1047 "builtin surface type.");
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1048 auto SurfType = Args[1].getAsIntegral();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1049 if (!D->hasExternalStorage())
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1050 registerDeviceSurf(D, GV, !D->hasDefinition(), SurfType.getSExtValue());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1051 } else {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1052 assert(Args.size() == 3 &&
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1053 "Unexpected number of template arguments of CUDA device "
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1054 "builtin texture type.");
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1055 auto TexType = Args[1].getAsIntegral();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1056 auto Normalized = Args[2].getAsIntegral();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1057 if (!D->hasExternalStorage())
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1058 registerDeviceTex(D, GV, !D->hasDefinition(), TexType.getSExtValue(),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1059 Normalized.getZExtValue());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1060 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1061 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1062 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1063
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1064 // Transform managed variables to pointers to managed variables in device code.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1065 // Each use of the original managed variable is replaced by a load from the
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1066 // transformed managed variable. The transformed managed variable contains
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1067 // the address of managed memory which will be allocated by the runtime.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1068 void CGNVCUDARuntime::transformManagedVars() {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1069 for (auto &&Info : DeviceVars) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1070 llvm::GlobalVariable *Var = Info.Var;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1071 if (Info.Flags.getKind() == DeviceVarFlags::Variable &&
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1072 Info.Flags.isManaged()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1073 auto ManagedVar = new llvm::GlobalVariable(
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1074 CGM.getModule(), Var->getType(),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1075 /*isConstant=*/false, Var->getLinkage(),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1076 /*Init=*/Var->isDeclaration()
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1077 ? nullptr
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1078 : llvm::ConstantPointerNull::get(Var->getType()),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1079 /*Name=*/"", /*InsertBefore=*/nullptr,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1080 llvm::GlobalVariable::NotThreadLocal,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1081 CGM.getContext().getTargetAddressSpace(LangAS::cuda_device));
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1082 ManagedVar->setDSOLocal(Var->isDSOLocal());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1083 ManagedVar->setVisibility(Var->getVisibility());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1084 ManagedVar->setExternallyInitialized(true);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1085 replaceManagedVar(Var, ManagedVar);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1086 ManagedVar->takeName(Var);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1087 Var->setName(Twine(ManagedVar->getName()) + ".managed");
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1088 // Keep managed variables even if they are not used in device code since
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1089 // they need to be allocated by the runtime.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1090 if (!Var->isDeclaration()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1091 assert(!ManagedVar->isDeclaration());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1092 CGM.addCompilerUsedGlobal(Var);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1093 CGM.addCompilerUsedGlobal(ManagedVar);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1094 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1095 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1096 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1097 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1098
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1099 // Returns module constructor to be added.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1100 llvm::Function *CGNVCUDARuntime::finalizeModule() {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1101 if (CGM.getLangOpts().CUDAIsDevice) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1102 transformManagedVars();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1103
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1104 // Mark ODR-used device variables as compiler used to prevent it from being
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1105 // eliminated by optimization. This is necessary for device variables
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1106 // ODR-used by host functions. Sema correctly marks them as ODR-used no
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1107 // matter whether they are ODR-used by device or host functions.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1108 //
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1109 // We do not need to do this if the variable has used attribute since it
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1110 // has already been added.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1111 //
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1112 // Static device variables have been externalized at this point, therefore
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1113 // variables with LLVM private or internal linkage need not be added.
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1114 for (auto &&Info : DeviceVars) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1115 auto Kind = Info.Flags.getKind();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1116 if (!Info.Var->isDeclaration() &&
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1117 !llvm::GlobalValue::isLocalLinkage(Info.Var->getLinkage()) &&
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1118 (Kind == DeviceVarFlags::Variable ||
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1119 Kind == DeviceVarFlags::Surface ||
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1120 Kind == DeviceVarFlags::Texture) &&
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1121 Info.D->isUsed() && !Info.D->hasAttr<UsedAttr>()) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1122 CGM.addCompilerUsedGlobal(Info.Var);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1123 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1124 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1125 return nullptr;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1126 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1127 return makeModuleCtorFunction();
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1128 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1129
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1130 llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1131 GlobalDecl GD) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1132 auto Loc = KernelHandles.find(F);
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1133 if (Loc != KernelHandles.end())
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1134 return Loc->second;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1135
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1136 if (!CGM.getLangOpts().HIP) {
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1137 KernelHandles[F] = F;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1138 KernelStubs[F] = F;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1139 return F;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1140 }
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1141
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1142 auto *Var = new llvm::GlobalVariable(
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1143 TheModule, F->getType(), /*isConstant=*/true, F->getLinkage(),
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1144 /*Initializer=*/nullptr,
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1145 CGM.getMangledName(
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1146 GD.getWithKernelReferenceKind(KernelReferenceKind::Kernel)));
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1147 Var->setAlignment(CGM.getPointerAlign().getAsAlign());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1148 Var->setDSOLocal(F->isDSOLocal());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1149 Var->setVisibility(F->getVisibility());
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1150 KernelHandles[F] = Var;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1151 KernelStubs[Var] = F;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1152 return Var;
79ff65ed7e25 LLVM12 Original
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents: 173
diff changeset
1153 }