CbC/CbC_llvm: clang/lib/CodeGen/CGCUDANV.cpp annotate

annotate clang/lib/CodeGen/CGCUDANV.cpp @ 221:79ff65ed7e25

LLVM12 Original

author	Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date	Tue, 15 Jun 2021 19:15:29 +0900
parents	0572611fdcc8
children	5f17cb93ff66

rev	line source
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	1 //===----- CGCUDANV.cpp - Interface to NVIDIA CUDA Runtime ----------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	2 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
1d019706d866 LLVM10 anatofuz parents: diff changeset	4 // See https://llvm.org/LICENSE.txt for license information.
1d019706d866 LLVM10 anatofuz parents: diff changeset	5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
1d019706d866 LLVM10 anatofuz parents: diff changeset	6 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	7 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	8 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	9 // This provides a class for CUDA code generation targeting the NVIDIA CUDA
1d019706d866 LLVM10 anatofuz parents: diff changeset	10 // runtime library.
1d019706d866 LLVM10 anatofuz parents: diff changeset	11 //
1d019706d866 LLVM10 anatofuz parents: diff changeset	12 //===----------------------------------------------------------------------===//
1d019706d866 LLVM10 anatofuz parents: diff changeset	13
1d019706d866 LLVM10 anatofuz parents: diff changeset	14 #include "CGCUDARuntime.h"
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	15 #include "CGCXXABI.h"
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	16 #include "CodeGenFunction.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	17 #include "CodeGenModule.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	18 #include "clang/AST/Decl.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	19 #include "clang/Basic/Cuda.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	20 #include "clang/CodeGen/CodeGenABITypes.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	21 #include "clang/CodeGen/ConstantInitBuilder.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	22 #include "llvm/IR/BasicBlock.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	23 #include "llvm/IR/Constants.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	24 #include "llvm/IR/DerivedTypes.h"
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	25 #include "llvm/IR/ReplaceConstant.h"
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	26 #include "llvm/Support/Format.h"
1d019706d866 LLVM10 anatofuz parents: diff changeset	27
1d019706d866 LLVM10 anatofuz parents: diff changeset	28 using namespace clang;
1d019706d866 LLVM10 anatofuz parents: diff changeset	29 using namespace CodeGen;
1d019706d866 LLVM10 anatofuz parents: diff changeset	30
1d019706d866 LLVM10 anatofuz parents: diff changeset	31 namespace {
1d019706d866 LLVM10 anatofuz parents: diff changeset	32 constexpr unsigned CudaFatMagic = 0x466243b1;
1d019706d866 LLVM10 anatofuz parents: diff changeset	33 constexpr unsigned HIPFatMagic = 0x48495046; // "HIPF"
1d019706d866 LLVM10 anatofuz parents: diff changeset	34
1d019706d866 LLVM10 anatofuz parents: diff changeset	35 class CGNVCUDARuntime : public CGCUDARuntime {
1d019706d866 LLVM10 anatofuz parents: diff changeset	36
1d019706d866 LLVM10 anatofuz parents: diff changeset	37 private:
1d019706d866 LLVM10 anatofuz parents: diff changeset	38 llvm::IntegerType IntTy, SizeTy;
1d019706d866 LLVM10 anatofuz parents: diff changeset	39 llvm::Type *VoidTy;
1d019706d866 LLVM10 anatofuz parents: diff changeset	40 llvm::PointerType CharPtrTy, VoidPtrTy, *VoidPtrPtrTy;
1d019706d866 LLVM10 anatofuz parents: diff changeset	41
1d019706d866 LLVM10 anatofuz parents: diff changeset	42 /// Convenience reference to LLVM Context
1d019706d866 LLVM10 anatofuz parents: diff changeset	43 llvm::LLVMContext &Context;
1d019706d866 LLVM10 anatofuz parents: diff changeset	44 /// Convenience reference to the current module
1d019706d866 LLVM10 anatofuz parents: diff changeset	45 llvm::Module &TheModule;
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	46 /// Keeps track of kernel launch stubs and handles emitted in this module
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	47 struct KernelInfo {
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	48 llvm::Function *Kernel; // stub function to help launch kernel
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	49 const Decl *D;
1d019706d866 LLVM10 anatofuz parents: diff changeset	50 };
1d019706d866 LLVM10 anatofuz parents: diff changeset	51 llvm::SmallVector<KernelInfo, 16> EmittedKernels;
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	52 // Map a device stub function to a symbol for identifying kernel in host code.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	53 // For CUDA, the symbol for identifying the kernel is the same as the device
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	54 // stub function. For HIP, they are different.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	55 llvm::DenseMap<llvm::Function , llvm::GlobalValue > KernelHandles;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	56 // Map a kernel handle to the kernel stub.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	57 llvm::DenseMap<llvm::GlobalValue , llvm::Function > KernelStubs;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	58 struct VarInfo {
1d019706d866 LLVM10 anatofuz parents: diff changeset	59 llvm::GlobalVariable *Var;
1d019706d866 LLVM10 anatofuz parents: diff changeset	60 const VarDecl *D;
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	61 DeviceVarFlags Flags;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	62 };
1d019706d866 LLVM10 anatofuz parents: diff changeset	63 llvm::SmallVector<VarInfo, 16> DeviceVars;
1d019706d866 LLVM10 anatofuz parents: diff changeset	64 /// Keeps track of variable containing handle of GPU binary. Populated by
1d019706d866 LLVM10 anatofuz parents: diff changeset	65 /// ModuleCtorFunction() and used to create corresponding cleanup calls in
1d019706d866 LLVM10 anatofuz parents: diff changeset	66 /// ModuleDtorFunction()
1d019706d866 LLVM10 anatofuz parents: diff changeset	67 llvm::GlobalVariable *GpuBinaryHandle = nullptr;
1d019706d866 LLVM10 anatofuz parents: diff changeset	68 /// Whether we generate relocatable device code.
1d019706d866 LLVM10 anatofuz parents: diff changeset	69 bool RelocatableDeviceCode;
1d019706d866 LLVM10 anatofuz parents: diff changeset	70 /// Mangle context for device.
1d019706d866 LLVM10 anatofuz parents: diff changeset	71 std::unique_ptr<MangleContext> DeviceMC;
1d019706d866 LLVM10 anatofuz parents: diff changeset	72
1d019706d866 LLVM10 anatofuz parents: diff changeset	73 llvm::FunctionCallee getSetupArgumentFn() const;
1d019706d866 LLVM10 anatofuz parents: diff changeset	74 llvm::FunctionCallee getLaunchFn() const;
1d019706d866 LLVM10 anatofuz parents: diff changeset	75
1d019706d866 LLVM10 anatofuz parents: diff changeset	76 llvm::FunctionType *getRegisterGlobalsFnTy() const;
1d019706d866 LLVM10 anatofuz parents: diff changeset	77 llvm::FunctionType *getCallbackFnTy() const;
1d019706d866 LLVM10 anatofuz parents: diff changeset	78 llvm::FunctionType *getRegisterLinkedBinaryFnTy() const;
1d019706d866 LLVM10 anatofuz parents: diff changeset	79 std::string addPrefixToName(StringRef FuncName) const;
1d019706d866 LLVM10 anatofuz parents: diff changeset	80 std::string addUnderscoredPrefixToName(StringRef FuncName) const;
1d019706d866 LLVM10 anatofuz parents: diff changeset	81
1d019706d866 LLVM10 anatofuz parents: diff changeset	82 /// Creates a function to register all kernel stubs generated in this module.
1d019706d866 LLVM10 anatofuz parents: diff changeset	83 llvm::Function *makeRegisterGlobalsFn();
1d019706d866 LLVM10 anatofuz parents: diff changeset	84
1d019706d866 LLVM10 anatofuz parents: diff changeset	85 /// Helper function that generates a constant string and returns a pointer to
1d019706d866 LLVM10 anatofuz parents: diff changeset	86 /// the start of the string. The result of this function can be used anywhere
1d019706d866 LLVM10 anatofuz parents: diff changeset	87 /// where the C code specifies const char*.
1d019706d866 LLVM10 anatofuz parents: diff changeset	88 llvm::Constant *makeConstantString(const std::string &Str,
1d019706d866 LLVM10 anatofuz parents: diff changeset	89 const std::string &Name = "",
1d019706d866 LLVM10 anatofuz parents: diff changeset	90 const std::string &SectionName = "",
1d019706d866 LLVM10 anatofuz parents: diff changeset	91 unsigned Alignment = 0) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	92 llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
1d019706d866 LLVM10 anatofuz parents: diff changeset	93 llvm::ConstantInt::get(SizeTy, 0)};
1d019706d866 LLVM10 anatofuz parents: diff changeset	94 auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
1d019706d866 LLVM10 anatofuz parents: diff changeset	95 llvm::GlobalVariable *GV =
1d019706d866 LLVM10 anatofuz parents: diff changeset	96 cast<llvm::GlobalVariable>(ConstStr.getPointer());
1d019706d866 LLVM10 anatofuz parents: diff changeset	97 if (!SectionName.empty()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	98 GV->setSection(SectionName);
1d019706d866 LLVM10 anatofuz parents: diff changeset	99 // Mark the address as used which make sure that this section isn't
1d019706d866 LLVM10 anatofuz parents: diff changeset	100 // merged and we will really have it in the object file.
1d019706d866 LLVM10 anatofuz parents: diff changeset	101 GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::None);
1d019706d866 LLVM10 anatofuz parents: diff changeset	102 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	103 if (Alignment)
1d019706d866 LLVM10 anatofuz parents: diff changeset	104 GV->setAlignment(llvm::Align(Alignment));
1d019706d866 LLVM10 anatofuz parents: diff changeset	105
1d019706d866 LLVM10 anatofuz parents: diff changeset	106 return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	107 ConstStr.getPointer(), Zeros);
1d019706d866 LLVM10 anatofuz parents: diff changeset	108 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	109
1d019706d866 LLVM10 anatofuz parents: diff changeset	110 /// Helper function that generates an empty dummy function returning void.
1d019706d866 LLVM10 anatofuz parents: diff changeset	111 llvm::Function makeDummyFunction(llvm::FunctionType FnTy) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	112 assert(FnTy->getReturnType()->isVoidTy() &&
1d019706d866 LLVM10 anatofuz parents: diff changeset	113 "Can only generate dummy functions returning void!");
1d019706d866 LLVM10 anatofuz parents: diff changeset	114 llvm::Function *DummyFunc = llvm::Function::Create(
1d019706d866 LLVM10 anatofuz parents: diff changeset	115 FnTy, llvm::GlobalValue::InternalLinkage, "dummy", &TheModule);
1d019706d866 LLVM10 anatofuz parents: diff changeset	116
1d019706d866 LLVM10 anatofuz parents: diff changeset	117 llvm::BasicBlock *DummyBlock =
1d019706d866 LLVM10 anatofuz parents: diff changeset	118 llvm::BasicBlock::Create(Context, "", DummyFunc);
1d019706d866 LLVM10 anatofuz parents: diff changeset	119 CGBuilderTy FuncBuilder(CGM, Context);
1d019706d866 LLVM10 anatofuz parents: diff changeset	120 FuncBuilder.SetInsertPoint(DummyBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	121 FuncBuilder.CreateRetVoid();
1d019706d866 LLVM10 anatofuz parents: diff changeset	122
1d019706d866 LLVM10 anatofuz parents: diff changeset	123 return DummyFunc;
1d019706d866 LLVM10 anatofuz parents: diff changeset	124 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	125
1d019706d866 LLVM10 anatofuz parents: diff changeset	126 void emitDeviceStubBodyLegacy(CodeGenFunction &CGF, FunctionArgList &Args);
1d019706d866 LLVM10 anatofuz parents: diff changeset	127 void emitDeviceStubBodyNew(CodeGenFunction &CGF, FunctionArgList &Args);
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	128 std::string getDeviceSideName(const NamedDecl *ND) override;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	129
1d019706d866 LLVM10 anatofuz parents: diff changeset	130 void registerDeviceVar(const VarDecl *VD, llvm::GlobalVariable &Var,
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	131 bool Extern, bool Constant) {
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	132 DeviceVars.push_back({&Var,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	133 VD,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	134 {DeviceVarFlags::Variable, Extern, Constant,
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	135 VD->hasAttr<HIPManagedAttr>(),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	136 /Normalized/ false, 0}});
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	137 }
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	138 void registerDeviceSurf(const VarDecl *VD, llvm::GlobalVariable &Var,
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	139 bool Extern, int Type) {
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	140 DeviceVars.push_back({&Var,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	141 VD,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	142 {DeviceVarFlags::Surface, Extern, /Constant/ false,
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	143 /Managed/ false,
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	144 /Normalized/ false, Type}});
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	145 }
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	146 void registerDeviceTex(const VarDecl *VD, llvm::GlobalVariable &Var,
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	147 bool Extern, int Type, bool Normalized) {
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	148 DeviceVars.push_back({&Var,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	149 VD,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	150 {DeviceVarFlags::Texture, Extern, /Constant/ false,
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	151 /Managed/ false, Normalized, Type}});
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	152 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	153
1d019706d866 LLVM10 anatofuz parents: diff changeset	154 /// Creates module constructor function
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	155 llvm::Function *makeModuleCtorFunction();
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	156 /// Creates module destructor function
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	157 llvm::Function *makeModuleDtorFunction();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	158 /// Transform managed variables for device compilation.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	159 void transformManagedVars();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	160
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	161 public:
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	162 CGNVCUDARuntime(CodeGenModule &CGM);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	163
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	164 llvm::GlobalValue getKernelHandle(llvm::Function F, GlobalDecl GD) override;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	165 llvm::Function getKernelStub(llvm::GlobalValue Handle) override {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	166 auto Loc = KernelStubs.find(Handle);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	167 assert(Loc != KernelStubs.end());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	168 return Loc->second;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	169 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	170 void emitDeviceStub(CodeGenFunction &CGF, FunctionArgList &Args) override;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	171 void handleVarRegistration(const VarDecl *VD,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	172 llvm::GlobalVariable &Var) override;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	173 void
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	174 internalizeDeviceSideVar(const VarDecl *D,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	175 llvm::GlobalValue::LinkageTypes &Linkage) override;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	176
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	177 llvm::Function *finalizeModule() override;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	178 };
1d019706d866 LLVM10 anatofuz parents: diff changeset	179
1d019706d866 LLVM10 anatofuz parents: diff changeset	180 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	181
1d019706d866 LLVM10 anatofuz parents: diff changeset	182 std::string CGNVCUDARuntime::addPrefixToName(StringRef FuncName) const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	183 if (CGM.getLangOpts().HIP)
1d019706d866 LLVM10 anatofuz parents: diff changeset	184 return ((Twine("hip") + Twine(FuncName)).str());
1d019706d866 LLVM10 anatofuz parents: diff changeset	185 return ((Twine("cuda") + Twine(FuncName)).str());
1d019706d866 LLVM10 anatofuz parents: diff changeset	186 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	187 std::string
1d019706d866 LLVM10 anatofuz parents: diff changeset	188 CGNVCUDARuntime::addUnderscoredPrefixToName(StringRef FuncName) const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	189 if (CGM.getLangOpts().HIP)
1d019706d866 LLVM10 anatofuz parents: diff changeset	190 return ((Twine("__hip") + Twine(FuncName)).str());
1d019706d866 LLVM10 anatofuz parents: diff changeset	191 return ((Twine("__cuda") + Twine(FuncName)).str());
1d019706d866 LLVM10 anatofuz parents: diff changeset	192 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	193
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	194 static std::unique_ptr<MangleContext> InitDeviceMC(CodeGenModule &CGM) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	195 // If the host and device have different C++ ABIs, mark it as the device
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	196 // mangle context so that the mangling needs to retrieve the additional
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	197 // device lambda mangling number instead of the regular host one.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	198 if (CGM.getContext().getAuxTargetInfo() &&
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	199 CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() &&
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	200 CGM.getContext().getAuxTargetInfo()->getCXXABI().isItaniumFamily()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	201 return std::unique_ptr<MangleContext>(
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	202 CGM.getContext().createDeviceMangleContext(
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	203 *CGM.getContext().getAuxTargetInfo()));
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	204 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	205
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	206 return std::unique_ptr<MangleContext>(CGM.getContext().createMangleContext(
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	207 CGM.getContext().getAuxTargetInfo()));
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	208 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	209
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	210 CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
1d019706d866 LLVM10 anatofuz parents: diff changeset	211 : CGCUDARuntime(CGM), Context(CGM.getLLVMContext()),
1d019706d866 LLVM10 anatofuz parents: diff changeset	212 TheModule(CGM.getModule()),
1d019706d866 LLVM10 anatofuz parents: diff changeset	213 RelocatableDeviceCode(CGM.getLangOpts().GPURelocatableDeviceCode),
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	214 DeviceMC(InitDeviceMC(CGM)) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	215 CodeGen::CodeGenTypes &Types = CGM.getTypes();
1d019706d866 LLVM10 anatofuz parents: diff changeset	216 ASTContext &Ctx = CGM.getContext();
1d019706d866 LLVM10 anatofuz parents: diff changeset	217
1d019706d866 LLVM10 anatofuz parents: diff changeset	218 IntTy = CGM.IntTy;
1d019706d866 LLVM10 anatofuz parents: diff changeset	219 SizeTy = CGM.SizeTy;
1d019706d866 LLVM10 anatofuz parents: diff changeset	220 VoidTy = CGM.VoidTy;
1d019706d866 LLVM10 anatofuz parents: diff changeset	221
1d019706d866 LLVM10 anatofuz parents: diff changeset	222 CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
1d019706d866 LLVM10 anatofuz parents: diff changeset	223 VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
1d019706d866 LLVM10 anatofuz parents: diff changeset	224 VoidPtrPtrTy = VoidPtrTy->getPointerTo();
1d019706d866 LLVM10 anatofuz parents: diff changeset	225 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	226
1d019706d866 LLVM10 anatofuz parents: diff changeset	227 llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	228 // cudaError_t cudaSetupArgument(void *, size_t, size_t)
1d019706d866 LLVM10 anatofuz parents: diff changeset	229 llvm::Type *Params[] = {VoidPtrTy, SizeTy, SizeTy};
1d019706d866 LLVM10 anatofuz parents: diff changeset	230 return CGM.CreateRuntimeFunction(
1d019706d866 LLVM10 anatofuz parents: diff changeset	231 llvm::FunctionType::get(IntTy, Params, false),
1d019706d866 LLVM10 anatofuz parents: diff changeset	232 addPrefixToName("SetupArgument"));
1d019706d866 LLVM10 anatofuz parents: diff changeset	233 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	234
1d019706d866 LLVM10 anatofuz parents: diff changeset	235 llvm::FunctionCallee CGNVCUDARuntime::getLaunchFn() const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	236 if (CGM.getLangOpts().HIP) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	237 // hipError_t hipLaunchByPtr(char *);
1d019706d866 LLVM10 anatofuz parents: diff changeset	238 return CGM.CreateRuntimeFunction(
1d019706d866 LLVM10 anatofuz parents: diff changeset	239 llvm::FunctionType::get(IntTy, CharPtrTy, false), "hipLaunchByPtr");
1d019706d866 LLVM10 anatofuz parents: diff changeset	240 } else {
1d019706d866 LLVM10 anatofuz parents: diff changeset	241 // cudaError_t cudaLaunch(char *);
1d019706d866 LLVM10 anatofuz parents: diff changeset	242 return CGM.CreateRuntimeFunction(
1d019706d866 LLVM10 anatofuz parents: diff changeset	243 llvm::FunctionType::get(IntTy, CharPtrTy, false), "cudaLaunch");
1d019706d866 LLVM10 anatofuz parents: diff changeset	244 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	245 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	246
1d019706d866 LLVM10 anatofuz parents: diff changeset	247 llvm::FunctionType *CGNVCUDARuntime::getRegisterGlobalsFnTy() const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	248 return llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false);
1d019706d866 LLVM10 anatofuz parents: diff changeset	249 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	250
1d019706d866 LLVM10 anatofuz parents: diff changeset	251 llvm::FunctionType *CGNVCUDARuntime::getCallbackFnTy() const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	252 return llvm::FunctionType::get(VoidTy, VoidPtrTy, false);
1d019706d866 LLVM10 anatofuz parents: diff changeset	253 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	254
1d019706d866 LLVM10 anatofuz parents: diff changeset	255 llvm::FunctionType *CGNVCUDARuntime::getRegisterLinkedBinaryFnTy() const {
1d019706d866 LLVM10 anatofuz parents: diff changeset	256 auto CallbackFnTy = getCallbackFnTy();
1d019706d866 LLVM10 anatofuz parents: diff changeset	257 auto RegisterGlobalsFnTy = getRegisterGlobalsFnTy();
1d019706d866 LLVM10 anatofuz parents: diff changeset	258 llvm::Type *Params[] = {RegisterGlobalsFnTy->getPointerTo(), VoidPtrTy,
1d019706d866 LLVM10 anatofuz parents: diff changeset	259 VoidPtrTy, CallbackFnTy->getPointerTo()};
1d019706d866 LLVM10 anatofuz parents: diff changeset	260 return llvm::FunctionType::get(VoidTy, Params, false);
1d019706d866 LLVM10 anatofuz parents: diff changeset	261 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	262
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	263 std::string CGNVCUDARuntime::getDeviceSideName(const NamedDecl *ND) {
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	264 GlobalDecl GD;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	265 // D could be either a kernel or a variable.
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	266 if (auto *FD = dyn_cast<FunctionDecl>(ND))
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	267 GD = GlobalDecl(FD, KernelReferenceKind::Kernel);
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	268 else
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	269 GD = GlobalDecl(ND);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	270 std::string DeviceSideName;
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	271 MangleContext *MC;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	272 if (CGM.getLangOpts().CUDAIsDevice)
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	273 MC = &CGM.getCXXABI().getMangleContext();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	274 else
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	275 MC = DeviceMC.get();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	276 if (MC->shouldMangleDeclName(ND)) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	277 SmallString<256> Buffer;
1d019706d866 LLVM10 anatofuz parents: diff changeset	278 llvm::raw_svector_ostream Out(Buffer);
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	279 MC->mangleName(GD, Out);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	280 DeviceSideName = std::string(Out.str());
1d019706d866 LLVM10 anatofuz parents: diff changeset	281 } else
1d019706d866 LLVM10 anatofuz parents: diff changeset	282 DeviceSideName = std::string(ND->getIdentifier()->getName());
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	283
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	284 // Make unique name for device side static file-scope variable for HIP.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	285 if (CGM.getContext().shouldExternalizeStaticVar(ND) &&
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	286 CGM.getLangOpts().GPURelocatableDeviceCode &&
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	287 !CGM.getLangOpts().CUID.empty()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	288 SmallString<256> Buffer;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	289 llvm::raw_svector_ostream Out(Buffer);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	290 Out << DeviceSideName;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	291 CGM.printPostfixForExternalizedStaticVar(Out);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	292 DeviceSideName = std::string(Out.str());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	293 }
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	294 return DeviceSideName;
1d019706d866 LLVM10 anatofuz parents: diff changeset	295 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	296
1d019706d866 LLVM10 anatofuz parents: diff changeset	297 void CGNVCUDARuntime::emitDeviceStub(CodeGenFunction &CGF,
1d019706d866 LLVM10 anatofuz parents: diff changeset	298 FunctionArgList &Args) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	299 EmittedKernels.push_back({CGF.CurFn, CGF.CurFuncDecl});
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	300 if (auto *GV = dyn_cast<llvm::GlobalVariable>(KernelHandles[CGF.CurFn])) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	301 GV->setLinkage(CGF.CurFn->getLinkage());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	302 GV->setInitializer(CGF.CurFn);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	303 }
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	304 if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	305 CudaFeature::CUDA_USES_NEW_LAUNCH) \|\|
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	306 (CGF.getLangOpts().HIP && CGF.getLangOpts().HIPUseNewLaunchAPI))
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	307 emitDeviceStubBodyNew(CGF, Args);
1d019706d866 LLVM10 anatofuz parents: diff changeset	308 else
1d019706d866 LLVM10 anatofuz parents: diff changeset	309 emitDeviceStubBodyLegacy(CGF, Args);
1d019706d866 LLVM10 anatofuz parents: diff changeset	310 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	311
1d019706d866 LLVM10 anatofuz parents: diff changeset	312 // CUDA 9.0+ uses new way to launch kernels. Parameters are packed in a local
1d019706d866 LLVM10 anatofuz parents: diff changeset	313 // array and kernels are launched using cudaLaunchKernel().
1d019706d866 LLVM10 anatofuz parents: diff changeset	314 void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
1d019706d866 LLVM10 anatofuz parents: diff changeset	315 FunctionArgList &Args) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	316 // Build the shadow stack entry at the very start of the function.
1d019706d866 LLVM10 anatofuz parents: diff changeset	317
1d019706d866 LLVM10 anatofuz parents: diff changeset	318 // Calculate amount of space we will need for all arguments. If we have no
1d019706d866 LLVM10 anatofuz parents: diff changeset	319 // args, allocate a single pointer so we still have a valid pointer to the
1d019706d866 LLVM10 anatofuz parents: diff changeset	320 // argument array that we can pass to runtime, even if it will be unused.
1d019706d866 LLVM10 anatofuz parents: diff changeset	321 Address KernelArgs = CGF.CreateTempAlloca(
1d019706d866 LLVM10 anatofuz parents: diff changeset	322 VoidPtrTy, CharUnits::fromQuantity(16), "kernel_args",
1d019706d866 LLVM10 anatofuz parents: diff changeset	323 llvm::ConstantInt::get(SizeTy, std::max<size_t>(1, Args.size())));
1d019706d866 LLVM10 anatofuz parents: diff changeset	324 // Store pointers to the arguments in a locally allocated launch_args.
1d019706d866 LLVM10 anatofuz parents: diff changeset	325 for (unsigned i = 0; i < Args.size(); ++i) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	326 llvm::Value* VarPtr = CGF.GetAddrOfLocalVar(Args[i]).getPointer();
1d019706d866 LLVM10 anatofuz parents: diff changeset	327 llvm::Value *VoidVarPtr = CGF.Builder.CreatePointerCast(VarPtr, VoidPtrTy);
1d019706d866 LLVM10 anatofuz parents: diff changeset	328 CGF.Builder.CreateDefaultAlignedStore(
1d019706d866 LLVM10 anatofuz parents: diff changeset	329 VoidVarPtr, CGF.Builder.CreateConstGEP1_32(KernelArgs.getPointer(), i));
1d019706d866 LLVM10 anatofuz parents: diff changeset	330 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	331
1d019706d866 LLVM10 anatofuz parents: diff changeset	332 llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
1d019706d866 LLVM10 anatofuz parents: diff changeset	333
1d019706d866 LLVM10 anatofuz parents: diff changeset	334 // Lookup cudaLaunchKernel/hipLaunchKernel function.
1d019706d866 LLVM10 anatofuz parents: diff changeset	335 // cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
1d019706d866 LLVM10 anatofuz parents: diff changeset	336 // void **args, size_t sharedMem,
1d019706d866 LLVM10 anatofuz parents: diff changeset	337 // cudaStream_t stream);
1d019706d866 LLVM10 anatofuz parents: diff changeset	338 // hipError_t hipLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
1d019706d866 LLVM10 anatofuz parents: diff changeset	339 // void **args, size_t sharedMem,
1d019706d866 LLVM10 anatofuz parents: diff changeset	340 // hipStream_t stream);
1d019706d866 LLVM10 anatofuz parents: diff changeset	341 TranslationUnitDecl *TUDecl = CGM.getContext().getTranslationUnitDecl();
1d019706d866 LLVM10 anatofuz parents: diff changeset	342 DeclContext *DC = TranslationUnitDecl::castToDeclContext(TUDecl);
1d019706d866 LLVM10 anatofuz parents: diff changeset	343 auto LaunchKernelName = addPrefixToName("LaunchKernel");
1d019706d866 LLVM10 anatofuz parents: diff changeset	344 IdentifierInfo &cudaLaunchKernelII =
1d019706d866 LLVM10 anatofuz parents: diff changeset	345 CGM.getContext().Idents.get(LaunchKernelName);
1d019706d866 LLVM10 anatofuz parents: diff changeset	346 FunctionDecl *cudaLaunchKernelFD = nullptr;
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	347 for (auto *Result : DC->lookup(&cudaLaunchKernelII)) {
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	348 if (FunctionDecl *FD = dyn_cast<FunctionDecl>(Result))
1d019706d866 LLVM10 anatofuz parents: diff changeset	349 cudaLaunchKernelFD = FD;
1d019706d866 LLVM10 anatofuz parents: diff changeset	350 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	351
1d019706d866 LLVM10 anatofuz parents: diff changeset	352 if (cudaLaunchKernelFD == nullptr) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	353 CGM.Error(CGF.CurFuncDecl->getLocation(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	354 "Can't find declaration for " + LaunchKernelName);
1d019706d866 LLVM10 anatofuz parents: diff changeset	355 return;
1d019706d866 LLVM10 anatofuz parents: diff changeset	356 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	357 // Create temporary dim3 grid_dim, block_dim.
1d019706d866 LLVM10 anatofuz parents: diff changeset	358 ParmVarDecl *GridDimParam = cudaLaunchKernelFD->getParamDecl(1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	359 QualType Dim3Ty = GridDimParam->getType();
1d019706d866 LLVM10 anatofuz parents: diff changeset	360 Address GridDim =
1d019706d866 LLVM10 anatofuz parents: diff changeset	361 CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "grid_dim");
1d019706d866 LLVM10 anatofuz parents: diff changeset	362 Address BlockDim =
1d019706d866 LLVM10 anatofuz parents: diff changeset	363 CGF.CreateMemTemp(Dim3Ty, CharUnits::fromQuantity(8), "block_dim");
1d019706d866 LLVM10 anatofuz parents: diff changeset	364 Address ShmemSize =
1d019706d866 LLVM10 anatofuz parents: diff changeset	365 CGF.CreateTempAlloca(SizeTy, CGM.getSizeAlign(), "shmem_size");
1d019706d866 LLVM10 anatofuz parents: diff changeset	366 Address Stream =
1d019706d866 LLVM10 anatofuz parents: diff changeset	367 CGF.CreateTempAlloca(VoidPtrTy, CGM.getPointerAlign(), "stream");
1d019706d866 LLVM10 anatofuz parents: diff changeset	368 llvm::FunctionCallee cudaPopConfigFn = CGM.CreateRuntimeFunction(
1d019706d866 LLVM10 anatofuz parents: diff changeset	369 llvm::FunctionType::get(IntTy,
1d019706d866 LLVM10 anatofuz parents: diff changeset	370 {/gridDim=/GridDim.getType(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	371 /blockDim=/BlockDim.getType(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	372 /ShmemSize=/ShmemSize.getType(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	373 /Stream=/Stream.getType()},
1d019706d866 LLVM10 anatofuz parents: diff changeset	374 /isVarArg=/false),
1d019706d866 LLVM10 anatofuz parents: diff changeset	375 addUnderscoredPrefixToName("PopCallConfiguration"));
1d019706d866 LLVM10 anatofuz parents: diff changeset	376
1d019706d866 LLVM10 anatofuz parents: diff changeset	377 CGF.EmitRuntimeCallOrInvoke(cudaPopConfigFn,
1d019706d866 LLVM10 anatofuz parents: diff changeset	378 {GridDim.getPointer(), BlockDim.getPointer(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	379 ShmemSize.getPointer(), Stream.getPointer()});
1d019706d866 LLVM10 anatofuz parents: diff changeset	380
1d019706d866 LLVM10 anatofuz parents: diff changeset	381 // Emit the call to cudaLaunch
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	382 llvm::Value *Kernel =
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	383 CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], VoidPtrTy);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	384 CallArgList LaunchKernelArgs;
1d019706d866 LLVM10 anatofuz parents: diff changeset	385 LaunchKernelArgs.add(RValue::get(Kernel),
1d019706d866 LLVM10 anatofuz parents: diff changeset	386 cudaLaunchKernelFD->getParamDecl(0)->getType());
1d019706d866 LLVM10 anatofuz parents: diff changeset	387 LaunchKernelArgs.add(RValue::getAggregate(GridDim), Dim3Ty);
1d019706d866 LLVM10 anatofuz parents: diff changeset	388 LaunchKernelArgs.add(RValue::getAggregate(BlockDim), Dim3Ty);
1d019706d866 LLVM10 anatofuz parents: diff changeset	389 LaunchKernelArgs.add(RValue::get(KernelArgs.getPointer()),
1d019706d866 LLVM10 anatofuz parents: diff changeset	390 cudaLaunchKernelFD->getParamDecl(3)->getType());
1d019706d866 LLVM10 anatofuz parents: diff changeset	391 LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(ShmemSize)),
1d019706d866 LLVM10 anatofuz parents: diff changeset	392 cudaLaunchKernelFD->getParamDecl(4)->getType());
1d019706d866 LLVM10 anatofuz parents: diff changeset	393 LaunchKernelArgs.add(RValue::get(CGF.Builder.CreateLoad(Stream)),
1d019706d866 LLVM10 anatofuz parents: diff changeset	394 cudaLaunchKernelFD->getParamDecl(5)->getType());
1d019706d866 LLVM10 anatofuz parents: diff changeset	395
1d019706d866 LLVM10 anatofuz parents: diff changeset	396 QualType QT = cudaLaunchKernelFD->getType();
1d019706d866 LLVM10 anatofuz parents: diff changeset	397 QualType CQT = QT.getCanonicalType();
1d019706d866 LLVM10 anatofuz parents: diff changeset	398 llvm::Type *Ty = CGM.getTypes().ConvertType(CQT);
1d019706d866 LLVM10 anatofuz parents: diff changeset	399 llvm::FunctionType *FTy = dyn_cast<llvm::FunctionType>(Ty);
1d019706d866 LLVM10 anatofuz parents: diff changeset	400
1d019706d866 LLVM10 anatofuz parents: diff changeset	401 const CGFunctionInfo &FI =
1d019706d866 LLVM10 anatofuz parents: diff changeset	402 CGM.getTypes().arrangeFunctionDeclaration(cudaLaunchKernelFD);
1d019706d866 LLVM10 anatofuz parents: diff changeset	403 llvm::FunctionCallee cudaLaunchKernelFn =
1d019706d866 LLVM10 anatofuz parents: diff changeset	404 CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
1d019706d866 LLVM10 anatofuz parents: diff changeset	405 CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	406 LaunchKernelArgs);
1d019706d866 LLVM10 anatofuz parents: diff changeset	407 CGF.EmitBranch(EndBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	408
1d019706d866 LLVM10 anatofuz parents: diff changeset	409 CGF.EmitBlock(EndBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	410 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	411
1d019706d866 LLVM10 anatofuz parents: diff changeset	412 void CGNVCUDARuntime::emitDeviceStubBodyLegacy(CodeGenFunction &CGF,
1d019706d866 LLVM10 anatofuz parents: diff changeset	413 FunctionArgList &Args) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	414 // Emit a call to cudaSetupArgument for each arg in Args.
1d019706d866 LLVM10 anatofuz parents: diff changeset	415 llvm::FunctionCallee cudaSetupArgFn = getSetupArgumentFn();
1d019706d866 LLVM10 anatofuz parents: diff changeset	416 llvm::BasicBlock *EndBlock = CGF.createBasicBlock("setup.end");
1d019706d866 LLVM10 anatofuz parents: diff changeset	417 CharUnits Offset = CharUnits::Zero();
1d019706d866 LLVM10 anatofuz parents: diff changeset	418 for (const VarDecl *A : Args) {
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	419 auto TInfo = CGM.getContext().getTypeInfoInChars(A->getType());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	420 Offset = Offset.alignTo(TInfo.Align);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	421 llvm::Value *Args[] = {
1d019706d866 LLVM10 anatofuz parents: diff changeset	422 CGF.Builder.CreatePointerCast(CGF.GetAddrOfLocalVar(A).getPointer(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	423 VoidPtrTy),
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	424 llvm::ConstantInt::get(SizeTy, TInfo.Width.getQuantity()),
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	425 llvm::ConstantInt::get(SizeTy, Offset.getQuantity()),
1d019706d866 LLVM10 anatofuz parents: diff changeset	426 };
1d019706d866 LLVM10 anatofuz parents: diff changeset	427 llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(cudaSetupArgFn, Args);
1d019706d866 LLVM10 anatofuz parents: diff changeset	428 llvm::Constant *Zero = llvm::ConstantInt::get(IntTy, 0);
1d019706d866 LLVM10 anatofuz parents: diff changeset	429 llvm::Value *CBZero = CGF.Builder.CreateICmpEQ(CB, Zero);
1d019706d866 LLVM10 anatofuz parents: diff changeset	430 llvm::BasicBlock *NextBlock = CGF.createBasicBlock("setup.next");
1d019706d866 LLVM10 anatofuz parents: diff changeset	431 CGF.Builder.CreateCondBr(CBZero, NextBlock, EndBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	432 CGF.EmitBlock(NextBlock);
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	433 Offset += TInfo.Width;
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	434 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	435
1d019706d866 LLVM10 anatofuz parents: diff changeset	436 // Emit the call to cudaLaunch
1d019706d866 LLVM10 anatofuz parents: diff changeset	437 llvm::FunctionCallee cudaLaunchFn = getLaunchFn();
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	438 llvm::Value *Arg =
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	439 CGF.Builder.CreatePointerCast(KernelHandles[CGF.CurFn], CharPtrTy);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	440 CGF.EmitRuntimeCallOrInvoke(cudaLaunchFn, Arg);
1d019706d866 LLVM10 anatofuz parents: diff changeset	441 CGF.EmitBranch(EndBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	442
1d019706d866 LLVM10 anatofuz parents: diff changeset	443 CGF.EmitBlock(EndBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	444 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	445
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	446 // Replace the original variable Var with the address loaded from variable
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	447 // ManagedVar populated by HIP runtime.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	448 static void replaceManagedVar(llvm::GlobalVariable *Var,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	449 llvm::GlobalVariable *ManagedVar) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	450 SmallVector<SmallVector<llvm::User *, 8>, 8> WorkList;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	451 for (auto &&VarUse : Var->uses()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	452 WorkList.push_back({VarUse.getUser()});
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	453 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	454 while (!WorkList.empty()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	455 auto &&WorkItem = WorkList.pop_back_val();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	456 auto *U = WorkItem.back();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	457 if (isa<llvm::ConstantExpr>(U)) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	458 for (auto &&UU : U->uses()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	459 WorkItem.push_back(UU.getUser());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	460 WorkList.push_back(WorkItem);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	461 WorkItem.pop_back();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	462 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	463 continue;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	464 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	465 if (auto *I = dyn_cast<llvm::Instruction>(U)) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	466 llvm::Value *OldV = Var;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	467 llvm::Instruction *NewV =
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	468 new llvm::LoadInst(Var->getType(), ManagedVar, "ld.managed", false,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	469 llvm::Align(Var->getAlignment()), I);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	470 WorkItem.pop_back();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	471 // Replace constant expressions directly or indirectly using the managed
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	472 // variable with instructions.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	473 for (auto &&Op : WorkItem) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	474 auto *CE = cast<llvm::ConstantExpr>(Op);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	475 auto *NewInst = llvm::createReplacementInstr(CE, I);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	476 NewInst->replaceUsesOfWith(OldV, NewV);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	477 OldV = CE;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	478 NewV = NewInst;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	479 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	480 I->replaceUsesOfWith(OldV, NewV);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	481 } else {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	482 llvm_unreachable("Invalid use of managed variable");
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	483 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	484 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	485 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	486
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	487 /// Creates a function that sets up state on the host side for CUDA objects that
1d019706d866 LLVM10 anatofuz parents: diff changeset	488 /// have a presence on both the host and device sides. Specifically, registers
1d019706d866 LLVM10 anatofuz parents: diff changeset	489 /// the host side of kernel functions and device global variables with the CUDA
1d019706d866 LLVM10 anatofuz parents: diff changeset	490 /// runtime.
1d019706d866 LLVM10 anatofuz parents: diff changeset	491 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	492 /// void __cuda_register_globals(void** GpuBinaryHandle) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	493 /// __cudaRegisterFunction(GpuBinaryHandle,Kernel0,...);
1d019706d866 LLVM10 anatofuz parents: diff changeset	494 /// ...
1d019706d866 LLVM10 anatofuz parents: diff changeset	495 /// __cudaRegisterFunction(GpuBinaryHandle,KernelM,...);
1d019706d866 LLVM10 anatofuz parents: diff changeset	496 /// __cudaRegisterVar(GpuBinaryHandle, GlobalVar0, ...);
1d019706d866 LLVM10 anatofuz parents: diff changeset	497 /// ...
1d019706d866 LLVM10 anatofuz parents: diff changeset	498 /// __cudaRegisterVar(GpuBinaryHandle, GlobalVarN, ...);
1d019706d866 LLVM10 anatofuz parents: diff changeset	499 /// }
1d019706d866 LLVM10 anatofuz parents: diff changeset	500 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	501 llvm::Function *CGNVCUDARuntime::makeRegisterGlobalsFn() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	502 // No need to register anything
1d019706d866 LLVM10 anatofuz parents: diff changeset	503 if (EmittedKernels.empty() && DeviceVars.empty())
1d019706d866 LLVM10 anatofuz parents: diff changeset	504 return nullptr;
1d019706d866 LLVM10 anatofuz parents: diff changeset	505
1d019706d866 LLVM10 anatofuz parents: diff changeset	506 llvm::Function *RegisterKernelsFunc = llvm::Function::Create(
1d019706d866 LLVM10 anatofuz parents: diff changeset	507 getRegisterGlobalsFnTy(), llvm::GlobalValue::InternalLinkage,
1d019706d866 LLVM10 anatofuz parents: diff changeset	508 addUnderscoredPrefixToName("_register_globals"), &TheModule);
1d019706d866 LLVM10 anatofuz parents: diff changeset	509 llvm::BasicBlock *EntryBB =
1d019706d866 LLVM10 anatofuz parents: diff changeset	510 llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
1d019706d866 LLVM10 anatofuz parents: diff changeset	511 CGBuilderTy Builder(CGM, Context);
1d019706d866 LLVM10 anatofuz parents: diff changeset	512 Builder.SetInsertPoint(EntryBB);
1d019706d866 LLVM10 anatofuz parents: diff changeset	513
1d019706d866 LLVM10 anatofuz parents: diff changeset	514 // void __cudaRegisterFunction(void *, const char , char , const char ,
1d019706d866 LLVM10 anatofuz parents: diff changeset	515 // int, uint3, uint3, dim3, dim3, int*)
1d019706d866 LLVM10 anatofuz parents: diff changeset	516 llvm::Type *RegisterFuncParams[] = {
1d019706d866 LLVM10 anatofuz parents: diff changeset	517 VoidPtrPtrTy, CharPtrTy, CharPtrTy, CharPtrTy, IntTy,
1d019706d866 LLVM10 anatofuz parents: diff changeset	518 VoidPtrTy, VoidPtrTy, VoidPtrTy, VoidPtrTy, IntTy->getPointerTo()};
1d019706d866 LLVM10 anatofuz parents: diff changeset	519 llvm::FunctionCallee RegisterFunc = CGM.CreateRuntimeFunction(
1d019706d866 LLVM10 anatofuz parents: diff changeset	520 llvm::FunctionType::get(IntTy, RegisterFuncParams, false),
1d019706d866 LLVM10 anatofuz parents: diff changeset	521 addUnderscoredPrefixToName("RegisterFunction"));
1d019706d866 LLVM10 anatofuz parents: diff changeset	522
1d019706d866 LLVM10 anatofuz parents: diff changeset	523 // Extract GpuBinaryHandle passed as the first argument passed to
1d019706d866 LLVM10 anatofuz parents: diff changeset	524 // __cuda_register_globals() and generate __cudaRegisterFunction() call for
1d019706d866 LLVM10 anatofuz parents: diff changeset	525 // each emitted kernel.
1d019706d866 LLVM10 anatofuz parents: diff changeset	526 llvm::Argument &GpuBinaryHandlePtr = *RegisterKernelsFunc->arg_begin();
1d019706d866 LLVM10 anatofuz parents: diff changeset	527 for (auto &&I : EmittedKernels) {
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	528 llvm::Constant *KernelName =
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	529 makeConstantString(getDeviceSideName(cast<NamedDecl>(I.D)));
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	530 llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy);
1d019706d866 LLVM10 anatofuz parents: diff changeset	531 llvm::Value *Args[] = {
1d019706d866 LLVM10 anatofuz parents: diff changeset	532 &GpuBinaryHandlePtr,
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	533 Builder.CreateBitCast(KernelHandles[I.Kernel], VoidPtrTy),
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	534 KernelName,
1d019706d866 LLVM10 anatofuz parents: diff changeset	535 KernelName,
1d019706d866 LLVM10 anatofuz parents: diff changeset	536 llvm::ConstantInt::get(IntTy, -1),
1d019706d866 LLVM10 anatofuz parents: diff changeset	537 NullPtr,
1d019706d866 LLVM10 anatofuz parents: diff changeset	538 NullPtr,
1d019706d866 LLVM10 anatofuz parents: diff changeset	539 NullPtr,
1d019706d866 LLVM10 anatofuz parents: diff changeset	540 NullPtr,
1d019706d866 LLVM10 anatofuz parents: diff changeset	541 llvm::ConstantPointerNull::get(IntTy->getPointerTo())};
1d019706d866 LLVM10 anatofuz parents: diff changeset	542 Builder.CreateCall(RegisterFunc, Args);
1d019706d866 LLVM10 anatofuz parents: diff changeset	543 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	544
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	545 llvm::Type *VarSizeTy = IntTy;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	546 // For HIP or CUDA 9.0+, device variable size is type of `size_t`.
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	547 if (CGM.getLangOpts().HIP \|\|
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	548 ToCudaVersion(CGM.getTarget().getSDKVersion()) >= CudaVersion::CUDA_90)
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	549 VarSizeTy = SizeTy;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	550
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	551 // void __cudaRegisterVar(void *, char , char , const char ,
1d019706d866 LLVM10 anatofuz parents: diff changeset	552 // int, int, int, int)
1d019706d866 LLVM10 anatofuz parents: diff changeset	553 llvm::Type *RegisterVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	554 CharPtrTy, IntTy, VarSizeTy,
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	555 IntTy, IntTy};
1d019706d866 LLVM10 anatofuz parents: diff changeset	556 llvm::FunctionCallee RegisterVar = CGM.CreateRuntimeFunction(
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	557 llvm::FunctionType::get(VoidTy, RegisterVarParams, false),
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	558 addUnderscoredPrefixToName("RegisterVar"));
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	559 // void __hipRegisterManagedVar(void *, char , char , const char ,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	560 // size_t, unsigned)
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	561 llvm::Type *RegisterManagedVarParams[] = {VoidPtrPtrTy, CharPtrTy, CharPtrTy,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	562 CharPtrTy, VarSizeTy, IntTy};
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	563 llvm::FunctionCallee RegisterManagedVar = CGM.CreateRuntimeFunction(
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	564 llvm::FunctionType::get(VoidTy, RegisterManagedVarParams, false),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	565 addUnderscoredPrefixToName("RegisterManagedVar"));
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	566 // void __cudaRegisterSurface(void *, const struct surfaceReference ,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	567 // const void *, const char , int, int);
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	568 llvm::FunctionCallee RegisterSurf = CGM.CreateRuntimeFunction(
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	569 llvm::FunctionType::get(
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	570 VoidTy, {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy},
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	571 false),
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	572 addUnderscoredPrefixToName("RegisterSurface"));
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	573 // void __cudaRegisterTexture(void *, const struct textureReference ,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	574 // const void *, const char , int, int, int)
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	575 llvm::FunctionCallee RegisterTex = CGM.CreateRuntimeFunction(
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	576 llvm::FunctionType::get(
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	577 VoidTy,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	578 {VoidPtrPtrTy, VoidPtrTy, CharPtrTy, CharPtrTy, IntTy, IntTy, IntTy},
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	579 false),
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	580 addUnderscoredPrefixToName("RegisterTexture"));
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	581 for (auto &&Info : DeviceVars) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	582 llvm::GlobalVariable *Var = Info.Var;
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	583 assert((!Var->isDeclaration() \|\| Info.Flags.isManaged()) &&
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	584 "External variables should not show up here, except HIP managed "
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	585 "variables");
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	586 llvm::Constant *VarName = makeConstantString(getDeviceSideName(Info.D));
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	587 switch (Info.Flags.getKind()) {
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	588 case DeviceVarFlags::Variable: {
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	589 uint64_t VarSize =
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	590 CGM.getDataLayout().getTypeAllocSize(Var->getValueType());
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	591 if (Info.Flags.isManaged()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	592 auto ManagedVar = new llvm::GlobalVariable(
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	593 CGM.getModule(), Var->getType(),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	594 /isConstant=/false, Var->getLinkage(),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	595 /Init=/Var->isDeclaration()
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	596 ? nullptr
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	597 : llvm::ConstantPointerNull::get(Var->getType()),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	598 /Name=/"", /InsertBefore=/nullptr,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	599 llvm::GlobalVariable::NotThreadLocal);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	600 ManagedVar->setDSOLocal(Var->isDSOLocal());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	601 ManagedVar->setVisibility(Var->getVisibility());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	602 ManagedVar->setExternallyInitialized(true);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	603 ManagedVar->takeName(Var);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	604 Var->setName(Twine(ManagedVar->getName() + ".managed"));
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	605 replaceManagedVar(Var, ManagedVar);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	606 llvm::Value *Args[] = {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	607 &GpuBinaryHandlePtr,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	608 Builder.CreateBitCast(ManagedVar, VoidPtrTy),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	609 Builder.CreateBitCast(Var, VoidPtrTy),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	610 VarName,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	611 llvm::ConstantInt::get(VarSizeTy, VarSize),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	612 llvm::ConstantInt::get(IntTy, Var->getAlignment())};
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	613 if (!Var->isDeclaration())
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	614 Builder.CreateCall(RegisterManagedVar, Args);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	615 } else {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	616 llvm::Value *Args[] = {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	617 &GpuBinaryHandlePtr,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	618 Builder.CreateBitCast(Var, VoidPtrTy),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	619 VarName,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	620 VarName,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	621 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern()),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	622 llvm::ConstantInt::get(VarSizeTy, VarSize),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	623 llvm::ConstantInt::get(IntTy, Info.Flags.isConstant()),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	624 llvm::ConstantInt::get(IntTy, 0)};
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	625 Builder.CreateCall(RegisterVar, Args);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	626 }
173 0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	627 break;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	628 }
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	629 case DeviceVarFlags::Surface:
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	630 Builder.CreateCall(
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	631 RegisterSurf,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	632 {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	633 VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	634 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	635 break;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	636 case DeviceVarFlags::Texture:
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	637 Builder.CreateCall(
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	638 RegisterTex,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	639 {&GpuBinaryHandlePtr, Builder.CreateBitCast(Var, VoidPtrTy), VarName,
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	640 VarName, llvm::ConstantInt::get(IntTy, Info.Flags.getSurfTexType()),
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	641 llvm::ConstantInt::get(IntTy, Info.Flags.isNormalized()),
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	642 llvm::ConstantInt::get(IntTy, Info.Flags.isExtern())});
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	643 break;
0572611fdcc8 reorgnization done Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 150 diff changeset	644 }
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	645 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	646
1d019706d866 LLVM10 anatofuz parents: diff changeset	647 Builder.CreateRetVoid();
1d019706d866 LLVM10 anatofuz parents: diff changeset	648 return RegisterKernelsFunc;
1d019706d866 LLVM10 anatofuz parents: diff changeset	649 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	650
1d019706d866 LLVM10 anatofuz parents: diff changeset	651 /// Creates a global constructor function for the module:
1d019706d866 LLVM10 anatofuz parents: diff changeset	652 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	653 /// For CUDA:
1d019706d866 LLVM10 anatofuz parents: diff changeset	654 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	655 /// void __cuda_module_ctor(void*) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	656 /// Handle = __cudaRegisterFatBinary(GpuBinaryBlob);
1d019706d866 LLVM10 anatofuz parents: diff changeset	657 /// __cuda_register_globals(Handle);
1d019706d866 LLVM10 anatofuz parents: diff changeset	658 /// }
1d019706d866 LLVM10 anatofuz parents: diff changeset	659 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	660 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	661 /// For HIP:
1d019706d866 LLVM10 anatofuz parents: diff changeset	662 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	663 /// void __hip_module_ctor(void*) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	664 /// if (__hip_gpubin_handle == 0) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	665 /// __hip_gpubin_handle = __hipRegisterFatBinary(GpuBinaryBlob);
1d019706d866 LLVM10 anatofuz parents: diff changeset	666 /// __hip_register_globals(__hip_gpubin_handle);
1d019706d866 LLVM10 anatofuz parents: diff changeset	667 /// }
1d019706d866 LLVM10 anatofuz parents: diff changeset	668 /// }
1d019706d866 LLVM10 anatofuz parents: diff changeset	669 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	670 llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	671 bool IsHIP = CGM.getLangOpts().HIP;
1d019706d866 LLVM10 anatofuz parents: diff changeset	672 bool IsCUDA = CGM.getLangOpts().CUDA;
1d019706d866 LLVM10 anatofuz parents: diff changeset	673 // No need to generate ctors/dtors if there is no GPU binary.
1d019706d866 LLVM10 anatofuz parents: diff changeset	674 StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
1d019706d866 LLVM10 anatofuz parents: diff changeset	675 if (CudaGpuBinaryFileName.empty() && !IsHIP)
1d019706d866 LLVM10 anatofuz parents: diff changeset	676 return nullptr;
1d019706d866 LLVM10 anatofuz parents: diff changeset	677 if ((IsHIP \|\| (IsCUDA && !RelocatableDeviceCode)) && EmittedKernels.empty() &&
1d019706d866 LLVM10 anatofuz parents: diff changeset	678 DeviceVars.empty())
1d019706d866 LLVM10 anatofuz parents: diff changeset	679 return nullptr;
1d019706d866 LLVM10 anatofuz parents: diff changeset	680
1d019706d866 LLVM10 anatofuz parents: diff changeset	681 // void __{cuda\|hip}_register_globals(void* handle);
1d019706d866 LLVM10 anatofuz parents: diff changeset	682 llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
1d019706d866 LLVM10 anatofuz parents: diff changeset	683 // We always need a function to pass in as callback. Create a dummy
1d019706d866 LLVM10 anatofuz parents: diff changeset	684 // implementation if we don't need to register anything.
1d019706d866 LLVM10 anatofuz parents: diff changeset	685 if (RelocatableDeviceCode && !RegisterGlobalsFunc)
1d019706d866 LLVM10 anatofuz parents: diff changeset	686 RegisterGlobalsFunc = makeDummyFunction(getRegisterGlobalsFnTy());
1d019706d866 LLVM10 anatofuz parents: diff changeset	687
1d019706d866 LLVM10 anatofuz parents: diff changeset	688 // void ** __{cuda\|hip}RegisterFatBinary(void *);
1d019706d866 LLVM10 anatofuz parents: diff changeset	689 llvm::FunctionCallee RegisterFatbinFunc = CGM.CreateRuntimeFunction(
1d019706d866 LLVM10 anatofuz parents: diff changeset	690 llvm::FunctionType::get(VoidPtrPtrTy, VoidPtrTy, false),
1d019706d866 LLVM10 anatofuz parents: diff changeset	691 addUnderscoredPrefixToName("RegisterFatBinary"));
1d019706d866 LLVM10 anatofuz parents: diff changeset	692 // struct { int magic, int version, void * gpu_binary, void * dont_care };
1d019706d866 LLVM10 anatofuz parents: diff changeset	693 llvm::StructType *FatbinWrapperTy =
1d019706d866 LLVM10 anatofuz parents: diff changeset	694 llvm::StructType::get(IntTy, IntTy, VoidPtrTy, VoidPtrTy);
1d019706d866 LLVM10 anatofuz parents: diff changeset	695
1d019706d866 LLVM10 anatofuz parents: diff changeset	696 // Register GPU binary with the CUDA runtime, store returned handle in a
1d019706d866 LLVM10 anatofuz parents: diff changeset	697 // global variable and save a reference in GpuBinaryHandle to be cleaned up
1d019706d866 LLVM10 anatofuz parents: diff changeset	698 // in destructor on exit. Then associate all known kernels with the GPU binary
1d019706d866 LLVM10 anatofuz parents: diff changeset	699 // handle so CUDA runtime can figure out what to call on the GPU side.
1d019706d866 LLVM10 anatofuz parents: diff changeset	700 std::unique_ptr<llvm::MemoryBuffer> CudaGpuBinary = nullptr;
1d019706d866 LLVM10 anatofuz parents: diff changeset	701 if (!CudaGpuBinaryFileName.empty()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	702 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CudaGpuBinaryOrErr =
1d019706d866 LLVM10 anatofuz parents: diff changeset	703 llvm::MemoryBuffer::getFileOrSTDIN(CudaGpuBinaryFileName);
1d019706d866 LLVM10 anatofuz parents: diff changeset	704 if (std::error_code EC = CudaGpuBinaryOrErr.getError()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	705 CGM.getDiags().Report(diag::err_cannot_open_file)
1d019706d866 LLVM10 anatofuz parents: diff changeset	706 << CudaGpuBinaryFileName << EC.message();
1d019706d866 LLVM10 anatofuz parents: diff changeset	707 return nullptr;
1d019706d866 LLVM10 anatofuz parents: diff changeset	708 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	709 CudaGpuBinary = std::move(CudaGpuBinaryOrErr.get());
1d019706d866 LLVM10 anatofuz parents: diff changeset	710 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	711
1d019706d866 LLVM10 anatofuz parents: diff changeset	712 llvm::Function *ModuleCtorFunc = llvm::Function::Create(
1d019706d866 LLVM10 anatofuz parents: diff changeset	713 llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
1d019706d866 LLVM10 anatofuz parents: diff changeset	714 llvm::GlobalValue::InternalLinkage,
1d019706d866 LLVM10 anatofuz parents: diff changeset	715 addUnderscoredPrefixToName("_module_ctor"), &TheModule);
1d019706d866 LLVM10 anatofuz parents: diff changeset	716 llvm::BasicBlock *CtorEntryBB =
1d019706d866 LLVM10 anatofuz parents: diff changeset	717 llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
1d019706d866 LLVM10 anatofuz parents: diff changeset	718 CGBuilderTy CtorBuilder(CGM, Context);
1d019706d866 LLVM10 anatofuz parents: diff changeset	719
1d019706d866 LLVM10 anatofuz parents: diff changeset	720 CtorBuilder.SetInsertPoint(CtorEntryBB);
1d019706d866 LLVM10 anatofuz parents: diff changeset	721
1d019706d866 LLVM10 anatofuz parents: diff changeset	722 const char *FatbinConstantName;
1d019706d866 LLVM10 anatofuz parents: diff changeset	723 const char *FatbinSectionName;
1d019706d866 LLVM10 anatofuz parents: diff changeset	724 const char *ModuleIDSectionName;
1d019706d866 LLVM10 anatofuz parents: diff changeset	725 StringRef ModuleIDPrefix;
1d019706d866 LLVM10 anatofuz parents: diff changeset	726 llvm::Constant *FatBinStr;
1d019706d866 LLVM10 anatofuz parents: diff changeset	727 unsigned FatMagic;
1d019706d866 LLVM10 anatofuz parents: diff changeset	728 if (IsHIP) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	729 FatbinConstantName = ".hip_fatbin";
1d019706d866 LLVM10 anatofuz parents: diff changeset	730 FatbinSectionName = ".hipFatBinSegment";
1d019706d866 LLVM10 anatofuz parents: diff changeset	731
1d019706d866 LLVM10 anatofuz parents: diff changeset	732 ModuleIDSectionName = "__hip_module_id";
1d019706d866 LLVM10 anatofuz parents: diff changeset	733 ModuleIDPrefix = "__hip_";
1d019706d866 LLVM10 anatofuz parents: diff changeset	734
1d019706d866 LLVM10 anatofuz parents: diff changeset	735 if (CudaGpuBinary) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	736 // If fatbin is available from early finalization, create a string
1d019706d866 LLVM10 anatofuz parents: diff changeset	737 // literal containing the fat binary loaded from the given file.
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	738 const unsigned HIPCodeObjectAlign = 4096;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	739 FatBinStr =
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	740 makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	741 FatbinConstantName, HIPCodeObjectAlign);
150 1d019706d866 LLVM10 anatofuz parents: diff changeset	742 } else {
1d019706d866 LLVM10 anatofuz parents: diff changeset	743 // If fatbin is not available, create an external symbol
1d019706d866 LLVM10 anatofuz parents: diff changeset	744 // __hip_fatbin in section .hip_fatbin. The external symbol is supposed
1d019706d866 LLVM10 anatofuz parents: diff changeset	745 // to contain the fat binary but will be populated somewhere else,
1d019706d866 LLVM10 anatofuz parents: diff changeset	746 // e.g. by lld through link script.
1d019706d866 LLVM10 anatofuz parents: diff changeset	747 FatBinStr = new llvm::GlobalVariable(
1d019706d866 LLVM10 anatofuz parents: diff changeset	748 CGM.getModule(), CGM.Int8Ty,
1d019706d866 LLVM10 anatofuz parents: diff changeset	749 /isConstant=/true, llvm::GlobalValue::ExternalLinkage, nullptr,
1d019706d866 LLVM10 anatofuz parents: diff changeset	750 "__hip_fatbin", nullptr,
1d019706d866 LLVM10 anatofuz parents: diff changeset	751 llvm::GlobalVariable::NotThreadLocal);
1d019706d866 LLVM10 anatofuz parents: diff changeset	752 cast<llvm::GlobalVariable>(FatBinStr)->setSection(FatbinConstantName);
1d019706d866 LLVM10 anatofuz parents: diff changeset	753 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	754
1d019706d866 LLVM10 anatofuz parents: diff changeset	755 FatMagic = HIPFatMagic;
1d019706d866 LLVM10 anatofuz parents: diff changeset	756 } else {
1d019706d866 LLVM10 anatofuz parents: diff changeset	757 if (RelocatableDeviceCode)
1d019706d866 LLVM10 anatofuz parents: diff changeset	758 FatbinConstantName = CGM.getTriple().isMacOSX()
1d019706d866 LLVM10 anatofuz parents: diff changeset	759 ? "__NV_CUDA,__nv_relfatbin"
1d019706d866 LLVM10 anatofuz parents: diff changeset	760 : "__nv_relfatbin";
1d019706d866 LLVM10 anatofuz parents: diff changeset	761 else
1d019706d866 LLVM10 anatofuz parents: diff changeset	762 FatbinConstantName =
1d019706d866 LLVM10 anatofuz parents: diff changeset	763 CGM.getTriple().isMacOSX() ? "__NV_CUDA,__nv_fatbin" : ".nv_fatbin";
1d019706d866 LLVM10 anatofuz parents: diff changeset	764 // NVIDIA's cuobjdump looks for fatbins in this section.
1d019706d866 LLVM10 anatofuz parents: diff changeset	765 FatbinSectionName =
1d019706d866 LLVM10 anatofuz parents: diff changeset	766 CGM.getTriple().isMacOSX() ? "__NV_CUDA,__fatbin" : ".nvFatBinSegment";
1d019706d866 LLVM10 anatofuz parents: diff changeset	767
1d019706d866 LLVM10 anatofuz parents: diff changeset	768 ModuleIDSectionName = CGM.getTriple().isMacOSX()
1d019706d866 LLVM10 anatofuz parents: diff changeset	769 ? "__NV_CUDA,__nv_module_id"
1d019706d866 LLVM10 anatofuz parents: diff changeset	770 : "__nv_module_id";
1d019706d866 LLVM10 anatofuz parents: diff changeset	771 ModuleIDPrefix = "__nv_";
1d019706d866 LLVM10 anatofuz parents: diff changeset	772
1d019706d866 LLVM10 anatofuz parents: diff changeset	773 // For CUDA, create a string literal containing the fat binary loaded from
1d019706d866 LLVM10 anatofuz parents: diff changeset	774 // the given file.
1d019706d866 LLVM10 anatofuz parents: diff changeset	775 FatBinStr = makeConstantString(std::string(CudaGpuBinary->getBuffer()), "",
1d019706d866 LLVM10 anatofuz parents: diff changeset	776 FatbinConstantName, 8);
1d019706d866 LLVM10 anatofuz parents: diff changeset	777 FatMagic = CudaFatMagic;
1d019706d866 LLVM10 anatofuz parents: diff changeset	778 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	779
1d019706d866 LLVM10 anatofuz parents: diff changeset	780 // Create initialized wrapper structure that points to the loaded GPU binary
1d019706d866 LLVM10 anatofuz parents: diff changeset	781 ConstantInitBuilder Builder(CGM);
1d019706d866 LLVM10 anatofuz parents: diff changeset	782 auto Values = Builder.beginStruct(FatbinWrapperTy);
1d019706d866 LLVM10 anatofuz parents: diff changeset	783 // Fatbin wrapper magic.
1d019706d866 LLVM10 anatofuz parents: diff changeset	784 Values.addInt(IntTy, FatMagic);
1d019706d866 LLVM10 anatofuz parents: diff changeset	785 // Fatbin version.
1d019706d866 LLVM10 anatofuz parents: diff changeset	786 Values.addInt(IntTy, 1);
1d019706d866 LLVM10 anatofuz parents: diff changeset	787 // Data.
1d019706d866 LLVM10 anatofuz parents: diff changeset	788 Values.add(FatBinStr);
1d019706d866 LLVM10 anatofuz parents: diff changeset	789 // Unused in fatbin v1.
1d019706d866 LLVM10 anatofuz parents: diff changeset	790 Values.add(llvm::ConstantPointerNull::get(VoidPtrTy));
1d019706d866 LLVM10 anatofuz parents: diff changeset	791 llvm::GlobalVariable *FatbinWrapper = Values.finishAndCreateGlobal(
1d019706d866 LLVM10 anatofuz parents: diff changeset	792 addUnderscoredPrefixToName("_fatbin_wrapper"), CGM.getPointerAlign(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	793 /constant/ true);
1d019706d866 LLVM10 anatofuz parents: diff changeset	794 FatbinWrapper->setSection(FatbinSectionName);
1d019706d866 LLVM10 anatofuz parents: diff changeset	795
1d019706d866 LLVM10 anatofuz parents: diff changeset	796 // There is only one HIP fat binary per linked module, however there are
1d019706d866 LLVM10 anatofuz parents: diff changeset	797 // multiple constructor functions. Make sure the fat binary is registered
1d019706d866 LLVM10 anatofuz parents: diff changeset	798 // only once. The constructor functions are executed by the dynamic loader
1d019706d866 LLVM10 anatofuz parents: diff changeset	799 // before the program gains control. The dynamic loader cannot execute the
1d019706d866 LLVM10 anatofuz parents: diff changeset	800 // constructor functions concurrently since doing that would not guarantee
1d019706d866 LLVM10 anatofuz parents: diff changeset	801 // thread safety of the loaded program. Therefore we can assume sequential
1d019706d866 LLVM10 anatofuz parents: diff changeset	802 // execution of constructor functions here.
1d019706d866 LLVM10 anatofuz parents: diff changeset	803 if (IsHIP) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	804 auto Linkage = CudaGpuBinary ? llvm::GlobalValue::InternalLinkage :
1d019706d866 LLVM10 anatofuz parents: diff changeset	805 llvm::GlobalValue::LinkOnceAnyLinkage;
1d019706d866 LLVM10 anatofuz parents: diff changeset	806 llvm::BasicBlock *IfBlock =
1d019706d866 LLVM10 anatofuz parents: diff changeset	807 llvm::BasicBlock::Create(Context, "if", ModuleCtorFunc);
1d019706d866 LLVM10 anatofuz parents: diff changeset	808 llvm::BasicBlock *ExitBlock =
1d019706d866 LLVM10 anatofuz parents: diff changeset	809 llvm::BasicBlock::Create(Context, "exit", ModuleCtorFunc);
1d019706d866 LLVM10 anatofuz parents: diff changeset	810 // The name, size, and initialization pattern of this variable is part
1d019706d866 LLVM10 anatofuz parents: diff changeset	811 // of HIP ABI.
1d019706d866 LLVM10 anatofuz parents: diff changeset	812 GpuBinaryHandle = new llvm::GlobalVariable(
1d019706d866 LLVM10 anatofuz parents: diff changeset	813 TheModule, VoidPtrPtrTy, /isConstant=/false,
1d019706d866 LLVM10 anatofuz parents: diff changeset	814 Linkage,
1d019706d866 LLVM10 anatofuz parents: diff changeset	815 /Initializer=/llvm::ConstantPointerNull::get(VoidPtrPtrTy),
1d019706d866 LLVM10 anatofuz parents: diff changeset	816 "__hip_gpubin_handle");
1d019706d866 LLVM10 anatofuz parents: diff changeset	817 GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
1d019706d866 LLVM10 anatofuz parents: diff changeset	818 // Prevent the weak symbol in different shared libraries being merged.
1d019706d866 LLVM10 anatofuz parents: diff changeset	819 if (Linkage != llvm::GlobalValue::InternalLinkage)
1d019706d866 LLVM10 anatofuz parents: diff changeset	820 GpuBinaryHandle->setVisibility(llvm::GlobalValue::HiddenVisibility);
1d019706d866 LLVM10 anatofuz parents: diff changeset	821 Address GpuBinaryAddr(
1d019706d866 LLVM10 anatofuz parents: diff changeset	822 GpuBinaryHandle,
1d019706d866 LLVM10 anatofuz parents: diff changeset	823 CharUnits::fromQuantity(GpuBinaryHandle->getAlignment()));
1d019706d866 LLVM10 anatofuz parents: diff changeset	824 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	825 auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
1d019706d866 LLVM10 anatofuz parents: diff changeset	826 llvm::Constant *Zero =
1d019706d866 LLVM10 anatofuz parents: diff changeset	827 llvm::Constant::getNullValue(HandleValue->getType());
1d019706d866 LLVM10 anatofuz parents: diff changeset	828 llvm::Value *EQZero = CtorBuilder.CreateICmpEQ(HandleValue, Zero);
1d019706d866 LLVM10 anatofuz parents: diff changeset	829 CtorBuilder.CreateCondBr(EQZero, IfBlock, ExitBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	830 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	831 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	832 CtorBuilder.SetInsertPoint(IfBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	833 // GpuBinaryHandle = __hipRegisterFatBinary(&FatbinWrapper);
1d019706d866 LLVM10 anatofuz parents: diff changeset	834 llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
1d019706d866 LLVM10 anatofuz parents: diff changeset	835 RegisterFatbinFunc,
1d019706d866 LLVM10 anatofuz parents: diff changeset	836 CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
1d019706d866 LLVM10 anatofuz parents: diff changeset	837 CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryAddr);
1d019706d866 LLVM10 anatofuz parents: diff changeset	838 CtorBuilder.CreateBr(ExitBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	839 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	840 {
1d019706d866 LLVM10 anatofuz parents: diff changeset	841 CtorBuilder.SetInsertPoint(ExitBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	842 // Call __hip_register_globals(GpuBinaryHandle);
1d019706d866 LLVM10 anatofuz parents: diff changeset	843 if (RegisterGlobalsFunc) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	844 auto HandleValue = CtorBuilder.CreateLoad(GpuBinaryAddr);
1d019706d866 LLVM10 anatofuz parents: diff changeset	845 CtorBuilder.CreateCall(RegisterGlobalsFunc, HandleValue);
1d019706d866 LLVM10 anatofuz parents: diff changeset	846 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	847 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	848 } else if (!RelocatableDeviceCode) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	849 // Register binary with CUDA runtime. This is substantially different in
1d019706d866 LLVM10 anatofuz parents: diff changeset	850 // default mode vs. separate compilation!
1d019706d866 LLVM10 anatofuz parents: diff changeset	851 // GpuBinaryHandle = __cudaRegisterFatBinary(&FatbinWrapper);
1d019706d866 LLVM10 anatofuz parents: diff changeset	852 llvm::CallInst *RegisterFatbinCall = CtorBuilder.CreateCall(
1d019706d866 LLVM10 anatofuz parents: diff changeset	853 RegisterFatbinFunc,
1d019706d866 LLVM10 anatofuz parents: diff changeset	854 CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy));
1d019706d866 LLVM10 anatofuz parents: diff changeset	855 GpuBinaryHandle = new llvm::GlobalVariable(
1d019706d866 LLVM10 anatofuz parents: diff changeset	856 TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
1d019706d866 LLVM10 anatofuz parents: diff changeset	857 llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
1d019706d866 LLVM10 anatofuz parents: diff changeset	858 GpuBinaryHandle->setAlignment(CGM.getPointerAlign().getAsAlign());
1d019706d866 LLVM10 anatofuz parents: diff changeset	859 CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
1d019706d866 LLVM10 anatofuz parents: diff changeset	860 CGM.getPointerAlign());
1d019706d866 LLVM10 anatofuz parents: diff changeset	861
1d019706d866 LLVM10 anatofuz parents: diff changeset	862 // Call __cuda_register_globals(GpuBinaryHandle);
1d019706d866 LLVM10 anatofuz parents: diff changeset	863 if (RegisterGlobalsFunc)
1d019706d866 LLVM10 anatofuz parents: diff changeset	864 CtorBuilder.CreateCall(RegisterGlobalsFunc, RegisterFatbinCall);
1d019706d866 LLVM10 anatofuz parents: diff changeset	865
1d019706d866 LLVM10 anatofuz parents: diff changeset	866 // Call __cudaRegisterFatBinaryEnd(Handle) if this CUDA version needs it.
1d019706d866 LLVM10 anatofuz parents: diff changeset	867 if (CudaFeatureEnabled(CGM.getTarget().getSDKVersion(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	868 CudaFeature::CUDA_USES_FATBIN_REGISTER_END)) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	869 // void __cudaRegisterFatBinaryEnd(void **);
1d019706d866 LLVM10 anatofuz parents: diff changeset	870 llvm::FunctionCallee RegisterFatbinEndFunc = CGM.CreateRuntimeFunction(
1d019706d866 LLVM10 anatofuz parents: diff changeset	871 llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
1d019706d866 LLVM10 anatofuz parents: diff changeset	872 "__cudaRegisterFatBinaryEnd");
1d019706d866 LLVM10 anatofuz parents: diff changeset	873 CtorBuilder.CreateCall(RegisterFatbinEndFunc, RegisterFatbinCall);
1d019706d866 LLVM10 anatofuz parents: diff changeset	874 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	875 } else {
1d019706d866 LLVM10 anatofuz parents: diff changeset	876 // Generate a unique module ID.
1d019706d866 LLVM10 anatofuz parents: diff changeset	877 SmallString<64> ModuleID;
1d019706d866 LLVM10 anatofuz parents: diff changeset	878 llvm::raw_svector_ostream OS(ModuleID);
1d019706d866 LLVM10 anatofuz parents: diff changeset	879 OS << ModuleIDPrefix << llvm::format("%" PRIx64, FatbinWrapper->getGUID());
1d019706d866 LLVM10 anatofuz parents: diff changeset	880 llvm::Constant *ModuleIDConstant = makeConstantString(
1d019706d866 LLVM10 anatofuz parents: diff changeset	881 std::string(ModuleID.str()), "", ModuleIDSectionName, 32);
1d019706d866 LLVM10 anatofuz parents: diff changeset	882
1d019706d866 LLVM10 anatofuz parents: diff changeset	883 // Create an alias for the FatbinWrapper that nvcc will look for.
1d019706d866 LLVM10 anatofuz parents: diff changeset	884 llvm::GlobalAlias::create(llvm::GlobalValue::ExternalLinkage,
1d019706d866 LLVM10 anatofuz parents: diff changeset	885 Twine("__fatbinwrap") + ModuleID, FatbinWrapper);
1d019706d866 LLVM10 anatofuz parents: diff changeset	886
1d019706d866 LLVM10 anatofuz parents: diff changeset	887 // void __cudaRegisterLinkedBinary%ModuleID%(void ()(void ), void *,
1d019706d866 LLVM10 anatofuz parents: diff changeset	888 // void , void ()(void **))
1d019706d866 LLVM10 anatofuz parents: diff changeset	889 SmallString<128> RegisterLinkedBinaryName("__cudaRegisterLinkedBinary");
1d019706d866 LLVM10 anatofuz parents: diff changeset	890 RegisterLinkedBinaryName += ModuleID;
1d019706d866 LLVM10 anatofuz parents: diff changeset	891 llvm::FunctionCallee RegisterLinkedBinaryFunc = CGM.CreateRuntimeFunction(
1d019706d866 LLVM10 anatofuz parents: diff changeset	892 getRegisterLinkedBinaryFnTy(), RegisterLinkedBinaryName);
1d019706d866 LLVM10 anatofuz parents: diff changeset	893
1d019706d866 LLVM10 anatofuz parents: diff changeset	894 assert(RegisterGlobalsFunc && "Expecting at least dummy function!");
1d019706d866 LLVM10 anatofuz parents: diff changeset	895 llvm::Value *Args[] = {RegisterGlobalsFunc,
1d019706d866 LLVM10 anatofuz parents: diff changeset	896 CtorBuilder.CreateBitCast(FatbinWrapper, VoidPtrTy),
1d019706d866 LLVM10 anatofuz parents: diff changeset	897 ModuleIDConstant,
1d019706d866 LLVM10 anatofuz parents: diff changeset	898 makeDummyFunction(getCallbackFnTy())};
1d019706d866 LLVM10 anatofuz parents: diff changeset	899 CtorBuilder.CreateCall(RegisterLinkedBinaryFunc, Args);
1d019706d866 LLVM10 anatofuz parents: diff changeset	900 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	901
1d019706d866 LLVM10 anatofuz parents: diff changeset	902 // Create destructor and register it with atexit() the way NVCC does it. Doing
1d019706d866 LLVM10 anatofuz parents: diff changeset	903 // it during regular destructor phase worked in CUDA before 9.2 but results in
1d019706d866 LLVM10 anatofuz parents: diff changeset	904 // double-free in 9.2.
1d019706d866 LLVM10 anatofuz parents: diff changeset	905 if (llvm::Function *CleanupFn = makeModuleDtorFunction()) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	906 // extern "C" int atexit(void (*f)(void));
1d019706d866 LLVM10 anatofuz parents: diff changeset	907 llvm::FunctionType *AtExitTy =
1d019706d866 LLVM10 anatofuz parents: diff changeset	908 llvm::FunctionType::get(IntTy, CleanupFn->getType(), false);
1d019706d866 LLVM10 anatofuz parents: diff changeset	909 llvm::FunctionCallee AtExitFunc =
1d019706d866 LLVM10 anatofuz parents: diff changeset	910 CGM.CreateRuntimeFunction(AtExitTy, "atexit", llvm::AttributeList(),
1d019706d866 LLVM10 anatofuz parents: diff changeset	911 /Local=/true);
1d019706d866 LLVM10 anatofuz parents: diff changeset	912 CtorBuilder.CreateCall(AtExitFunc, CleanupFn);
1d019706d866 LLVM10 anatofuz parents: diff changeset	913 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	914
1d019706d866 LLVM10 anatofuz parents: diff changeset	915 CtorBuilder.CreateRetVoid();
1d019706d866 LLVM10 anatofuz parents: diff changeset	916 return ModuleCtorFunc;
1d019706d866 LLVM10 anatofuz parents: diff changeset	917 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	918
1d019706d866 LLVM10 anatofuz parents: diff changeset	919 /// Creates a global destructor function that unregisters the GPU code blob
1d019706d866 LLVM10 anatofuz parents: diff changeset	920 /// registered by constructor.
1d019706d866 LLVM10 anatofuz parents: diff changeset	921 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	922 /// For CUDA:
1d019706d866 LLVM10 anatofuz parents: diff changeset	923 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	924 /// void __cuda_module_dtor(void*) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	925 /// __cudaUnregisterFatBinary(Handle);
1d019706d866 LLVM10 anatofuz parents: diff changeset	926 /// }
1d019706d866 LLVM10 anatofuz parents: diff changeset	927 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	928 ///
1d019706d866 LLVM10 anatofuz parents: diff changeset	929 /// For HIP:
1d019706d866 LLVM10 anatofuz parents: diff changeset	930 /// \code
1d019706d866 LLVM10 anatofuz parents: diff changeset	931 /// void __hip_module_dtor(void*) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	932 /// if (__hip_gpubin_handle) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	933 /// __hipUnregisterFatBinary(__hip_gpubin_handle);
1d019706d866 LLVM10 anatofuz parents: diff changeset	934 /// __hip_gpubin_handle = 0;
1d019706d866 LLVM10 anatofuz parents: diff changeset	935 /// }
1d019706d866 LLVM10 anatofuz parents: diff changeset	936 /// }
1d019706d866 LLVM10 anatofuz parents: diff changeset	937 /// \endcode
1d019706d866 LLVM10 anatofuz parents: diff changeset	938 llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
1d019706d866 LLVM10 anatofuz parents: diff changeset	939 // No need for destructor if we don't have a handle to unregister.
1d019706d866 LLVM10 anatofuz parents: diff changeset	940 if (!GpuBinaryHandle)
1d019706d866 LLVM10 anatofuz parents: diff changeset	941 return nullptr;
1d019706d866 LLVM10 anatofuz parents: diff changeset	942
1d019706d866 LLVM10 anatofuz parents: diff changeset	943 // void __cudaUnregisterFatBinary(void ** handle);
1d019706d866 LLVM10 anatofuz parents: diff changeset	944 llvm::FunctionCallee UnregisterFatbinFunc = CGM.CreateRuntimeFunction(
1d019706d866 LLVM10 anatofuz parents: diff changeset	945 llvm::FunctionType::get(VoidTy, VoidPtrPtrTy, false),
1d019706d866 LLVM10 anatofuz parents: diff changeset	946 addUnderscoredPrefixToName("UnregisterFatBinary"));
1d019706d866 LLVM10 anatofuz parents: diff changeset	947
1d019706d866 LLVM10 anatofuz parents: diff changeset	948 llvm::Function *ModuleDtorFunc = llvm::Function::Create(
1d019706d866 LLVM10 anatofuz parents: diff changeset	949 llvm::FunctionType::get(VoidTy, VoidPtrTy, false),
1d019706d866 LLVM10 anatofuz parents: diff changeset	950 llvm::GlobalValue::InternalLinkage,
1d019706d866 LLVM10 anatofuz parents: diff changeset	951 addUnderscoredPrefixToName("_module_dtor"), &TheModule);
1d019706d866 LLVM10 anatofuz parents: diff changeset	952
1d019706d866 LLVM10 anatofuz parents: diff changeset	953 llvm::BasicBlock *DtorEntryBB =
1d019706d866 LLVM10 anatofuz parents: diff changeset	954 llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
1d019706d866 LLVM10 anatofuz parents: diff changeset	955 CGBuilderTy DtorBuilder(CGM, Context);
1d019706d866 LLVM10 anatofuz parents: diff changeset	956 DtorBuilder.SetInsertPoint(DtorEntryBB);
1d019706d866 LLVM10 anatofuz parents: diff changeset	957
1d019706d866 LLVM10 anatofuz parents: diff changeset	958 Address GpuBinaryAddr(GpuBinaryHandle, CharUnits::fromQuantity(
1d019706d866 LLVM10 anatofuz parents: diff changeset	959 GpuBinaryHandle->getAlignment()));
1d019706d866 LLVM10 anatofuz parents: diff changeset	960 auto HandleValue = DtorBuilder.CreateLoad(GpuBinaryAddr);
1d019706d866 LLVM10 anatofuz parents: diff changeset	961 // There is only one HIP fat binary per linked module, however there are
1d019706d866 LLVM10 anatofuz parents: diff changeset	962 // multiple destructor functions. Make sure the fat binary is unregistered
1d019706d866 LLVM10 anatofuz parents: diff changeset	963 // only once.
1d019706d866 LLVM10 anatofuz parents: diff changeset	964 if (CGM.getLangOpts().HIP) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	965 llvm::BasicBlock *IfBlock =
1d019706d866 LLVM10 anatofuz parents: diff changeset	966 llvm::BasicBlock::Create(Context, "if", ModuleDtorFunc);
1d019706d866 LLVM10 anatofuz parents: diff changeset	967 llvm::BasicBlock *ExitBlock =
1d019706d866 LLVM10 anatofuz parents: diff changeset	968 llvm::BasicBlock::Create(Context, "exit", ModuleDtorFunc);
1d019706d866 LLVM10 anatofuz parents: diff changeset	969 llvm::Constant *Zero = llvm::Constant::getNullValue(HandleValue->getType());
1d019706d866 LLVM10 anatofuz parents: diff changeset	970 llvm::Value *NEZero = DtorBuilder.CreateICmpNE(HandleValue, Zero);
1d019706d866 LLVM10 anatofuz parents: diff changeset	971 DtorBuilder.CreateCondBr(NEZero, IfBlock, ExitBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	972
1d019706d866 LLVM10 anatofuz parents: diff changeset	973 DtorBuilder.SetInsertPoint(IfBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	974 DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
1d019706d866 LLVM10 anatofuz parents: diff changeset	975 DtorBuilder.CreateStore(Zero, GpuBinaryAddr);
1d019706d866 LLVM10 anatofuz parents: diff changeset	976 DtorBuilder.CreateBr(ExitBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	977
1d019706d866 LLVM10 anatofuz parents: diff changeset	978 DtorBuilder.SetInsertPoint(ExitBlock);
1d019706d866 LLVM10 anatofuz parents: diff changeset	979 } else {
1d019706d866 LLVM10 anatofuz parents: diff changeset	980 DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
1d019706d866 LLVM10 anatofuz parents: diff changeset	981 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	982 DtorBuilder.CreateRetVoid();
1d019706d866 LLVM10 anatofuz parents: diff changeset	983 return ModuleDtorFunc;
1d019706d866 LLVM10 anatofuz parents: diff changeset	984 }
1d019706d866 LLVM10 anatofuz parents: diff changeset	985
1d019706d866 LLVM10 anatofuz parents: diff changeset	986 CGCUDARuntime *CodeGen::CreateNVCUDARuntime(CodeGenModule &CGM) {
1d019706d866 LLVM10 anatofuz parents: diff changeset	987 return new CGNVCUDARuntime(CGM);
1d019706d866 LLVM10 anatofuz parents: diff changeset	988 }
221 79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	989
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	990 void CGNVCUDARuntime::internalizeDeviceSideVar(
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	991 const VarDecl *D, llvm::GlobalValue::LinkageTypes &Linkage) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	992 // For -fno-gpu-rdc, host-side shadows of external declarations of device-side
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	993 // global variables become internal definitions. These have to be internal in
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	994 // order to prevent name conflicts with global host variables with the same
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	995 // name in a different TUs.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	996 //
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	997 // For -fgpu-rdc, the shadow variables should not be internalized because
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	998 // they may be accessed by different TU.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	999 if (CGM.getLangOpts().GPURelocatableDeviceCode)
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1000 return;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1001
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1002 // __shared__ variables are odd. Shadows do get created, but
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1003 // they are not registered with the CUDA runtime, so they
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1004 // can't really be used to access their device-side
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1005 // counterparts. It's not clear yet whether it's nvcc's bug or
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1006 // a feature, but we've got to do the same for compatibility.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1007 if (D->hasAttr<CUDADeviceAttr>() \|\| D->hasAttr<CUDAConstantAttr>() \|\|
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1008 D->hasAttr<CUDASharedAttr>() \|\|
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1009 D->getType()->isCUDADeviceBuiltinSurfaceType() \|\|
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1010 D->getType()->isCUDADeviceBuiltinTextureType()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1011 Linkage = llvm::GlobalValue::InternalLinkage;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1012 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1013 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1014
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1015 void CGNVCUDARuntime::handleVarRegistration(const VarDecl *D,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1016 llvm::GlobalVariable &GV) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1017 if (D->hasAttr<CUDADeviceAttr>() \|\| D->hasAttr<CUDAConstantAttr>()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1018 // Shadow variables and their properties must be registered with CUDA
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1019 // runtime. Skip Extern global variables, which will be registered in
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1020 // the TU where they are defined.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1021 //
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1022 // Don't register a C++17 inline variable. The local symbol can be
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1023 // discarded and referencing a discarded local symbol from outside the
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1024 // comdat (__cuda_register_globals) is disallowed by the ELF spec.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1025 //
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1026 // HIP managed variables need to be always recorded in device and host
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1027 // compilations for transformation.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1028 //
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1029 // HIP managed variables and variables in CUDADeviceVarODRUsedByHost are
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1030 // added to llvm.compiler-used, therefore they are safe to be registered.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1031 if ((!D->hasExternalStorage() && !D->isInline()) \|\|
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1032 CGM.getContext().CUDADeviceVarODRUsedByHost.contains(D) \|\|
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1033 D->hasAttr<HIPManagedAttr>()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1034 registerDeviceVar(D, GV, !D->hasDefinition(),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1035 D->hasAttr<CUDAConstantAttr>());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1036 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1037 } else if (D->getType()->isCUDADeviceBuiltinSurfaceType() \|\|
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1038 D->getType()->isCUDADeviceBuiltinTextureType()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1039 // Builtin surfaces and textures and their template arguments are
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1040 // also registered with CUDA runtime.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1041 const auto *TD = cast<ClassTemplateSpecializationDecl>(
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1042 D->getType()->castAs<RecordType>()->getDecl());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1043 const TemplateArgumentList &Args = TD->getTemplateArgs();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1044 if (TD->hasAttr<CUDADeviceBuiltinSurfaceTypeAttr>()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1045 assert(Args.size() == 2 &&
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1046 "Unexpected number of template arguments of CUDA device "
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1047 "builtin surface type.");
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1048 auto SurfType = Args[1].getAsIntegral();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1049 if (!D->hasExternalStorage())
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1050 registerDeviceSurf(D, GV, !D->hasDefinition(), SurfType.getSExtValue());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1051 } else {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1052 assert(Args.size() == 3 &&
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1053 "Unexpected number of template arguments of CUDA device "
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1054 "builtin texture type.");
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1055 auto TexType = Args[1].getAsIntegral();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1056 auto Normalized = Args[2].getAsIntegral();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1057 if (!D->hasExternalStorage())
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1058 registerDeviceTex(D, GV, !D->hasDefinition(), TexType.getSExtValue(),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1059 Normalized.getZExtValue());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1060 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1061 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1062 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1063
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1064 // Transform managed variables to pointers to managed variables in device code.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1065 // Each use of the original managed variable is replaced by a load from the
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1066 // transformed managed variable. The transformed managed variable contains
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1067 // the address of managed memory which will be allocated by the runtime.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1068 void CGNVCUDARuntime::transformManagedVars() {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1069 for (auto &&Info : DeviceVars) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1070 llvm::GlobalVariable *Var = Info.Var;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1071 if (Info.Flags.getKind() == DeviceVarFlags::Variable &&
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1072 Info.Flags.isManaged()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1073 auto ManagedVar = new llvm::GlobalVariable(
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1074 CGM.getModule(), Var->getType(),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1075 /isConstant=/false, Var->getLinkage(),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1076 /Init=/Var->isDeclaration()
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1077 ? nullptr
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1078 : llvm::ConstantPointerNull::get(Var->getType()),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1079 /Name=/"", /InsertBefore=/nullptr,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1080 llvm::GlobalVariable::NotThreadLocal,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1081 CGM.getContext().getTargetAddressSpace(LangAS::cuda_device));
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1082 ManagedVar->setDSOLocal(Var->isDSOLocal());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1083 ManagedVar->setVisibility(Var->getVisibility());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1084 ManagedVar->setExternallyInitialized(true);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1085 replaceManagedVar(Var, ManagedVar);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1086 ManagedVar->takeName(Var);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1087 Var->setName(Twine(ManagedVar->getName()) + ".managed");
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1088 // Keep managed variables even if they are not used in device code since
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1089 // they need to be allocated by the runtime.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1090 if (!Var->isDeclaration()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1091 assert(!ManagedVar->isDeclaration());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1092 CGM.addCompilerUsedGlobal(Var);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1093 CGM.addCompilerUsedGlobal(ManagedVar);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1094 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1095 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1096 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1097 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1098
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1099 // Returns module constructor to be added.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1100 llvm::Function *CGNVCUDARuntime::finalizeModule() {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1101 if (CGM.getLangOpts().CUDAIsDevice) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1102 transformManagedVars();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1103
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1104 // Mark ODR-used device variables as compiler used to prevent it from being
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1105 // eliminated by optimization. This is necessary for device variables
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1106 // ODR-used by host functions. Sema correctly marks them as ODR-used no
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1107 // matter whether they are ODR-used by device or host functions.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1108 //
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1109 // We do not need to do this if the variable has used attribute since it
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1110 // has already been added.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1111 //
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1112 // Static device variables have been externalized at this point, therefore
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1113 // variables with LLVM private or internal linkage need not be added.
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1114 for (auto &&Info : DeviceVars) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1115 auto Kind = Info.Flags.getKind();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1116 if (!Info.Var->isDeclaration() &&
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1117 !llvm::GlobalValue::isLocalLinkage(Info.Var->getLinkage()) &&
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1118 (Kind == DeviceVarFlags::Variable \|\|
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1119 Kind == DeviceVarFlags::Surface \|\|
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1120 Kind == DeviceVarFlags::Texture) &&
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1121 Info.D->isUsed() && !Info.D->hasAttr<UsedAttr>()) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1122 CGM.addCompilerUsedGlobal(Info.Var);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1123 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1124 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1125 return nullptr;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1126 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1127 return makeModuleCtorFunction();
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1128 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1129
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1130 llvm::GlobalValue CGNVCUDARuntime::getKernelHandle(llvm::Function F,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1131 GlobalDecl GD) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1132 auto Loc = KernelHandles.find(F);
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1133 if (Loc != KernelHandles.end())
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1134 return Loc->second;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1135
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1136 if (!CGM.getLangOpts().HIP) {
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1137 KernelHandles[F] = F;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1138 KernelStubs[F] = F;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1139 return F;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1140 }
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1141
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1142 auto *Var = new llvm::GlobalVariable(
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1143 TheModule, F->getType(), /isConstant=/true, F->getLinkage(),
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1144 /Initializer=/nullptr,
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1145 CGM.getMangledName(
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1146 GD.getWithKernelReferenceKind(KernelReferenceKind::Kernel)));
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1147 Var->setAlignment(CGM.getPointerAlign().getAsAlign());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1148 Var->setDSOLocal(F->isDSOLocal());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1149 Var->setVisibility(F->getVisibility());
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1150 KernelHandles[F] = Var;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1151 KernelStubs[Var] = F;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1152 return Var;
79ff65ed7e25 LLVM12 Original Shinji KONO <kono@ie.u-ryukyu.ac.jp> parents: 173 diff changeset	1153 }

Mercurial > hg > CbC > CbC_llvm

annotate clang/lib/CodeGen/CGCUDANV.cpp @ 221:79ff65ed7e25