Mercurial > hg > CbC > CbC_llvm
annotate lld/ELF/InputFiles.cpp @ 266:00f31e85ec16 default tip
Added tag current for changeset 31d058e83c98
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 14 Oct 2023 10:13:55 +0900 |
parents | 1f2b6ac9f198 |
children |
rev | line source |
---|---|
150 | 1 //===- InputFiles.cpp -----------------------------------------------------===// |
2 // | |
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |
4 // See https://llvm.org/LICENSE.txt for license information. | |
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |
6 // | |
7 //===----------------------------------------------------------------------===// | |
8 | |
9 #include "InputFiles.h" | |
236 | 10 #include "Config.h" |
11 #include "DWARF.h" | |
150 | 12 #include "Driver.h" |
13 #include "InputSection.h" | |
14 #include "LinkerScript.h" | |
15 #include "SymbolTable.h" | |
16 #include "Symbols.h" | |
17 #include "SyntheticSections.h" | |
236 | 18 #include "Target.h" |
19 #include "lld/Common/CommonLinkerContext.h" | |
150 | 20 #include "lld/Common/DWARF.h" |
236 | 21 #include "llvm/ADT/CachedHashString.h" |
150 | 22 #include "llvm/ADT/STLExtras.h" |
23 #include "llvm/LTO/LTO.h" | |
236 | 24 #include "llvm/Object/IRObjectFile.h" |
150 | 25 #include "llvm/Support/ARMAttributeParser.h" |
26 #include "llvm/Support/ARMBuildAttributes.h" | |
27 #include "llvm/Support/Endian.h" | |
236 | 28 #include "llvm/Support/FileSystem.h" |
150 | 29 #include "llvm/Support/Path.h" |
221 | 30 #include "llvm/Support/RISCVAttributeParser.h" |
150 | 31 #include "llvm/Support/TarWriter.h" |
32 #include "llvm/Support/raw_ostream.h" | |
33 | |
34 using namespace llvm; | |
35 using namespace llvm::ELF; | |
36 using namespace llvm::object; | |
37 using namespace llvm::sys; | |
38 using namespace llvm::sys::fs; | |
39 using namespace llvm::support::endian; | |
173 | 40 using namespace lld; |
41 using namespace lld::elf; | |
150 | 42 |
173 | 43 bool InputFile::isInGroup; |
44 uint32_t InputFile::nextGroupId; | |
45 | |
46 std::unique_ptr<TarWriter> elf::tar; | |
47 | |
150 | 48 // Returns "<internal>", "foo.a(bar.o)" or "baz.o". |
173 | 49 std::string lld::toString(const InputFile *f) { |
236 | 50 static std::mutex mu; |
150 | 51 if (!f) |
52 return "<internal>"; | |
53 | |
236 | 54 { |
55 std::lock_guard<std::mutex> lock(mu); | |
56 if (f->toStringCache.empty()) { | |
57 if (f->archiveName.empty()) | |
58 f->toStringCache = f->getName(); | |
59 else | |
60 (f->archiveName + "(" + f->getName() + ")").toVector(f->toStringCache); | |
61 } | |
150 | 62 } |
236 | 63 return std::string(f->toStringCache); |
150 | 64 } |
65 | |
66 static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) { | |
67 unsigned char size; | |
68 unsigned char endian; | |
69 std::tie(size, endian) = getElfArchType(mb.getBuffer()); | |
70 | |
71 auto report = [&](StringRef msg) { | |
72 StringRef filename = mb.getBufferIdentifier(); | |
73 if (archiveName.empty()) | |
74 fatal(filename + ": " + msg); | |
75 else | |
76 fatal(archiveName + "(" + filename + "): " + msg); | |
77 }; | |
78 | |
252 | 79 if (!mb.getBuffer().starts_with(ElfMagic)) |
150 | 80 report("not an ELF file"); |
81 if (endian != ELFDATA2LSB && endian != ELFDATA2MSB) | |
82 report("corrupted ELF file: invalid data encoding"); | |
83 if (size != ELFCLASS32 && size != ELFCLASS64) | |
84 report("corrupted ELF file: invalid file class"); | |
85 | |
86 size_t bufSize = mb.getBuffer().size(); | |
87 if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) || | |
88 (size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr))) | |
89 report("corrupted ELF file: file is too short"); | |
90 | |
91 if (size == ELFCLASS32) | |
92 return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind; | |
93 return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind; | |
94 } | |
95 | |
236 | 96 // For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD |
97 // flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how | |
98 // the input objects have been compiled. | |
99 static void updateARMVFPArgs(const ARMAttributeParser &attributes, | |
100 const InputFile *f) { | |
252 | 101 std::optional<unsigned> attr = |
236 | 102 attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args); |
103 if (!attr) | |
104 // If an ABI tag isn't present then it is implicitly given the value of 0 | |
105 // which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files, | |
106 // including some in glibc that don't use FP args (and should have value 3) | |
107 // don't have the attribute so we do not consider an implicit value of 0 | |
108 // as a clash. | |
109 return; | |
110 | |
111 unsigned vfpArgs = *attr; | |
112 ARMVFPArgKind arg; | |
113 switch (vfpArgs) { | |
114 case ARMBuildAttrs::BaseAAPCS: | |
115 arg = ARMVFPArgKind::Base; | |
116 break; | |
117 case ARMBuildAttrs::HardFPAAPCS: | |
118 arg = ARMVFPArgKind::VFP; | |
119 break; | |
120 case ARMBuildAttrs::ToolChainFPPCS: | |
121 // Tool chain specific convention that conforms to neither AAPCS variant. | |
122 arg = ARMVFPArgKind::ToolChain; | |
123 break; | |
124 case ARMBuildAttrs::CompatibleFPAAPCS: | |
125 // Object compatible with all conventions. | |
126 return; | |
127 default: | |
128 error(toString(f) + ": unknown Tag_ABI_VFP_args value: " + Twine(vfpArgs)); | |
129 return; | |
130 } | |
131 // Follow ld.bfd and error if there is a mix of calling conventions. | |
132 if (config->armVFPArgs != arg && config->armVFPArgs != ARMVFPArgKind::Default) | |
133 error(toString(f) + ": incompatible Tag_ABI_VFP_args"); | |
134 else | |
135 config->armVFPArgs = arg; | |
136 } | |
137 | |
138 // The ARM support in lld makes some use of instructions that are not available | |
139 // on all ARM architectures. Namely: | |
140 // - Use of BLX instruction for interworking between ARM and Thumb state. | |
141 // - Use of the extended Thumb branch encoding in relocation. | |
142 // - Use of the MOVT/MOVW instructions in Thumb Thunks. | |
143 // The ARM Attributes section contains information about the architecture chosen | |
144 // at compile time. We follow the convention that if at least one input object | |
145 // is compiled with an architecture that supports these features then lld is | |
146 // permitted to use them. | |
147 static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) { | |
252 | 148 std::optional<unsigned> attr = |
236 | 149 attributes.getAttributeValue(ARMBuildAttrs::CPU_arch); |
150 if (!attr) | |
151 return; | |
252 | 152 auto arch = *attr; |
236 | 153 switch (arch) { |
154 case ARMBuildAttrs::Pre_v4: | |
155 case ARMBuildAttrs::v4: | |
156 case ARMBuildAttrs::v4T: | |
157 // Architectures prior to v5 do not support BLX instruction | |
158 break; | |
159 case ARMBuildAttrs::v5T: | |
160 case ARMBuildAttrs::v5TE: | |
161 case ARMBuildAttrs::v5TEJ: | |
162 case ARMBuildAttrs::v6: | |
163 case ARMBuildAttrs::v6KZ: | |
164 case ARMBuildAttrs::v6K: | |
165 config->armHasBlx = true; | |
166 // Architectures used in pre-Cortex processors do not support | |
167 // The J1 = 1 J2 = 1 Thumb branch range extension, with the exception | |
168 // of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do. | |
169 break; | |
170 default: | |
171 // All other Architectures have BLX and extended branch encoding | |
172 config->armHasBlx = true; | |
173 config->armJ1J2BranchEncoding = true; | |
174 if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M) | |
175 // All Architectures used in Cortex processors with the exception | |
176 // of v6-M and v6S-M have the MOVT and MOVW instructions. | |
177 config->armHasMovtMovw = true; | |
178 break; | |
179 } | |
252 | 180 |
181 // Only ARMv8-M or later architectures have CMSE support. | |
182 std::optional<unsigned> profile = | |
183 attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile); | |
184 if (!profile) | |
185 return; | |
186 if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base && | |
187 profile == ARMBuildAttrs::MicroControllerProfile) | |
188 config->armCMSESupport = true; | |
236 | 189 } |
190 | |
150 | 191 InputFile::InputFile(Kind k, MemoryBufferRef m) |
192 : mb(m), groupId(nextGroupId), fileKind(k) { | |
193 // All files within the same --{start,end}-group get the same group ID. | |
194 // Otherwise, a new file will get a new group ID. | |
195 if (!isInGroup) | |
196 ++nextGroupId; | |
197 } | |
198 | |
252 | 199 std::optional<MemoryBufferRef> elf::readFile(StringRef path) { |
221 | 200 llvm::TimeTraceScope timeScope("Load input files", path); |
201 | |
150 | 202 // The --chroot option changes our virtual root directory. |
203 // This is useful when you are dealing with files created by --reproduce. | |
252 | 204 if (!config->chroot.empty() && path.starts_with("/")) |
236 | 205 path = saver().save(config->chroot + path); |
150 | 206 |
252 | 207 bool remapped = false; |
208 auto it = config->remapInputs.find(path); | |
209 if (it != config->remapInputs.end()) { | |
210 path = it->second; | |
211 remapped = true; | |
212 } else { | |
213 for (const auto &[pat, toFile] : config->remapInputsWildcards) { | |
214 if (pat.match(path)) { | |
215 path = toFile; | |
216 remapped = true; | |
217 break; | |
218 } | |
219 } | |
220 } | |
221 if (remapped) { | |
222 // Use /dev/null to indicate an input file that should be ignored. Change | |
223 // the path to NUL on Windows. | |
224 #ifdef _WIN32 | |
225 if (path == "/dev/null") | |
226 path = "NUL"; | |
227 #endif | |
228 } | |
229 | |
150 | 230 log(path); |
221 | 231 config->dependencyFiles.insert(llvm::CachedHashString(path)); |
150 | 232 |
221 | 233 auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false, |
234 /*RequiresNullTerminator=*/false); | |
150 | 235 if (auto ec = mbOrErr.getError()) { |
236 error("cannot open " + path + ": " + ec.message()); | |
252 | 237 return std::nullopt; |
150 | 238 } |
239 | |
236 | 240 MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef(); |
241 ctx.memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership | |
150 | 242 |
243 if (tar) | |
244 tar->append(relativeToRoot(path), mbref.getBuffer()); | |
245 return mbref; | |
246 } | |
247 | |
248 // All input object files must be for the same architecture | |
249 // (e.g. it does not make sense to link x86 object files with | |
250 // MIPS object files.) This function checks for that error. | |
251 static bool isCompatible(InputFile *file) { | |
252 if (!file->isElf() && !isa<BitcodeFile>(file)) | |
253 return true; | |
254 | |
255 if (file->ekind == config->ekind && file->emachine == config->emachine) { | |
256 if (config->emachine != EM_MIPS) | |
257 return true; | |
258 if (isMipsN32Abi(file) == config->mipsN32Abi) | |
259 return true; | |
260 } | |
261 | |
173 | 262 StringRef target = |
263 !config->bfdname.empty() ? config->bfdname : config->emulation; | |
264 if (!target.empty()) { | |
265 error(toString(file) + " is incompatible with " + target); | |
150 | 266 return false; |
267 } | |
268 | |
236 | 269 InputFile *existing = nullptr; |
270 if (!ctx.objectFiles.empty()) | |
271 existing = ctx.objectFiles[0]; | |
272 else if (!ctx.sharedFiles.empty()) | |
273 existing = ctx.sharedFiles[0]; | |
274 else if (!ctx.bitcodeFiles.empty()) | |
275 existing = ctx.bitcodeFiles[0]; | |
276 std::string with; | |
277 if (existing) | |
278 with = " with " + toString(existing); | |
279 error(toString(file) + " is incompatible" + with); | |
150 | 280 return false; |
281 } | |
282 | |
283 template <class ELFT> static void doParseFile(InputFile *file) { | |
284 if (!isCompatible(file)) | |
285 return; | |
286 | |
287 // Binary file | |
288 if (auto *f = dyn_cast<BinaryFile>(file)) { | |
236 | 289 ctx.binaryFiles.push_back(f); |
150 | 290 f->parse(); |
291 return; | |
292 } | |
293 | |
294 // Lazy object file | |
236 | 295 if (file->lazy) { |
296 if (auto *f = dyn_cast<BitcodeFile>(file)) { | |
297 ctx.lazyBitcodeFiles.push_back(f); | |
298 f->parseLazy(); | |
299 } else { | |
300 cast<ObjFile<ELFT>>(file)->parseLazy(); | |
301 } | |
150 | 302 return; |
303 } | |
304 | |
305 if (config->trace) | |
306 message(toString(file)); | |
307 | |
308 // .so file | |
309 if (auto *f = dyn_cast<SharedFile>(file)) { | |
310 f->parse<ELFT>(); | |
311 return; | |
312 } | |
313 | |
314 // LLVM bitcode file | |
315 if (auto *f = dyn_cast<BitcodeFile>(file)) { | |
236 | 316 ctx.bitcodeFiles.push_back(f); |
317 f->parse(); | |
150 | 318 return; |
319 } | |
320 | |
321 // Regular object file | |
236 | 322 ctx.objectFiles.push_back(cast<ELFFileBase>(file)); |
150 | 323 cast<ObjFile<ELFT>>(file)->parse(); |
324 } | |
325 | |
326 // Add symbols in File to the symbol table. | |
236 | 327 void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); } |
150 | 328 |
252 | 329 template <class ELFT> static void doParseArmCMSEImportLib(InputFile *file) { |
330 cast<ObjFile<ELFT>>(file)->importCmseSymbols(); | |
331 } | |
332 | |
333 void elf::parseArmCMSEImportLib(InputFile *file) { | |
334 invokeELFT(doParseArmCMSEImportLib, file); | |
335 } | |
336 | |
150 | 337 // Concatenates arguments to construct a string representing an error location. |
338 static std::string createFileLineMsg(StringRef path, unsigned line) { | |
339 std::string filename = std::string(path::filename(path)); | |
340 std::string lineno = ":" + std::to_string(line); | |
341 if (filename == path) | |
342 return filename + lineno; | |
343 return filename + lineno + " (" + path.str() + lineno + ")"; | |
344 } | |
345 | |
346 template <class ELFT> | |
347 static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym, | |
348 InputSectionBase &sec, uint64_t offset) { | |
349 // In DWARF, functions and variables are stored to different places. | |
236 | 350 // First, look up a function for a given offset. |
252 | 351 if (std::optional<DILineInfo> info = file.getDILineInfo(&sec, offset)) |
150 | 352 return createFileLineMsg(info->FileName, info->Line); |
353 | |
236 | 354 // If it failed, look up again as a variable. |
252 | 355 if (std::optional<std::pair<std::string, unsigned>> fileLine = |
150 | 356 file.getVariableLoc(sym.getName())) |
357 return createFileLineMsg(fileLine->first, fileLine->second); | |
358 | |
359 // File.sourceFile contains STT_FILE symbol, and that is a last resort. | |
360 return std::string(file.sourceFile); | |
361 } | |
362 | |
363 std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec, | |
364 uint64_t offset) { | |
365 if (kind() != ObjKind) | |
366 return ""; | |
236 | 367 switch (ekind) { |
150 | 368 default: |
369 llvm_unreachable("Invalid kind"); | |
370 case ELF32LEKind: | |
371 return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), sym, sec, offset); | |
372 case ELF32BEKind: | |
373 return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), sym, sec, offset); | |
374 case ELF64LEKind: | |
375 return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), sym, sec, offset); | |
376 case ELF64BEKind: | |
377 return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), sym, sec, offset); | |
378 } | |
379 } | |
380 | |
221 | 381 StringRef InputFile::getNameForScript() const { |
382 if (archiveName.empty()) | |
383 return getName(); | |
384 | |
385 if (nameForScriptCache.empty()) | |
386 nameForScriptCache = (archiveName + Twine(':') + getName()).str(); | |
387 | |
388 return nameForScriptCache; | |
389 } | |
390 | |
236 | 391 // An ELF object file may contain a `.deplibs` section. If it exists, the |
392 // section contains a list of library specifiers such as `m` for libm. This | |
393 // function resolves a given name by finding the first matching library checking | |
394 // the various ways that a library can be specified to LLD. This ELF extension | |
395 // is a form of autolinking and is called `dependent libraries`. It is currently | |
396 // unique to LLVM and lld. | |
397 static void addDependentLibrary(StringRef specifier, const InputFile *f) { | |
398 if (!config->dependentLibraries) | |
399 return; | |
252 | 400 if (std::optional<std::string> s = searchLibraryBaseName(specifier)) |
236 | 401 ctx.driver.addFile(saver().save(*s), /*withLOption=*/true); |
252 | 402 else if (std::optional<std::string> s = findFromSearchPaths(specifier)) |
236 | 403 ctx.driver.addFile(saver().save(*s), /*withLOption=*/true); |
404 else if (fs::exists(specifier)) | |
405 ctx.driver.addFile(specifier, /*withLOption=*/false); | |
406 else | |
407 error(toString(f) + | |
408 ": unable to find library from dependent library specifier: " + | |
409 specifier); | |
410 } | |
411 | |
412 // Record the membership of a section group so that in the garbage collection | |
413 // pass, section group members are kept or discarded as a unit. | |
414 template <class ELFT> | |
415 static void handleSectionGroup(ArrayRef<InputSectionBase *> sections, | |
416 ArrayRef<typename ELFT::Word> entries) { | |
417 bool hasAlloc = false; | |
418 for (uint32_t index : entries.slice(1)) { | |
419 if (index >= sections.size()) | |
420 return; | |
421 if (InputSectionBase *s = sections[index]) | |
422 if (s != &InputSection::discarded && s->flags & SHF_ALLOC) | |
423 hasAlloc = true; | |
424 } | |
425 | |
426 // If any member has the SHF_ALLOC flag, the whole group is subject to garbage | |
427 // collection. See the comment in markLive(). This rule retains .debug_types | |
428 // and .rela.debug_types. | |
429 if (!hasAlloc) | |
430 return; | |
431 | |
432 // Connect the members in a circular doubly-linked list via | |
433 // nextInSectionGroup. | |
434 InputSectionBase *head; | |
435 InputSectionBase *prev = nullptr; | |
436 for (uint32_t index : entries.slice(1)) { | |
437 InputSectionBase *s = sections[index]; | |
438 if (!s || s == &InputSection::discarded) | |
439 continue; | |
440 if (prev) | |
441 prev->nextInSectionGroup = s; | |
442 else | |
443 head = s; | |
444 prev = s; | |
445 } | |
446 if (prev) | |
447 prev->nextInSectionGroup = head; | |
448 } | |
449 | |
173 | 450 template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() { |
451 llvm::call_once(initDwarf, [this]() { | |
452 dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>( | |
453 std::make_unique<LLDDwarfObj<ELFT>>(this), "", | |
454 [&](Error err) { warn(getName() + ": " + toString(std::move(err))); }, | |
455 [&](Error warning) { | |
456 warn(getName() + ": " + toString(std::move(warning))); | |
457 })); | |
458 }); | |
459 | |
460 return dwarf.get(); | |
150 | 461 } |
462 | |
463 // Returns the pair of file name and line number describing location of data | |
464 // object (variable, array, etc) definition. | |
465 template <class ELFT> | |
252 | 466 std::optional<std::pair<std::string, unsigned>> |
150 | 467 ObjFile<ELFT>::getVariableLoc(StringRef name) { |
173 | 468 return getDwarf()->getVariableLoc(name); |
150 | 469 } |
470 | |
471 // Returns source line information for a given offset | |
472 // using DWARF debug info. | |
473 template <class ELFT> | |
252 | 474 std::optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s, |
475 uint64_t offset) { | |
150 | 476 // Detect SectionIndex for specified section. |
477 uint64_t sectionIndex = object::SectionedAddress::UndefSection; | |
478 ArrayRef<InputSectionBase *> sections = s->file->getSections(); | |
479 for (uint64_t curIndex = 0; curIndex < sections.size(); ++curIndex) { | |
480 if (s == sections[curIndex]) { | |
481 sectionIndex = curIndex; | |
482 break; | |
483 } | |
484 } | |
485 | |
173 | 486 return getDwarf()->getDILineInfo(offset, sectionIndex); |
150 | 487 } |
488 | |
236 | 489 ELFFileBase::ELFFileBase(Kind k, ELFKind ekind, MemoryBufferRef mb) |
490 : InputFile(k, mb) { | |
491 this->ekind = ekind; | |
150 | 492 } |
493 | |
494 template <typename Elf_Shdr> | |
495 static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) { | |
496 for (const Elf_Shdr &sec : sections) | |
497 if (sec.sh_type == type) | |
498 return &sec; | |
499 return nullptr; | |
500 } | |
501 | |
236 | 502 void ELFFileBase::init() { |
503 switch (ekind) { | |
504 case ELF32LEKind: | |
505 init<ELF32LE>(fileKind); | |
506 break; | |
507 case ELF32BEKind: | |
508 init<ELF32BE>(fileKind); | |
509 break; | |
510 case ELF64LEKind: | |
511 init<ELF64LE>(fileKind); | |
512 break; | |
513 case ELF64BEKind: | |
514 init<ELF64BE>(fileKind); | |
515 break; | |
516 default: | |
517 llvm_unreachable("getELFKind"); | |
518 } | |
519 } | |
520 | |
521 template <class ELFT> void ELFFileBase::init(InputFile::Kind k) { | |
150 | 522 using Elf_Shdr = typename ELFT::Shdr; |
523 using Elf_Sym = typename ELFT::Sym; | |
524 | |
525 // Initialize trivial attributes. | |
526 const ELFFile<ELFT> &obj = getObj<ELFT>(); | |
221 | 527 emachine = obj.getHeader().e_machine; |
528 osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI]; | |
529 abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; | |
150 | 530 |
531 ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this); | |
236 | 532 elfShdrs = sections.data(); |
533 numELFShdrs = sections.size(); | |
150 | 534 |
535 // Find a symbol table. | |
536 const Elf_Shdr *symtabSec = | |
236 | 537 findSection(sections, k == SharedKind ? SHT_DYNSYM : SHT_SYMTAB); |
150 | 538 |
539 if (!symtabSec) | |
540 return; | |
541 | |
542 // Initialize members corresponding to a symbol table. | |
543 firstGlobal = symtabSec->sh_info; | |
544 | |
545 ArrayRef<Elf_Sym> eSyms = CHECK(obj.symbols(symtabSec), this); | |
546 if (firstGlobal == 0 || firstGlobal > eSyms.size()) | |
547 fatal(toString(this) + ": invalid sh_info in symbol table"); | |
548 | |
549 elfSyms = reinterpret_cast<const void *>(eSyms.data()); | |
236 | 550 numELFSyms = uint32_t(eSyms.size()); |
150 | 551 stringTable = CHECK(obj.getStringTableForSymtab(*symtabSec, sections), this); |
552 } | |
553 | |
554 template <class ELFT> | |
555 uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const { | |
556 return CHECK( | |
221 | 557 this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable), |
150 | 558 this); |
559 } | |
560 | |
236 | 561 template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) { |
562 object::ELFFile<ELFT> obj = this->getObj(); | |
563 // Read a section table. justSymbols is usually false. | |
564 if (this->justSymbols) { | |
565 initializeJustSymbols(); | |
566 initializeSymbols(obj); | |
567 return; | |
568 } | |
569 | |
570 // Handle dependent libraries and selection of section groups as these are not | |
571 // done in parallel. | |
572 ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); | |
573 StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); | |
574 uint64_t size = objSections.size(); | |
575 sections.resize(size); | |
576 for (size_t i = 0; i != size; ++i) { | |
577 const Elf_Shdr &sec = objSections[i]; | |
578 if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !config->relocatable) { | |
579 StringRef name = check(obj.getSectionName(sec, shstrtab)); | |
580 ArrayRef<char> data = CHECK( | |
581 this->getObj().template getSectionContentsAsArray<char>(sec), this); | |
582 if (!data.empty() && data.back() != '\0') { | |
583 error( | |
584 toString(this) + | |
585 ": corrupted dependent libraries section (unterminated string): " + | |
586 name); | |
587 } else { | |
588 for (const char *d = data.begin(), *e = data.end(); d < e;) { | |
589 StringRef s(d); | |
590 addDependentLibrary(s, this); | |
591 d += s.size() + 1; | |
592 } | |
593 } | |
594 this->sections[i] = &InputSection::discarded; | |
595 continue; | |
596 } | |
597 | |
598 if (sec.sh_type == SHT_ARM_ATTRIBUTES && config->emachine == EM_ARM) { | |
599 ARMAttributeParser attributes; | |
600 ArrayRef<uint8_t> contents = | |
601 check(this->getObj().getSectionContents(sec)); | |
602 StringRef name = check(obj.getSectionName(sec, shstrtab)); | |
603 this->sections[i] = &InputSection::discarded; | |
604 if (Error e = | |
605 attributes.parse(contents, ekind == ELF32LEKind ? support::little | |
606 : support::big)) { | |
607 InputSection isec(*this, sec, name); | |
608 warn(toString(&isec) + ": " + llvm::toString(std::move(e))); | |
609 } else { | |
610 updateSupportedARMFeatures(attributes); | |
611 updateARMVFPArgs(attributes, this); | |
150 | 612 |
236 | 613 // FIXME: Retain the first attribute section we see. The eglibc ARM |
614 // dynamic loaders require the presence of an attribute section for | |
615 // dlopen to work. In a full implementation we would merge all attribute | |
616 // sections. | |
617 if (in.attributes == nullptr) { | |
618 in.attributes = std::make_unique<InputSection>(*this, sec, name); | |
619 this->sections[i] = in.attributes.get(); | |
620 } | |
621 } | |
622 } | |
623 | |
624 if (sec.sh_type != SHT_GROUP) | |
625 continue; | |
626 StringRef signature = getShtGroupSignature(objSections, sec); | |
627 ArrayRef<Elf_Word> entries = | |
628 CHECK(obj.template getSectionContentsAsArray<Elf_Word>(sec), this); | |
629 if (entries.empty()) | |
630 fatal(toString(this) + ": empty SHT_GROUP"); | |
631 | |
632 Elf_Word flag = entries[0]; | |
633 if (flag && flag != GRP_COMDAT) | |
634 fatal(toString(this) + ": unsupported SHT_GROUP format"); | |
635 | |
636 bool keepGroup = | |
637 (flag & GRP_COMDAT) == 0 || ignoreComdats || | |
638 symtab.comdatGroups.try_emplace(CachedHashStringRef(signature), this) | |
639 .second; | |
640 if (keepGroup) { | |
641 if (config->relocatable) | |
642 this->sections[i] = createInputSection( | |
643 i, sec, check(obj.getSectionName(sec, shstrtab))); | |
644 continue; | |
645 } | |
646 | |
647 // Otherwise, discard group members. | |
648 for (uint32_t secIndex : entries.slice(1)) { | |
649 if (secIndex >= size) | |
650 fatal(toString(this) + | |
651 ": invalid section index in group: " + Twine(secIndex)); | |
652 this->sections[secIndex] = &InputSection::discarded; | |
653 } | |
654 } | |
150 | 655 |
656 // Read a symbol table. | |
236 | 657 initializeSymbols(obj); |
150 | 658 } |
659 | |
660 // Sections with SHT_GROUP and comdat bits define comdat section groups. | |
661 // They are identified and deduplicated by group name. This function | |
662 // returns a group name. | |
663 template <class ELFT> | |
664 StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections, | |
665 const Elf_Shdr &sec) { | |
666 typename ELFT::SymRange symbols = this->getELFSyms<ELFT>(); | |
667 if (sec.sh_info >= symbols.size()) | |
668 fatal(toString(this) + ": invalid symbol index"); | |
669 const typename ELFT::Sym &sym = symbols[sec.sh_info]; | |
236 | 670 return CHECK(sym.getName(this->stringTable), this); |
150 | 671 } |
672 | |
673 template <class ELFT> | |
674 bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) { | |
675 // On a regular link we don't merge sections if -O0 (default is -O1). This | |
676 // sometimes makes the linker significantly faster, although the output will | |
677 // be bigger. | |
678 // | |
679 // Doing the same for -r would create a problem as it would combine sections | |
680 // with different sh_entsize. One option would be to just copy every SHF_MERGE | |
681 // section as is to the output. While this would produce a valid ELF file with | |
682 // usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when | |
683 // they see two .debug_str. We could have separate logic for combining | |
684 // SHF_MERGE sections based both on their name and sh_entsize, but that seems | |
685 // to be more trouble than it is worth. Instead, we just use the regular (-O1) | |
686 // logic for -r. | |
687 if (config->optimize == 0 && !config->relocatable) | |
688 return false; | |
689 | |
690 // A mergeable section with size 0 is useless because they don't have | |
691 // any data to merge. A mergeable string section with size 0 can be | |
692 // argued as invalid because it doesn't end with a null character. | |
693 // We'll avoid a mess by handling them as if they were non-mergeable. | |
694 if (sec.sh_size == 0) | |
695 return false; | |
696 | |
697 // Check for sh_entsize. The ELF spec is not clear about the zero | |
698 // sh_entsize. It says that "the member [sh_entsize] contains 0 if | |
699 // the section does not hold a table of fixed-size entries". We know | |
700 // that Rust 1.13 produces a string mergeable section with a zero | |
701 // sh_entsize. Here we just accept it rather than being picky about it. | |
702 uint64_t entSize = sec.sh_entsize; | |
703 if (entSize == 0) | |
704 return false; | |
705 if (sec.sh_size % entSize) | |
706 fatal(toString(this) + ":(" + name + "): SHF_MERGE section size (" + | |
707 Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" + | |
708 Twine(entSize) + ")"); | |
709 | |
173 | 710 if (sec.sh_flags & SHF_WRITE) |
150 | 711 fatal(toString(this) + ":(" + name + |
712 "): writable SHF_MERGE section is not supported"); | |
713 | |
714 return true; | |
715 } | |
716 | |
717 // This is for --just-symbols. | |
718 // | |
719 // --just-symbols is a very minor feature that allows you to link your | |
720 // output against other existing program, so that if you load both your | |
721 // program and the other program into memory, your output can refer the | |
722 // other program's symbols. | |
723 // | |
724 // When the option is given, we link "just symbols". The section table is | |
725 // initialized with null pointers. | |
726 template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() { | |
236 | 727 sections.resize(numELFShdrs); |
150 | 728 } |
729 | |
730 template <class ELFT> | |
236 | 731 void ObjFile<ELFT>::initializeSections(bool ignoreComdats, |
732 const llvm::object::ELFFile<ELFT> &obj) { | |
733 ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>(); | |
734 StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); | |
150 | 735 uint64_t size = objSections.size(); |
236 | 736 SmallVector<ArrayRef<Elf_Word>, 0> selectedGroups; |
737 for (size_t i = 0; i != size; ++i) { | |
150 | 738 if (this->sections[i] == &InputSection::discarded) |
739 continue; | |
740 const Elf_Shdr &sec = objSections[i]; | |
741 | |
742 // SHF_EXCLUDE'ed sections are discarded by the linker. However, | |
743 // if -r is given, we'll let the final link discard such sections. | |
744 // This is compatible with GNU. | |
745 if ((sec.sh_flags & SHF_EXCLUDE) && !config->relocatable) { | |
236 | 746 if (sec.sh_type == SHT_LLVM_CALL_GRAPH_PROFILE) |
747 cgProfileSectionIndex = i; | |
150 | 748 if (sec.sh_type == SHT_LLVM_ADDRSIG) { |
749 // We ignore the address-significance table if we know that the object | |
750 // file was created by objcopy or ld -r. This is because these tools | |
751 // will reorder the symbols in the symbol table, invalidating the data | |
752 // in the address-significance table, which refers to symbols by index. | |
753 if (sec.sh_link != 0) | |
754 this->addrsigSec = &sec; | |
755 else if (config->icf == ICFLevel::Safe) | |
221 | 756 warn(toString(this) + |
757 ": --icf=safe conservatively ignores " | |
758 "SHT_LLVM_ADDRSIG [index " + | |
759 Twine(i) + | |
760 "] with sh_link=0 " | |
761 "(likely created using objcopy or ld -r)"); | |
150 | 762 } |
763 this->sections[i] = &InputSection::discarded; | |
764 continue; | |
765 } | |
766 | |
767 switch (sec.sh_type) { | |
768 case SHT_GROUP: { | |
236 | 769 if (!config->relocatable) |
770 sections[i] = &InputSection::discarded; | |
771 StringRef signature = | |
772 cantFail(this->getELFSyms<ELFT>()[sec.sh_info].getName(stringTable)); | |
150 | 773 ArrayRef<Elf_Word> entries = |
236 | 774 cantFail(obj.template getSectionContentsAsArray<Elf_Word>(sec)); |
775 if ((entries[0] & GRP_COMDAT) == 0 || ignoreComdats || | |
776 symtab.comdatGroups.find(CachedHashStringRef(signature))->second == | |
777 this) | |
150 | 778 selectedGroups.push_back(entries); |
779 break; | |
780 } | |
781 case SHT_SYMTAB_SHNDX: | |
782 shndxTable = CHECK(obj.getSHNDXTable(sec, objSections), this); | |
783 break; | |
784 case SHT_SYMTAB: | |
785 case SHT_STRTAB: | |
221 | 786 case SHT_REL: |
787 case SHT_RELA: | |
150 | 788 case SHT_NULL: |
789 break; | |
236 | 790 case SHT_LLVM_SYMPART: |
791 ctx.hasSympart.store(true, std::memory_order_relaxed); | |
792 [[fallthrough]]; | |
150 | 793 default: |
236 | 794 this->sections[i] = |
795 createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab))); | |
150 | 796 } |
797 } | |
798 | |
221 | 799 // We have a second loop. It is used to: |
800 // 1) handle SHF_LINK_ORDER sections. | |
801 // 2) create SHT_REL[A] sections. In some cases the section header index of a | |
802 // relocation section may be smaller than that of the relocated section. In | |
803 // such cases, the relocation section would attempt to reference a target | |
804 // section that has not yet been created. For simplicity, delay creation of | |
805 // relocation sections until now. | |
236 | 806 for (size_t i = 0; i != size; ++i) { |
150 | 807 if (this->sections[i] == &InputSection::discarded) |
808 continue; | |
809 const Elf_Shdr &sec = objSections[i]; | |
221 | 810 |
236 | 811 if (sec.sh_type == SHT_REL || sec.sh_type == SHT_RELA) { |
812 // Find a relocation target section and associate this section with that. | |
813 // Target may have been discarded if it is in a different section group | |
814 // and the group is discarded, even though it's a violation of the spec. | |
815 // We handle that situation gracefully by discarding dangling relocation | |
816 // sections. | |
817 const uint32_t info = sec.sh_info; | |
818 InputSectionBase *s = getRelocTarget(i, sec, info); | |
819 if (!s) | |
820 continue; | |
821 | |
822 // ELF spec allows mergeable sections with relocations, but they are rare, | |
823 // and it is in practice hard to merge such sections by contents, because | |
824 // applying relocations at end of linking changes section contents. So, we | |
825 // simply handle such sections as non-mergeable ones. Degrading like this | |
826 // is acceptable because section merging is optional. | |
827 if (auto *ms = dyn_cast<MergeInputSection>(s)) { | |
252 | 828 s = makeThreadLocal<InputSection>( |
829 ms->file, ms->flags, ms->type, ms->addralign, | |
830 ms->contentMaybeDecompress(), ms->name); | |
236 | 831 sections[info] = s; |
832 } | |
833 | |
834 if (s->relSecIdx != 0) | |
835 error( | |
836 toString(s) + | |
837 ": multiple relocation sections to one section are not supported"); | |
838 s->relSecIdx = i; | |
839 | |
840 // Relocation sections are usually removed from the output, so return | |
841 // `nullptr` for the normal case. However, if -r or --emit-relocs is | |
842 // specified, we need to copy them to the output. (Some post link analysis | |
843 // tools specify --emit-relocs to obtain the information.) | |
844 if (config->copyRelocs) { | |
845 auto *isec = makeThreadLocal<InputSection>( | |
846 *this, sec, check(obj.getSectionName(sec, shstrtab))); | |
847 // If the relocated section is discarded (due to /DISCARD/ or | |
848 // --gc-sections), the relocation section should be discarded as well. | |
849 s->dependentSections.push_back(isec); | |
850 sections[i] = isec; | |
851 } | |
852 continue; | |
853 } | |
221 | 854 |
855 // A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have | |
856 // the flag. | |
236 | 857 if (!sec.sh_link || !(sec.sh_flags & SHF_LINK_ORDER)) |
150 | 858 continue; |
859 | |
860 InputSectionBase *linkSec = nullptr; | |
236 | 861 if (sec.sh_link < size) |
150 | 862 linkSec = this->sections[sec.sh_link]; |
863 if (!linkSec) | |
864 fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link)); | |
865 | |
221 | 866 // A SHF_LINK_ORDER section is discarded if its linked-to section is |
867 // discarded. | |
150 | 868 InputSection *isec = cast<InputSection>(this->sections[i]); |
869 linkSec->dependentSections.push_back(isec); | |
870 if (!isa<InputSection>(linkSec)) | |
871 error("a section " + isec->name + | |
872 " with SHF_LINK_ORDER should not refer a non-regular section: " + | |
873 toString(linkSec)); | |
874 } | |
875 | |
876 for (ArrayRef<Elf_Word> entries : selectedGroups) | |
877 handleSectionGroup<ELFT>(this->sections, entries); | |
878 } | |
879 | |
880 // If a source file is compiled with x86 hardware-assisted call flow control | |
881 // enabled, the generated object file contains feature flags indicating that | |
882 // fact. This function reads the feature flags and returns it. | |
883 // | |
884 // Essentially we want to read a single 32-bit value in this function, but this | |
885 // function is rather complicated because the value is buried deep inside a | |
886 // .note.gnu.property section. | |
887 // | |
888 // The section consists of one or more NOTE records. Each NOTE record consists | |
889 // of zero or more type-length-value fields. We want to find a field of a | |
890 // certain type. It seems a bit too much to just store a 32-bit value, perhaps | |
891 // the ABI is unnecessarily complicated. | |
221 | 892 template <class ELFT> static uint32_t readAndFeatures(const InputSection &sec) { |
150 | 893 using Elf_Nhdr = typename ELFT::Nhdr; |
894 using Elf_Note = typename ELFT::Note; | |
895 | |
896 uint32_t featuresSet = 0; | |
252 | 897 ArrayRef<uint8_t> data = sec.content(); |
221 | 898 auto reportFatal = [&](const uint8_t *place, const char *msg) { |
899 fatal(toString(sec.file) + ":(" + sec.name + "+0x" + | |
252 | 900 Twine::utohexstr(place - sec.content().data()) + "): " + msg); |
221 | 901 }; |
150 | 902 while (!data.empty()) { |
903 // Read one NOTE record. | |
904 auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data()); | |
252 | 905 if (data.size() < sizeof(Elf_Nhdr) || |
906 data.size() < nhdr->getSize(sec.addralign)) | |
221 | 907 reportFatal(data.data(), "data is too short"); |
150 | 908 |
909 Elf_Note note(*nhdr); | |
910 if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") { | |
252 | 911 data = data.slice(nhdr->getSize(sec.addralign)); |
150 | 912 continue; |
913 } | |
914 | |
915 uint32_t featureAndType = config->emachine == EM_AARCH64 | |
916 ? GNU_PROPERTY_AARCH64_FEATURE_1_AND | |
917 : GNU_PROPERTY_X86_FEATURE_1_AND; | |
918 | |
919 // Read a body of a NOTE record, which consists of type-length-value fields. | |
252 | 920 ArrayRef<uint8_t> desc = note.getDesc(sec.addralign); |
150 | 921 while (!desc.empty()) { |
221 | 922 const uint8_t *place = desc.data(); |
150 | 923 if (desc.size() < 8) |
221 | 924 reportFatal(place, "program property is too short"); |
925 uint32_t type = read32<ELFT::TargetEndianness>(desc.data()); | |
926 uint32_t size = read32<ELFT::TargetEndianness>(desc.data() + 4); | |
927 desc = desc.slice(8); | |
928 if (desc.size() < size) | |
929 reportFatal(place, "program property is too short"); | |
150 | 930 |
931 if (type == featureAndType) { | |
932 // We found a FEATURE_1_AND field. There may be more than one of these | |
933 // in a .note.gnu.property section, for a relocatable object we | |
934 // accumulate the bits set. | |
221 | 935 if (size < 4) |
936 reportFatal(place, "FEATURE_1_AND entry is too short"); | |
937 featuresSet |= read32<ELFT::TargetEndianness>(desc.data()); | |
150 | 938 } |
939 | |
221 | 940 // Padding is present in the note descriptor, if necessary. |
941 desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size)); | |
150 | 942 } |
943 | |
944 // Go to next NOTE record to look for more FEATURE_1_AND descriptions. | |
252 | 945 data = data.slice(nhdr->getSize(sec.addralign)); |
150 | 946 } |
947 | |
948 return featuresSet; | |
949 } | |
950 | |
951 template <class ELFT> | |
236 | 952 InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx, |
953 const Elf_Shdr &sec, | |
954 uint32_t info) { | |
955 if (info < this->sections.size()) { | |
956 InputSectionBase *target = this->sections[info]; | |
221 | 957 |
236 | 958 // Strictly speaking, a relocation section must be included in the |
959 // group of the section it relocates. However, LLVM 3.3 and earlier | |
960 // would fail to do so, so we gracefully handle that case. | |
961 if (target == &InputSection::discarded) | |
150 | 962 return nullptr; |
963 | |
236 | 964 if (target != nullptr) |
965 return target; | |
150 | 966 } |
967 | |
236 | 968 error(toString(this) + Twine(": relocation section (index ") + Twine(idx) + |
969 ") has invalid sh_info (" + Twine(info) + ")"); | |
970 return nullptr; | |
971 } | |
150 | 972 |
236 | 973 // The function may be called concurrently for different input files. For |
974 // allocation, prefer makeThreadLocal which does not require holding a lock. | |
975 template <class ELFT> | |
976 InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx, | |
977 const Elf_Shdr &sec, | |
978 StringRef name) { | |
252 | 979 if (name.starts_with(".n")) { |
236 | 980 // The GNU linker uses .note.GNU-stack section as a marker indicating |
981 // that the code in the object file does not expect that the stack is | |
982 // executable (in terms of NX bit). If all input files have the marker, | |
983 // the GNU linker adds a PT_GNU_STACK segment to tells the loader to | |
984 // make the stack non-executable. Most object files have this section as | |
985 // of 2017. | |
986 // | |
987 // But making the stack non-executable is a norm today for security | |
988 // reasons. Failure to do so may result in a serious security issue. | |
989 // Therefore, we make LLD always add PT_GNU_STACK unless it is | |
990 // explicitly told to do otherwise (by -z execstack). Because the stack | |
991 // executable-ness is controlled solely by command line options, | |
992 // .note.GNU-stack sections are simply ignored. | |
993 if (name == ".note.GNU-stack") | |
994 return &InputSection::discarded; | |
150 | 995 |
236 | 996 // Object files that use processor features such as Intel Control-Flow |
997 // Enforcement (CET) or AArch64 Branch Target Identification BTI, use a | |
998 // .note.gnu.property section containing a bitfield of feature bits like the | |
999 // GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag. | |
1000 // | |
1001 // Since we merge bitmaps from multiple object files to create a new | |
1002 // .note.gnu.property containing a single AND'ed bitmap, we discard an input | |
1003 // file's .note.gnu.property section. | |
1004 if (name == ".note.gnu.property") { | |
1005 this->andFeatures = readAndFeatures<ELFT>(InputSection(*this, sec, name)); | |
150 | 1006 return &InputSection::discarded; |
1007 } | |
1008 | |
236 | 1009 // Split stacks is a feature to support a discontiguous stack, |
1010 // commonly used in the programming language Go. For the details, | |
1011 // see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled | |
1012 // for split stack will include a .note.GNU-split-stack section. | |
1013 if (name == ".note.GNU-split-stack") { | |
1014 if (config->relocatable) { | |
1015 error( | |
1016 "cannot mix split-stack and non-split-stack in a relocatable link"); | |
1017 return &InputSection::discarded; | |
1018 } | |
1019 this->splitStack = true; | |
1020 return &InputSection::discarded; | |
1021 } | |
150 | 1022 |
236 | 1023 // An object file compiled for split stack, but where some of the |
1024 // functions were compiled with the no_split_stack_attribute will | |
1025 // include a .note.GNU-no-split-stack section. | |
1026 if (name == ".note.GNU-no-split-stack") { | |
1027 this->someNoSplitStack = true; | |
1028 return &InputSection::discarded; | |
1029 } | |
150 | 1030 |
236 | 1031 // Strip existing .note.gnu.build-id sections so that the output won't have |
1032 // more than one build-id. This is not usually a problem because input | |
1033 // object files normally don't have .build-id sections, but you can create | |
1034 // such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard | |
1035 // against it. | |
1036 if (name == ".note.gnu.build-id") | |
1037 return &InputSection::discarded; | |
1038 } | |
150 | 1039 |
1040 // The linker merges EH (exception handling) frames and creates a | |
1041 // .eh_frame_hdr section for runtime. So we handle them with a special | |
1042 // class. For relocatable outputs, they are just passed through. | |
1043 if (name == ".eh_frame" && !config->relocatable) | |
236 | 1044 return makeThreadLocal<EhInputSection>(*this, sec, name); |
150 | 1045 |
236 | 1046 if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name)) |
1047 return makeThreadLocal<MergeInputSection>(*this, sec, name); | |
1048 return makeThreadLocal<InputSection>(*this, sec, name); | |
150 | 1049 } |
1050 | |
252 | 1051 // Initialize symbols. symbols is a parallel array to the corresponding ELF |
1052 // symbol table. | |
236 | 1053 template <class ELFT> |
1054 void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) { | |
150 | 1055 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); |
252 | 1056 if (numSymbols == 0) { |
1057 numSymbols = eSyms.size(); | |
1058 symbols = std::make_unique<Symbol *[]>(numSymbols); | |
1059 } | |
150 | 1060 |
236 | 1061 // Some entries have been filled by LazyObjFile. |
1062 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) | |
1063 if (!symbols[i]) | |
1064 symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); | |
1065 | |
1066 // Perform symbol resolution on non-local symbols. | |
1067 SmallVector<unsigned, 32> undefineds; | |
1068 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { | |
150 | 1069 const Elf_Sym &eSym = eSyms[i]; |
236 | 1070 uint32_t secIdx = eSym.st_shndx; |
1071 if (secIdx == SHN_UNDEF) { | |
1072 undefineds.push_back(i); | |
221 | 1073 continue; |
1074 } | |
1075 | |
150 | 1076 uint8_t binding = eSym.getBinding(); |
1077 uint8_t stOther = eSym.st_other; | |
1078 uint8_t type = eSym.getType(); | |
1079 uint64_t value = eSym.st_value; | |
1080 uint64_t size = eSym.st_size; | |
1081 | |
236 | 1082 Symbol *sym = symbols[i]; |
1083 sym->isUsedInRegularObj = true; | |
1084 if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) { | |
150 | 1085 if (value == 0 || value >= UINT32_MAX) |
236 | 1086 fatal(toString(this) + ": common symbol '" + sym->getName() + |
150 | 1087 "' has invalid alignment: " + Twine(value)); |
236 | 1088 hasCommonSyms = true; |
1089 sym->resolve( | |
1090 CommonSymbol{this, StringRef(), binding, stOther, type, value, size}); | |
150 | 1091 continue; |
1092 } | |
1093 | |
236 | 1094 // Handle global defined symbols. Defined::section will be set in postParse. |
1095 sym->resolve(Defined{this, StringRef(), binding, stOther, type, value, size, | |
1096 nullptr}); | |
150 | 1097 } |
221 | 1098 |
1099 // Undefined symbols (excluding those defined relative to non-prevailing | |
236 | 1100 // sections) can trigger recursive extract. Process defined symbols first so |
221 | 1101 // that the relative order between a defined symbol and an undefined symbol |
1102 // does not change the symbol resolution behavior. In addition, a set of | |
1103 // interconnected symbols will all be resolved to the same file, instead of | |
1104 // being resolved to different files. | |
232
70dce7da266c
llvm original Jul 20 16:41:34 2021
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
223
diff
changeset
|
1105 for (unsigned i : undefineds) { |
221 | 1106 const Elf_Sym &eSym = eSyms[i]; |
236 | 1107 Symbol *sym = symbols[i]; |
1108 sym->resolve(Undefined{this, StringRef(), eSym.getBinding(), eSym.st_other, | |
1109 eSym.getType()}); | |
1110 sym->isUsedInRegularObj = true; | |
1111 sym->referenced = true; | |
1112 } | |
1113 } | |
1114 | |
1115 template <class ELFT> | |
1116 void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) { | |
1117 if (!justSymbols) | |
1118 initializeSections(ignoreComdats, getObj()); | |
1119 | |
1120 if (!firstGlobal) | |
1121 return; | |
1122 SymbolUnion *locals = makeThreadLocalN<SymbolUnion>(firstGlobal); | |
1123 memset(locals, 0, sizeof(SymbolUnion) * firstGlobal); | |
1124 | |
1125 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); | |
1126 for (size_t i = 0, end = firstGlobal; i != end; ++i) { | |
1127 const Elf_Sym &eSym = eSyms[i]; | |
1128 uint32_t secIdx = eSym.st_shndx; | |
1129 if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) | |
1130 secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); | |
1131 else if (secIdx >= SHN_LORESERVE) | |
1132 secIdx = 0; | |
1133 if (LLVM_UNLIKELY(secIdx >= sections.size())) | |
1134 fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); | |
1135 if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL)) | |
1136 error(toString(this) + ": non-local symbol (" + Twine(i) + | |
1137 ") found at index < .symtab's sh_info (" + Twine(end) + ")"); | |
1138 | |
1139 InputSectionBase *sec = sections[secIdx]; | |
1140 uint8_t type = eSym.getType(); | |
1141 if (type == STT_FILE) | |
1142 sourceFile = CHECK(eSym.getName(stringTable), this); | |
1143 if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name)) | |
1144 fatal(toString(this) + ": invalid symbol name offset"); | |
1145 StringRef name(stringTable.data() + eSym.st_name); | |
1146 | |
1147 symbols[i] = reinterpret_cast<Symbol *>(locals + i); | |
1148 if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded) | |
1149 new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type, | |
1150 /*discardedSecIdx=*/secIdx); | |
1151 else | |
1152 new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type, | |
1153 eSym.st_value, eSym.st_size, sec); | |
1154 symbols[i]->partition = 1; | |
1155 symbols[i]->isUsedInRegularObj = true; | |
221 | 1156 } |
150 | 1157 } |
1158 | |
236 | 1159 // Called after all ObjFile::parse is called for all ObjFiles. This checks |
1160 // duplicate symbols and may do symbol property merge in the future. | |
1161 template <class ELFT> void ObjFile<ELFT>::postParse() { | |
1162 static std::mutex mu; | |
1163 ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>(); | |
1164 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { | |
1165 const Elf_Sym &eSym = eSyms[i]; | |
1166 Symbol &sym = *symbols[i]; | |
1167 uint32_t secIdx = eSym.st_shndx; | |
1168 uint8_t binding = eSym.getBinding(); | |
1169 if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK && | |
1170 binding != STB_GNU_UNIQUE)) | |
1171 errorOrWarn(toString(this) + ": symbol (" + Twine(i) + | |
1172 ") has invalid binding: " + Twine((int)binding)); | |
150 | 1173 |
236 | 1174 // st_value of STT_TLS represents the assigned offset, not the actual |
1175 // address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can | |
1176 // only be referenced by special TLS relocations. It is usually an error if | |
1177 // a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa. | |
1178 if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS && | |
1179 eSym.getType() != STT_NOTYPE) | |
1180 errorOrWarn("TLS attribute mismatch: " + toString(sym) + "\n>>> in " + | |
1181 toString(sym.file) + "\n>>> in " + toString(this)); | |
221 | 1182 |
236 | 1183 // Handle non-COMMON defined symbol below. !sym.file allows a symbol |
1184 // assignment to redefine a symbol without an error. | |
1185 if (!sym.file || !sym.isDefined() || secIdx == SHN_UNDEF || | |
1186 secIdx == SHN_COMMON) | |
1187 continue; | |
150 | 1188 |
236 | 1189 if (LLVM_UNLIKELY(secIdx == SHN_XINDEX)) |
1190 secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable)); | |
1191 else if (secIdx >= SHN_LORESERVE) | |
1192 secIdx = 0; | |
1193 if (LLVM_UNLIKELY(secIdx >= sections.size())) | |
1194 fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); | |
1195 InputSectionBase *sec = sections[secIdx]; | |
1196 if (sec == &InputSection::discarded) { | |
1197 if (sym.traced) { | |
1198 printTraceSymbol(Undefined{this, sym.getName(), sym.binding, | |
1199 sym.stOther, sym.type, secIdx}, | |
1200 sym.getName()); | |
1201 } | |
1202 if (sym.file == this) { | |
1203 std::lock_guard<std::mutex> lock(mu); | |
1204 ctx.nonPrevailingSyms.emplace_back(&sym, secIdx); | |
1205 } | |
1206 continue; | |
1207 } | |
150 | 1208 |
236 | 1209 if (sym.file == this) { |
1210 cast<Defined>(sym).section = sec; | |
1211 continue; | |
1212 } | |
150 | 1213 |
236 | 1214 if (sym.binding == STB_WEAK || binding == STB_WEAK) |
1215 continue; | |
1216 std::lock_guard<std::mutex> lock(mu); | |
1217 ctx.duplicates.push_back({&sym, this, sec, eSym.st_value}); | |
1218 } | |
150 | 1219 } |
1220 | |
221 | 1221 // The handling of tentative definitions (COMMON symbols) in archives is murky. |
1222 // A tentative definition will be promoted to a global definition if there are | |
1223 // no non-tentative definitions to dominate it. When we hold a tentative | |
1224 // definition to a symbol and are inspecting archive members for inclusion | |
1225 // there are 2 ways we can proceed: | |
1226 // | |
1227 // 1) Consider the tentative definition a 'real' definition (ie promotion from | |
1228 // tentative to real definition has already happened) and not inspect | |
1229 // archive members for Global/Weak definitions to replace the tentative | |
1230 // definition. An archive member would only be included if it satisfies some | |
1231 // other undefined symbol. This is the behavior Gold uses. | |
1232 // | |
1233 // 2) Consider the tentative definition as still undefined (ie the promotion to | |
1234 // a real definition happens only after all symbol resolution is done). | |
223 | 1235 // The linker searches archive members for STB_GLOBAL definitions to |
221 | 1236 // replace the tentative definition with. This is the behavior used by |
1237 // GNU ld. | |
1238 // | |
1239 // The second behavior is inherited from SysVR4, which based it on the FORTRAN | |
1240 // COMMON BLOCK model. This behavior is needed for proper initialization in old | |
1241 // (pre F90) FORTRAN code that is packaged into an archive. | |
1242 // | |
1243 // The following functions search archive members for definitions to replace | |
1244 // tentative definitions (implementing behavior 2). | |
1245 static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName, | |
1246 StringRef archiveName) { | |
1247 IRSymtabFile symtabFile = check(readIRSymtab(mb)); | |
1248 for (const irsymtab::Reader::SymbolRef &sym : | |
1249 symtabFile.TheReader.symbols()) { | |
1250 if (sym.isGlobal() && sym.getName() == symName) | |
223 | 1251 return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon(); |
221 | 1252 } |
1253 return false; | |
1254 } | |
1255 | |
1256 template <class ELFT> | |
236 | 1257 static bool isNonCommonDef(ELFKind ekind, MemoryBufferRef mb, StringRef symName, |
221 | 1258 StringRef archiveName) { |
236 | 1259 ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(ekind, mb, archiveName); |
1260 obj->init(); | |
221 | 1261 StringRef stringtable = obj->getStringTable(); |
1262 | |
1263 for (auto sym : obj->template getGlobalELFSyms<ELFT>()) { | |
1264 Expected<StringRef> name = sym.getName(stringtable); | |
1265 if (name && name.get() == symName) | |
223 | 1266 return sym.isDefined() && sym.getBinding() == STB_GLOBAL && |
1267 !sym.isCommon(); | |
221 | 1268 } |
1269 return false; | |
1270 } | |
1271 | |
1272 static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName, | |
1273 StringRef archiveName) { | |
1274 switch (getELFKind(mb, archiveName)) { | |
1275 case ELF32LEKind: | |
236 | 1276 return isNonCommonDef<ELF32LE>(ELF32LEKind, mb, symName, archiveName); |
221 | 1277 case ELF32BEKind: |
236 | 1278 return isNonCommonDef<ELF32BE>(ELF32BEKind, mb, symName, archiveName); |
221 | 1279 case ELF64LEKind: |
236 | 1280 return isNonCommonDef<ELF64LE>(ELF64LEKind, mb, symName, archiveName); |
221 | 1281 case ELF64BEKind: |
236 | 1282 return isNonCommonDef<ELF64BE>(ELF64BEKind, mb, symName, archiveName); |
221 | 1283 default: |
1284 llvm_unreachable("getELFKind"); | |
1285 } | |
1286 } | |
1287 | |
236 | 1288 unsigned SharedFile::vernauxNum; |
221 | 1289 |
236 | 1290 SharedFile::SharedFile(MemoryBufferRef m, StringRef defaultSoName) |
1291 : ELFFileBase(SharedKind, getELFKind(m, ""), m), soName(defaultSoName), | |
1292 isNeeded(!config->asNeeded) {} | |
150 | 1293 |
1294 // Parse the version definitions in the object file if present, and return a | |
1295 // vector whose nth element contains a pointer to the Elf_Verdef for version | |
1296 // identifier n. Version identifiers that are not definitions map to nullptr. | |
1297 template <typename ELFT> | |
236 | 1298 static SmallVector<const void *, 0> |
1299 parseVerdefs(const uint8_t *base, const typename ELFT::Shdr *sec) { | |
150 | 1300 if (!sec) |
1301 return {}; | |
1302 | |
1303 // Build the Verdefs array by following the chain of Elf_Verdef objects | |
1304 // from the start of the .gnu.version_d section. | |
236 | 1305 SmallVector<const void *, 0> verdefs; |
150 | 1306 const uint8_t *verdef = base + sec->sh_offset; |
236 | 1307 for (unsigned i = 0, e = sec->sh_info; i != e; ++i) { |
150 | 1308 auto *curVerdef = reinterpret_cast<const typename ELFT::Verdef *>(verdef); |
1309 verdef += curVerdef->vd_next; | |
1310 unsigned verdefIndex = curVerdef->vd_ndx; | |
236 | 1311 if (verdefIndex >= verdefs.size()) |
1312 verdefs.resize(verdefIndex + 1); | |
150 | 1313 verdefs[verdefIndex] = curVerdef; |
1314 } | |
1315 return verdefs; | |
1316 } | |
1317 | |
173 | 1318 // Parse SHT_GNU_verneed to properly set the name of a versioned undefined |
1319 // symbol. We detect fatal issues which would cause vulnerabilities, but do not | |
1320 // implement sophisticated error checking like in llvm-readobj because the value | |
1321 // of such diagnostics is low. | |
1322 template <typename ELFT> | |
1323 std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj, | |
1324 const typename ELFT::Shdr *sec) { | |
1325 if (!sec) | |
1326 return {}; | |
1327 std::vector<uint32_t> verneeds; | |
221 | 1328 ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(*sec), this); |
173 | 1329 const uint8_t *verneedBuf = data.begin(); |
1330 for (unsigned i = 0; i != sec->sh_info; ++i) { | |
221 | 1331 if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end()) |
173 | 1332 fatal(toString(this) + " has an invalid Verneed"); |
1333 auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf); | |
1334 const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux; | |
1335 for (unsigned j = 0; j != vn->vn_cnt; ++j) { | |
221 | 1336 if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end()) |
173 | 1337 fatal(toString(this) + " has an invalid Vernaux"); |
1338 auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf); | |
1339 if (aux->vna_name >= this->stringTable.size()) | |
1340 fatal(toString(this) + " has a Vernaux with an invalid vna_name"); | |
1341 uint16_t version = aux->vna_other & VERSYM_VERSION; | |
1342 if (version >= verneeds.size()) | |
1343 verneeds.resize(version + 1); | |
1344 verneeds[version] = aux->vna_name; | |
1345 vernauxBuf += aux->vna_next; | |
1346 } | |
1347 verneedBuf += vn->vn_next; | |
1348 } | |
1349 return verneeds; | |
1350 } | |
1351 | |
150 | 1352 // We do not usually care about alignments of data in shared object |
1353 // files because the loader takes care of it. However, if we promote a | |
1354 // DSO symbol to point to .bss due to copy relocation, we need to keep | |
1355 // the original alignment requirements. We infer it in this function. | |
1356 template <typename ELFT> | |
1357 static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections, | |
1358 const typename ELFT::Sym &sym) { | |
1359 uint64_t ret = UINT64_MAX; | |
1360 if (sym.st_value) | |
252 | 1361 ret = 1ULL << llvm::countr_zero((uint64_t)sym.st_value); |
150 | 1362 if (0 < sym.st_shndx && sym.st_shndx < sections.size()) |
1363 ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign); | |
1364 return (ret > UINT32_MAX) ? 0 : ret; | |
1365 } | |
1366 | |
1367 // Fully parse the shared object file. | |
1368 // | |
1369 // This function parses symbol versions. If a DSO has version information, | |
1370 // the file has a ".gnu.version_d" section which contains symbol version | |
1371 // definitions. Each symbol is associated to one version through a table in | |
1372 // ".gnu.version" section. That table is a parallel array for the symbol | |
1373 // table, and each table entry contains an index in ".gnu.version_d". | |
1374 // | |
1375 // The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for | |
1376 // VER_NDX_GLOBAL. There's no table entry for these special versions in | |
1377 // ".gnu.version_d". | |
1378 // | |
1379 // The file format for symbol versioning is perhaps a bit more complicated | |
1380 // than necessary, but you can easily understand the code if you wrap your | |
1381 // head around the data structure described above. | |
1382 template <class ELFT> void SharedFile::parse() { | |
1383 using Elf_Dyn = typename ELFT::Dyn; | |
1384 using Elf_Shdr = typename ELFT::Shdr; | |
1385 using Elf_Sym = typename ELFT::Sym; | |
1386 using Elf_Verdef = typename ELFT::Verdef; | |
1387 using Elf_Versym = typename ELFT::Versym; | |
1388 | |
1389 ArrayRef<Elf_Dyn> dynamicTags; | |
1390 const ELFFile<ELFT> obj = this->getObj<ELFT>(); | |
236 | 1391 ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>(); |
150 | 1392 |
1393 const Elf_Shdr *versymSec = nullptr; | |
1394 const Elf_Shdr *verdefSec = nullptr; | |
173 | 1395 const Elf_Shdr *verneedSec = nullptr; |
150 | 1396 |
1397 // Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d. | |
1398 for (const Elf_Shdr &sec : sections) { | |
1399 switch (sec.sh_type) { | |
1400 default: | |
1401 continue; | |
1402 case SHT_DYNAMIC: | |
1403 dynamicTags = | |
221 | 1404 CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this); |
150 | 1405 break; |
1406 case SHT_GNU_versym: | |
1407 versymSec = &sec; | |
1408 break; | |
1409 case SHT_GNU_verdef: | |
1410 verdefSec = &sec; | |
1411 break; | |
173 | 1412 case SHT_GNU_verneed: |
1413 verneedSec = &sec; | |
1414 break; | |
150 | 1415 } |
1416 } | |
1417 | |
1418 if (versymSec && numELFSyms == 0) { | |
1419 error("SHT_GNU_versym should be associated with symbol table"); | |
1420 return; | |
1421 } | |
1422 | |
1423 // Search for a DT_SONAME tag to initialize this->soName. | |
1424 for (const Elf_Dyn &dyn : dynamicTags) { | |
1425 if (dyn.d_tag == DT_NEEDED) { | |
1426 uint64_t val = dyn.getVal(); | |
1427 if (val >= this->stringTable.size()) | |
1428 fatal(toString(this) + ": invalid DT_NEEDED entry"); | |
1429 dtNeeded.push_back(this->stringTable.data() + val); | |
1430 } else if (dyn.d_tag == DT_SONAME) { | |
1431 uint64_t val = dyn.getVal(); | |
1432 if (val >= this->stringTable.size()) | |
1433 fatal(toString(this) + ": invalid DT_SONAME entry"); | |
1434 soName = this->stringTable.data() + val; | |
1435 } | |
1436 } | |
1437 | |
1438 // DSOs are uniquified not by filename but by soname. | |
236 | 1439 DenseMap<CachedHashStringRef, SharedFile *>::iterator it; |
150 | 1440 bool wasInserted; |
236 | 1441 std::tie(it, wasInserted) = |
1442 symtab.soNames.try_emplace(CachedHashStringRef(soName), this); | |
150 | 1443 |
1444 // If a DSO appears more than once on the command line with and without | |
1445 // --as-needed, --no-as-needed takes precedence over --as-needed because a | |
1446 // user can add an extra DSO with --no-as-needed to force it to be added to | |
1447 // the dependency list. | |
1448 it->second->isNeeded |= isNeeded; | |
1449 if (!wasInserted) | |
1450 return; | |
1451 | |
236 | 1452 ctx.sharedFiles.push_back(this); |
150 | 1453 |
1454 verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec); | |
173 | 1455 std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec); |
150 | 1456 |
1457 // Parse ".gnu.version" section which is a parallel array for the symbol | |
1458 // table. If a given file doesn't have a ".gnu.version" section, we use | |
1459 // VER_NDX_GLOBAL. | |
1460 size_t size = numELFSyms - firstGlobal; | |
173 | 1461 std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL); |
150 | 1462 if (versymSec) { |
1463 ArrayRef<Elf_Versym> versym = | |
221 | 1464 CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec), |
150 | 1465 this) |
1466 .slice(firstGlobal); | |
1467 for (size_t i = 0; i < size; ++i) | |
1468 versyms[i] = versym[i].vs_index; | |
1469 } | |
1470 | |
1471 // System libraries can have a lot of symbols with versions. Using a | |
1472 // fixed buffer for computing the versions name (foo@ver) can save a | |
1473 // lot of allocations. | |
1474 SmallString<0> versionedNameBuffer; | |
1475 | |
1476 // Add symbols to the symbol table. | |
1477 ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>(); | |
236 | 1478 for (size_t i = 0, e = syms.size(); i != e; ++i) { |
150 | 1479 const Elf_Sym &sym = syms[i]; |
1480 | |
1481 // ELF spec requires that all local symbols precede weak or global | |
1482 // symbols in each symbol table, and the index of first non-local symbol | |
1483 // is stored to sh_info. If a local symbol appears after some non-local | |
1484 // symbol, that's a violation of the spec. | |
236 | 1485 StringRef name = CHECK(sym.getName(stringTable), this); |
150 | 1486 if (sym.getBinding() == STB_LOCAL) { |
236 | 1487 errorOrWarn(toString(this) + ": invalid local symbol '" + name + |
1488 "' in global part of symbol table"); | |
150 | 1489 continue; |
1490 } | |
1491 | |
236 | 1492 const uint16_t ver = versyms[i], idx = ver & ~VERSYM_HIDDEN; |
150 | 1493 if (sym.isUndefined()) { |
173 | 1494 // For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but |
1495 // as of binutils 2.34, GNU ld produces VER_NDX_LOCAL. | |
236 | 1496 if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) { |
173 | 1497 if (idx >= verneeds.size()) { |
1498 error("corrupt input file: version need index " + Twine(idx) + | |
1499 " for symbol " + name + " is out of bounds\n>>> defined in " + | |
1500 toString(this)); | |
1501 continue; | |
1502 } | |
236 | 1503 StringRef verName = stringTable.data() + verneeds[idx]; |
173 | 1504 versionedNameBuffer.clear(); |
236 | 1505 name = saver().save( |
1506 (name + "@" + verName).toStringRef(versionedNameBuffer)); | |
173 | 1507 } |
236 | 1508 Symbol *s = symtab.addSymbol( |
150 | 1509 Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); |
1510 s->exportDynamic = true; | |
236 | 1511 if (s->isUndefined() && sym.getBinding() != STB_WEAK && |
221 | 1512 config->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore) |
1513 requiredSymbols.push_back(s); | |
150 | 1514 continue; |
1515 } | |
1516 | |
236 | 1517 if (ver == VER_NDX_LOCAL || |
1518 (ver != VER_NDX_GLOBAL && idx >= verdefs.size())) { | |
1519 // In GNU ld < 2.31 (before 3be08ea4728b56d35e136af4e6fd3086ade17764), the | |
1520 // MIPS port puts _gp_disp symbol into DSO files and incorrectly assigns | |
1521 // VER_NDX_LOCAL. Workaround this bug. | |
1522 if (config->emachine == EM_MIPS && name == "_gp_disp") | |
1523 continue; | |
150 | 1524 error("corrupt input file: version definition index " + Twine(idx) + |
1525 " for symbol " + name + " is out of bounds\n>>> defined in " + | |
1526 toString(this)); | |
1527 continue; | |
1528 } | |
1529 | |
236 | 1530 uint32_t alignment = getAlignment<ELFT>(sections, sym); |
1531 if (ver == idx) { | |
1532 auto *s = symtab.addSymbol( | |
1533 SharedSymbol{*this, name, sym.getBinding(), sym.st_other, | |
1534 sym.getType(), sym.st_value, sym.st_size, alignment}); | |
1535 if (s->file == this) | |
1536 s->verdefIndex = ver; | |
1537 } | |
1538 | |
1539 // Also add the symbol with the versioned name to handle undefined symbols | |
1540 // with explicit versions. | |
1541 if (ver == VER_NDX_GLOBAL) | |
1542 continue; | |
1543 | |
150 | 1544 StringRef verName = |
236 | 1545 stringTable.data() + |
150 | 1546 reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name; |
1547 versionedNameBuffer.clear(); | |
1548 name = (name + "@" + verName).toStringRef(versionedNameBuffer); | |
236 | 1549 auto *s = symtab.addSymbol( |
1550 SharedSymbol{*this, saver().save(name), sym.getBinding(), sym.st_other, | |
1551 sym.getType(), sym.st_value, sym.st_size, alignment}); | |
1552 if (s->file == this) | |
1553 s->verdefIndex = idx; | |
150 | 1554 } |
1555 } | |
1556 | |
1557 static ELFKind getBitcodeELFKind(const Triple &t) { | |
1558 if (t.isLittleEndian()) | |
1559 return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind; | |
1560 return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind; | |
1561 } | |
1562 | |
221 | 1563 static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) { |
150 | 1564 switch (t.getArch()) { |
1565 case Triple::aarch64: | |
221 | 1566 case Triple::aarch64_be: |
150 | 1567 return EM_AARCH64; |
1568 case Triple::amdgcn: | |
1569 case Triple::r600: | |
1570 return EM_AMDGPU; | |
1571 case Triple::arm: | |
1572 case Triple::thumb: | |
1573 return EM_ARM; | |
1574 case Triple::avr: | |
1575 return EM_AVR; | |
236 | 1576 case Triple::hexagon: |
1577 return EM_HEXAGON; | |
252 | 1578 case Triple::loongarch32: |
1579 case Triple::loongarch64: | |
1580 return EM_LOONGARCH; | |
150 | 1581 case Triple::mips: |
1582 case Triple::mipsel: | |
1583 case Triple::mips64: | |
1584 case Triple::mips64el: | |
1585 return EM_MIPS; | |
1586 case Triple::msp430: | |
1587 return EM_MSP430; | |
1588 case Triple::ppc: | |
221 | 1589 case Triple::ppcle: |
150 | 1590 return EM_PPC; |
1591 case Triple::ppc64: | |
1592 case Triple::ppc64le: | |
1593 return EM_PPC64; | |
1594 case Triple::riscv32: | |
1595 case Triple::riscv64: | |
1596 return EM_RISCV; | |
1597 case Triple::x86: | |
1598 return t.isOSIAMCU() ? EM_IAMCU : EM_386; | |
1599 case Triple::x86_64: | |
1600 return EM_X86_64; | |
1601 default: | |
1602 error(path + ": could not infer e_machine from bitcode target triple " + | |
1603 t.str()); | |
1604 return EM_NONE; | |
1605 } | |
1606 } | |
1607 | |
221 | 1608 static uint8_t getOsAbi(const Triple &t) { |
1609 switch (t.getOS()) { | |
1610 case Triple::AMDHSA: | |
1611 return ELF::ELFOSABI_AMDGPU_HSA; | |
1612 case Triple::AMDPAL: | |
1613 return ELF::ELFOSABI_AMDGPU_PAL; | |
1614 case Triple::Mesa3D: | |
1615 return ELF::ELFOSABI_AMDGPU_MESA3D; | |
1616 default: | |
1617 return ELF::ELFOSABI_NONE; | |
1618 } | |
1619 } | |
1620 | |
150 | 1621 BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, |
236 | 1622 uint64_t offsetInArchive, bool lazy) |
150 | 1623 : InputFile(BitcodeKind, mb) { |
236 | 1624 this->archiveName = archiveName; |
1625 this->lazy = lazy; | |
150 | 1626 |
1627 std::string path = mb.getBufferIdentifier().str(); | |
1628 if (config->thinLTOIndexOnly) | |
1629 path = replaceThinLTOSuffix(mb.getBufferIdentifier()); | |
1630 | |
1631 // ThinLTO assumes that all MemoryBufferRefs given to it have a unique | |
1632 // name. If two archives define two members with the same name, this | |
1633 // causes a collision which result in only one of the objects being taken | |
1634 // into consideration at LTO time (which very likely causes undefined | |
1635 // symbols later in the link stage). So we append file offset to make | |
1636 // filename unique. | |
236 | 1637 StringRef name = archiveName.empty() |
1638 ? saver().save(path) | |
1639 : saver().save(archiveName + "(" + path::filename(path) + | |
1640 " at " + utostr(offsetInArchive) + ")"); | |
150 | 1641 MemoryBufferRef mbref(mb.getBuffer(), name); |
1642 | |
1643 obj = CHECK(lto::InputFile::create(mbref), this); | |
1644 | |
1645 Triple t(obj->getTargetTriple()); | |
1646 ekind = getBitcodeELFKind(t); | |
1647 emachine = getBitcodeMachineKind(mb.getBufferIdentifier(), t); | |
221 | 1648 osabi = getOsAbi(t); |
150 | 1649 } |
1650 | |
1651 static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) { | |
1652 switch (gvVisibility) { | |
1653 case GlobalValue::DefaultVisibility: | |
1654 return STV_DEFAULT; | |
1655 case GlobalValue::HiddenVisibility: | |
1656 return STV_HIDDEN; | |
1657 case GlobalValue::ProtectedVisibility: | |
1658 return STV_PROTECTED; | |
1659 } | |
1660 llvm_unreachable("unknown visibility"); | |
1661 } | |
1662 | |
236 | 1663 static void |
1664 createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats, | |
1665 const lto::InputFile::Symbol &objSym, BitcodeFile &f) { | |
150 | 1666 uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL; |
1667 uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE; | |
1668 uint8_t visibility = mapVisibility(objSym.getVisibility()); | |
236 | 1669 |
1670 if (!sym) | |
1671 sym = symtab.insert(saver().save(objSym.getName())); | |
150 | 1672 |
1673 int c = objSym.getComdatIndex(); | |
1674 if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) { | |
236 | 1675 Undefined newSym(&f, StringRef(), binding, visibility, type); |
1676 sym->resolve(newSym); | |
1677 sym->referenced = true; | |
1678 return; | |
150 | 1679 } |
1680 | |
236 | 1681 if (objSym.isCommon()) { |
1682 sym->resolve(CommonSymbol{&f, StringRef(), binding, visibility, STT_OBJECT, | |
1683 objSym.getCommonAlignment(), | |
1684 objSym.getCommonSize()}); | |
1685 } else { | |
1686 Defined newSym(&f, StringRef(), binding, visibility, type, 0, 0, nullptr); | |
1687 if (objSym.canBeOmittedFromSymbolTable()) | |
1688 newSym.exportDynamic = false; | |
1689 sym->resolve(newSym); | |
1690 } | |
150 | 1691 } |
1692 | |
236 | 1693 void BitcodeFile::parse() { |
232
70dce7da266c
llvm original Jul 20 16:41:34 2021
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
223
diff
changeset
|
1694 for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) { |
150 | 1695 keptComdats.push_back( |
232
70dce7da266c
llvm original Jul 20 16:41:34 2021
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
223
diff
changeset
|
1696 s.second == Comdat::NoDeduplicate || |
236 | 1697 symtab.comdatGroups.try_emplace(CachedHashStringRef(s.first), this) |
232
70dce7da266c
llvm original Jul 20 16:41:34 2021
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
223
diff
changeset
|
1698 .second); |
70dce7da266c
llvm original Jul 20 16:41:34 2021
Shinji KONO <kono@ie.u-ryukyu.ac.jp>
parents:
223
diff
changeset
|
1699 } |
150 | 1700 |
252 | 1701 if (numSymbols == 0) { |
1702 numSymbols = obj->symbols().size(); | |
1703 symbols = std::make_unique<Symbol *[]>(numSymbols); | |
1704 } | |
236 | 1705 // Process defined symbols first. See the comment in |
1706 // ObjFile<ELFT>::initializeSymbols. | |
1707 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) | |
1708 if (!irSym.isUndefined()) | |
1709 createBitcodeSymbol(symbols[i], keptComdats, irSym, *this); | |
1710 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) | |
1711 if (irSym.isUndefined()) | |
1712 createBitcodeSymbol(symbols[i], keptComdats, irSym, *this); | |
150 | 1713 |
1714 for (auto l : obj->getDependentLibraries()) | |
1715 addDependentLibrary(l, this); | |
1716 } | |
1717 | |
236 | 1718 void BitcodeFile::parseLazy() { |
252 | 1719 numSymbols = obj->symbols().size(); |
1720 symbols = std::make_unique<Symbol *[]>(numSymbols); | |
236 | 1721 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) |
1722 if (!irSym.isUndefined()) { | |
1723 auto *sym = symtab.insert(saver().save(irSym.getName())); | |
1724 sym->resolve(LazyObject{*this}); | |
1725 symbols[i] = sym; | |
1726 } | |
1727 } | |
1728 | |
1729 void BitcodeFile::postParse() { | |
1730 for (auto [i, irSym] : llvm::enumerate(obj->symbols())) { | |
1731 const Symbol &sym = *symbols[i]; | |
1732 if (sym.file == this || !sym.isDefined() || irSym.isUndefined() || | |
1733 irSym.isCommon() || irSym.isWeak()) | |
1734 continue; | |
1735 int c = irSym.getComdatIndex(); | |
1736 if (c != -1 && !keptComdats[c]) | |
1737 continue; | |
1738 reportDuplicate(sym, this, nullptr, 0); | |
1739 } | |
1740 } | |
1741 | |
150 | 1742 void BinaryFile::parse() { |
1743 ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer()); | |
1744 auto *section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, | |
1745 8, data, ".data"); | |
1746 sections.push_back(section); | |
1747 | |
1748 // For each input file foo that is embedded to a result as a binary | |
1749 // blob, we define _binary_foo_{start,end,size} symbols, so that | |
1750 // user programs can access blobs by name. Non-alphanumeric | |
1751 // characters in a filename are replaced with underscore. | |
1752 std::string s = "_binary_" + mb.getBufferIdentifier().str(); | |
252 | 1753 for (char &c : s) |
1754 if (!isAlnum(c)) | |
1755 c = '_'; | |
150 | 1756 |
236 | 1757 llvm::StringSaver &saver = lld::saver(); |
1758 | |
1759 symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_start"), | |
1760 STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0, 0, | |
1761 section}); | |
1762 symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_end"), | |
1763 STB_GLOBAL, STV_DEFAULT, STT_OBJECT, | |
1764 data.size(), 0, section}); | |
1765 symtab.addAndCheckDuplicate(Defined{nullptr, saver.save(s + "_size"), | |
1766 STB_GLOBAL, STV_DEFAULT, STT_OBJECT, | |
1767 data.size(), 0, nullptr}); | |
150 | 1768 } |
1769 | |
236 | 1770 ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName, |
1771 bool lazy) { | |
1772 ELFFileBase *f; | |
150 | 1773 switch (getELFKind(mb, archiveName)) { |
1774 case ELF32LEKind: | |
236 | 1775 f = make<ObjFile<ELF32LE>>(ELF32LEKind, mb, archiveName); |
1776 break; | |
150 | 1777 case ELF32BEKind: |
236 | 1778 f = make<ObjFile<ELF32BE>>(ELF32BEKind, mb, archiveName); |
1779 break; | |
150 | 1780 case ELF64LEKind: |
236 | 1781 f = make<ObjFile<ELF64LE>>(ELF64LEKind, mb, archiveName); |
1782 break; | |
150 | 1783 case ELF64BEKind: |
236 | 1784 f = make<ObjFile<ELF64BE>>(ELF64BEKind, mb, archiveName); |
1785 break; | |
150 | 1786 default: |
1787 llvm_unreachable("getELFKind"); | |
1788 } | |
236 | 1789 f->init(); |
1790 f->lazy = lazy; | |
1791 return f; | |
150 | 1792 } |
1793 | |
236 | 1794 template <class ELFT> void ObjFile<ELFT>::parseLazy() { |
1795 const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>(); | |
252 | 1796 numSymbols = eSyms.size(); |
1797 symbols = std::make_unique<Symbol *[]>(numSymbols); | |
150 | 1798 |
236 | 1799 // resolve() may trigger this->extract() if an existing symbol is an undefined |
1800 // symbol. If that happens, this function has served its purpose, and we can | |
1801 // exit from the loop early. | |
252 | 1802 for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { |
1803 if (eSyms[i].st_shndx == SHN_UNDEF) | |
1804 continue; | |
1805 symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); | |
1806 symbols[i]->resolve(LazyObject{*this}); | |
1807 if (!lazy) | |
1808 break; | |
1809 } | |
150 | 1810 } |
1811 | |
236 | 1812 bool InputFile::shouldExtractForCommon(StringRef name) { |
1813 if (isa<BitcodeFile>(this)) | |
221 | 1814 return isBitcodeNonCommonDef(mb, name, archiveName); |
1815 | |
1816 return isNonCommonDef(mb, name, archiveName); | |
1817 } | |
1818 | |
173 | 1819 std::string elf::replaceThinLTOSuffix(StringRef path) { |
252 | 1820 auto [suffix, repl] = config->thinLTOObjectSuffixReplace; |
150 | 1821 if (path.consume_back(suffix)) |
1822 return (path + repl).str(); | |
1823 return std::string(path); | |
1824 } | |
1825 | |
173 | 1826 template class elf::ObjFile<ELF32LE>; |
1827 template class elf::ObjFile<ELF32BE>; | |
1828 template class elf::ObjFile<ELF64LE>; | |
1829 template class elf::ObjFile<ELF64BE>; | |
150 | 1830 |
1831 template void SharedFile::parse<ELF32LE>(); | |
1832 template void SharedFile::parse<ELF32BE>(); | |
1833 template void SharedFile::parse<ELF64LE>(); | |
1834 template void SharedFile::parse<ELF64BE>(); |