Mercurial > hg > CbC > CbC_llvm
comparison lld/MachO/InputFiles.cpp @ 207:2e18cbf3894f
LLVM12
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 08 Jun 2021 06:07:14 +0900 |
parents | 0572611fdcc8 |
children | 5f17cb93ff66 |
comparison
equal
deleted
inserted
replaced
173:0572611fdcc8 | 207:2e18cbf3894f |
---|---|
41 // | 41 // |
42 //===----------------------------------------------------------------------===// | 42 //===----------------------------------------------------------------------===// |
43 | 43 |
44 #include "InputFiles.h" | 44 #include "InputFiles.h" |
45 #include "Config.h" | 45 #include "Config.h" |
46 #include "Driver.h" | |
47 #include "Dwarf.h" | |
46 #include "ExportTrie.h" | 48 #include "ExportTrie.h" |
47 #include "InputSection.h" | 49 #include "InputSection.h" |
50 #include "MachOStructs.h" | |
51 #include "ObjC.h" | |
48 #include "OutputSection.h" | 52 #include "OutputSection.h" |
53 #include "OutputSegment.h" | |
49 #include "SymbolTable.h" | 54 #include "SymbolTable.h" |
50 #include "Symbols.h" | 55 #include "Symbols.h" |
51 #include "Target.h" | 56 #include "Target.h" |
52 | 57 |
58 #include "lld/Common/DWARF.h" | |
53 #include "lld/Common/ErrorHandler.h" | 59 #include "lld/Common/ErrorHandler.h" |
54 #include "lld/Common/Memory.h" | 60 #include "lld/Common/Memory.h" |
61 #include "lld/Common/Reproduce.h" | |
62 #include "llvm/ADT/iterator.h" | |
55 #include "llvm/BinaryFormat/MachO.h" | 63 #include "llvm/BinaryFormat/MachO.h" |
64 #include "llvm/LTO/LTO.h" | |
56 #include "llvm/Support/Endian.h" | 65 #include "llvm/Support/Endian.h" |
57 #include "llvm/Support/MemoryBuffer.h" | 66 #include "llvm/Support/MemoryBuffer.h" |
58 #include "llvm/Support/Path.h" | 67 #include "llvm/Support/Path.h" |
68 #include "llvm/Support/TarWriter.h" | |
69 #include "llvm/TextAPI/Architecture.h" | |
70 #include "llvm/TextAPI/InterfaceFile.h" | |
59 | 71 |
60 using namespace llvm; | 72 using namespace llvm; |
61 using namespace llvm::MachO; | 73 using namespace llvm::MachO; |
62 using namespace llvm::support::endian; | 74 using namespace llvm::support::endian; |
63 using namespace llvm::sys; | 75 using namespace llvm::sys; |
64 using namespace lld; | 76 using namespace lld; |
65 using namespace lld::macho; | 77 using namespace lld::macho; |
66 | 78 |
67 std::vector<InputFile *> macho::inputFiles; | 79 // Returns "<internal>", "foo.a(bar.o)", or "baz.o". |
80 std::string lld::toString(const InputFile *f) { | |
81 if (!f) | |
82 return "<internal>"; | |
83 | |
84 // Multiple dylibs can be defined in one .tbd file. | |
85 if (auto dylibFile = dyn_cast<DylibFile>(f)) | |
86 if (f->getName().endswith(".tbd")) | |
87 return (f->getName() + "(" + dylibFile->installName + ")").str(); | |
88 | |
89 if (f->archiveName.empty()) | |
90 return std::string(f->getName()); | |
91 return (f->archiveName + "(" + path::filename(f->getName()) + ")").str(); | |
92 } | |
93 | |
94 SetVector<InputFile *> macho::inputFiles; | |
95 std::unique_ptr<TarWriter> macho::tar; | |
96 int InputFile::idCount = 0; | |
97 | |
98 static VersionTuple decodeVersion(uint32_t version) { | |
99 unsigned major = version >> 16; | |
100 unsigned minor = (version >> 8) & 0xffu; | |
101 unsigned subMinor = version & 0xffu; | |
102 return VersionTuple(major, minor, subMinor); | |
103 } | |
104 | |
105 static std::vector<PlatformInfo> getPlatformInfos(const InputFile *input) { | |
106 if (!isa<ObjFile>(input) && !isa<DylibFile>(input)) | |
107 return {}; | |
108 | |
109 const char *hdr = input->mb.getBufferStart(); | |
110 | |
111 std::vector<PlatformInfo> platformInfos; | |
112 for (auto *cmd : findCommands<build_version_command>(hdr, LC_BUILD_VERSION)) { | |
113 PlatformInfo info; | |
114 info.target.Platform = static_cast<PlatformKind>(cmd->platform); | |
115 info.minimum = decodeVersion(cmd->minos); | |
116 platformInfos.emplace_back(std::move(info)); | |
117 } | |
118 for (auto *cmd : findCommands<version_min_command>( | |
119 hdr, LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, | |
120 LC_VERSION_MIN_TVOS, LC_VERSION_MIN_WATCHOS)) { | |
121 PlatformInfo info; | |
122 switch (cmd->cmd) { | |
123 case LC_VERSION_MIN_MACOSX: | |
124 info.target.Platform = PlatformKind::macOS; | |
125 break; | |
126 case LC_VERSION_MIN_IPHONEOS: | |
127 info.target.Platform = PlatformKind::iOS; | |
128 break; | |
129 case LC_VERSION_MIN_TVOS: | |
130 info.target.Platform = PlatformKind::tvOS; | |
131 break; | |
132 case LC_VERSION_MIN_WATCHOS: | |
133 info.target.Platform = PlatformKind::watchOS; | |
134 break; | |
135 } | |
136 info.minimum = decodeVersion(cmd->version); | |
137 platformInfos.emplace_back(std::move(info)); | |
138 } | |
139 | |
140 return platformInfos; | |
141 } | |
142 | |
143 static PlatformKind removeSimulator(PlatformKind platform) { | |
144 // Mapping of platform to simulator and vice-versa. | |
145 static const std::map<PlatformKind, PlatformKind> platformMap = { | |
146 {PlatformKind::iOSSimulator, PlatformKind::iOS}, | |
147 {PlatformKind::tvOSSimulator, PlatformKind::tvOS}, | |
148 {PlatformKind::watchOSSimulator, PlatformKind::watchOS}}; | |
149 | |
150 auto iter = platformMap.find(platform); | |
151 if (iter == platformMap.end()) | |
152 return platform; | |
153 return iter->second; | |
154 } | |
155 | |
156 static bool checkCompatibility(const InputFile *input) { | |
157 std::vector<PlatformInfo> platformInfos = getPlatformInfos(input); | |
158 if (platformInfos.empty()) | |
159 return true; | |
160 | |
161 auto it = find_if(platformInfos, [&](const PlatformInfo &info) { | |
162 return removeSimulator(info.target.Platform) == | |
163 removeSimulator(config->platform()); | |
164 }); | |
165 if (it == platformInfos.end()) { | |
166 std::string platformNames; | |
167 raw_string_ostream os(platformNames); | |
168 interleave( | |
169 platformInfos, os, | |
170 [&](const PlatformInfo &info) { | |
171 os << getPlatformName(info.target.Platform); | |
172 }, | |
173 "/"); | |
174 error(toString(input) + " has platform " + platformNames + | |
175 Twine(", which is different from target platform ") + | |
176 getPlatformName(config->platform())); | |
177 return false; | |
178 } | |
179 | |
180 if (it->minimum <= config->platformInfo.minimum) | |
181 return true; | |
182 | |
183 error(toString(input) + " has version " + it->minimum.getAsString() + | |
184 ", which is newer than target minimum of " + | |
185 config->platformInfo.minimum.getAsString()); | |
186 return false; | |
187 } | |
68 | 188 |
69 // Open a given file path and return it as a memory-mapped file. | 189 // Open a given file path and return it as a memory-mapped file. |
70 Optional<MemoryBufferRef> macho::readFile(StringRef path) { | 190 Optional<MemoryBufferRef> macho::readFile(StringRef path) { |
71 // Open a file. | 191 ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = MemoryBuffer::getFile(path); |
72 auto mbOrErr = MemoryBuffer::getFile(path); | 192 if (std::error_code ec = mbOrErr.getError()) { |
73 if (auto ec = mbOrErr.getError()) { | |
74 error("cannot open " + path + ": " + ec.message()); | 193 error("cannot open " + path + ": " + ec.message()); |
75 return None; | 194 return None; |
76 } | 195 } |
77 | 196 |
78 std::unique_ptr<MemoryBuffer> &mb = *mbOrErr; | 197 std::unique_ptr<MemoryBuffer> &mb = *mbOrErr; |
79 MemoryBufferRef mbref = mb->getMemBufferRef(); | 198 MemoryBufferRef mbref = mb->getMemBufferRef(); |
80 make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take mb ownership | 199 make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take mb ownership |
81 | 200 |
82 // If this is a regular non-fat file, return it. | 201 // If this is a regular non-fat file, return it. |
83 const char *buf = mbref.getBufferStart(); | 202 const char *buf = mbref.getBufferStart(); |
84 auto *hdr = reinterpret_cast<const MachO::fat_header *>(buf); | 203 const auto *hdr = reinterpret_cast<const fat_header *>(buf); |
85 if (read32be(&hdr->magic) != MachO::FAT_MAGIC) | 204 if (mbref.getBufferSize() < sizeof(uint32_t) || |
205 read32be(&hdr->magic) != FAT_MAGIC) { | |
206 if (tar) | |
207 tar->append(relativeToRoot(path), mbref.getBuffer()); | |
86 return mbref; | 208 return mbref; |
87 | 209 } |
88 // Object files and archive files may be fat files, which contains | 210 |
89 // multiple real files for different CPU ISAs. Here, we search for a | 211 // Object files and archive files may be fat files, which contain multiple |
90 // file that matches with the current link target and returns it as | 212 // real files for different CPU ISAs. Here, we search for a file that matches |
91 // a MemoryBufferRef. | 213 // with the current link target and returns it as a MemoryBufferRef. |
92 auto *arch = reinterpret_cast<const MachO::fat_arch *>(buf + sizeof(*hdr)); | 214 const auto *arch = reinterpret_cast<const fat_arch *>(buf + sizeof(*hdr)); |
93 | 215 |
94 for (uint32_t i = 0, n = read32be(&hdr->nfat_arch); i < n; ++i) { | 216 for (uint32_t i = 0, n = read32be(&hdr->nfat_arch); i < n; ++i) { |
95 if (reinterpret_cast<const char *>(arch + i + 1) > | 217 if (reinterpret_cast<const char *>(arch + i + 1) > |
96 buf + mbref.getBufferSize()) { | 218 buf + mbref.getBufferSize()) { |
97 error(path + ": fat_arch struct extends beyond end of file"); | 219 error(path + ": fat_arch struct extends beyond end of file"); |
98 return None; | 220 return None; |
99 } | 221 } |
100 | 222 |
101 if (read32be(&arch[i].cputype) != target->cpuType || | 223 if (read32be(&arch[i].cputype) != static_cast<uint32_t>(target->cpuType) || |
102 read32be(&arch[i].cpusubtype) != target->cpuSubtype) | 224 read32be(&arch[i].cpusubtype) != target->cpuSubtype) |
103 continue; | 225 continue; |
104 | 226 |
105 uint32_t offset = read32be(&arch[i].offset); | 227 uint32_t offset = read32be(&arch[i].offset); |
106 uint32_t size = read32be(&arch[i].size); | 228 uint32_t size = read32be(&arch[i].size); |
107 if (offset + size > mbref.getBufferSize()) | 229 if (offset + size > mbref.getBufferSize()) |
108 error(path + ": slice extends beyond end of file"); | 230 error(path + ": slice extends beyond end of file"); |
231 if (tar) | |
232 tar->append(relativeToRoot(path), mbref.getBuffer()); | |
109 return MemoryBufferRef(StringRef(buf + offset, size), path.copy(bAlloc)); | 233 return MemoryBufferRef(StringRef(buf + offset, size), path.copy(bAlloc)); |
110 } | 234 } |
111 | 235 |
112 error("unable to find matching architecture in " + path); | 236 error("unable to find matching architecture in " + path); |
113 return None; | 237 return None; |
114 } | 238 } |
115 | 239 |
116 static const load_command *findCommand(const mach_header_64 *hdr, | 240 InputFile::InputFile(Kind kind, const InterfaceFile &interface) |
117 uint32_t type) { | 241 : id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {} |
118 const uint8_t *p = | 242 |
119 reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64); | 243 template <class Section> |
120 | 244 void ObjFile::parseSections(ArrayRef<Section> sections) { |
121 for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { | |
122 auto *cmd = reinterpret_cast<const load_command *>(p); | |
123 if (cmd->cmd == type) | |
124 return cmd; | |
125 p += cmd->cmdsize; | |
126 } | |
127 return nullptr; | |
128 } | |
129 | |
130 void InputFile::parseSections(ArrayRef<section_64> sections) { | |
131 subsections.reserve(sections.size()); | 245 subsections.reserve(sections.size()); |
132 auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); | 246 auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); |
133 | 247 |
134 for (const section_64 &sec : sections) { | 248 for (const Section &sec : sections) { |
135 InputSection *isec = make<InputSection>(); | 249 InputSection *isec = make<InputSection>(); |
136 isec->file = this; | 250 isec->file = this; |
137 isec->name = StringRef(sec.sectname, strnlen(sec.sectname, 16)); | 251 isec->name = |
138 isec->segname = StringRef(sec.segname, strnlen(sec.segname, 16)); | 252 StringRef(sec.sectname, strnlen(sec.sectname, sizeof(sec.sectname))); |
139 isec->data = {buf + sec.offset, static_cast<size_t>(sec.size)}; | 253 isec->segname = |
254 StringRef(sec.segname, strnlen(sec.segname, sizeof(sec.segname))); | |
255 isec->data = {isZeroFill(sec.flags) ? nullptr : buf + sec.offset, | |
256 static_cast<size_t>(sec.size)}; | |
140 if (sec.align >= 32) | 257 if (sec.align >= 32) |
141 error("alignment " + std::to_string(sec.align) + " of section " + | 258 error("alignment " + std::to_string(sec.align) + " of section " + |
142 isec->name + " is too large"); | 259 isec->name + " is too large"); |
143 else | 260 else |
144 isec->align = 1 << sec.align; | 261 isec->align = 1 << sec.align; |
145 isec->flags = sec.flags; | 262 isec->flags = sec.flags; |
146 subsections.push_back({{0, isec}}); | 263 |
264 if (!(isDebugSection(isec->flags) && | |
265 isec->segname == segment_names::dwarf)) { | |
266 subsections.push_back({{0, isec}}); | |
267 } else { | |
268 // Instead of emitting DWARF sections, we emit STABS symbols to the | |
269 // object files that contain them. We filter them out early to avoid | |
270 // parsing their relocations unnecessarily. But we must still push an | |
271 // empty map to ensure the indices line up for the remaining sections. | |
272 subsections.push_back({}); | |
273 debugSections.push_back(isec); | |
274 } | |
147 } | 275 } |
148 } | 276 } |
149 | 277 |
150 // Find the subsection corresponding to the greatest section offset that is <= | 278 // Find the subsection corresponding to the greatest section offset that is <= |
151 // that of the given offset. | 279 // that of the given offset. |
153 // offset: an offset relative to the start of the original InputSection (before | 281 // offset: an offset relative to the start of the original InputSection (before |
154 // any subsection splitting has occurred). It will be updated to represent the | 282 // any subsection splitting has occurred). It will be updated to represent the |
155 // same location as an offset relative to the start of the containing | 283 // same location as an offset relative to the start of the containing |
156 // subsection. | 284 // subsection. |
157 static InputSection *findContainingSubsection(SubsectionMap &map, | 285 static InputSection *findContainingSubsection(SubsectionMap &map, |
158 uint32_t *offset) { | 286 uint64_t *offset) { |
159 auto it = std::prev(map.upper_bound(*offset)); | 287 auto it = std::prev(llvm::upper_bound( |
160 *offset -= it->first; | 288 map, *offset, [](uint64_t value, SubsectionEntry subsecEntry) { |
161 return it->second; | 289 return value < subsecEntry.offset; |
162 } | 290 })); |
163 | 291 *offset -= it->offset; |
164 void InputFile::parseRelocations(const section_64 &sec, | 292 return it->isec; |
165 SubsectionMap &subsecMap) { | 293 } |
294 | |
295 template <class Section> | |
296 static bool validateRelocationInfo(InputFile *file, const Section &sec, | |
297 relocation_info rel) { | |
298 const RelocAttrs &relocAttrs = target->getRelocAttrs(rel.r_type); | |
299 bool valid = true; | |
300 auto message = [relocAttrs, file, sec, rel, &valid](const Twine &diagnostic) { | |
301 valid = false; | |
302 return (relocAttrs.name + " relocation " + diagnostic + " at offset " + | |
303 std::to_string(rel.r_address) + " of " + sec.segname + "," + | |
304 sec.sectname + " in " + toString(file)) | |
305 .str(); | |
306 }; | |
307 | |
308 if (!relocAttrs.hasAttr(RelocAttrBits::LOCAL) && !rel.r_extern) | |
309 error(message("must be extern")); | |
310 if (relocAttrs.hasAttr(RelocAttrBits::PCREL) != rel.r_pcrel) | |
311 error(message(Twine("must ") + (rel.r_pcrel ? "not " : "") + | |
312 "be PC-relative")); | |
313 if (isThreadLocalVariables(sec.flags) && | |
314 !relocAttrs.hasAttr(RelocAttrBits::UNSIGNED)) | |
315 error(message("not allowed in thread-local section, must be UNSIGNED")); | |
316 if (rel.r_length < 2 || rel.r_length > 3 || | |
317 !relocAttrs.hasAttr(static_cast<RelocAttrBits>(1 << rel.r_length))) { | |
318 static SmallVector<StringRef, 4> widths{"0", "4", "8", "4 or 8"}; | |
319 error(message("has width " + std::to_string(1 << rel.r_length) + | |
320 " bytes, but must be " + | |
321 widths[(static_cast<int>(relocAttrs.bits) >> 2) & 3] + | |
322 " bytes")); | |
323 } | |
324 return valid; | |
325 } | |
326 | |
327 template <class Section> | |
328 void ObjFile::parseRelocations(ArrayRef<Section> sectionHeaders, | |
329 const Section &sec, SubsectionMap &subsecMap) { | |
166 auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); | 330 auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); |
167 ArrayRef<any_relocation_info> relInfos( | 331 ArrayRef<relocation_info> relInfos( |
168 reinterpret_cast<const any_relocation_info *>(buf + sec.reloff), | 332 reinterpret_cast<const relocation_info *>(buf + sec.reloff), sec.nreloc); |
169 sec.nreloc); | 333 |
170 | 334 for (size_t i = 0; i < relInfos.size(); i++) { |
171 for (const any_relocation_info &anyRel : relInfos) { | 335 // Paired relocations serve as Mach-O's method for attaching a |
172 if (anyRel.r_word0 & R_SCATTERED) | 336 // supplemental datum to a primary relocation record. ELF does not |
337 // need them because the *_RELOC_RELA records contain the extra | |
338 // addend field, vs. *_RELOC_REL which omit the addend. | |
339 // | |
340 // The {X86_64,ARM64}_RELOC_SUBTRACTOR record holds the subtrahend, | |
341 // and the paired *_RELOC_UNSIGNED record holds the minuend. The | |
342 // datum for each is a symbolic address. The result is the offset | |
343 // between two addresses. | |
344 // | |
345 // The ARM64_RELOC_ADDEND record holds the addend, and the paired | |
346 // ARM64_RELOC_BRANCH26 or ARM64_RELOC_PAGE21/PAGEOFF12 holds the | |
347 // base symbolic address. | |
348 // | |
349 // Note: X86 does not use *_RELOC_ADDEND because it can embed an | |
350 // addend into the instruction stream. On X86, a relocatable address | |
351 // field always occupies an entire contiguous sequence of byte(s), | |
352 // so there is no need to merge opcode bits with address | |
353 // bits. Therefore, it's easy and convenient to store addends in the | |
354 // instruction-stream bytes that would otherwise contain zeroes. By | |
355 // contrast, RISC ISAs such as ARM64 mix opcode bits with with | |
356 // address bits so that bitwise arithmetic is necessary to extract | |
357 // and insert them. Storing addends in the instruction stream is | |
358 // possible, but inconvenient and more costly at link time. | |
359 | |
360 int64_t pairedAddend = 0; | |
361 relocation_info relInfo = relInfos[i]; | |
362 if (target->hasAttr(relInfo.r_type, RelocAttrBits::ADDEND)) { | |
363 pairedAddend = SignExtend64<24>(relInfo.r_symbolnum); | |
364 relInfo = relInfos[++i]; | |
365 } | |
366 assert(i < relInfos.size()); | |
367 if (!validateRelocationInfo(this, sec, relInfo)) | |
368 continue; | |
369 if (relInfo.r_address & R_SCATTERED) | |
173 fatal("TODO: Scattered relocations not supported"); | 370 fatal("TODO: Scattered relocations not supported"); |
174 | 371 |
175 auto rel = reinterpret_cast<const relocation_info &>(anyRel); | 372 bool isSubtrahend = |
176 | 373 target->hasAttr(relInfo.r_type, RelocAttrBits::SUBTRAHEND); |
374 int64_t embeddedAddend = target->getEmbeddedAddend(mb, sec.offset, relInfo); | |
375 assert(!(embeddedAddend && pairedAddend)); | |
376 int64_t totalAddend = pairedAddend + embeddedAddend; | |
177 Reloc r; | 377 Reloc r; |
178 r.type = rel.r_type; | 378 r.type = relInfo.r_type; |
179 r.pcrel = rel.r_pcrel; | 379 r.pcrel = relInfo.r_pcrel; |
180 uint32_t secRelOffset = rel.r_address; | 380 r.length = relInfo.r_length; |
181 uint64_t rawAddend = | 381 r.offset = relInfo.r_address; |
182 target->getImplicitAddend(buf + sec.offset + secRelOffset, r.type); | 382 if (relInfo.r_extern) { |
183 | 383 r.referent = symbols[relInfo.r_symbolnum]; |
184 if (rel.r_extern) { | 384 r.addend = isSubtrahend ? 0 : totalAddend; |
185 r.target = symbols[rel.r_symbolnum]; | |
186 r.addend = rawAddend; | |
187 } else { | 385 } else { |
188 if (!rel.r_pcrel) | 386 assert(!isSubtrahend); |
189 fatal("TODO: Only pcrel section relocations are supported"); | 387 const Section &referentSec = sectionHeaders[relInfo.r_symbolnum - 1]; |
190 | 388 uint64_t referentOffset; |
191 if (rel.r_symbolnum == 0 || rel.r_symbolnum > subsections.size()) | 389 if (relInfo.r_pcrel) { |
192 fatal("invalid section index in relocation for offset " + | 390 // The implicit addend for pcrel section relocations is the pcrel offset |
193 std::to_string(r.offset) + " in section " + sec.sectname + | 391 // in terms of the addresses in the input file. Here we adjust it so |
194 " of " + getName()); | 392 // that it describes the offset from the start of the referent section. |
195 | 393 // FIXME This logic was written around x86_64 behavior -- ARM64 doesn't |
196 SubsectionMap &targetSubsecMap = subsections[rel.r_symbolnum - 1]; | 394 // have pcrel section relocations. We may want to factor this out into |
197 const section_64 &targetSec = sectionHeaders[rel.r_symbolnum - 1]; | 395 // the arch-specific .cpp file. |
198 // The implicit addend for pcrel section relocations is the pcrel offset | 396 assert(target->hasAttr(r.type, RelocAttrBits::BYTE4)); |
199 // in terms of the addresses in the input file. Here we adjust it so that | 397 referentOffset = |
200 // it describes the offset from the start of the target section. | 398 sec.addr + relInfo.r_address + 4 + totalAddend - referentSec.addr; |
201 // TODO: Figure out what to do for non-pcrel section relocations. | 399 } else { |
202 // TODO: The offset of 4 is probably not right for ARM64, nor for | 400 // The addend for a non-pcrel relocation is its absolute address. |
203 // relocations with r_length != 2. | 401 referentOffset = totalAddend - referentSec.addr; |
204 uint32_t targetOffset = | 402 } |
205 sec.addr + secRelOffset + 4 + rawAddend - targetSec.addr; | 403 SubsectionMap &referentSubsecMap = subsections[relInfo.r_symbolnum - 1]; |
206 r.target = findContainingSubsection(targetSubsecMap, &targetOffset); | 404 r.referent = findContainingSubsection(referentSubsecMap, &referentOffset); |
207 r.addend = targetOffset; | 405 r.addend = referentOffset; |
208 } | 406 } |
209 | 407 |
210 InputSection *subsec = findContainingSubsection(subsecMap, &secRelOffset); | 408 InputSection *subsec = findContainingSubsection(subsecMap, &r.offset); |
211 r.offset = secRelOffset; | |
212 subsec->relocs.push_back(r); | 409 subsec->relocs.push_back(r); |
213 } | 410 |
214 } | 411 if (isSubtrahend) { |
215 | 412 relocation_info minuendInfo = relInfos[++i]; |
216 void InputFile::parseSymbols(ArrayRef<nlist_64> nList, const char *strtab, | 413 // SUBTRACTOR relocations should always be followed by an UNSIGNED one |
217 bool subsectionsViaSymbols) { | 414 // attached to the same address. |
218 // resize(), not reserve(), because we are going to create N_ALT_ENTRY symbols | 415 assert(target->hasAttr(minuendInfo.r_type, RelocAttrBits::UNSIGNED) && |
219 // out-of-sequence. | 416 relInfo.r_address == minuendInfo.r_address); |
417 Reloc p; | |
418 p.type = minuendInfo.r_type; | |
419 if (minuendInfo.r_extern) { | |
420 p.referent = symbols[minuendInfo.r_symbolnum]; | |
421 p.addend = totalAddend; | |
422 } else { | |
423 uint64_t referentOffset = | |
424 totalAddend - sectionHeaders[minuendInfo.r_symbolnum - 1].addr; | |
425 SubsectionMap &referentSubsecMap = | |
426 subsections[minuendInfo.r_symbolnum - 1]; | |
427 p.referent = | |
428 findContainingSubsection(referentSubsecMap, &referentOffset); | |
429 p.addend = referentOffset; | |
430 } | |
431 subsec->relocs.push_back(p); | |
432 } | |
433 } | |
434 } | |
435 | |
436 template <class NList> | |
437 static macho::Symbol *createDefined(const NList &sym, StringRef name, | |
438 InputSection *isec, uint64_t value, | |
439 uint64_t size) { | |
440 // Symbol scope is determined by sym.n_type & (N_EXT | N_PEXT): | |
441 // N_EXT: Global symbols. These go in the symbol table during the link, | |
442 // and also in the export table of the output so that the dynamic | |
443 // linker sees them. | |
444 // N_EXT | N_PEXT: Linkage unit (think: dylib) scoped. These go in the | |
445 // symbol table during the link so that duplicates are | |
446 // either reported (for non-weak symbols) or merged | |
447 // (for weak symbols), but they do not go in the export | |
448 // table of the output. | |
449 // N_PEXT: Does not occur in input files in practice, | |
450 // a private extern must be external. | |
451 // 0: Translation-unit scoped. These are not in the symbol table during | |
452 // link, and not in the export table of the output either. | |
453 | |
454 bool isWeakDefCanBeHidden = | |
455 (sym.n_desc & (N_WEAK_DEF | N_WEAK_REF)) == (N_WEAK_DEF | N_WEAK_REF); | |
456 | |
457 if (sym.n_type & (N_EXT | N_PEXT)) { | |
458 assert((sym.n_type & N_EXT) && "invalid input"); | |
459 bool isPrivateExtern = sym.n_type & N_PEXT; | |
460 | |
461 // lld's behavior for merging symbols is slightly different from ld64: | |
462 // ld64 picks the winning symbol based on several criteria (see | |
463 // pickBetweenRegularAtoms() in ld64's SymbolTable.cpp), while lld | |
464 // just merges metadata and keeps the contents of the first symbol | |
465 // with that name (see SymbolTable::addDefined). For: | |
466 // * inline function F in a TU built with -fvisibility-inlines-hidden | |
467 // * and inline function F in another TU built without that flag | |
468 // ld64 will pick the one from the file built without | |
469 // -fvisibility-inlines-hidden. | |
470 // lld will instead pick the one listed first on the link command line and | |
471 // give it visibility as if the function was built without | |
472 // -fvisibility-inlines-hidden. | |
473 // If both functions have the same contents, this will have the same | |
474 // behavior. If not, it won't, but the input had an ODR violation in | |
475 // that case. | |
476 // | |
477 // Similarly, merging a symbol | |
478 // that's isPrivateExtern and not isWeakDefCanBeHidden with one | |
479 // that's not isPrivateExtern but isWeakDefCanBeHidden technically | |
480 // should produce one | |
481 // that's not isPrivateExtern but isWeakDefCanBeHidden. That matters | |
482 // with ld64's semantics, because it means the non-private-extern | |
483 // definition will continue to take priority if more private extern | |
484 // definitions are encountered. With lld's semantics there's no observable | |
485 // difference between a symbol that's isWeakDefCanBeHidden or one that's | |
486 // privateExtern -- neither makes it into the dynamic symbol table. So just | |
487 // promote isWeakDefCanBeHidden to isPrivateExtern here. | |
488 if (isWeakDefCanBeHidden) | |
489 isPrivateExtern = true; | |
490 | |
491 return symtab->addDefined( | |
492 name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF, | |
493 isPrivateExtern, sym.n_desc & N_ARM_THUMB_DEF, | |
494 sym.n_desc & REFERENCED_DYNAMICALLY, sym.n_desc & N_NO_DEAD_STRIP); | |
495 } | |
496 | |
497 assert(!isWeakDefCanBeHidden && | |
498 "weak_def_can_be_hidden on already-hidden symbol?"); | |
499 return make<Defined>( | |
500 name, isec->file, isec, value, size, sym.n_desc & N_WEAK_DEF, | |
501 /*isExternal=*/false, /*isPrivateExtern=*/false, | |
502 sym.n_desc & N_ARM_THUMB_DEF, sym.n_desc & REFERENCED_DYNAMICALLY, | |
503 sym.n_desc & N_NO_DEAD_STRIP); | |
504 } | |
505 | |
506 // Absolute symbols are defined symbols that do not have an associated | |
507 // InputSection. They cannot be weak. | |
508 template <class NList> | |
509 static macho::Symbol *createAbsolute(const NList &sym, InputFile *file, | |
510 StringRef name) { | |
511 if (sym.n_type & (N_EXT | N_PEXT)) { | |
512 assert((sym.n_type & N_EXT) && "invalid input"); | |
513 return symtab->addDefined(name, file, nullptr, sym.n_value, /*size=*/0, | |
514 /*isWeakDef=*/false, sym.n_type & N_PEXT, | |
515 sym.n_desc & N_ARM_THUMB_DEF, | |
516 /*isReferencedDynamically=*/false, | |
517 sym.n_desc & N_NO_DEAD_STRIP); | |
518 } | |
519 return make<Defined>(name, file, nullptr, sym.n_value, /*size=*/0, | |
520 /*isWeakDef=*/false, | |
521 /*isExternal=*/false, /*isPrivateExtern=*/false, | |
522 sym.n_desc & N_ARM_THUMB_DEF, | |
523 /*isReferencedDynamically=*/false, | |
524 sym.n_desc & N_NO_DEAD_STRIP); | |
525 } | |
526 | |
527 template <class NList> | |
528 macho::Symbol *ObjFile::parseNonSectionSymbol(const NList &sym, | |
529 StringRef name) { | |
530 uint8_t type = sym.n_type & N_TYPE; | |
531 switch (type) { | |
532 case N_UNDF: | |
533 return sym.n_value == 0 | |
534 ? symtab->addUndefined(name, this, sym.n_desc & N_WEAK_REF) | |
535 : symtab->addCommon(name, this, sym.n_value, | |
536 1 << GET_COMM_ALIGN(sym.n_desc), | |
537 sym.n_type & N_PEXT); | |
538 case N_ABS: | |
539 return createAbsolute(sym, this, name); | |
540 case N_PBUD: | |
541 case N_INDR: | |
542 error("TODO: support symbols of type " + std::to_string(type)); | |
543 return nullptr; | |
544 case N_SECT: | |
545 llvm_unreachable( | |
546 "N_SECT symbols should not be passed to parseNonSectionSymbol"); | |
547 default: | |
548 llvm_unreachable("invalid symbol type"); | |
549 } | |
550 } | |
551 | |
552 template <class LP> | |
553 void ObjFile::parseSymbols(ArrayRef<typename LP::section> sectionHeaders, | |
554 ArrayRef<typename LP::nlist> nList, | |
555 const char *strtab, bool subsectionsViaSymbols) { | |
556 using NList = typename LP::nlist; | |
557 | |
558 // Groups indices of the symbols by the sections that contain them. | |
559 std::vector<std::vector<uint32_t>> symbolsBySection(subsections.size()); | |
220 symbols.resize(nList.size()); | 560 symbols.resize(nList.size()); |
221 std::vector<size_t> altEntrySymIdxs; | 561 for (uint32_t i = 0; i < nList.size(); ++i) { |
222 | 562 const NList &sym = nList[i]; |
223 auto createDefined = [&](const nlist_64 &sym, InputSection *isec, | |
224 uint32_t value) -> Symbol * { | |
225 StringRef name = strtab + sym.n_strx; | 563 StringRef name = strtab + sym.n_strx; |
226 if (sym.n_type & N_EXT) | 564 if ((sym.n_type & N_TYPE) == N_SECT) { |
227 // Global defined symbol | 565 SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; |
228 return symtab->addDefined(name, isec, value); | 566 // parseSections() may have chosen not to parse this section. |
229 else | 567 if (subsecMap.empty()) |
230 // Local defined symbol | 568 continue; |
231 return make<Defined>(name, isec, value); | 569 symbolsBySection[sym.n_sect - 1].push_back(i); |
232 }; | 570 } else { |
233 | 571 symbols[i] = parseNonSectionSymbol(sym, name); |
234 for (size_t i = 0, n = nList.size(); i < n; ++i) { | 572 } |
235 const nlist_64 &sym = nList[i]; | 573 } |
236 | 574 |
237 // Undefined symbol | 575 // Calculate symbol sizes and create subsections by splitting the sections |
238 if (!sym.n_sect) { | 576 // along symbol boundaries. |
577 for (size_t i = 0; i < subsections.size(); ++i) { | |
578 SubsectionMap &subsecMap = subsections[i]; | |
579 if (subsecMap.empty()) | |
580 continue; | |
581 | |
582 std::vector<uint32_t> &symbolIndices = symbolsBySection[i]; | |
583 llvm::sort(symbolIndices, [&](uint32_t lhs, uint32_t rhs) { | |
584 return nList[lhs].n_value < nList[rhs].n_value; | |
585 }); | |
586 uint64_t sectionAddr = sectionHeaders[i].addr; | |
587 uint32_t sectionAlign = 1u << sectionHeaders[i].align; | |
588 | |
589 // We populate subsecMap by repeatedly splitting the last (highest address) | |
590 // subsection. | |
591 SubsectionEntry subsecEntry = subsecMap.back(); | |
592 for (size_t j = 0; j < symbolIndices.size(); ++j) { | |
593 uint32_t symIndex = symbolIndices[j]; | |
594 const NList &sym = nList[symIndex]; | |
239 StringRef name = strtab + sym.n_strx; | 595 StringRef name = strtab + sym.n_strx; |
240 symbols[i] = symtab->addUndefined(name); | 596 InputSection *isec = subsecEntry.isec; |
241 continue; | 597 |
242 } | 598 uint64_t subsecAddr = sectionAddr + subsecEntry.offset; |
243 | 599 uint64_t symbolOffset = sym.n_value - subsecAddr; |
244 const section_64 &sec = sectionHeaders[sym.n_sect - 1]; | 600 uint64_t symbolSize = |
245 SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; | 601 j + 1 < symbolIndices.size() |
246 uint64_t offset = sym.n_value - sec.addr; | 602 ? nList[symbolIndices[j + 1]].n_value - sym.n_value |
247 | 603 : isec->data.size() - symbolOffset; |
248 // If the input file does not use subsections-via-symbols, all symbols can | 604 // There are 3 cases where we do not need to create a new subsection: |
249 // use the same subsection. Otherwise, we must split the sections along | 605 // 1. If the input file does not use subsections-via-symbols. |
250 // symbol boundaries. | 606 // 2. Multiple symbols at the same address only induce one subsection. |
251 if (!subsectionsViaSymbols) { | 607 // (The symbolOffset == 0 check covers both this case as well as |
252 symbols[i] = createDefined(sym, subsecMap[0], offset); | 608 // the first loop iteration.) |
253 continue; | 609 // 3. Alternative entry points do not induce new subsections. |
254 } | 610 if (!subsectionsViaSymbols || symbolOffset == 0 || |
255 | 611 sym.n_desc & N_ALT_ENTRY) { |
256 // nList entries aren't necessarily arranged in address order. Therefore, | 612 symbols[symIndex] = |
257 // we can't create alt-entry symbols at this point because a later symbol | 613 createDefined(sym, name, isec, symbolOffset, symbolSize); |
258 // may split its section, which may affect which subsection the alt-entry | 614 continue; |
259 // symbol is assigned to. So we need to handle them in a second pass below. | 615 } |
260 if (sym.n_desc & N_ALT_ENTRY) { | 616 |
261 altEntrySymIdxs.push_back(i); | 617 auto *nextIsec = make<InputSection>(*isec); |
262 continue; | 618 nextIsec->data = isec->data.slice(symbolOffset); |
263 } | 619 nextIsec->numRefs = 0; |
264 | 620 nextIsec->wasCoalesced = false; |
265 // Find the subsection corresponding to the greatest section offset that is | 621 isec->data = isec->data.slice(0, symbolOffset); |
266 // <= that of the current symbol. The subsection that we find either needs | 622 |
267 // to be used directly or split in two. | 623 // By construction, the symbol will be at offset zero in the new |
268 uint32_t firstSize = offset; | 624 // subsection. |
269 InputSection *firstIsec = findContainingSubsection(subsecMap, &firstSize); | 625 symbols[symIndex] = |
270 | 626 createDefined(sym, name, nextIsec, /*value=*/0, symbolSize); |
271 if (firstSize == 0) { | 627 // TODO: ld64 appears to preserve the original alignment as well as each |
272 // Alias of an existing symbol, or the first symbol in the section. These | 628 // subsection's offset from the last aligned address. We should consider |
273 // are handled by reusing the existing section. | 629 // emulating that behavior. |
274 symbols[i] = createDefined(sym, firstIsec, 0); | 630 nextIsec->align = MinAlign(sectionAlign, sym.n_value); |
275 continue; | 631 subsecMap.push_back({sym.n_value - sectionAddr, nextIsec}); |
276 } | 632 subsecEntry = subsecMap.back(); |
277 | 633 } |
278 // We saw a symbol definition at a new offset. Split the section into two | 634 } |
279 // subsections. The new symbol uses the second subsection. | 635 } |
280 auto *secondIsec = make<InputSection>(*firstIsec); | 636 |
281 secondIsec->data = firstIsec->data.slice(firstSize); | 637 OpaqueFile::OpaqueFile(MemoryBufferRef mb, StringRef segName, |
282 firstIsec->data = firstIsec->data.slice(0, firstSize); | 638 StringRef sectName) |
283 // TODO: ld64 appears to preserve the original alignment as well as each | 639 : InputFile(OpaqueKind, mb) { |
284 // subsection's offset from the last aligned address. We should consider | 640 InputSection *isec = make<InputSection>(); |
285 // emulating that behavior. | 641 isec->file = this; |
286 secondIsec->align = MinAlign(firstIsec->align, offset); | 642 isec->name = sectName.take_front(16); |
287 | 643 isec->segname = segName.take_front(16); |
288 subsecMap[offset] = secondIsec; | 644 const auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); |
289 // By construction, the symbol will be at offset zero in the new section. | 645 isec->data = {buf, mb.getBufferSize()}; |
290 symbols[i] = createDefined(sym, secondIsec, 0); | 646 isec->live = true; |
291 } | 647 subsections.push_back({{0, isec}}); |
292 | 648 } |
293 for (size_t idx : altEntrySymIdxs) { | 649 |
294 const nlist_64 &sym = nList[idx]; | 650 ObjFile::ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName) |
295 SubsectionMap &subsecMap = subsections[sym.n_sect - 1]; | 651 : InputFile(ObjKind, mb), modTime(modTime) { |
296 uint32_t off = sym.n_value - sectionHeaders[sym.n_sect - 1].addr; | 652 this->archiveName = std::string(archiveName); |
297 InputSection *subsec = findContainingSubsection(subsecMap, &off); | 653 if (target->wordSize == 8) |
298 symbols[idx] = createDefined(sym, subsec, off); | 654 parse<LP64>(); |
299 } | 655 else |
300 } | 656 parse<ILP32>(); |
301 | 657 } |
302 ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) { | 658 |
659 template <class LP> void ObjFile::parse() { | |
660 using Header = typename LP::mach_header; | |
661 using SegmentCommand = typename LP::segment_command; | |
662 using Section = typename LP::section; | |
663 using NList = typename LP::nlist; | |
664 | |
303 auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); | 665 auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); |
304 auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart()); | 666 auto *hdr = reinterpret_cast<const Header *>(mb.getBufferStart()); |
305 | 667 |
306 if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) { | 668 Architecture arch = getArchitectureFromCpuType(hdr->cputype, hdr->cpusubtype); |
307 auto *c = reinterpret_cast<const segment_command_64 *>(cmd); | 669 if (arch != config->arch()) { |
308 sectionHeaders = ArrayRef<section_64>{ | 670 error(toString(this) + " has architecture " + getArchitectureName(arch) + |
309 reinterpret_cast<const section_64 *>(c + 1), c->nsects}; | 671 " which is incompatible with target architecture " + |
672 getArchitectureName(config->arch())); | |
673 return; | |
674 } | |
675 | |
676 if (!checkCompatibility(this)) | |
677 return; | |
678 | |
679 if (const load_command *cmd = findCommand(hdr, LC_LINKER_OPTION)) { | |
680 auto *c = reinterpret_cast<const linker_option_command *>(cmd); | |
681 StringRef data{reinterpret_cast<const char *>(c + 1), | |
682 c->cmdsize - sizeof(linker_option_command)}; | |
683 parseLCLinkerOption(this, c->count, data); | |
684 } | |
685 | |
686 ArrayRef<Section> sectionHeaders; | |
687 if (const load_command *cmd = findCommand(hdr, LP::segmentLCType)) { | |
688 auto *c = reinterpret_cast<const SegmentCommand *>(cmd); | |
689 sectionHeaders = | |
690 ArrayRef<Section>{reinterpret_cast<const Section *>(c + 1), c->nsects}; | |
310 parseSections(sectionHeaders); | 691 parseSections(sectionHeaders); |
311 } | 692 } |
312 | 693 |
313 // TODO: Error on missing LC_SYMTAB? | 694 // TODO: Error on missing LC_SYMTAB? |
314 if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) { | 695 if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) { |
315 auto *c = reinterpret_cast<const symtab_command *>(cmd); | 696 auto *c = reinterpret_cast<const symtab_command *>(cmd); |
316 ArrayRef<nlist_64> nList( | 697 ArrayRef<NList> nList(reinterpret_cast<const NList *>(buf + c->symoff), |
317 reinterpret_cast<const nlist_64 *>(buf + c->symoff), c->nsyms); | 698 c->nsyms); |
318 const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff; | 699 const char *strtab = reinterpret_cast<const char *>(buf) + c->stroff; |
319 bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS; | 700 bool subsectionsViaSymbols = hdr->flags & MH_SUBSECTIONS_VIA_SYMBOLS; |
320 parseSymbols(nList, strtab, subsectionsViaSymbols); | 701 parseSymbols<LP>(sectionHeaders, nList, strtab, subsectionsViaSymbols); |
321 } | 702 } |
322 | 703 |
323 // The relocations may refer to the symbols, so we parse them after we have | 704 // The relocations may refer to the symbols, so we parse them after we have |
324 // parsed all the symbols. | 705 // parsed all the symbols. |
325 for (size_t i = 0, n = subsections.size(); i < n; ++i) | 706 for (size_t i = 0, n = subsections.size(); i < n; ++i) |
326 parseRelocations(sectionHeaders[i], subsections[i]); | 707 if (!subsections[i].empty()) |
327 } | 708 parseRelocations(sectionHeaders, sectionHeaders[i], subsections[i]); |
328 | 709 |
329 DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella) | 710 parseDebugInfo(); |
330 : InputFile(DylibKind, mb) { | 711 } |
712 | |
713 void ObjFile::parseDebugInfo() { | |
714 std::unique_ptr<DwarfObject> dObj = DwarfObject::create(this); | |
715 if (!dObj) | |
716 return; | |
717 | |
718 auto *ctx = make<DWARFContext>( | |
719 std::move(dObj), "", | |
720 [&](Error err) { | |
721 warn(toString(this) + ": " + toString(std::move(err))); | |
722 }, | |
723 [&](Error warning) { | |
724 warn(toString(this) + ": " + toString(std::move(warning))); | |
725 }); | |
726 | |
727 // TODO: Since object files can contain a lot of DWARF info, we should verify | |
728 // that we are parsing just the info we need | |
729 const DWARFContext::compile_unit_range &units = ctx->compile_units(); | |
730 // FIXME: There can be more than one compile unit per object file. See | |
731 // PR48637. | |
732 auto it = units.begin(); | |
733 compileUnit = it->get(); | |
734 } | |
735 | |
736 // The path can point to either a dylib or a .tbd file. | |
737 static DylibFile *loadDylib(StringRef path, DylibFile *umbrella) { | |
738 Optional<MemoryBufferRef> mbref = readFile(path); | |
739 if (!mbref) { | |
740 error("could not read dylib file at " + path); | |
741 return nullptr; | |
742 } | |
743 return loadDylib(*mbref, umbrella); | |
744 } | |
745 | |
746 // TBD files are parsed into a series of TAPI documents (InterfaceFiles), with | |
747 // the first document storing child pointers to the rest of them. When we are | |
748 // processing a given TBD file, we store that top-level document in | |
749 // currentTopLevelTapi. When processing re-exports, we search its children for | |
750 // potentially matching documents in the same TBD file. Note that the children | |
751 // themselves don't point to further documents, i.e. this is a two-level tree. | |
752 // | |
753 // Re-exports can either refer to on-disk files, or to documents within .tbd | |
754 // files. | |
755 static DylibFile *findDylib(StringRef path, DylibFile *umbrella, | |
756 const InterfaceFile *currentTopLevelTapi) { | |
757 if (path::is_absolute(path, path::Style::posix)) | |
758 for (StringRef root : config->systemLibraryRoots) | |
759 if (Optional<std::string> dylibPath = | |
760 resolveDylibPath((root + path).str())) | |
761 return loadDylib(*dylibPath, umbrella); | |
762 | |
763 // TODO: Handle -dylib_file | |
764 | |
765 SmallString<128> newPath; | |
766 if (config->outputType == MH_EXECUTE && | |
767 path.consume_front("@executable_path/")) { | |
768 // ld64 allows overriding this with the undocumented flag -executable_path. | |
769 // lld doesn't currently implement that flag. | |
770 path::append(newPath, sys::path::parent_path(config->outputFile), path); | |
771 path = newPath; | |
772 } else if (path.consume_front("@loader_path/")) { | |
773 path::append(newPath, sys::path::parent_path(umbrella->getName()), path); | |
774 path = newPath; | |
775 } else if (path.startswith("@rpath/")) { | |
776 for (StringRef rpath : umbrella->rpaths) { | |
777 newPath.clear(); | |
778 if (rpath.consume_front("@loader_path/")) | |
779 path::append(newPath, sys::path::parent_path(umbrella->getName())); | |
780 path::append(newPath, rpath, path.drop_front(strlen("@rpath/"))); | |
781 if (Optional<std::string> dylibPath = resolveDylibPath(newPath)) | |
782 return loadDylib(*dylibPath, umbrella); | |
783 } | |
784 } | |
785 | |
786 if (currentTopLevelTapi) { | |
787 for (InterfaceFile &child : | |
788 make_pointee_range(currentTopLevelTapi->documents())) { | |
789 assert(child.documents().empty()); | |
790 if (path == child.getInstallName()) { | |
791 auto file = make<DylibFile>(child, umbrella); | |
792 file->parseReexports(child); | |
793 return file; | |
794 } | |
795 } | |
796 } | |
797 | |
798 if (Optional<std::string> dylibPath = resolveDylibPath(path)) | |
799 return loadDylib(*dylibPath, umbrella); | |
800 | |
801 return nullptr; | |
802 } | |
803 | |
804 // If a re-exported dylib is public (lives in /usr/lib or | |
805 // /System/Library/Frameworks), then it is considered implicitly linked: we | |
806 // should bind to its symbols directly instead of via the re-exporting umbrella | |
807 // library. | |
808 static bool isImplicitlyLinked(StringRef path) { | |
809 if (!config->implicitDylibs) | |
810 return false; | |
811 | |
812 if (path::parent_path(path) == "/usr/lib") | |
813 return true; | |
814 | |
815 // Match /System/Library/Frameworks/$FOO.framework/**/$FOO | |
816 if (path.consume_front("/System/Library/Frameworks/")) { | |
817 StringRef frameworkName = path.take_until([](char c) { return c == '.'; }); | |
818 return path::filename(path) == frameworkName; | |
819 } | |
820 | |
821 return false; | |
822 } | |
823 | |
824 static void loadReexport(StringRef path, DylibFile *umbrella, | |
825 const InterfaceFile *currentTopLevelTapi) { | |
826 DylibFile *reexport = findDylib(path, umbrella, currentTopLevelTapi); | |
827 if (!reexport) | |
828 error("unable to locate re-export with install name " + path); | |
829 else if (isImplicitlyLinked(path)) | |
830 inputFiles.insert(reexport); | |
831 } | |
832 | |
833 DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella, | |
834 bool isBundleLoader) | |
835 : InputFile(DylibKind, mb), refState(RefState::Unreferenced), | |
836 isBundleLoader(isBundleLoader) { | |
837 assert(!isBundleLoader || !umbrella); | |
331 if (umbrella == nullptr) | 838 if (umbrella == nullptr) |
332 umbrella = this; | 839 umbrella = this; |
840 this->umbrella = umbrella; | |
333 | 841 |
334 auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); | 842 auto *buf = reinterpret_cast<const uint8_t *>(mb.getBufferStart()); |
335 auto *hdr = reinterpret_cast<const mach_header_64 *>(mb.getBufferStart()); | 843 auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart()); |
336 | 844 |
337 // Initialize dylibName. | 845 // Initialize installName. |
338 if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) { | 846 if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) { |
339 auto *c = reinterpret_cast<const dylib_command *>(cmd); | 847 auto *c = reinterpret_cast<const dylib_command *>(cmd); |
340 dylibName = reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name); | 848 currentVersion = read32le(&c->dylib.current_version); |
341 } else { | 849 compatibilityVersion = read32le(&c->dylib.compatibility_version); |
342 error("dylib " + getName() + " missing LC_ID_DYLIB load command"); | 850 installName = |
343 return; | 851 reinterpret_cast<const char *>(cmd) + read32le(&c->dylib.name); |
852 } else if (!isBundleLoader) { | |
853 // macho_executable and macho_bundle don't have LC_ID_DYLIB, | |
854 // so it's OK. | |
855 error("dylib " + toString(this) + " missing LC_ID_DYLIB load command"); | |
856 return; | |
857 } | |
858 | |
859 if (config->printEachFile) | |
860 message(toString(this)); | |
861 | |
862 deadStrippable = hdr->flags & MH_DEAD_STRIPPABLE_DYLIB; | |
863 | |
864 if (!checkCompatibility(this)) | |
865 return; | |
866 | |
867 for (auto *cmd : findCommands<rpath_command>(hdr, LC_RPATH)) { | |
868 StringRef rpath{reinterpret_cast<const char *>(cmd) + cmd->path}; | |
869 rpaths.push_back(rpath); | |
344 } | 870 } |
345 | 871 |
346 // Initialize symbols. | 872 // Initialize symbols. |
873 exportingFile = isImplicitlyLinked(installName) ? this : this->umbrella; | |
347 if (const load_command *cmd = findCommand(hdr, LC_DYLD_INFO_ONLY)) { | 874 if (const load_command *cmd = findCommand(hdr, LC_DYLD_INFO_ONLY)) { |
348 auto *c = reinterpret_cast<const dyld_info_command *>(cmd); | 875 auto *c = reinterpret_cast<const dyld_info_command *>(cmd); |
349 parseTrie(buf + c->export_off, c->export_size, | 876 parseTrie(buf + c->export_off, c->export_size, |
350 [&](const Twine &name, uint64_t flags) { | 877 [&](const Twine &name, uint64_t flags) { |
351 symbols.push_back(symtab->addDylib(saver.save(name), umbrella)); | 878 StringRef savedName = saver.save(name); |
879 if (handleLDSymbol(savedName)) | |
880 return; | |
881 bool isWeakDef = flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; | |
882 bool isTlv = flags & EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL; | |
883 symbols.push_back(symtab->addDylib(savedName, exportingFile, | |
884 isWeakDef, isTlv)); | |
352 }); | 885 }); |
353 } else { | 886 } else { |
354 error("LC_DYLD_INFO_ONLY not found in " + getName()); | 887 error("LC_DYLD_INFO_ONLY not found in " + toString(this)); |
355 return; | 888 return; |
356 } | 889 } |
357 | 890 } |
358 if (hdr->flags & MH_NO_REEXPORTED_DYLIBS) | 891 |
359 return; | 892 void DylibFile::parseLoadCommands(MemoryBufferRef mb) { |
360 | 893 auto *hdr = reinterpret_cast<const mach_header *>(mb.getBufferStart()); |
361 const uint8_t *p = | 894 const uint8_t *p = reinterpret_cast<const uint8_t *>(mb.getBufferStart()) + |
362 reinterpret_cast<const uint8_t *>(hdr) + sizeof(mach_header_64); | 895 target->headerSize; |
363 for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { | 896 for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { |
364 auto *cmd = reinterpret_cast<const load_command *>(p); | 897 auto *cmd = reinterpret_cast<const load_command *>(p); |
365 p += cmd->cmdsize; | 898 p += cmd->cmdsize; |
366 if (cmd->cmd != LC_REEXPORT_DYLIB) | 899 |
900 if (!(hdr->flags & MH_NO_REEXPORTED_DYLIBS) && | |
901 cmd->cmd == LC_REEXPORT_DYLIB) { | |
902 const auto *c = reinterpret_cast<const dylib_command *>(cmd); | |
903 StringRef reexportPath = | |
904 reinterpret_cast<const char *>(c) + read32le(&c->dylib.name); | |
905 loadReexport(reexportPath, exportingFile, nullptr); | |
906 } | |
907 | |
908 // FIXME: What about LC_LOAD_UPWARD_DYLIB, LC_LAZY_LOAD_DYLIB, | |
909 // LC_LOAD_WEAK_DYLIB, LC_REEXPORT_DYLIB (..are reexports from dylibs with | |
910 // MH_NO_REEXPORTED_DYLIBS loaded for -flat_namespace)? | |
911 if (config->namespaceKind == NamespaceKind::flat && | |
912 cmd->cmd == LC_LOAD_DYLIB) { | |
913 const auto *c = reinterpret_cast<const dylib_command *>(cmd); | |
914 StringRef dylibPath = | |
915 reinterpret_cast<const char *>(c) + read32le(&c->dylib.name); | |
916 DylibFile *dylib = findDylib(dylibPath, umbrella, nullptr); | |
917 if (!dylib) | |
918 error(Twine("unable to locate library '") + dylibPath + | |
919 "' loaded from '" + toString(this) + "' for -flat_namespace"); | |
920 } | |
921 } | |
922 } | |
923 | |
924 // Some versions of XCode ship with .tbd files that don't have the right | |
925 // platform settings. | |
926 static constexpr std::array<StringRef, 3> skipPlatformChecks{ | |
927 "/usr/lib/system/libsystem_kernel.dylib", | |
928 "/usr/lib/system/libsystem_platform.dylib", | |
929 "/usr/lib/system/libsystem_pthread.dylib"}; | |
930 | |
931 DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella, | |
932 bool isBundleLoader) | |
933 : InputFile(DylibKind, interface), refState(RefState::Unreferenced), | |
934 isBundleLoader(isBundleLoader) { | |
935 // FIXME: Add test for the missing TBD code path. | |
936 | |
937 if (umbrella == nullptr) | |
938 umbrella = this; | |
939 this->umbrella = umbrella; | |
940 | |
941 installName = saver.save(interface.getInstallName()); | |
942 compatibilityVersion = interface.getCompatibilityVersion().rawValue(); | |
943 currentVersion = interface.getCurrentVersion().rawValue(); | |
944 | |
945 if (config->printEachFile) | |
946 message(toString(this)); | |
947 | |
948 if (!is_contained(skipPlatformChecks, installName) && | |
949 !is_contained(interface.targets(), config->platformInfo.target)) { | |
950 error(toString(this) + " is incompatible with " + | |
951 std::string(config->platformInfo.target)); | |
952 return; | |
953 } | |
954 | |
955 exportingFile = isImplicitlyLinked(installName) ? this : umbrella; | |
956 auto addSymbol = [&](const Twine &name) -> void { | |
957 symbols.push_back(symtab->addDylib(saver.save(name), exportingFile, | |
958 /*isWeakDef=*/false, | |
959 /*isTlv=*/false)); | |
960 }; | |
961 // TODO(compnerd) filter out symbols based on the target platform | |
962 // TODO: handle weak defs, thread locals | |
963 for (const auto *symbol : interface.symbols()) { | |
964 if (!symbol->getArchitectures().has(config->arch())) | |
367 continue; | 965 continue; |
368 | 966 |
369 auto *c = reinterpret_cast<const dylib_command *>(cmd); | 967 if (handleLDSymbol(symbol->getName())) |
370 StringRef reexportPath = | 968 continue; |
371 reinterpret_cast<const char *>(c) + read32le(&c->dylib.name); | 969 |
372 // TODO: Expand @loader_path, @executable_path etc in reexportPath | 970 switch (symbol->getKind()) { |
373 Optional<MemoryBufferRef> buffer = readFile(reexportPath); | 971 case SymbolKind::GlobalSymbol: |
374 if (!buffer) { | 972 addSymbol(symbol->getName()); |
375 error("unable to read re-exported dylib at " + reexportPath); | 973 break; |
974 case SymbolKind::ObjectiveCClass: | |
975 // XXX ld64 only creates these symbols when -ObjC is passed in. We may | |
976 // want to emulate that. | |
977 addSymbol(objc::klass + symbol->getName()); | |
978 addSymbol(objc::metaclass + symbol->getName()); | |
979 break; | |
980 case SymbolKind::ObjectiveCClassEHType: | |
981 addSymbol(objc::ehtype + symbol->getName()); | |
982 break; | |
983 case SymbolKind::ObjectiveCInstanceVariable: | |
984 addSymbol(objc::ivar + symbol->getName()); | |
985 break; | |
986 } | |
987 } | |
988 } | |
989 | |
990 void DylibFile::parseReexports(const InterfaceFile &interface) { | |
991 const InterfaceFile *topLevel = | |
992 interface.getParent() == nullptr ? &interface : interface.getParent(); | |
993 for (InterfaceFileRef intfRef : interface.reexportedLibraries()) { | |
994 InterfaceFile::const_target_range targets = intfRef.targets(); | |
995 if (is_contained(skipPlatformChecks, intfRef.getInstallName()) || | |
996 is_contained(targets, config->platformInfo.target)) | |
997 loadReexport(intfRef.getInstallName(), exportingFile, topLevel); | |
998 } | |
999 } | |
1000 | |
1001 // $ld$ symbols modify the properties/behavior of the library (e.g. its install | |
1002 // name, compatibility version or hide/add symbols) for specific target | |
1003 // versions. | |
1004 bool DylibFile::handleLDSymbol(StringRef originalName) { | |
1005 if (!originalName.startswith("$ld$")) | |
1006 return false; | |
1007 | |
1008 StringRef action; | |
1009 StringRef name; | |
1010 std::tie(action, name) = originalName.drop_front(strlen("$ld$")).split('$'); | |
1011 if (action == "previous") | |
1012 handleLDPreviousSymbol(name, originalName); | |
1013 else if (action == "install_name") | |
1014 handleLDInstallNameSymbol(name, originalName); | |
1015 return true; | |
1016 } | |
1017 | |
1018 void DylibFile::handleLDPreviousSymbol(StringRef name, StringRef originalName) { | |
1019 // originalName: $ld$ previous $ <installname> $ <compatversion> $ | |
1020 // <platformstr> $ <startversion> $ <endversion> $ <symbol-name> $ | |
1021 StringRef installName; | |
1022 StringRef compatVersion; | |
1023 StringRef platformStr; | |
1024 StringRef startVersion; | |
1025 StringRef endVersion; | |
1026 StringRef symbolName; | |
1027 StringRef rest; | |
1028 | |
1029 std::tie(installName, name) = name.split('$'); | |
1030 std::tie(compatVersion, name) = name.split('$'); | |
1031 std::tie(platformStr, name) = name.split('$'); | |
1032 std::tie(startVersion, name) = name.split('$'); | |
1033 std::tie(endVersion, name) = name.split('$'); | |
1034 std::tie(symbolName, rest) = name.split('$'); | |
1035 // TODO: ld64 contains some logic for non-empty symbolName as well. | |
1036 if (!symbolName.empty()) | |
1037 return; | |
1038 unsigned platform; | |
1039 if (platformStr.getAsInteger(10, platform) || | |
1040 platform != static_cast<unsigned>(config->platform())) | |
1041 return; | |
1042 | |
1043 VersionTuple start; | |
1044 if (start.tryParse(startVersion)) { | |
1045 warn("failed to parse start version, symbol '" + originalName + | |
1046 "' ignored"); | |
1047 return; | |
1048 } | |
1049 VersionTuple end; | |
1050 if (end.tryParse(endVersion)) { | |
1051 warn("failed to parse end version, symbol '" + originalName + "' ignored"); | |
1052 return; | |
1053 } | |
1054 if (config->platformInfo.minimum < start || | |
1055 config->platformInfo.minimum >= end) | |
1056 return; | |
1057 | |
1058 this->installName = saver.save(installName); | |
1059 | |
1060 if (!compatVersion.empty()) { | |
1061 VersionTuple cVersion; | |
1062 if (cVersion.tryParse(compatVersion)) { | |
1063 warn("failed to parse compatibility version, symbol '" + originalName + | |
1064 "' ignored"); | |
376 return; | 1065 return; |
377 } | 1066 } |
378 reexported.push_back(make<DylibFile>(*buffer, umbrella)); | 1067 compatibilityVersion = encodeVersion(cVersion); |
379 } | 1068 } |
380 } | 1069 } |
381 | 1070 |
382 DylibFile::DylibFile() : InputFile(DylibKind, MemoryBufferRef()) {} | 1071 void DylibFile::handleLDInstallNameSymbol(StringRef name, |
383 | 1072 StringRef originalName) { |
384 DylibFile *DylibFile::createLibSystemMock() { | 1073 // originalName: $ld$ install_name $ os<version> $ install_name |
385 auto *file = make<DylibFile>(); | 1074 StringRef condition, installName; |
386 file->mb = MemoryBufferRef("", "/usr/lib/libSystem.B.dylib"); | 1075 std::tie(condition, installName) = name.split('$'); |
387 file->dylibName = "/usr/lib/libSystem.B.dylib"; | 1076 VersionTuple version; |
388 file->symbols.push_back(symtab->addDylib("dyld_stub_binder", file)); | 1077 if (!condition.consume_front("os") || version.tryParse(condition)) |
389 return file; | 1078 warn("failed to parse os version, symbol '" + originalName + "' ignored"); |
390 } | 1079 else if (version == config->platformInfo.minimum) |
391 | 1080 this->installName = saver.save(installName); |
392 ArchiveFile::ArchiveFile(std::unique_ptr<llvm::object::Archive> &&f) | 1081 } |
1082 | |
1083 ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f) | |
393 : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) { | 1084 : InputFile(ArchiveKind, f->getMemoryBufferRef()), file(std::move(f)) { |
394 for (const object::Archive::Symbol &sym : file->symbols()) | 1085 for (const object::Archive::Symbol &sym : file->symbols()) |
395 symtab->addLazy(sym.getName(), this, sym); | 1086 symtab->addLazy(sym.getName(), this, sym); |
396 } | 1087 } |
397 | 1088 |
398 void ArchiveFile::fetch(const object::Archive::Symbol &sym) { | 1089 void ArchiveFile::fetch(const object::Archive::Symbol &sym) { |
399 object::Archive::Child c = | 1090 object::Archive::Child c = |
400 CHECK(sym.getMember(), toString(this) + | 1091 CHECK(sym.getMember(), toString(this) + |
401 ": could not get the member for symbol " + | 1092 ": could not get the member for symbol " + |
402 sym.getName()); | 1093 toMachOString(sym)); |
403 | 1094 |
404 if (!seen.insert(c.getChildOffset()).second) | 1095 if (!seen.insert(c.getChildOffset()).second) |
405 return; | 1096 return; |
406 | 1097 |
407 MemoryBufferRef mb = | 1098 MemoryBufferRef mb = |
408 CHECK(c.getMemoryBufferRef(), | 1099 CHECK(c.getMemoryBufferRef(), |
409 toString(this) + | 1100 toString(this) + |
410 ": could not get the buffer for the member defining symbol " + | 1101 ": could not get the buffer for the member defining symbol " + |
411 sym.getName()); | 1102 toMachOString(sym)); |
412 auto file = make<ObjFile>(mb); | 1103 |
413 symbols.insert(symbols.end(), file->symbols.begin(), file->symbols.end()); | 1104 if (tar && c.getParent()->isThin()) |
414 subsections.insert(subsections.end(), file->subsections.begin(), | 1105 tar->append(relativeToRoot(CHECK(c.getFullName(), this)), mb.getBuffer()); |
415 file->subsections.end()); | 1106 |
416 } | 1107 uint32_t modTime = toTimeT( |
417 | 1108 CHECK(c.getLastModified(), toString(this) + |
418 // Returns "<internal>" or "baz.o". | 1109 ": could not get the modification time " |
419 std::string lld::toString(const InputFile *file) { | 1110 "for the member defining symbol " + |
420 return file ? std::string(file->getName()) : "<internal>"; | 1111 toMachOString(sym))); |
421 } | 1112 |
1113 // `sym` is owned by a LazySym, which will be replace<>()d by make<ObjFile> | |
1114 // and become invalid after that call. Copy it to the stack so we can refer | |
1115 // to it later. | |
1116 const object::Archive::Symbol symCopy = sym; | |
1117 | |
1118 if (Optional<InputFile *> file = | |
1119 loadArchiveMember(mb, modTime, getName(), /*objCOnly=*/false)) { | |
1120 inputFiles.insert(*file); | |
1121 // ld64 doesn't demangle sym here even with -demangle. | |
1122 // Match that: intentionally don't call toMachOString(). | |
1123 printArchiveMemberLoad(symCopy.getName(), *file); | |
1124 } | |
1125 } | |
1126 | |
1127 static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym, | |
1128 BitcodeFile &file) { | |
1129 StringRef name = saver.save(objSym.getName()); | |
1130 | |
1131 // TODO: support weak references | |
1132 if (objSym.isUndefined()) | |
1133 return symtab->addUndefined(name, &file, /*isWeakRef=*/false); | |
1134 | |
1135 assert(!objSym.isCommon() && "TODO: support common symbols in LTO"); | |
1136 | |
1137 // TODO: Write a test demonstrating why computing isPrivateExtern before | |
1138 // LTO compilation is important. | |
1139 bool isPrivateExtern = false; | |
1140 switch (objSym.getVisibility()) { | |
1141 case GlobalValue::HiddenVisibility: | |
1142 isPrivateExtern = true; | |
1143 break; | |
1144 case GlobalValue::ProtectedVisibility: | |
1145 error(name + " has protected visibility, which is not supported by Mach-O"); | |
1146 break; | |
1147 case GlobalValue::DefaultVisibility: | |
1148 break; | |
1149 } | |
1150 | |
1151 return symtab->addDefined(name, &file, /*isec=*/nullptr, /*value=*/0, | |
1152 /*size=*/0, objSym.isWeak(), isPrivateExtern, | |
1153 /*isThumb=*/false, | |
1154 /*isReferencedDynamically=*/false, | |
1155 /*noDeadStrip=*/false); | |
1156 } | |
1157 | |
1158 BitcodeFile::BitcodeFile(MemoryBufferRef mbref) | |
1159 : InputFile(BitcodeKind, mbref) { | |
1160 obj = check(lto::InputFile::create(mbref)); | |
1161 | |
1162 // Convert LTO Symbols to LLD Symbols in order to perform resolution. The | |
1163 // "winning" symbol will then be marked as Prevailing at LTO compilation | |
1164 // time. | |
1165 for (const lto::InputFile::Symbol &objSym : obj->symbols()) | |
1166 symbols.push_back(createBitcodeSymbol(objSym, *this)); | |
1167 } | |
1168 | |
1169 template void ObjFile::parse<LP64>(); |