[lld-macho] Support archives without index (#132942)

This is a ~port of https://reviews.llvm.org/D117284. Like in that
change, archives without indices are treated as a collection of lazy
object files (as in `--start-lib/--end-lib`)

Porting the ELF follow-up to convert *all* archives to the lazy object
code path (https://reviews.llvm.org/D119074) is a natural next step, but
we would need to ensure the assertions about memory use hold for Mach-O.

NB: without an index, we can't do the part of the `-ObjC` scan where we
check for Objective-C symbols directly. We *can* still check for
`__obcj` sections so I wonder how much of a problem this actually is,
since I'm not sure how the "symbols but no sections" case can appear in
the wild.
This commit is contained in:
Leonard Grey 2025-04-10 14:33:56 -04:00 committed by GitHub
parent 589e1c73d0
commit 8bea91f677
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 117 additions and 55 deletions

View File

@ -314,8 +314,6 @@ static InputFile *addFile(StringRef path, LoadType loadType,
std::unique_ptr<object::Archive> archive = CHECK(
object::Archive::create(mbref), path + ": failed to parse archive");
if (!archive->isEmpty() && !archive->hasSymbolTable())
error(path + ": archive has no index; run ranlib to add one");
file = make<ArchiveFile>(std::move(archive), isForceHidden);
if (tar && file->getArchive().isThin())
@ -362,9 +360,11 @@ static InputFile *addFile(StringRef path, LoadType loadType,
": Archive::children failed: " + toString(std::move(e)));
}
} else if (isCommandLineLoad && config->forceLoadObjC) {
for (const object::Archive::Symbol &sym : file->getArchive().symbols())
if (sym.getName().starts_with(objc::symbol_names::klass))
file->fetch(sym);
if (file->getArchive().hasSymbolTable()) {
for (const object::Archive::Symbol &sym : file->getArchive().symbols())
if (sym.getName().starts_with(objc::symbol_names::klass))
file->fetch(sym);
}
// TODO: no need to look for ObjC sections for a given archive member if
// we already found that it contains an ObjC symbol.
@ -394,7 +394,6 @@ static InputFile *addFile(StringRef path, LoadType loadType,
": Archive::children failed: " + toString(std::move(e)));
}
}
file->addLazySymbols();
loadedArchives[path] = ArchiveFileInfo{file, isCommandLineLoad};
newFile = file;

View File

@ -2159,9 +2159,31 @@ ArchiveFile::ArchiveFile(std::unique_ptr<object::Archive> &&f, bool forceHidden)
void ArchiveFile::addLazySymbols() {
// Avoid calling getMemoryBufferRef() on zero-symbol archive
// since that crashes.
if (file->isEmpty() || file->getNumberOfSymbols() == 0)
if (file->isEmpty() ||
(file->hasSymbolTable() && file->getNumberOfSymbols() == 0))
return;
if (!file->hasSymbolTable()) {
// No index, treat each child as a lazy object file.
Error e = Error::success();
for (const object::Archive::Child &c : file->children(e)) {
// Check `seen` but don't insert so a future eager load can still happen.
if (seen.contains(c.getChildOffset()))
continue;
if (!seenLazy.insert(c.getChildOffset()).second)
continue;
auto file = childToObjectFile(c, /*lazy=*/true);
if (!file)
error(toString(this) +
": couldn't process child: " + toString(file.takeError()));
inputFiles.insert(*file);
}
if (e)
error(toString(this) +
": Archive::children failed: " + toString(std::move(e)));
return;
}
Error err = Error::success();
auto child = file->child_begin(err);
// Ignore the I/O error here - will be reported later.
@ -2191,16 +2213,17 @@ void ArchiveFile::addLazySymbols() {
static Expected<InputFile *>
loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
uint64_t offsetInArchive, bool forceHidden, bool compatArch) {
uint64_t offsetInArchive, bool forceHidden, bool compatArch,
bool lazy) {
if (config->zeroModTime)
modTime = 0;
switch (identify_magic(mb.getBuffer())) {
case file_magic::macho_object:
return make<ObjFile>(mb, modTime, archiveName, /*lazy=*/false, forceHidden,
return make<ObjFile>(mb, modTime, archiveName, lazy, forceHidden,
compatArch);
case file_magic::bitcode:
return make<BitcodeFile>(mb, archiveName, offsetInArchive, /*lazy=*/false,
return make<BitcodeFile>(mb, archiveName, offsetInArchive, lazy,
forceHidden, compatArch);
default:
return createStringError(inconvertibleErrorCode(),
@ -2212,19 +2235,7 @@ loadArchiveMember(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
Error ArchiveFile::fetch(const object::Archive::Child &c, StringRef reason) {
if (!seen.insert(c.getChildOffset()).second)
return Error::success();
Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
if (!mb)
return mb.takeError();
Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
if (!modTime)
return modTime.takeError();
Expected<InputFile *> file =
loadArchiveMember(*mb, toTimeT(*modTime), getName(), c.getChildOffset(),
forceHidden, compatArch);
auto file = childToObjectFile(c, /*lazy=*/false);
if (!file)
return file.takeError();
@ -2251,6 +2262,21 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
toMachOString(symCopy) + ": " + toString(std::move(e)));
}
Expected<InputFile *>
ArchiveFile::childToObjectFile(const llvm::object::Archive::Child &c,
bool lazy) {
Expected<MemoryBufferRef> mb = c.getMemoryBufferRef();
if (!mb)
return mb.takeError();
Expected<TimePoint<std::chrono::seconds>> modTime = c.getLastModified();
if (!modTime)
return modTime.takeError();
return loadArchiveMember(*mb, toTimeT(*modTime), getName(),
c.getChildOffset(), forceHidden, compatArch, lazy);
}
static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
BitcodeFile &file) {
StringRef name = saver().save(objSym.getName());

View File

@ -297,10 +297,13 @@ public:
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
private:
Expected<InputFile *> childToObjectFile(const llvm::object::Archive::Child &c,
bool lazy);
std::unique_ptr<llvm::object::Archive> file;
// Keep track of children fetched from the archive by tracking
// which address offsets have been fetched already.
llvm::DenseSet<uint64_t> seen;
llvm::DenseSet<uint64_t> seenLazy;
// Load all symbols with hidden visibility (-load_hidden).
bool forceHidden;
};

View File

@ -0,0 +1,23 @@
; REQUIRES: x86
; RUN: rm -rf %t; split-file %s %t
; RUN: llvm-as %t/lib.ll -o %t/lib.o
; RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s
; RUN: llvm-ar rcST %t/lib.a %t/lib.o
; RUN: %lld %t/main.o %t/lib.a -o %t/out
;--- main.s
.global _main
_main:
call _foo
mov $0, %rax
ret
;--- lib.ll
target triple = "x86_64-apple-darwin"
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
define void @foo() {
entry:
ret void
}

View File

@ -0,0 +1,43 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/main.o %t/main.s
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib.o %t/lib.s
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos -o %t/lib2.o %t/lib2.s
# RUN: llvm-ar crST %t/lib.a %t/lib.o %t/lib2.o
# RUN: %lld %t/main.o %t/lib.a -o %t/out
# RUN: llvm-nm %t/out | FileCheck %s
# CHECK-NOT: T _bar
# CHECK: T _foo
## Test that every kind of eager load mechanism still works.
# RUN: %lld %t/main.o %t/lib.a -all_load -o %t/all_load
# RUN: llvm-nm %t/all_load | FileCheck %s --check-prefix FORCED-LOAD
# RUN: %lld %t/main.o -force_load %t/lib.a -o %t/force_load
# RUN: llvm-nm %t/force_load | FileCheck %s --check-prefix FORCED-LOAD
# RUN: %lld %t/main.o %t/lib.a -ObjC -o %t/objc
# RUN: llvm-nm %t/objc | FileCheck %s --check-prefix FORCED-LOAD
# FORCED-LOAD: T _bar
#--- lib.s
.global _foo
_foo:
ret
#--- lib2.s
.section __DATA,__objc_catlist
.quad 0x1234
.section __TEXT,__text
.global _bar
_bar:
ret
#--- main.s
.global _main
_main:
call _foo
mov $0, %rax
ret

View File

@ -1,32 +0,0 @@
# REQUIRES: x86
# RUN: rm -rf %t; split-file %s %t
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/2.s -o %t/2.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/3.s -o %t/3.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/4.s -o %t/4.o
# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/main.o
# RUN: llvm-ar rcS %t/test.a %t/2.o %t/3.o %t/4.o
# RUN: not %lld %t/test.o %t/test.a -o /dev/null 2>&1 | FileCheck %s
# CHECK: error: {{.*}}.a: archive has no index; run ranlib to add one
#--- 2.s
.globl _boo
_boo:
ret
#--- 3.s
.globl _bar
_bar:
ret
#--- 4.s
.globl _undefined, _unused
_unused:
ret
#--- main.s
.global _main
_main:
mov $0, %rax
ret