From 1c05c6183da836d148eb3f558e8f8e0194f39eed Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Fri, 11 Apr 2025 15:53:25 +0200 Subject: [PATCH] [LLD][COFF] Swap the meaning of symtab and hybridSymtab in hybrid images (#135093) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Originally, the intent behind symtab was to represent the symbol table seen in the PE header (without applying ARM64X relocations). However, in most cases outside of `writeHeader()`, the code references either both symbol tables or only the EC one, for example, `mainSymtab` in `linkerMain()` maps to `hybridSymtab` on ARM64X. MSVC's link.exe allows pure ARM64EC images to include native ARM64 files. This patch prepares LLD to support the same, which will require `hybridSymtab` to be available even for ARM64EC. At that point, `writeHeader()` will need to use the EC symbol table, and the original reasoning for keeping it in `hybridSymtab` no longer applies. Given this, it seems cleaner to treat the EC symbol table as the “main” one, assigning it to `symtab`, and use `hybridSymtab` for the native symbol table instead. Since `writeHeader()` will need to be conditional anyway, this change simplifies the rest of the code by allowing other parts to consistently treat `ctx.symtab` as the main symbol table. As a further simplification, this also allows us to eliminate `symtabEC` and use `symtab` directly; I’ll submit that as a separate PR. The map file now uses the EC symbol table for printed entry points and exports, matching MSVC behavior. --- lld/COFF/COFFLinkerContext.h | 7 +++-- lld/COFF/Chunks.cpp | 10 +++--- lld/COFF/Driver.cpp | 35 +++++++++------------ lld/COFF/InputFiles.cpp | 1 + lld/COFF/Writer.cpp | 54 ++++++++++++++++---------------- lld/test/COFF/arm64x-map.s | 60 ++++++++++++++++++++++++++++++++++++ 6 files changed, 111 insertions(+), 56 deletions(-) create mode 100644 lld/test/COFF/arm64x-map.s diff --git a/lld/COFF/COFFLinkerContext.h b/lld/COFF/COFFLinkerContext.h index 8322f829d405..613184d05d81 100644 --- a/lld/COFF/COFFLinkerContext.h +++ b/lld/COFF/COFFLinkerContext.h @@ -32,7 +32,7 @@ public: SymbolTable symtab; COFFOptTable optTable; - // A hybrid ARM64EC symbol table on ARM64X target. + // A native ARM64 symbol table on ARM64X target. std::optional hybridSymtab; // Pointer to the ARM64EC symbol table: either symtab for an ARM64EC target or @@ -41,16 +41,17 @@ public: // Returns the appropriate symbol table for the specified machine type. SymbolTable &getSymtab(llvm::COFF::MachineTypes machine) { - if (hybridSymtab && (machine == ARM64EC || machine == AMD64)) + if (hybridSymtab && machine == ARM64) return *hybridSymtab; return symtab; } // Invoke the specified callback for each symbol table. void forEachSymtab(std::function f) { - f(symtab); + // If present, process the native symbol table first. if (hybridSymtab) f(*hybridSymtab); + f(symtab); } std::vector objFileInstances; diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index 3494d1ba0ac0..392015892385 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -570,14 +570,14 @@ void SectionChunk::getBaserels(std::vector *res) { // to match the value in the EC load config, which is expected to be // a relocatable pointer to the __chpe_metadata symbol. COFFLinkerContext &ctx = file->symtab.ctx; - if (ctx.hybridSymtab && ctx.symtab.loadConfigSym && - ctx.symtab.loadConfigSym->getChunk() == this && - ctx.hybridSymtab->loadConfigSym && - ctx.symtab.loadConfigSize >= + if (ctx.hybridSymtab && ctx.hybridSymtab->loadConfigSym && + ctx.hybridSymtab->loadConfigSym->getChunk() == this && + ctx.symtab.loadConfigSym && + ctx.hybridSymtab->loadConfigSize >= offsetof(coff_load_configuration64, CHPEMetadataPointer) + sizeof(coff_load_configuration64::CHPEMetadataPointer)) res->emplace_back( - ctx.symtab.loadConfigSym->getRVA() + + ctx.hybridSymtab->loadConfigSym->getRVA() + offsetof(coff_load_configuration64, CHPEMetadataPointer), IMAGE_REL_BASED_DIR64); } diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 7aa13bdce488..d9e70a5be326 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -662,9 +662,9 @@ void LinkerDriver::setMachine(MachineTypes machine) { if (machine == ARM64EC) ctx.symtabEC = &ctx.symtab; } else { - ctx.symtab.machine = ARM64; - ctx.hybridSymtab.emplace(ctx, ARM64EC); - ctx.symtabEC = &*ctx.hybridSymtab; + ctx.symtab.machine = ARM64EC; + ctx.hybridSymtab.emplace(ctx, ARM64); + ctx.symtabEC = &ctx.symtab; } addWinSysRootLibSearchPaths(); @@ -981,12 +981,9 @@ void LinkerDriver::createImportLibrary(bool asLib) { } }; - if (ctx.hybridSymtab) { - getExports(ctx.symtab, nativeExports); - getExports(*ctx.hybridSymtab, exports); - } else { - getExports(ctx.symtab, exports); - } + getExports(ctx.symtab, exports); + if (ctx.hybridSymtab) + getExports(*ctx.hybridSymtab, nativeExports); std::string libName = getImportName(asLib); std::string path = getImplibPath(); @@ -1818,10 +1815,6 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { } } - // Most of main arguments apply either to both or only to EC symbol table on - // ARM64X target. - SymbolTable &mainSymtab = ctx.hybridSymtab ? *ctx.hybridSymtab : ctx.symtab; - // Handle /nodefaultlib: { llvm::TimeTraceScope timeScope2("Nodefaultlib"); @@ -1903,11 +1896,11 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // Handle /alternatename for (auto *arg : args.filtered(OPT_alternatename)) - mainSymtab.parseAlternateName(arg->getValue()); + ctx.symtab.parseAlternateName(arg->getValue()); // Handle /include for (auto *arg : args.filtered(OPT_incl)) - mainSymtab.addGCRoot(arg->getValue()); + ctx.symtab.addGCRoot(arg->getValue()); // Handle /implib if (auto *arg = args.getLastArg(OPT_implib)) @@ -2056,7 +2049,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // Handle /aligncomm for (auto *arg : args.filtered(OPT_aligncomm)) - mainSymtab.parseAligncomm(arg->getValue()); + ctx.symtab.parseAligncomm(arg->getValue()); // Handle /manifestdependency. for (auto *arg : args.filtered(OPT_manifestdependency)) @@ -2307,19 +2300,19 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (!e.extName.empty() && !isDecorated(e.extName)) e.extName = saver().save("_" + e.extName); } - mainSymtab.exports.push_back(e); + ctx.symtab.exports.push_back(e); } } // Handle /def if (auto *arg = args.getLastArg(OPT_deffile)) { // parseModuleDefs mutates Config object. - mainSymtab.parseModuleDefs(arg->getValue()); + ctx.symtab.parseModuleDefs(arg->getValue()); if (ctx.hybridSymtab) { // MSVC ignores the /defArm64Native argument on non-ARM64X targets. // It is also ignored if the /def option is not specified. if (auto *arg = args.getLastArg(OPT_defarm64native)) - ctx.symtab.parseModuleDefs(arg->getValue()); + ctx.hybridSymtab->parseModuleDefs(arg->getValue()); } } @@ -2336,7 +2329,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // and after the early return when just writing an import library. if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) { llvm::TimeTraceScope timeScope("Infer subsystem"); - config->subsystem = mainSymtab.inferSubsystem(); + config->subsystem = ctx.symtab.inferSubsystem(); if (config->subsystem == IMAGE_SUBSYSTEM_UNKNOWN) Fatal(ctx) << "subsystem must be defined"; } @@ -2702,7 +2695,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { // Handle /output-def (MinGW specific). if (auto *arg = args.getLastArg(OPT_output_def)) - writeDefFile(ctx, arg->getValue(), mainSymtab.exports); + writeDefFile(ctx, arg->getValue(), ctx.symtab.exports); // Set extra alignment for .comm symbols ctx.forEachSymtab([&](SymbolTable &symtab) { diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp index 256bec6bb636..7916e3fd4ea1 100644 --- a/lld/COFF/InputFiles.cpp +++ b/lld/COFF/InputFiles.cpp @@ -139,6 +139,7 @@ void ArchiveFile::parse() { // Read both EC and native symbols on ARM64X. if (!ctx.hybridSymtab) return; + archiveSymtab = &*ctx.hybridSymtab; } else if (ctx.hybridSymtab) { // If the ECSYMBOLS section is missing in the archive, the archive could // be either a native-only ARM64 or x86_64 archive. Check the machine type diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 6ed1f884a963..31c686761d74 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1359,14 +1359,14 @@ void Writer::createExportTable() { for (auto chunk : edataSec->chunks) { if (chunk->getMachine() != ARM64) { - ctx.hybridSymtab->edataStart = chunk; - ctx.hybridSymtab->edataEnd = edataSec->chunks.back(); + ctx.symtab.edataStart = chunk; + ctx.symtab.edataEnd = edataSec->chunks.back(); break; } - if (!ctx.symtab.edataStart) - ctx.symtab.edataStart = chunk; - ctx.symtab.edataEnd = chunk; + if (!ctx.hybridSymtab->edataStart) + ctx.hybridSymtab->edataStart = chunk; + ctx.hybridSymtab->edataEnd = chunk; } } } @@ -1760,7 +1760,8 @@ template void Writer::writeHeader() { assert(coffHeaderOffset == buf - buffer->getBufferStart()); auto *coff = reinterpret_cast(buf); buf += sizeof(*coff); - coff->Machine = ctx.symtab.isEC() ? AMD64 : ctx.symtab.machine; + SymbolTable &symtab = ctx.hybridSymtab ? *ctx.hybridSymtab : ctx.symtab; + coff->Machine = symtab.isEC() ? AMD64 : symtab.machine; coff->NumberOfSections = ctx.outputSections.size(); coff->Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE; if (config->largeAddressAware) @@ -1807,7 +1808,7 @@ template void Writer::writeHeader() { pe->SizeOfImage = sizeOfImage; pe->SizeOfHeaders = sizeOfHeaders; if (!config->noEntry) { - Defined *entry = cast(ctx.symtab.entry); + Defined *entry = cast(symtab.entry); pe->AddressOfEntryPoint = entry->getRVA(); // Pointer to thumb code must have the LSB set, so adjust it. if (config->machine == ARMNT) @@ -1851,11 +1852,11 @@ template void Writer::writeHeader() { dataDirOffset64 == buf - buffer->getBufferStart()); auto *dir = reinterpret_cast(buf); buf += sizeof(*dir) * numberOfDataDirectory; - if (ctx.symtab.edataStart) { - dir[EXPORT_TABLE].RelativeVirtualAddress = ctx.symtab.edataStart->getRVA(); - dir[EXPORT_TABLE].Size = ctx.symtab.edataEnd->getRVA() + - ctx.symtab.edataEnd->getSize() - - ctx.symtab.edataStart->getRVA(); + if (symtab.edataStart) { + dir[EXPORT_TABLE].RelativeVirtualAddress = symtab.edataStart->getRVA(); + dir[EXPORT_TABLE].Size = symtab.edataEnd->getRVA() + + symtab.edataEnd->getSize() - + symtab.edataStart->getRVA(); } if (importTableStart) { dir[IMPORT_TABLE].RelativeVirtualAddress = importTableStart->getRVA(); @@ -1886,7 +1887,7 @@ template void Writer::writeHeader() { dir[BASE_RELOCATION_TABLE].RelativeVirtualAddress = relocSec->getRVA(); dir[BASE_RELOCATION_TABLE].Size = relocSize; } - if (Symbol *sym = ctx.symtab.findUnderscore("_tls_used")) { + if (Symbol *sym = symtab.findUnderscore("_tls_used")) { if (Defined *b = dyn_cast(sym)) { dir[TLS_TABLE].RelativeVirtualAddress = b->getRVA(); dir[TLS_TABLE].Size = config->is64() @@ -1898,10 +1899,10 @@ template void Writer::writeHeader() { dir[DEBUG_DIRECTORY].RelativeVirtualAddress = debugDirectory->getRVA(); dir[DEBUG_DIRECTORY].Size = debugDirectory->getSize(); } - if (ctx.symtab.loadConfigSym) { + if (symtab.loadConfigSym) { dir[LOAD_CONFIG_TABLE].RelativeVirtualAddress = - ctx.symtab.loadConfigSym->getRVA(); - dir[LOAD_CONFIG_TABLE].Size = ctx.symtab.loadConfigSize; + symtab.loadConfigSym->getRVA(); + dir[LOAD_CONFIG_TABLE].Size = symtab.loadConfigSize; } if (!delayIdata.empty()) { dir[DELAY_IMPORT_DESCRIPTOR].RelativeVirtualAddress = @@ -2442,7 +2443,7 @@ void Writer::setECSymbols() { // For the hybrid image, set the alternate entry point to the EC entry // point. In the hybrid view, it is swapped to the native entry point // using ARM64X relocations. - if (auto altEntrySym = cast_or_null(ctx.hybridSymtab->entry)) { + if (auto altEntrySym = cast_or_null(ctx.symtab.entry)) { // If the entry is an EC export thunk, use its target instead. if (auto thunkChunk = dyn_cast(altEntrySym->getChunk())) @@ -2711,7 +2712,7 @@ void Writer::createDynamicRelocs() { if (ctx.symtab.entry != ctx.hybridSymtab->entry || pdata.first != hybridPdata.first) { chpeSym = cast_or_null( - ctx.hybridSymtab->findUnderscore("__chpe_metadata")); + ctx.symtab.findUnderscore("__chpe_metadata")); if (!chpeSym) Warn(ctx) << "'__chpe_metadata' is missing for ARM64X target"; } @@ -2720,14 +2721,14 @@ void Writer::createDynamicRelocs() { ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t), peHeaderOffset + offsetof(pe32plus_header, AddressOfEntryPoint), - cast_or_null(ctx.hybridSymtab->entry)); + cast_or_null(ctx.symtab.entry)); // Swap the alternate entry point in the CHPE metadata. if (chpeSym) ctx.dynamicRelocs->add( IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t), Arm64XRelocVal(chpeSym, offsetof(chpe_metadata, AlternateEntryPoint)), - cast_or_null(ctx.symtab.entry)); + cast_or_null(ctx.hybridSymtab->entry)); } if (ctx.symtab.edataStart != ctx.hybridSymtab->edataStart) { @@ -2735,7 +2736,7 @@ void Writer::createDynamicRelocs() { dataDirOffset64 + EXPORT_TABLE * sizeof(data_directory) + offsetof(data_directory, RelativeVirtualAddress), - ctx.hybridSymtab->edataStart); + ctx.symtab.edataStart); // The Size value is assigned after addresses are finalized. ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t), dataDirOffset64 + @@ -2773,12 +2774,12 @@ void Writer::createDynamicRelocs() { dataDirOffset64 + LOAD_CONFIG_TABLE * sizeof(data_directory) + offsetof(data_directory, RelativeVirtualAddress), - ctx.hybridSymtab->loadConfigSym); + ctx.symtab.loadConfigSym); ctx.dynamicRelocs->add(IMAGE_DVRT_ARM64X_FIXUP_TYPE_VALUE, sizeof(uint32_t), dataDirOffset64 + LOAD_CONFIG_TABLE * sizeof(data_directory) + offsetof(data_directory, Size), - ctx.hybridSymtab->loadConfigSize); + ctx.symtab.loadConfigSize); } PartialSection *Writer::createPartialSection(StringRef name, @@ -2889,15 +2890,14 @@ void Writer::prepareLoadConfig(SymbolTable &symtab, T *loadConfig) { // On ARM64X, only the EC version of the load config contains // CHPEMetadataPointer. Copy its value to the native load config. if (ctx.hybridSymtab && !symtab.isEC() && - ctx.hybridSymtab->loadConfigSize >= + ctx.symtab.loadConfigSize >= offsetof(T, CHPEMetadataPointer) + sizeof(T::CHPEMetadataPointer)) { OutputSection *sec = - ctx.getOutputSection(ctx.hybridSymtab->loadConfigSym->getChunk()); + ctx.getOutputSection(ctx.symtab.loadConfigSym->getChunk()); uint8_t *secBuf = buffer->getBufferStart() + sec->getFileOff(); auto hybridLoadConfig = reinterpret_cast( - secBuf + - (ctx.hybridSymtab->loadConfigSym->getRVA() - sec->getRVA())); + secBuf + (ctx.symtab.loadConfigSym->getRVA() - sec->getRVA())); loadConfig->CHPEMetadataPointer = hybridLoadConfig->CHPEMetadataPointer; } } diff --git a/lld/test/COFF/arm64x-map.s b/lld/test/COFF/arm64x-map.s new file mode 100644 index 000000000000..956c52a93b04 --- /dev/null +++ b/lld/test/COFF/arm64x-map.s @@ -0,0 +1,60 @@ +// REQUIRES: aarch64 +// RUN: split-file %s %t.dir && cd %t.dir + +// RUN: llvm-mc -filetype=obj -triple=aarch64-windows arm64-data-sym.s -o arm64-data-sym.obj +// RUN: llvm-mc -filetype=obj -triple=arm64ec-windows arm64ec-data-sym.s -o arm64ec-data-sym.obj +// RUN: lld-link -machine:arm64x -dll -out:out.dll -map -mapinfo:exports arm64-data-sym.obj arm64ec-data-sym.obj +// RUN: FileCheck %s < out.map + +// CHECK: Start Length Name Class +// CHECK-NEXT: 0001:00000000 00001004H .text CODE +// CHECK-NEXT: 0004:00000000 00000008H .data DATA +// CHECK-NEXT: 0004:00000008 00000000H .bss DATA +// CHECK-EMPTY: +// CHECK-NEXT: Address Publics by Value Rva+Base Lib:Object +// CHECK-EMPTY: +// CHECK-NEXT: 0001:00000000 _DllMainCRTStartup 0000000180001000 arm64-data-sym.obj +// CHECK-NEXT: 0001:00001000 _DllMainCRTStartup 0000000180002000 arm64ec-data-sym.obj +// CHECK-NEXT: 0004:00000000 arm64_data_sym 0000000180005000 arm64-data-sym.obj +// CHECK-NEXT: 0004:00000004 arm64ec_data_sym 0000000180005004 arm64ec-data-sym.obj +// CHECK-EMPTY: +// CHECK-NEXT: entry point at 0002:00000000 +// CHECK-EMPTY: +// CHECK-NEXT: Static symbols +// CHECK-EMPTY: +// CHECK-EMPTY: +// CHECK-NEXT: Exports +// CHECK-EMPTY: +// CHECK-NEXT: ordinal name +// CHECK-EMPTY: +// CHECK-NEXT: 1 arm64ec_data_sym + +#--- arm64ec-data-sym.s + .text + .globl _DllMainCRTStartup +_DllMainCRTStartup: + ret + + .data + .globl arm64ec_data_sym + .p2align 2, 0x0 +arm64ec_data_sym: + .word 0x02020202 + + .section .drectve + .ascii "-export:arm64ec_data_sym,DATA" + +#--- arm64-data-sym.s + .text + .globl _DllMainCRTStartup +_DllMainCRTStartup: + ret + + .data + .globl arm64_data_sym + .p2align 2, 0x0 +arm64_data_sym: + .word 0x01010101 + + .section .drectve + .ascii "-export:arm64_data_sym,DATA"