[Clang] Put offloading globals in the .llvm.rodata.offloading section (#111890)

Summary:
For our offloading entries, we currently store all the string names of
kernels that the runtime will need to load from the target executable.
These are available via pointer in the `__tgt_offload_entry` struct,
however this makes it difficult to obtain from the object itself. This
patch simply puts the strings in a named section so they can be easily
queried.

The motivation behind this is that when the linker wrapper is doing
linking, it wants to know which kernels the host executable is calling.
We *could* get this already via the `.relaomp_offloading_entires`
section and trawling through the string table, but that's quite annoying
and not portable. The follow-up to this should be to make the linker
wrapper get a list of all used symbols the device link job should count
as "needed" so we can handle static linking more directly.
This commit is contained in:
Joseph Huber 2024-10-28 07:17:50 -07:00 committed by GitHub
parent d4c4180417
commit 42eb54b774
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 53 additions and 20 deletions

View File

@ -15,48 +15,48 @@
#include "Inputs/cuda.h"
//.
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
// CUDA: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
// CUDA: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
// CUDA: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
// CUDA: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries", align 1
// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
// CUDA: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// CUDA: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries", align 1
//.
// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
// HIP: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
// HIP: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
// HIP: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
// HIP: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries", align 1
// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
// HIP: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// HIP: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries", align 1
//.
// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
// CUDA-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z18__device_stub__foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
// CUDA-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z21__device_stub__kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
// CUDA-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
// CUDA-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "cuda_offloading_entries$OE", align 1
// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
// CUDA-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// CUDA-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "cuda_offloading_entries$OE", align 1
//.
// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00"
// HIP-COFF: @.offloading.entry_name = internal unnamed_addr constant [8 x i8] c"_Z3foov\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry._Z3foov = weak constant %struct.__tgt_offload_entry { ptr @_Z3foov, ptr @.offloading.entry_name, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00"
// HIP-COFF: @.offloading.entry_name.1 = internal unnamed_addr constant [11 x i8] c"_Z6kernelv\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry._Z6kernelv = weak constant %struct.__tgt_offload_entry { ptr @_Z6kernelv, ptr @.offloading.entry_name.1, i64 0, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00"
// HIP-COFF: @.offloading.entry_name.2 = internal unnamed_addr constant [4 x i8] c"var\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry.var = weak constant %struct.__tgt_offload_entry { ptr @var, ptr @.offloading.entry_name.2, i64 4, i32 0, i32 0 }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00"
// HIP-COFF: @.offloading.entry_name.3 = internal unnamed_addr constant [5 x i8] c"surf\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry.surf = weak constant %struct.__tgt_offload_entry { ptr @surf, ptr @.offloading.entry_name.3, i64 4, i32 2, i32 1 }, section "hip_offloading_entries$OE", align 1
// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00"
// HIP-COFF: @.offloading.entry_name.4 = internal unnamed_addr constant [4 x i8] c"tex\00", section ".llvm.rodata.offloading", align 1
// HIP-COFF: @.offloading.entry.tex = weak constant %struct.__tgt_offload_entry { ptr @tex, ptr @.offloading.entry_name.4, i64 4, i32 3, i32 1 }, section "hip_offloading_entries$OE", align 1
//.
// CUDA-LABEL: @_Z18__device_stub__foov(
@ -137,3 +137,28 @@ template <typename T, int dim = 1, int mode = 0>
struct __attribute__((device_builtin_texture_type)) texture : public textureReference {};
texture<void> tex;
//.
// CUDA: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
// CUDA: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
// CUDA: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
// CUDA: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
// CUDA: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
//.
// HIP: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
// HIP: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
// HIP: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
// HIP: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
// HIP: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
//.
// CUDA-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
// CUDA-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
// CUDA-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
// CUDA-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
// CUDA-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
//.
// HIP-COFF: [[META0:![0-9]+]] = !{ptr @.offloading.entry_name}
// HIP-COFF: [[META1:![0-9]+]] = !{ptr @.offloading.entry_name.1}
// HIP-COFF: [[META2:![0-9]+]] = !{ptr @.offloading.entry_name.2}
// HIP-COFF: [[META3:![0-9]+]] = !{ptr @.offloading.entry_name.3}
// HIP-COFF: [[META4:![0-9]+]] = !{ptr @.offloading.entry_name.4}
//.

View File

@ -53,7 +53,15 @@ offloading::getOffloadingEntryInitializer(Module &M, Constant *Addr,
auto *Str =
new GlobalVariable(M, AddrName->getType(), /*isConstant=*/true,
GlobalValue::InternalLinkage, AddrName, Prefix);
StringRef SectionName = ".llvm.rodata.offloading";
Str->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
Str->setSection(SectionName);
Str->setAlignment(Align(1));
// Make a metadata node for these constants so it can be queried from IR.
NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.offloading.symbols");
Metadata *MDVals[] = {ConstantAsMetadata::get(Str)};
MD->addOperand(llvm::MDNode::get(M.getContext(), MDVals));
// Construct the offloading entry.
Constant *EntryData[] = {