llvm-project/lld/ELF/Arch/RISCV.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

1316 lines
43 KiB
C++
Raw Normal View History

//===- RISCV.cpp ----------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "InputFiles.h"
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
#include "OutputSections.h"
#include "Symbols.h"
#include "SyntheticSections.h"
#include "Target.h"
#include "llvm/Support/ELFAttributes.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/RISCVAttributeParser.h"
#include "llvm/Support/RISCVAttributes.h"
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
#include "llvm/Support/TimeProfiler.h"
#include "llvm/TargetParser/RISCVISAInfo.h"
using namespace llvm;
using namespace llvm::object;
using namespace llvm::support::endian;
using namespace llvm::ELF;
using namespace lld;
using namespace lld::elf;
namespace {
class RISCV final : public TargetInfo {
public:
2024-09-28 21:48:26 -07:00
RISCV(Ctx &);
uint32_t calcEFlags() const override;
int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
void writeGotHeader(uint8_t *buf) const override;
void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
void writePltHeader(uint8_t *buf) const override;
void writePlt(uint8_t *buf, const Symbol &sym,
uint64_t pltEntryAddr) const override;
RelType getDynRel(RelType type) const override;
RelExpr getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const override;
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
bool relaxOnce(int pass) const override;
void finalizeRelax(int passes) const override;
};
} // end anonymous namespace
// These are internal relocation numbers for GP relaxation. They aren't part
// of the psABI spec.
#define INTERNAL_R_RISCV_GPREL_I 256
#define INTERNAL_R_RISCV_GPREL_S 257
const uint64_t dtpOffset = 0x800;
namespace {
enum Op {
ADDI = 0x13,
AUIPC = 0x17,
JALR = 0x67,
LD = 0x3003,
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
LUI = 0x37,
LW = 0x2003,
SRLI = 0x5013,
SUB = 0x40000033,
};
enum Reg {
X_RA = 1,
X_GP = 3,
X_TP = 4,
X_T0 = 5,
X_T1 = 6,
X_T2 = 7,
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
X_A0 = 10,
X_T3 = 28,
};
} // namespace
static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
static uint32_t lo12(uint32_t val) { return val & 4095; }
static uint32_t itype(uint32_t op, uint32_t rd, uint32_t rs1, uint32_t imm) {
return op | (rd << 7) | (rs1 << 15) | (imm << 20);
}
static uint32_t rtype(uint32_t op, uint32_t rd, uint32_t rs1, uint32_t rs2) {
return op | (rd << 7) | (rs1 << 15) | (rs2 << 20);
}
static uint32_t utype(uint32_t op, uint32_t rd, uint32_t imm) {
return op | (rd << 7) | (imm << 12);
}
// Extract bits v[begin:end], where range is inclusive, and begin must be < 63.
static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
return (v & ((1ULL << (begin + 1)) - 1)) >> end;
}
static uint32_t setLO12_I(uint32_t insn, uint32_t imm) {
return (insn & 0xfffff) | (imm << 20);
}
static uint32_t setLO12_S(uint32_t insn, uint32_t imm) {
return (insn & 0x1fff07f) | (extractBits(imm, 11, 5) << 25) |
(extractBits(imm, 4, 0) << 7);
}
2024-09-28 21:48:26 -07:00
RISCV::RISCV(Ctx &ctx) : TargetInfo(ctx) {
copyRel = R_RISCV_COPY;
pltRel = R_RISCV_JUMP_SLOT;
relativeRel = R_RISCV_RELATIVE;
iRelativeRel = R_RISCV_IRELATIVE;
if (ctx.arg.is64) {
symbolicRel = R_RISCV_64;
tlsModuleIndexRel = R_RISCV_TLS_DTPMOD64;
tlsOffsetRel = R_RISCV_TLS_DTPREL64;
tlsGotRel = R_RISCV_TLS_TPREL64;
} else {
symbolicRel = R_RISCV_32;
tlsModuleIndexRel = R_RISCV_TLS_DTPMOD32;
tlsOffsetRel = R_RISCV_TLS_DTPREL32;
tlsGotRel = R_RISCV_TLS_TPREL32;
}
gotRel = symbolicRel;
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
tlsDescRel = R_RISCV_TLSDESC;
// .got[0] = _DYNAMIC
gotHeaderEntriesNum = 1;
// .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map
gotPltHeaderEntriesNum = 2;
pltHeaderSize = 32;
pltEntrySize = 16;
ipltEntrySize = 16;
}
2024-10-06 00:14:12 -07:00
static uint32_t getEFlags(Ctx &ctx, InputFile *f) {
if (ctx.arg.is64)
return cast<ObjFile<ELF64LE>>(f)->getObj().getHeader().e_flags;
return cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
}
uint32_t RISCV::calcEFlags() const {
// If there are only binary input files (from -b binary), use a
// value of 0 for the ELF header flags.
if (ctx.objectFiles.empty())
return 0;
2024-10-06 00:14:12 -07:00
uint32_t target = getEFlags(ctx, ctx.objectFiles.front());
for (InputFile *f : ctx.objectFiles) {
2024-10-06 00:14:12 -07:00
uint32_t eflags = getEFlags(ctx, f);
if (eflags & EF_RISCV_RVC)
target |= EF_RISCV_RVC;
if ((eflags & EF_RISCV_FLOAT_ABI) != (target & EF_RISCV_FLOAT_ABI))
2024-11-24 11:43:40 -08:00
Err(ctx) << f
<< ": cannot link object files with different "
"floating-point ABI from "
<< ctx.objectFiles[0];
if ((eflags & EF_RISCV_RVE) != (target & EF_RISCV_RVE))
2024-11-24 11:43:40 -08:00
Err(ctx) << f << ": cannot link object files with different EF_RISCV_RVE";
}
return target;
}
int64_t RISCV::getImplicitAddend(const uint8_t *buf, RelType type) const {
switch (type) {
default:
InternalErr(ctx, buf) << "cannot read addend for relocation " << type;
return 0;
case R_RISCV_32:
case R_RISCV_TLS_DTPMOD32:
case R_RISCV_TLS_DTPREL32:
case R_RISCV_TLS_TPREL32:
return SignExtend64<32>(read32le(buf));
case R_RISCV_64:
case R_RISCV_TLS_DTPMOD64:
case R_RISCV_TLS_DTPREL64:
case R_RISCV_TLS_TPREL64:
return read64le(buf);
case R_RISCV_RELATIVE:
case R_RISCV_IRELATIVE:
return ctx.arg.is64 ? read64le(buf) : read32le(buf);
case R_RISCV_NONE:
case R_RISCV_JUMP_SLOT:
// These relocations are defined as not having an implicit addend.
return 0;
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
case R_RISCV_TLSDESC:
return ctx.arg.is64 ? read64le(buf + 8) : read32le(buf + 4);
}
}
void RISCV::writeGotHeader(uint8_t *buf) const {
if (ctx.arg.is64)
write64le(buf, ctx.mainPart->dynamic->getVA());
else
write32le(buf, ctx.mainPart->dynamic->getVA());
}
void RISCV::writeGotPlt(uint8_t *buf, const Symbol &s) const {
if (ctx.arg.is64)
write64le(buf, ctx.in.plt->getVA());
else
write32le(buf, ctx.in.plt->getVA());
}
void RISCV::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
if (ctx.arg.writeAddends) {
if (ctx.arg.is64)
2024-10-19 20:32:58 -07:00
write64le(buf, s.getVA(ctx));
else
2024-10-19 20:32:58 -07:00
write32le(buf, s.getVA(ctx));
}
}
void RISCV::writePltHeader(uint8_t *buf) const {
// 1: auipc t2, %pcrel_hi(.got.plt)
// sub t1, t1, t3
// l[wd] t3, %pcrel_lo(1b)(t2); t3 = _dl_runtime_resolve
// addi t1, t1, -pltHeaderSize-12; t1 = &.plt[i] - &.plt[0]
// addi t0, t2, %pcrel_lo(1b)
// srli t1, t1, (rv64?1:2); t1 = &.got.plt[i] - &.got.plt[0]
// l[wd] t0, Wordsize(t0); t0 = link_map
// jr t3
uint32_t offset = ctx.in.gotPlt->getVA() - ctx.in.plt->getVA();
uint32_t load = ctx.arg.is64 ? LD : LW;
write32le(buf + 0, utype(AUIPC, X_T2, hi20(offset)));
write32le(buf + 4, rtype(SUB, X_T1, X_T1, X_T3));
write32le(buf + 8, itype(load, X_T3, X_T2, lo12(offset)));
write32le(buf + 12, itype(ADDI, X_T1, X_T1, -ctx.target->pltHeaderSize - 12));
write32le(buf + 16, itype(ADDI, X_T0, X_T2, lo12(offset)));
write32le(buf + 20, itype(SRLI, X_T1, X_T1, ctx.arg.is64 ? 1 : 2));
write32le(buf + 24, itype(load, X_T0, X_T0, ctx.arg.wordsize));
write32le(buf + 28, itype(JALR, 0, X_T3, 0));
}
void RISCV::writePlt(uint8_t *buf, const Symbol &sym,
uint64_t pltEntryAddr) const {
// 1: auipc t3, %pcrel_hi(f@.got.plt)
// l[wd] t3, %pcrel_lo(1b)(t3)
// jalr t1, t3
// nop
2024-10-06 16:59:04 -07:00
uint32_t offset = sym.getGotPltVA(ctx) - pltEntryAddr;
write32le(buf + 0, utype(AUIPC, X_T3, hi20(offset)));
write32le(buf + 4, itype(ctx.arg.is64 ? LD : LW, X_T3, X_T3, lo12(offset)));
write32le(buf + 8, itype(JALR, X_T1, X_T3, 0));
write32le(buf + 12, itype(ADDI, 0, 0, 0));
}
RelType RISCV::getDynRel(RelType type) const {
return type == ctx.target->symbolicRel ? type
: static_cast<RelType>(R_RISCV_NONE);
}
RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
const uint8_t *loc) const {
switch (type) {
case R_RISCV_NONE:
return R_NONE;
case R_RISCV_32:
case R_RISCV_64:
case R_RISCV_HI20:
case R_RISCV_LO12_I:
case R_RISCV_LO12_S:
return R_ABS;
case R_RISCV_ADD8:
case R_RISCV_ADD16:
case R_RISCV_ADD32:
case R_RISCV_ADD64:
case R_RISCV_SET6:
case R_RISCV_SET8:
case R_RISCV_SET16:
case R_RISCV_SET32:
case R_RISCV_SUB6:
case R_RISCV_SUB8:
case R_RISCV_SUB16:
case R_RISCV_SUB32:
case R_RISCV_SUB64:
return RE_RISCV_ADD;
case R_RISCV_JAL:
case R_RISCV_BRANCH:
case R_RISCV_PCREL_HI20:
case R_RISCV_RVC_BRANCH:
case R_RISCV_RVC_JUMP:
case R_RISCV_32_PCREL:
return R_PC;
case R_RISCV_CALL:
case R_RISCV_CALL_PLT:
case R_RISCV_PLT32:
return R_PLT_PC;
case R_RISCV_GOT_HI20:
case R_RISCV_GOT32_PCREL:
return R_GOT_PC;
case R_RISCV_PCREL_LO12_I:
case R_RISCV_PCREL_LO12_S:
return RE_RISCV_PC_INDIRECT;
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
case R_RISCV_TLSDESC_HI20:
case R_RISCV_TLSDESC_LOAD_LO12:
case R_RISCV_TLSDESC_ADD_LO12:
return R_TLSDESC_PC;
case R_RISCV_TLSDESC_CALL:
return R_TLSDESC_CALL;
case R_RISCV_TLS_GD_HI20:
return R_TLSGD_PC;
case R_RISCV_TLS_GOT_HI20:
return R_GOT_PC;
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_LO12_I:
case R_RISCV_TPREL_LO12_S:
return R_TPREL;
case R_RISCV_ALIGN:
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
return R_RELAX_HINT;
case R_RISCV_TPREL_ADD:
case R_RISCV_RELAX:
return ctx.arg.relax ? R_RELAX_HINT : R_NONE;
case R_RISCV_SET_ULEB128:
case R_RISCV_SUB_ULEB128:
return RE_RISCV_LEB128;
default:
Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v
<< ") against symbol " << &s;
return R_NONE;
}
}
void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
const unsigned bits = ctx.arg.wordsize * 8;
switch (rel.type) {
case R_RISCV_32:
write32le(loc, val);
return;
case R_RISCV_64:
write64le(loc, val);
return;
case R_RISCV_RVC_BRANCH: {
2024-10-13 11:14:40 -07:00
checkInt(ctx, loc, val, 9, rel);
checkAlignment(ctx, loc, val, 2, rel);
uint16_t insn = read16le(loc) & 0xE383;
uint16_t imm8 = extractBits(val, 8, 8) << 12;
uint16_t imm4_3 = extractBits(val, 4, 3) << 10;
uint16_t imm7_6 = extractBits(val, 7, 6) << 5;
uint16_t imm2_1 = extractBits(val, 2, 1) << 3;
uint16_t imm5 = extractBits(val, 5, 5) << 2;
insn |= imm8 | imm4_3 | imm7_6 | imm2_1 | imm5;
[Coding style change] Rename variables so that they start with a lowercase letter This patch is mechanically generated by clang-llvm-rename tool that I wrote using Clang Refactoring Engine just for creating this patch. You can see the source code of the tool at https://reviews.llvm.org/D64123. There's no manual post-processing; you can generate the same patch by re-running the tool against lld's code base. Here is the main discussion thread to change the LLVM coding style: https://lists.llvm.org/pipermail/llvm-dev/2019-February/130083.html In the discussion thread, I proposed we use lld as a testbed for variable naming scheme change, and this patch does that. I chose to rename variables so that they are in camelCase, just because that is a minimal change to make variables to start with a lowercase letter. Note to downstream patch maintainers: if you are maintaining a downstream lld repo, just rebasing ahead of this commit would cause massive merge conflicts because this patch essentially changes every line in the lld subdirectory. But there's a remedy. clang-llvm-rename tool is a batch tool, so you can rename variables in your downstream repo with the tool. Given that, here is how to rebase your repo to a commit after the mass renaming: 1. rebase to the commit just before the mass variable renaming, 2. apply the tool to your downstream repo to mass-rename variables locally, and 3. rebase again to the head. Most changes made by the tool should be identical for a downstream repo and for the head, so at the step 3, almost all changes should be merged and disappear. I'd expect that there would be some lines that you need to merge by hand, but that shouldn't be too many. Differential Revision: https://reviews.llvm.org/D64121 llvm-svn: 365595
2019-07-10 05:00:37 +00:00
write16le(loc, insn);
return;
}
case R_RISCV_RVC_JUMP: {
2024-10-13 11:14:40 -07:00
checkInt(ctx, loc, val, 12, rel);
checkAlignment(ctx, loc, val, 2, rel);
uint16_t insn = read16le(loc) & 0xE003;
uint16_t imm11 = extractBits(val, 11, 11) << 12;
uint16_t imm4 = extractBits(val, 4, 4) << 11;
uint16_t imm9_8 = extractBits(val, 9, 8) << 9;
uint16_t imm10 = extractBits(val, 10, 10) << 8;
uint16_t imm6 = extractBits(val, 6, 6) << 7;
uint16_t imm7 = extractBits(val, 7, 7) << 6;
uint16_t imm3_1 = extractBits(val, 3, 1) << 3;
uint16_t imm5 = extractBits(val, 5, 5) << 2;
insn |= imm11 | imm4 | imm9_8 | imm10 | imm6 | imm7 | imm3_1 | imm5;
[Coding style change] Rename variables so that they start with a lowercase letter This patch is mechanically generated by clang-llvm-rename tool that I wrote using Clang Refactoring Engine just for creating this patch. You can see the source code of the tool at https://reviews.llvm.org/D64123. There's no manual post-processing; you can generate the same patch by re-running the tool against lld's code base. Here is the main discussion thread to change the LLVM coding style: https://lists.llvm.org/pipermail/llvm-dev/2019-February/130083.html In the discussion thread, I proposed we use lld as a testbed for variable naming scheme change, and this patch does that. I chose to rename variables so that they are in camelCase, just because that is a minimal change to make variables to start with a lowercase letter. Note to downstream patch maintainers: if you are maintaining a downstream lld repo, just rebasing ahead of this commit would cause massive merge conflicts because this patch essentially changes every line in the lld subdirectory. But there's a remedy. clang-llvm-rename tool is a batch tool, so you can rename variables in your downstream repo with the tool. Given that, here is how to rebase your repo to a commit after the mass renaming: 1. rebase to the commit just before the mass variable renaming, 2. apply the tool to your downstream repo to mass-rename variables locally, and 3. rebase again to the head. Most changes made by the tool should be identical for a downstream repo and for the head, so at the step 3, almost all changes should be merged and disappear. I'd expect that there would be some lines that you need to merge by hand, but that shouldn't be too many. Differential Revision: https://reviews.llvm.org/D64121 llvm-svn: 365595
2019-07-10 05:00:37 +00:00
write16le(loc, insn);
return;
}
case R_RISCV_JAL: {
2024-10-13 11:14:40 -07:00
checkInt(ctx, loc, val, 21, rel);
checkAlignment(ctx, loc, val, 2, rel);
uint32_t insn = read32le(loc) & 0xFFF;
uint32_t imm20 = extractBits(val, 20, 20) << 31;
uint32_t imm10_1 = extractBits(val, 10, 1) << 21;
uint32_t imm11 = extractBits(val, 11, 11) << 20;
uint32_t imm19_12 = extractBits(val, 19, 12) << 12;
insn |= imm20 | imm10_1 | imm11 | imm19_12;
write32le(loc, insn);
return;
}
case R_RISCV_BRANCH: {
2024-10-13 11:14:40 -07:00
checkInt(ctx, loc, val, 13, rel);
checkAlignment(ctx, loc, val, 2, rel);
uint32_t insn = read32le(loc) & 0x1FFF07F;
uint32_t imm12 = extractBits(val, 12, 12) << 31;
uint32_t imm10_5 = extractBits(val, 10, 5) << 25;
uint32_t imm4_1 = extractBits(val, 4, 1) << 8;
uint32_t imm11 = extractBits(val, 11, 11) << 7;
insn |= imm12 | imm10_5 | imm4_1 | imm11;
write32le(loc, insn);
return;
}
// auipc + jalr pair
case R_RISCV_CALL:
case R_RISCV_CALL_PLT: {
int64_t hi = SignExtend64(val + 0x800, bits) >> 12;
2024-10-13 11:14:40 -07:00
checkInt(ctx, loc, hi, 20, rel);
if (isInt<20>(hi)) {
relocateNoSym(loc, R_RISCV_PCREL_HI20, val);
relocateNoSym(loc + 4, R_RISCV_PCREL_LO12_I, val);
}
return;
}
case R_RISCV_GOT_HI20:
case R_RISCV_PCREL_HI20:
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
case R_RISCV_TLSDESC_HI20:
case R_RISCV_TLS_GD_HI20:
case R_RISCV_TLS_GOT_HI20:
case R_RISCV_TPREL_HI20:
case R_RISCV_HI20: {
uint64_t hi = val + 0x800;
2024-10-13 11:14:40 -07:00
checkInt(ctx, loc, SignExtend64(hi, bits) >> 12, 20, rel);
write32le(loc, (read32le(loc) & 0xFFF) | (hi & 0xFFFFF000));
return;
}
case R_RISCV_PCREL_LO12_I:
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
case R_RISCV_TLSDESC_LOAD_LO12:
case R_RISCV_TLSDESC_ADD_LO12:
case R_RISCV_TPREL_LO12_I:
case R_RISCV_LO12_I: {
uint64_t hi = (val + 0x800) >> 12;
uint64_t lo = val - (hi << 12);
write32le(loc, setLO12_I(read32le(loc), lo & 0xfff));
return;
}
case R_RISCV_PCREL_LO12_S:
case R_RISCV_TPREL_LO12_S:
case R_RISCV_LO12_S: {
uint64_t hi = (val + 0x800) >> 12;
uint64_t lo = val - (hi << 12);
write32le(loc, setLO12_S(read32le(loc), lo));
return;
}
case INTERNAL_R_RISCV_GPREL_I:
case INTERNAL_R_RISCV_GPREL_S: {
Defined *gp = ctx.sym.riscvGlobalPointer;
2024-10-19 20:32:58 -07:00
int64_t displace = SignExtend64(val - gp->getVA(ctx), bits);
2024-10-13 11:14:40 -07:00
checkInt(ctx, loc, displace, 12, rel);
uint32_t insn = (read32le(loc) & ~(31 << 15)) | (X_GP << 15);
if (rel.type == INTERNAL_R_RISCV_GPREL_I)
insn = setLO12_I(insn, displace);
else
insn = setLO12_S(insn, displace);
write32le(loc, insn);
return;
}
case R_RISCV_ADD8:
*loc += val;
return;
case R_RISCV_ADD16:
write16le(loc, read16le(loc) + val);
return;
case R_RISCV_ADD32:
write32le(loc, read32le(loc) + val);
return;
case R_RISCV_ADD64:
write64le(loc, read64le(loc) + val);
return;
case R_RISCV_SUB6:
*loc = (*loc & 0xc0) | (((*loc & 0x3f) - val) & 0x3f);
return;
case R_RISCV_SUB8:
*loc -= val;
return;
case R_RISCV_SUB16:
write16le(loc, read16le(loc) - val);
return;
case R_RISCV_SUB32:
write32le(loc, read32le(loc) - val);
return;
case R_RISCV_SUB64:
write64le(loc, read64le(loc) - val);
return;
case R_RISCV_SET6:
*loc = (*loc & 0xc0) | (val & 0x3f);
return;
case R_RISCV_SET8:
*loc = val;
return;
case R_RISCV_SET16:
write16le(loc, val);
return;
case R_RISCV_SET32:
case R_RISCV_32_PCREL:
case R_RISCV_PLT32:
case R_RISCV_GOT32_PCREL:
2024-10-13 11:14:40 -07:00
checkInt(ctx, loc, val, 32, rel);
write32le(loc, val);
return;
case R_RISCV_TLS_DTPREL32:
write32le(loc, val - dtpOffset);
break;
case R_RISCV_TLS_DTPREL64:
write64le(loc, val - dtpOffset);
break;
case R_RISCV_RELAX:
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
return;
case R_RISCV_TLSDESC:
// The addend is stored in the second word.
if (ctx.arg.is64)
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
write64le(loc + 8, val);
else
write32le(loc + 4, val);
break;
default:
llvm_unreachable("unknown relocation");
}
}
2024-01-25 10:09:43 -08:00
static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
return i + 1 != relocs.size() && relocs[i + 1].type == R_RISCV_RELAX;
}
2024-10-06 00:14:12 -07:00
static void tlsdescToIe(Ctx &ctx, uint8_t *loc, const Relocation &rel,
uint64_t val) {
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
switch (rel.type) {
case R_RISCV_TLSDESC_HI20:
case R_RISCV_TLSDESC_LOAD_LO12:
write32le(loc, 0x00000013); // nop
break;
case R_RISCV_TLSDESC_ADD_LO12:
write32le(loc, utype(AUIPC, X_A0, hi20(val))); // auipc a0,<hi20>
break;
case R_RISCV_TLSDESC_CALL:
if (ctx.arg.is64)
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
write32le(loc, itype(LD, X_A0, X_A0, lo12(val))); // ld a0,<lo12>(a0)
else
write32le(loc, itype(LW, X_A0, X_A0, lo12(val))); // lw a0,<lo12>(a0)
break;
default:
llvm_unreachable("unsupported relocation for TLSDESC to IE");
}
}
static void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
switch (rel.type) {
case R_RISCV_TLSDESC_HI20:
case R_RISCV_TLSDESC_LOAD_LO12:
write32le(loc, 0x00000013); // nop
return;
case R_RISCV_TLSDESC_ADD_LO12:
if (isInt<12>(val))
write32le(loc, 0x00000013); // nop
else
write32le(loc, utype(LUI, X_A0, hi20(val))); // lui a0,<hi20>
return;
case R_RISCV_TLSDESC_CALL:
if (isInt<12>(val))
write32le(loc, itype(ADDI, X_A0, 0, val)); // addi a0,zero,<lo12>
else
write32le(loc, itype(ADDI, X_A0, X_A0, lo12(val))); // addi a0,a0,<lo12>
return;
default:
llvm_unreachable("unsupported relocation for TLSDESC to LE");
}
}
void RISCV::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
uint64_t secAddr = sec.getOutputSection()->addr;
if (auto *s = dyn_cast<InputSection>(&sec))
secAddr += s->outSecOff;
else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
secAddr += ehIn->getParent()->outSecOff;
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
uint64_t tlsdescVal = 0;
bool tlsdescRelax = false, isToLe = false;
2024-01-25 10:09:43 -08:00
const ArrayRef<Relocation> relocs = sec.relocs();
for (size_t i = 0, size = relocs.size(); i != size; ++i) {
const Relocation &rel = relocs[i];
uint8_t *loc = buf + rel.offset;
uint64_t val = sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset);
switch (rel.expr) {
case R_RELAX_HINT:
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
continue;
case R_TLSDESC_PC:
// For R_RISCV_TLSDESC_HI20, store &got(sym)-PC to be used by the
// following two instructions L[DW] and ADDI.
if (rel.type == R_RISCV_TLSDESC_HI20)
tlsdescVal = val;
else
val = tlsdescVal;
break;
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
case R_RELAX_TLS_GD_TO_IE:
// Only R_RISCV_TLSDESC_HI20 reaches here. tlsdescVal will be finalized
// after we see R_RISCV_TLSDESC_ADD_LO12 in the R_RELAX_TLS_GD_TO_LE case.
// The net effect is that tlsdescVal will be smaller than `val` to take
// into account of NOP instructions (in the absence of R_RISCV_RELAX)
// before AUIPC.
tlsdescVal = val + rel.offset;
isToLe = false;
tlsdescRelax = relaxable(relocs, i);
if (!tlsdescRelax)
2024-10-06 00:14:12 -07:00
tlsdescToIe(ctx, loc, rel, val);
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
continue;
case R_RELAX_TLS_GD_TO_LE:
// See the comment in handleTlsRelocation. For TLSDESC=>IE,
// R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12,CALL} also reach here. If isToLe is
// false, this is actually TLSDESC=>IE optimization.
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
if (rel.type == R_RISCV_TLSDESC_HI20) {
tlsdescVal = val;
isToLe = true;
tlsdescRelax = relaxable(relocs, i);
} else {
if (!isToLe && rel.type == R_RISCV_TLSDESC_ADD_LO12)
tlsdescVal -= rel.offset;
val = tlsdescVal;
}
// When NOP conversion is eligible and relaxation applies, don't write a
// NOP in case an unrelated instruction follows the current instruction.
if (tlsdescRelax &&
(rel.type == R_RISCV_TLSDESC_HI20 ||
rel.type == R_RISCV_TLSDESC_LOAD_LO12 ||
(rel.type == R_RISCV_TLSDESC_ADD_LO12 && isToLe && !hi20(val))))
continue;
if (isToLe)
tlsdescToLe(loc, rel, val);
else
2024-10-06 00:14:12 -07:00
tlsdescToIe(ctx, loc, rel, val);
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
continue;
case RE_RISCV_LEB128:
if (i + 1 < size) {
2024-01-25 10:09:43 -08:00
const Relocation &rel1 = relocs[i + 1];
if (rel.type == R_RISCV_SET_ULEB128 &&
rel1.type == R_RISCV_SUB_ULEB128 && rel.offset == rel1.offset) {
2024-10-19 20:32:58 -07:00
auto val = rel.sym->getVA(ctx, rel.addend) -
rel1.sym->getVA(ctx, rel1.addend);
if (overwriteULEB128(loc, val) >= 0x80)
Err(ctx) << sec.getLocation(rel.offset) << ": ULEB128 value " << val
<< " exceeds available space; references '" << rel.sym
<< "'";
++i;
continue;
}
}
Err(ctx) << sec.getLocation(rel.offset)
<< ": R_RISCV_SET_ULEB128 not paired with R_RISCV_SUB_SET128";
return;
default:
break;
}
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
relocate(loc, rel, val);
}
}
2024-10-06 00:14:12 -07:00
void elf::initSymbolAnchors(Ctx &ctx) {
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
SmallVector<InputSection *, 0> storage;
for (OutputSection *osec : ctx.outputSections) {
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (InputSection *sec : getInputSections(*osec, storage)) {
sec->relaxAux = make<RelaxAux>();
if (sec->relocs().size()) {
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
sec->relaxAux->relocDeltas =
std::make_unique<uint32_t[]>(sec->relocs().size());
sec->relaxAux->relocTypes =
std::make_unique<RelType[]>(sec->relocs().size());
}
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
}
}
// Store anchors (st_value and st_value+st_size) for symbols relative to text
// sections.
//
// For a defined symbol foo, we may have `d->file != file` with --wrap=foo.
// We should process foo, as the defining object file's symbol table may not
// contain foo after redirectSymbols changed the foo entry to __wrap_foo. To
// avoid adding a Defined that is undefined in one object file, use
// `!d->scriptDefined` to exclude symbols that are definitely not wrapped.
//
// `relaxAux->anchors` may contain duplicate symbols, but that is fine.
for (InputFile *file : ctx.objectFiles)
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
for (Symbol *sym : file->getSymbols()) {
auto *d = dyn_cast<Defined>(sym);
if (!d || (d->file != file && !d->scriptDefined))
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
continue;
if (auto *sec = dyn_cast_or_null<InputSection>(d->section))
if (sec->flags & SHF_EXECINSTR && sec->relaxAux) {
// If sec is discarded, relaxAux will be nullptr.
sec->relaxAux->anchors.push_back({d->value, d, false});
sec->relaxAux->anchors.push_back({d->value + d->size, d, true});
}
}
// Sort anchors by offset so that we can find the closest relocation
// efficiently. For a zero size symbol, ensure that its start anchor precedes
// its end anchor. For two symbols with anchors at the same offset, their
// order does not matter.
for (OutputSection *osec : ctx.outputSections) {
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (InputSection *sec : getInputSections(*osec, storage)) {
llvm::sort(sec->relaxAux->anchors, [](auto &a, auto &b) {
return std::make_pair(a.offset, a.end) <
std::make_pair(b.offset, b.end);
});
}
}
}
// Relax R_RISCV_CALL/R_RISCV_CALL_PLT auipc+jalr to c.j, c.jal, or jal.
2024-10-06 00:14:12 -07:00
static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
Relocation &r, uint32_t &remove) {
2024-10-06 00:14:12 -07:00
const bool rvc = getEFlags(ctx, sec.file) & EF_RISCV_RVC;
const Symbol &sym = *r.sym;
const uint64_t insnPair = read64le(sec.content().data() + r.offset);
const uint32_t rd = extractBits(insnPair, 32 + 11, 32 + 7);
const uint64_t dest =
2024-10-19 20:32:58 -07:00
(r.expr == R_PLT_PC ? sym.getPltVA(ctx) : sym.getVA(ctx)) + r.addend;
const int64_t displace = dest - loc;
if (rvc && isInt<12>(displace) && rd == 0) {
sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
sec.relaxAux->writes.push_back(0xa001); // c.j
remove = 6;
} else if (rvc && isInt<12>(displace) && rd == X_RA &&
!ctx.arg.is64) { // RV32C only
sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
sec.relaxAux->writes.push_back(0x2001); // c.jal
remove = 6;
} else if (isInt<21>(displace)) {
sec.relaxAux->relocTypes[i] = R_RISCV_JAL;
sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal
remove = 4;
}
}
// Relax local-exec TLS when hi20 is zero.
2024-10-19 21:08:50 -07:00
static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i,
uint64_t loc, Relocation &r, uint32_t &remove) {
2024-10-19 20:32:58 -07:00
uint64_t val = r.sym->getVA(ctx, r.addend);
if (hi20(val) != 0)
return;
uint32_t insn = read32le(sec.content().data() + r.offset);
switch (r.type) {
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_ADD:
// Remove lui rd, %tprel_hi(x) and add rd, rd, tp, %tprel_add(x).
sec.relaxAux->relocTypes[i] = R_RISCV_RELAX;
remove = 4;
break;
case R_RISCV_TPREL_LO12_I:
// addi rd, rd, %tprel_lo(x) => addi rd, tp, st_value(x)
sec.relaxAux->relocTypes[i] = R_RISCV_32;
insn = (insn & ~(31 << 15)) | (X_TP << 15);
sec.relaxAux->writes.push_back(setLO12_I(insn, val));
break;
case R_RISCV_TPREL_LO12_S:
// sw rs, %tprel_lo(x)(rd) => sw rs, st_value(x)(rd)
sec.relaxAux->relocTypes[i] = R_RISCV_32;
insn = (insn & ~(31 << 15)) | (X_TP << 15);
sec.relaxAux->writes.push_back(setLO12_S(insn, val));
break;
}
}
2024-10-06 00:14:12 -07:00
static void relaxHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
uint64_t loc, Relocation &r, uint32_t &remove) {
const Defined *gp = ctx.sym.riscvGlobalPointer;
if (!gp)
return;
2024-10-19 20:32:58 -07:00
if (!isInt<12>(r.sym->getVA(ctx, r.addend) - gp->getVA(ctx)))
return;
switch (r.type) {
case R_RISCV_HI20:
// Remove lui rd, %hi20(x).
sec.relaxAux->relocTypes[i] = R_RISCV_RELAX;
remove = 4;
break;
case R_RISCV_LO12_I:
sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_GPREL_I;
break;
case R_RISCV_LO12_S:
sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_GPREL_S;
break;
}
}
2024-10-06 00:14:12 -07:00
static bool relax(Ctx &ctx, InputSection &sec) {
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
const uint64_t secAddr = sec.getVA();
2024-01-25 10:09:43 -08:00
const MutableArrayRef<Relocation> relocs = sec.relocs();
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
auto &aux = *sec.relaxAux;
bool changed = false;
ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);
uint64_t delta = 0;
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
bool tlsdescRelax = false, toLeShortForm = false;
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
2024-01-25 10:09:43 -08:00
std::fill_n(aux.relocTypes.get(), relocs.size(), R_RISCV_NONE);
aux.writes.clear();
2024-01-25 10:09:43 -08:00
for (auto [i, r] : llvm::enumerate(relocs)) {
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
const uint64_t loc = secAddr + r.offset - delta;
uint32_t &cur = aux.relocDeltas[i], remove = 0;
switch (r.type) {
case R_RISCV_ALIGN: {
const uint64_t nextLoc = loc + r.addend;
const uint64_t align = PowerOf2Ceil(r.addend + 2);
// All bytes beyond the alignment boundary should be removed.
remove = nextLoc - ((loc + align - 1) & -align);
[LLD][RISCV] Report error for unsatisfiable RISCV_ALIGN (#74121) If we have a RISCV_ALIGN relocation which can't be satisfied with the available space provided, report an error rather than silently continuing with a corrupt state. For context, https://github.com/llvm/llvm-project/pull/73977 fixes an LLD bug which can cause this effect, but that's not the only source of such cases. Another is our hard-to-fix set of LTO problems. We can have a single function which was compiled without C in an otherwise entirely C module. Until we have all of the mapping symbols and related mechanisms implemented, this case can continue to arise. I think it's very important from a user interface perspective to have non-assertion builds report an error in this case. If we don't report an error here, we can crash the linker (due to the fatal error at the bottom of the function), or if we're less lucky silently produce a malformed binary. There's a couple of known defects with this patch. First, there's no test case. I don't know how to write a stable test case for this that doesn't involve hex editing an object file, or abusing the LTO bug that we hope to fix. Second, this will report an error on each relax iteration. I explored trying to report an error only once after relaxation, but ended up deciding I didn't have the context to implement it safely. I would be thrilled if someone more knowledgeable of this code wants to write a better version of this patch, but in the meantime, I believe we should land this to address the user experience problem described above.
2024-01-17 14:32:20 -08:00
// If we can't satisfy this alignment, we've found a bad input.
if (LLVM_UNLIKELY(static_cast<int32_t>(remove) < 0)) {
Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc)
<< "insufficient padding bytes for " << r.type << ": "
<< r.addend
<< " bytes available "
"for requested alignment of "
<< align << " bytes";
[LLD][RISCV] Report error for unsatisfiable RISCV_ALIGN (#74121) If we have a RISCV_ALIGN relocation which can't be satisfied with the available space provided, report an error rather than silently continuing with a corrupt state. For context, https://github.com/llvm/llvm-project/pull/73977 fixes an LLD bug which can cause this effect, but that's not the only source of such cases. Another is our hard-to-fix set of LTO problems. We can have a single function which was compiled without C in an otherwise entirely C module. Until we have all of the mapping symbols and related mechanisms implemented, this case can continue to arise. I think it's very important from a user interface perspective to have non-assertion builds report an error in this case. If we don't report an error here, we can crash the linker (due to the fatal error at the bottom of the function), or if we're less lucky silently produce a malformed binary. There's a couple of known defects with this patch. First, there's no test case. I don't know how to write a stable test case for this that doesn't involve hex editing an object file, or abusing the LTO bug that we hope to fix. Second, this will report an error on each relax iteration. I explored trying to report an error only once after relaxation, but ended up deciding I didn't have the context to implement it safely. I would be thrilled if someone more knowledgeable of this code wants to write a better version of this patch, but in the meantime, I believe we should land this to address the user experience problem described above.
2024-01-17 14:32:20 -08:00
remove = 0;
}
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
break;
}
case R_RISCV_CALL:
case R_RISCV_CALL_PLT:
2024-01-25 10:09:43 -08:00
if (relaxable(relocs, i))
2024-10-06 00:14:12 -07:00
relaxCall(ctx, sec, i, loc, r, remove);
break;
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_ADD:
case R_RISCV_TPREL_LO12_I:
case R_RISCV_TPREL_LO12_S:
2024-01-25 10:09:43 -08:00
if (relaxable(relocs, i))
2024-10-19 21:08:50 -07:00
relaxTlsLe(ctx, sec, i, loc, r, remove);
break;
case R_RISCV_HI20:
case R_RISCV_LO12_I:
case R_RISCV_LO12_S:
2024-01-25 10:09:43 -08:00
if (relaxable(relocs, i))
2024-10-06 00:14:12 -07:00
relaxHi20Lo12(ctx, sec, i, loc, r, remove);
break;
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
case R_RISCV_TLSDESC_HI20:
// For TLSDESC=>LE, we can use the short form if hi20 is zero.
tlsdescRelax = relaxable(relocs, i);
toLeShortForm = tlsdescRelax && r.expr == R_RELAX_TLS_GD_TO_LE &&
2024-10-19 20:32:58 -07:00
!hi20(r.sym->getVA(ctx, r.addend));
[ELF] Implement R_RISCV_TLSDESC for RISC-V Support R_RISCV_TLSDESC_HI20/R_RISCV_TLSDESC_LOAD_LO12/R_RISCV_TLSDESC_ADD_LO12/R_RISCV_TLSDESC_CALL. LOAD_LO12/ADD_LO12/CALL relocations reference a label at the HI20 location, which requires special handling. We save the value of HI20 to be reused. Two interleaved TLSDESC code sequences, which compilers do not generate, are unsupported. For -no-pie/-pie links, TLSDESC to initial-exec or local-exec optimizations are eligible. Implement the relevant hooks (R_RELAX_TLS_GD_TO_LE, R_RELAX_TLS_GD_TO_IE): the first two instructions are converted to NOP while the latter two are converted to a GOT load or a lui+addi. The first two instructions, which would be converted to NOP, are removed instead in the presence of relaxation. Relaxation is eligible as long as the R_RISCV_TLSDESC_HI20 relocation has a pairing R_RISCV_RELAX, regardless of whether the following instructions have a R_RISCV_RELAX. In addition, for the TLSDESC to LE optimization (`lui a0,<hi20>; addi a0,a0,<lo12>`), `lui` can be removed (i.e. use the short form) if hi20 is 0. ``` // TLSDESC to LE/IE optimization .Ltlsdesc_hi2: auipc a4, %tlsdesc_hi(c) # if relax: remove; otherwise, NOP load a5, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a4) # if relax: remove; otherwise, NOP addi a0, a4, %tlsdesc_add_lo(.Ltlsdesc_hi2) # if LE && !hi20 {if relax: remove; otherwise, NOP} jalr t0, 0(a5), %tlsdesc_call(.Ltlsdesc_hi2) add a0, a0, tp ``` The implementation carefully ensures that an instruction unrelated to the current TLSDESC code sequence, if immediately follows a removable instruction (HI20 or LOAD_LO12 OR (LE-specific) ADD_LO12), is not converted to NOP. * `riscv64-tlsdesc.s` is inspired by `i386-tlsdesc-gd.s` (https://reviews.llvm.org/D112582). * `riscv64-tlsdesc-relax.s` tests linker relaxation. * `riscv-tlsdesc-gd-mixed.s` is inspired by `x86-64-tlsdesc-gd-mixed.s` (https://reviews.llvm.org/D116900). Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/373 Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/79239
2024-01-25 13:42:31 -08:00
[[fallthrough]];
case R_RISCV_TLSDESC_LOAD_LO12:
// For TLSDESC=>LE/IE, AUIPC and L[DW] are removed if relaxable.
if (tlsdescRelax && r.expr != R_TLSDESC_PC)
remove = 4;
break;
case R_RISCV_TLSDESC_ADD_LO12:
if (toLeShortForm)
remove = 4;
break;
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
}
// For all anchors whose offsets are <= r.offset, they are preceded by
// the previous relocation whose `relocDeltas` value equals `delta`.
// Decrease their st_value and update their st_size.
for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) {
if (sa[0].end)
sa[0].d->size = sa[0].offset - delta - sa[0].d->value;
else
sa[0].d->value = sa[0].offset - delta;
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
}
delta += remove;
if (delta != cur) {
cur = delta;
changed = true;
}
}
for (const SymbolAnchor &a : sa) {
if (a.end)
a.d->size = a.offset - delta - a.d->value;
else
a.d->value = a.offset - delta;
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
}
// Inform assignAddresses that the size has changed.
if (!isUInt<32>(delta))
Fatal(ctx) << "section size decrease is too large: " << delta;
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
sec.bytesDropped = delta;
return changed;
}
// When relaxing just R_RISCV_ALIGN, relocDeltas is usually changed only once in
// the absence of a linker script. For call and load/store R_RISCV_RELAX, code
// shrinkage may reduce displacement and make more relocations eligible for
// relaxation. Code shrinkage may increase displacement to a call/load/store
// target at a higher fixed address, invalidating an earlier relaxation. Any
// change in section sizes can have cascading effect and require another
// relaxation pass.
bool RISCV::relaxOnce(int pass) const {
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
llvm::TimeTraceScope timeScope("RISC-V relaxOnce");
if (ctx.arg.relocatable)
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
return false;
if (pass == 0)
2024-10-06 00:14:12 -07:00
initSymbolAnchors(ctx);
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
SmallVector<InputSection *, 0> storage;
bool changed = false;
for (OutputSection *osec : ctx.outputSections) {
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (InputSection *sec : getInputSections(*osec, storage))
2024-10-06 00:14:12 -07:00
changed |= relax(ctx, *sec);
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
}
return changed;
}
void RISCV::finalizeRelax(int passes) const {
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation");
Log(ctx) << "relaxation passes: " << passes;
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
SmallVector<InputSection *, 0> storage;
for (OutputSection *osec : ctx.outputSections) {
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
if (!(osec->flags & SHF_EXECINSTR))
continue;
for (InputSection *sec : getInputSections(*osec, storage)) {
RelaxAux &aux = *sec->relaxAux;
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
if (!aux.relocDeltas)
continue;
MutableArrayRef<Relocation> rels = sec->relocs();
ArrayRef<uint8_t> old = sec->content();
size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
size_t writesIdx = 0;
uint8_t *p = ctx.bAlloc.Allocate<uint8_t>(newSize);
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
uint64_t offset = 0;
int64_t delta = 0;
sec->content_ = p;
sec->size = newSize;
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
sec->bytesDropped = 0;
// Update section content: remove NOPs for R_RISCV_ALIGN and rewrite
// instructions for relaxed relocations.
for (size_t i = 0, e = rels.size(); i != e; ++i) {
uint32_t remove = aux.relocDeltas[i] - delta;
delta = aux.relocDeltas[i];
if (remove == 0 && aux.relocTypes[i] == R_RISCV_NONE)
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
continue;
// Copy from last location to the current relocated location.
const Relocation &r = rels[i];
uint64_t size = r.offset - offset;
memcpy(p, old.data() + offset, size);
p += size;
// For R_RISCV_ALIGN, we will place `offset` in a location (among NOPs)
// to satisfy the alignment requirement. If both `remove` and r.addend
// are multiples of 4, it is as if we have skipped some NOPs. Otherwise
// we are in the middle of a 4-byte NOP, and we need to rewrite the NOP
// sequence.
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
int64_t skip = 0;
if (r.type == R_RISCV_ALIGN) {
if (remove % 4 || r.addend % 4) {
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
skip = r.addend - remove;
int64_t j = 0;
for (; j + 4 <= skip; j += 4)
write32le(p + j, 0x00000013); // nop
if (j != skip) {
assert(j + 2 == skip);
write16le(p + j, 0x0001); // c.nop
}
}
} else if (RelType newType = aux.relocTypes[i]) {
switch (newType) {
case INTERNAL_R_RISCV_GPREL_I:
case INTERNAL_R_RISCV_GPREL_S:
break;
case R_RISCV_RELAX:
// Used by relaxTlsLe to indicate the relocation is ignored.
break;
case R_RISCV_RVC_JUMP:
skip = 2;
write16le(p, aux.writes[writesIdx++]);
break;
case R_RISCV_JAL:
skip = 4;
write32le(p, aux.writes[writesIdx++]);
break;
case R_RISCV_32:
// Used by relaxTlsLe to write a uint32_t then suppress the handling
// in relocateAlloc.
skip = 4;
write32le(p, aux.writes[writesIdx++]);
aux.relocTypes[i] = R_RISCV_NONE;
break;
default:
llvm_unreachable("unsupported type");
}
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
}
p += skip;
offset = r.offset + skip + remove;
}
memcpy(p, old.data() + offset, old.size() - offset);
2022-07-09 22:41:58 +02:00
// Subtract the previous relocDeltas value from the relocation offset.
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
// For a pair of R_RISCV_CALL/R_RISCV_RELAX with the same offset, decrease
// their r_offset by the same delta.
delta = 0;
for (size_t i = 0, e = rels.size(); i != e;) {
uint64_t cur = rels[i].offset;
do {
rels[i].offset -= delta;
if (aux.relocTypes[i] != R_RISCV_NONE)
rels[i].type = aux.relocTypes[i];
[ELF] Relax R_RISCV_ALIGN Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
2022-07-07 10:16:09 -07:00
} while (++i != e && rels[i].offset == cur);
delta = aux.relocDeltas[i - 1];
}
}
}
}
namespace {
// Representation of the merged .riscv.attributes input sections. The psABI
// specifies merge policy for attributes. E.g. if we link an object without an
// extension with an object with the extension, the output Tag_RISCV_arch shall
// contain the extension. Some tools like objdump parse .riscv.attributes and
// disabling some instructions if the first Tag_RISCV_arch does not contain an
// extension.
class RISCVAttributesSection final : public SyntheticSection {
public:
2024-10-10 23:07:02 -07:00
RISCVAttributesSection(Ctx &ctx)
: SyntheticSection(ctx, ".riscv.attributes", SHT_RISCV_ATTRIBUTES, 0, 1) {
2024-10-10 23:07:02 -07:00
}
size_t getSize() const override { return size; }
void writeTo(uint8_t *buf) override;
static constexpr StringRef vendor = "riscv";
DenseMap<unsigned, unsigned> intAttr;
DenseMap<unsigned, StringRef> strAttr;
size_t size = 0;
};
} // namespace
2024-11-14 22:30:29 -08:00
static void mergeArch(Ctx &ctx, RISCVISAUtils::OrderedExtensionMap &mergedExts,
unsigned &mergedXlen, const InputSectionBase *sec,
StringRef s) {
[lld][RISCV] Avoid error when encountering unrecognised ISA extensions/versions in RISC-V attributes This patch follows on from this RFC thread <https://discourse.llvm.org/t/rfc-resolving-issues-related-to-extension-versioning-in-risc-v/68472/> and the ensuing discussion in the RISC-V LLVM sync-up call. The consensus agreed that the behaviour change in LLD introduced in D138550 that results in object files including arch attributes with unrecognised extensions or versions of extensions is a regression and should be treated as such. As it stands, this logic means that LLD will error out if trying to link a RISC-V object file from LLVM HEAD (rv32i2p0/rv64i2p0) with one from current GCC (rv32i2p1/rv64i2p1 by default). There's a bigger discussion about exactly when to warn vs error and so on, and how to control that, and this patch doesn't attempt to address all those questions. It simply tries to fix the problem with a minimally invasive change, intended to be cherry-picked for 16.0.x (ideally 16.0.0, but queued for 16.0.1 if we're too late in the release cycle). As you can see from the test changes, although the changed logic is mostly more permissive, it will reject some embedded arch strings that were accepted before. Because the same logic was previously used for parsing human-written -march as for the arch attribute (intended to be stored in normalised form), various short-hand formats were previously accepted. Maintaining support for such ill-formed attributes would substantially complicate the logic, and given the previous implementation was so much stricter in every other way, was surely a bug rather than a design choice. Surprisingly, the precise rules for how numbers can be embedded into extension names isn't fully defined (there is a PR to the ISA manual that is not yet merged <https://github.com/riscv/riscv-isa-manual/pull/718>). In the absence of specific criteria for rejecting extensions names that would be ambiguous in abbreviated form, `RISCVISAInfo::parseArchStringNormalized` just pulls out the version information from the end of each extension description. Differential Revision: https://reviews.llvm.org/D144353
2023-02-25 06:17:40 +00:00
auto maybeInfo = RISCVISAInfo::parseNormalizedArchString(s);
if (!maybeInfo) {
Err(ctx) << sec << ": " << s << ": " << maybeInfo.takeError();
return;
}
// Merge extensions.
RISCVISAInfo &info = **maybeInfo;
if (mergedExts.empty()) {
mergedExts = info.getExtensions();
mergedXlen = info.getXLen();
} else {
for (const auto &ext : info.getExtensions()) {
auto p = mergedExts.insert(ext);
if (!p.second) {
if (std::tie(p.first->second.Major, p.first->second.Minor) <
std::tie(ext.second.Major, ext.second.Minor))
p.first->second = ext.second;
}
}
}
}
2024-11-14 22:30:29 -08:00
static void mergeAtomic(Ctx &ctx, DenseMap<unsigned, unsigned>::iterator it,
const InputSectionBase *oldSection,
const InputSectionBase *newSection,
RISCVAttrs::RISCVAtomicAbiTag oldTag,
RISCVAttrs::RISCVAtomicAbiTag newTag) {
using RISCVAttrs::RISCVAtomicAbiTag;
// Same tags stay the same, and UNKNOWN is compatible with anything
if (oldTag == newTag || newTag == RISCVAtomicAbiTag::UNKNOWN)
return;
auto reportAbiError = [&]() {
Err(ctx) << "atomic abi mismatch for " << oldSection->name << "\n>>> "
<< oldSection << ": atomic_abi=" << static_cast<unsigned>(oldTag)
<< "\n>>> " << newSection
<< ": atomic_abi=" << static_cast<unsigned>(newTag);
};
2024-11-14 22:30:29 -08:00
auto reportUnknownAbiError = [&](const InputSectionBase *section,
RISCVAtomicAbiTag tag) {
switch (tag) {
case RISCVAtomicAbiTag::UNKNOWN:
case RISCVAtomicAbiTag::A6C:
case RISCVAtomicAbiTag::A6S:
case RISCVAtomicAbiTag::A7:
return;
};
Err(ctx) << "unknown atomic abi for " << section->name << "\n>>> "
<< section << ": atomic_abi=" << static_cast<unsigned>(tag);
};
switch (oldTag) {
case RISCVAtomicAbiTag::UNKNOWN:
it->getSecond() = static_cast<unsigned>(newTag);
return;
case RISCVAtomicAbiTag::A6C:
switch (newTag) {
case RISCVAtomicAbiTag::A6S:
it->getSecond() = static_cast<unsigned>(RISCVAtomicAbiTag::A6C);
return;
case RISCVAtomicAbiTag::A7:
reportAbiError();
return;
case RISCVAttrs::RISCVAtomicAbiTag::UNKNOWN:
case RISCVAttrs::RISCVAtomicAbiTag::A6C:
return;
};
break;
case RISCVAtomicAbiTag::A6S:
switch (newTag) {
case RISCVAtomicAbiTag::A6C:
it->getSecond() = static_cast<unsigned>(RISCVAtomicAbiTag::A6C);
return;
case RISCVAtomicAbiTag::A7:
it->getSecond() = static_cast<unsigned>(RISCVAtomicAbiTag::A7);
return;
case RISCVAttrs::RISCVAtomicAbiTag::UNKNOWN:
case RISCVAttrs::RISCVAtomicAbiTag::A6S:
return;
};
break;
case RISCVAtomicAbiTag::A7:
switch (newTag) {
case RISCVAtomicAbiTag::A6S:
it->getSecond() = static_cast<unsigned>(RISCVAtomicAbiTag::A7);
return;
case RISCVAtomicAbiTag::A6C:
reportAbiError();
return;
case RISCVAttrs::RISCVAtomicAbiTag::UNKNOWN:
case RISCVAttrs::RISCVAtomicAbiTag::A7:
return;
};
break;
};
// If we get here, then we have an invalid tag, so report it.
// Putting these checks at the end allows us to only do these checks when we
// need to, since this is expected to be a rare occurrence.
reportUnknownAbiError(oldSection, oldTag);
reportUnknownAbiError(newSection, newTag);
}
static RISCVAttributesSection *
2024-10-06 00:14:12 -07:00
mergeAttributesSection(Ctx &ctx,
const SmallVector<InputSectionBase *, 0> &sections) {
using RISCVAttrs::RISCVAtomicAbiTag;
RISCVISAUtils::OrderedExtensionMap exts;
const InputSectionBase *firstStackAlign = nullptr;
const InputSectionBase *firstAtomicAbi = nullptr;
unsigned firstStackAlignValue = 0, xlen = 0;
bool hasArch = false;
2024-10-10 23:07:02 -07:00
ctx.in.riscvAttributes = std::make_unique<RISCVAttributesSection>(ctx);
auto &merged = static_cast<RISCVAttributesSection &>(*ctx.in.riscvAttributes);
// Collect all tags values from attributes section.
const auto &attributesTags = RISCVAttrs::getRISCVAttributeTags();
for (const InputSectionBase *sec : sections) {
RISCVAttributeParser parser;
if (Error e = parser.parse(sec->content(), llvm::endianness::little))
Warn(ctx) << sec << ": " << std::move(e);
for (const auto &tag : attributesTags) {
switch (RISCVAttrs::AttrType(tag.attr)) {
// Integer attributes.
case RISCVAttrs::STACK_ALIGN:
if (auto i = parser.getAttributeValue(tag.attr)) {
auto r = merged.intAttr.try_emplace(tag.attr, *i);
if (r.second) {
firstStackAlign = sec;
firstStackAlignValue = *i;
} else if (r.first->second != *i) {
2024-11-16 20:32:44 -08:00
Err(ctx) << sec << " has stack_align=" << *i << " but "
<< firstStackAlign
2024-11-16 20:32:44 -08:00
<< " has stack_align=" << firstStackAlignValue;
}
}
continue;
case RISCVAttrs::UNALIGNED_ACCESS:
if (auto i = parser.getAttributeValue(tag.attr))
merged.intAttr[tag.attr] |= *i;
continue;
// String attributes.
case RISCVAttrs::ARCH:
if (auto s = parser.getAttributeString(tag.attr)) {
hasArch = true;
2024-11-14 22:30:29 -08:00
mergeArch(ctx, exts, xlen, sec, *s);
}
continue;
// Attributes which use the default handling.
case RISCVAttrs::PRIV_SPEC:
case RISCVAttrs::PRIV_SPEC_MINOR:
case RISCVAttrs::PRIV_SPEC_REVISION:
break;
case RISCVAttrs::AttrType::ATOMIC_ABI:
if (auto i = parser.getAttributeValue(tag.attr)) {
auto r = merged.intAttr.try_emplace(tag.attr, *i);
if (r.second)
firstAtomicAbi = sec;
else
2024-11-14 22:30:29 -08:00
mergeAtomic(ctx, r.first, firstAtomicAbi, sec,
static_cast<RISCVAtomicAbiTag>(r.first->getSecond()),
static_cast<RISCVAtomicAbiTag>(*i));
}
continue;
}
// Fallback for deprecated priv_spec* and other unknown attributes: retain
// the attribute if all input sections agree on the value. GNU ld uses 0
// and empty strings as default values which are not dumped to the output.
// TODO Adjust after resolution to
// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/issues/352
if (tag.attr % 2 == 0) {
if (auto i = parser.getAttributeValue(tag.attr)) {
auto r = merged.intAttr.try_emplace(tag.attr, *i);
if (!r.second && r.first->second != *i)
r.first->second = 0;
}
} else if (auto s = parser.getAttributeString(tag.attr)) {
auto r = merged.strAttr.try_emplace(tag.attr, *s);
if (!r.second && r.first->second != *s)
r.first->second = {};
}
}
}
if (hasArch && xlen != 0) {
if (auto result = RISCVISAInfo::createFromExtMap(xlen, exts)) {
merged.strAttr.try_emplace(RISCVAttrs::ARCH,
ctx.saver.save((*result)->toString()));
} else {
Err(ctx) << result.takeError();
}
}
// The total size of headers: format-version [ <section-length> "vendor-name"
// [ <file-tag> <size>.
size_t size = 5 + merged.vendor.size() + 1 + 5;
for (auto &attr : merged.intAttr)
if (attr.second != 0)
size += getULEB128Size(attr.first) + getULEB128Size(attr.second);
for (auto &attr : merged.strAttr)
if (!attr.second.empty())
size += getULEB128Size(attr.first) + attr.second.size() + 1;
merged.size = size;
return &merged;
}
void RISCVAttributesSection::writeTo(uint8_t *buf) {
const size_t size = getSize();
uint8_t *const end = buf + size;
*buf = ELFAttrs::Format_Version;
2024-10-13 10:37:47 -07:00
write32(ctx, buf + 1, size - 1);
buf += 5;
memcpy(buf, vendor.data(), vendor.size());
buf += vendor.size() + 1;
*buf = ELFAttrs::File;
2024-10-13 10:37:47 -07:00
write32(ctx, buf + 1, end - buf);
buf += 5;
for (auto &attr : intAttr) {
if (attr.second == 0)
continue;
buf += encodeULEB128(attr.first, buf);
buf += encodeULEB128(attr.second, buf);
}
for (auto &attr : strAttr) {
if (attr.second.empty())
continue;
buf += encodeULEB128(attr.first, buf);
memcpy(buf, attr.second.data(), attr.second.size());
buf += attr.second.size() + 1;
}
}
2024-10-06 00:14:12 -07:00
void elf::mergeRISCVAttributesSections(Ctx &ctx) {
// Find the first input SHT_RISCV_ATTRIBUTES; return if not found.
size_t place =
llvm::find_if(ctx.inputSections,
[](auto *s) { return s->type == SHT_RISCV_ATTRIBUTES; }) -
ctx.inputSections.begin();
if (place == ctx.inputSections.size())
return;
// Extract all SHT_RISCV_ATTRIBUTES sections into `sections`.
SmallVector<InputSectionBase *, 0> sections;
llvm::erase_if(ctx.inputSections, [&](InputSectionBase *s) {
if (s->type != SHT_RISCV_ATTRIBUTES)
return false;
sections.push_back(s);
return true;
});
// Add the merged section.
ctx.inputSections.insert(ctx.inputSections.begin() + place,
2024-10-06 00:14:12 -07:00
mergeAttributesSection(ctx, sections));
}
void elf::setRISCVTargetInfo(Ctx &ctx) { ctx.target.reset(new RISCV(ctx)); }