mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-16 12:56:35 +00:00
[XRay][RISCV] RISCV support for XRay (#117368)
Add RISC-V support for XRay. The RV64 implementation has been tested in both QEMU and in our hardware environment. Currently this requires D and C extensions, but since both RV64GC and RVA22/RVA23 are becoming mainstream, I don't think this requirement will be a big problem. Based on the previous work by @a-poduval : https://reviews.llvm.org/D117929 --------- Co-authored-by: Ashwin Poduval <ashwin.poduval@gmail.com>
This commit is contained in:
parent
7987f478be
commit
ea76b2d8d8
@ -51,6 +51,8 @@ XRayArgs::XRayArgs(const ToolChain &TC, const ArgList &Args) {
|
||||
case llvm::Triple::mips64:
|
||||
case llvm::Triple::mips64el:
|
||||
case llvm::Triple::systemz:
|
||||
case llvm::Triple::riscv32:
|
||||
case llvm::Triple::riscv64:
|
||||
break;
|
||||
default:
|
||||
D.Diag(diag::err_drv_unsupported_opt_for_target)
|
||||
|
@ -102,7 +102,7 @@ if(APPLE)
|
||||
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64})
|
||||
else()
|
||||
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
|
||||
powerpc64le ${HEXAGON} ${LOONGARCH64})
|
||||
powerpc64le ${HEXAGON} ${LOONGARCH64} ${RISCV32} ${RISCV64})
|
||||
endif()
|
||||
set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64} ${ARM64})
|
||||
set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})
|
||||
|
@ -96,6 +96,16 @@ set(hexagon_SOURCES
|
||||
xray_trampoline_hexagon.S
|
||||
)
|
||||
|
||||
set(riscv32_SOURCES
|
||||
xray_riscv.cpp
|
||||
xray_trampoline_riscv32.S
|
||||
)
|
||||
|
||||
set(riscv64_SOURCES
|
||||
xray_riscv.cpp
|
||||
xray_trampoline_riscv64.S
|
||||
)
|
||||
|
||||
set(XRAY_SOURCE_ARCHS
|
||||
arm
|
||||
armhf
|
||||
@ -156,6 +166,8 @@ set(XRAY_ALL_SOURCE_FILES
|
||||
${mips64_SOURCES}
|
||||
${mips64el_SOURCES}
|
||||
${powerpc64le_SOURCES}
|
||||
${riscv32_SOURCES}
|
||||
${riscv64_SOURCES}
|
||||
${XRAY_IMPL_HEADERS}
|
||||
)
|
||||
list(REMOVE_DUPLICATES XRAY_ALL_SOURCE_FILES)
|
||||
|
@ -57,6 +57,10 @@ static const int16_t cSledLength = 64;
|
||||
static const int16_t cSledLength = 8;
|
||||
#elif defined(__hexagon__)
|
||||
static const int16_t cSledLength = 20;
|
||||
#elif defined(__riscv) && (__riscv_xlen == 64)
|
||||
static const int16_t cSledLength = 68;
|
||||
#elif defined(__riscv) && (__riscv_xlen == 32)
|
||||
static const int16_t cSledLength = 52;
|
||||
#else
|
||||
#error "Unsupported CPU Architecture"
|
||||
#endif /* CPU architecture */
|
||||
|
266
compiler-rt/lib/xray/xray_riscv.cpp
Normal file
266
compiler-rt/lib/xray/xray_riscv.cpp
Normal file
@ -0,0 +1,266 @@
|
||||
//===-- xray_riscv.cpp ----------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of XRay, a dynamic runtime instrumentation system.
|
||||
//
|
||||
// Implementation of RISC-V specific routines (32- and 64-bit).
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "sanitizer_common/sanitizer_common.h"
|
||||
#include "xray_defs.h"
|
||||
#include "xray_interface_internal.h"
|
||||
#include <atomic>
|
||||
|
||||
namespace __xray {
|
||||
|
||||
// The machine codes for some instructions used in runtime patching.
|
||||
enum PatchOpcodes : uint32_t {
|
||||
PO_ADDI = 0x00000013, // addi rd, rs1, imm
|
||||
PO_ADD = 0x00000033, // add rd, rs1, rs2
|
||||
PO_SW = 0x00002023, // sw rs2, imm(rs1)
|
||||
PO_SD = 0x00003023, // sd rs2, imm(rs1)
|
||||
PO_LUI = 0x00000037, // lui rd, imm
|
||||
PO_OR = 0x00006033, // or rd, rs1, rs2
|
||||
PO_SLLI = 0x00001013, // slli rd, rs1, shamt
|
||||
PO_JALR = 0x00000067, // jalr rd, rs1
|
||||
PO_LW = 0x00002003, // lw rd, imm(rs1)
|
||||
PO_LD = 0x00003003, // ld rd, imm(rs1)
|
||||
PO_J = 0x0000006f, // jal imm
|
||||
PO_NOP = PO_ADDI, // addi x0, x0, 0
|
||||
};
|
||||
|
||||
enum RegNum : uint32_t {
|
||||
RN_X0 = 0,
|
||||
RN_RA = 1,
|
||||
RN_SP = 2,
|
||||
RN_T1 = 6,
|
||||
RN_A0 = 10,
|
||||
};
|
||||
|
||||
static inline uint32_t encodeRTypeInstruction(uint32_t Opcode, uint32_t Rs1,
|
||||
uint32_t Rs2, uint32_t Rd) {
|
||||
return Rs2 << 20 | Rs1 << 15 | Rd << 7 | Opcode;
|
||||
}
|
||||
|
||||
static inline uint32_t encodeITypeInstruction(uint32_t Opcode, uint32_t Rs1,
|
||||
uint32_t Rd, uint32_t Imm) {
|
||||
return Imm << 20 | Rs1 << 15 | Rd << 7 | Opcode;
|
||||
}
|
||||
|
||||
static inline uint32_t encodeSTypeInstruction(uint32_t Opcode, uint32_t Rs1,
|
||||
uint32_t Rs2, uint32_t Imm) {
|
||||
uint32_t ImmMSB = (Imm & 0xfe0) << 20;
|
||||
uint32_t ImmLSB = (Imm & 0x01f) << 7;
|
||||
return ImmMSB | Rs2 << 20 | Rs1 << 15 | ImmLSB | Opcode;
|
||||
}
|
||||
|
||||
static inline uint32_t encodeUTypeInstruction(uint32_t Opcode, uint32_t Rd,
|
||||
uint32_t Imm) {
|
||||
return Imm << 12 | Rd << 7 | Opcode;
|
||||
}
|
||||
|
||||
static inline uint32_t encodeJTypeInstruction(uint32_t Opcode, uint32_t Rd,
|
||||
uint32_t Imm) {
|
||||
uint32_t ImmMSB = (Imm & 0x100000) << 11;
|
||||
uint32_t ImmLSB = (Imm & 0x7fe) << 20;
|
||||
uint32_t Imm11 = (Imm & 0x800) << 9;
|
||||
uint32_t Imm1912 = (Imm & 0xff000);
|
||||
return ImmMSB | ImmLSB | Imm11 | Imm1912 | Rd << 7 | Opcode;
|
||||
}
|
||||
|
||||
static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
|
||||
static uint32_t lo12(uint32_t val) { return val & 0xfff; }
|
||||
|
||||
static inline bool patchSled(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled,
|
||||
void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
|
||||
// When |Enable| == true,
|
||||
// We replace the following compile-time stub (sled):
|
||||
//
|
||||
// xray_sled_n:
|
||||
// J .tmpN
|
||||
// 21 or 33 C.NOPs (42 or 66 bytes)
|
||||
// .tmpN
|
||||
//
|
||||
// With one of the following runtime patches:
|
||||
//
|
||||
// xray_sled_n (32-bit):
|
||||
// addi sp, sp, -16 ;create stack frame
|
||||
// sw ra, 12(sp) ;save return address
|
||||
// sw a0, 8(sp) ;save register a0
|
||||
// lui ra, %hi(__xray_FunctionEntry/Exit)
|
||||
// addi ra, ra, %lo(__xray_FunctionEntry/Exit)
|
||||
// lui a0, %hi(function_id)
|
||||
// addi a0, a0, %lo(function_id) ;pass function id
|
||||
// jalr ra ;call Tracing hook
|
||||
// lw a0, 8(sp) ;restore register a0
|
||||
// lw ra, 12(sp) ;restore return address
|
||||
// addi sp, sp, 16 ;delete stack frame
|
||||
//
|
||||
// xray_sled_n (64-bit):
|
||||
// addi sp, sp, -32 ;create stack frame
|
||||
// sd ra, 24(sp) ;save return address
|
||||
// sd a0, 16(sp) ;save register a0
|
||||
// sd t1, 8(sp) ;save register t1
|
||||
// lui t1, %highest(__xray_FunctionEntry/Exit)
|
||||
// addi t1, t1, %higher(__xray_FunctionEntry/Exit)
|
||||
// slli t1, t1, 32
|
||||
// lui ra, ra, %hi(__xray_FunctionEntry/Exit)
|
||||
// addi ra, ra, %lo(__xray_FunctionEntry/Exit)
|
||||
// add ra, t1, ra
|
||||
// lui a0, %hi(function_id)
|
||||
// addi a0, a0, %lo(function_id) ;pass function id
|
||||
// jalr ra ;call Tracing hook
|
||||
// ld t1, 8(sp) ;restore register t1
|
||||
// ld a0, 16(sp) ;restore register a0
|
||||
// ld ra, 24(sp) ;restore return address
|
||||
// addi sp, sp, 32 ;delete stack frame
|
||||
//
|
||||
// Replacement of the first 4-byte instruction should be the last and atomic
|
||||
// operation, so that the user code which reaches the sled concurrently
|
||||
// either jumps over the whole sled, or executes the whole sled when the
|
||||
// latter is ready.
|
||||
//
|
||||
// When |Enable|==false, we set back the first instruction in the sled to be
|
||||
// J 44 bytes (rv32)
|
||||
// J 68 bytes (rv64)
|
||||
|
||||
uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address());
|
||||
if (Enable) {
|
||||
#if __riscv_xlen == 64
|
||||
// If the ISA is RV64, the Tracing Hook needs to be typecast to a 64 bit
|
||||
// value.
|
||||
uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint64_t>(TracingHook));
|
||||
uint32_t HiTracingHookAddr = hi20(reinterpret_cast<uint64_t>(TracingHook));
|
||||
uint32_t HigherTracingHookAddr =
|
||||
lo12((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32);
|
||||
uint32_t HighestTracingHookAddr =
|
||||
hi20((reinterpret_cast<uint64_t>(TracingHook) + 0x80000000) >> 32);
|
||||
#elif __riscv_xlen == 32
|
||||
// We typecast the Tracing Hook to a 32 bit value for RV32
|
||||
uint32_t LoTracingHookAddr = lo12(reinterpret_cast<uint32_t>(TracingHook));
|
||||
uint32_t HiTracingHookAddr = hi20((reinterpret_cast<uint32_t>(TracingHook));
|
||||
#endif
|
||||
uint32_t LoFunctionID = lo12(FuncId);
|
||||
uint32_t HiFunctionID = hi20(FuncId);
|
||||
|
||||
// The sled that is patched in for RISCV64 defined below. We need the entire
|
||||
// sleds corresponding to both ISAs to be protected by defines because the
|
||||
// first few instructions are all different, because we store doubles in
|
||||
// case of RV64 and store words for RV32. Subsequently, we have LUI - and in
|
||||
// case of RV64, we need extra instructions from this point on, so we see
|
||||
// differences in addresses to which instructions are stored.
|
||||
size_t Idx = 1U;
|
||||
const uint32_t XLenBytes = __riscv_xlen / 8;
|
||||
#if __riscv_xlen == 64
|
||||
const uint32_t LoadOp = PatchOpcodes::PO_LD;
|
||||
const uint32_t StoreOp = PatchOpcodes::PO_SD;
|
||||
#elif __riscv_xlen == 32
|
||||
const uint32_t LoadOp = PatchOpcodes::PO_LW;
|
||||
const uint32_t StoreOp = PatchOpcodes::PO_SW;
|
||||
#endif
|
||||
|
||||
Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP,
|
||||
RegNum::RN_RA, 3 * XLenBytes);
|
||||
Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP,
|
||||
RegNum::RN_A0, 2 * XLenBytes);
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
Address[Idx++] = encodeSTypeInstruction(StoreOp, RegNum::RN_SP,
|
||||
RegNum::RN_T1, XLenBytes);
|
||||
Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_T1,
|
||||
HighestTracingHookAddr);
|
||||
Address[Idx++] =
|
||||
encodeITypeInstruction(PatchOpcodes::PO_ADDI, RegNum::RN_T1,
|
||||
RegNum::RN_T1, HigherTracingHookAddr);
|
||||
Address[Idx++] = encodeITypeInstruction(PatchOpcodes::PO_SLLI,
|
||||
RegNum::RN_T1, RegNum::RN_T1, 32);
|
||||
#endif
|
||||
Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_RA,
|
||||
HiTracingHookAddr);
|
||||
Address[Idx++] = encodeITypeInstruction(
|
||||
PatchOpcodes::PO_ADDI, RegNum::RN_RA, RegNum::RN_RA, LoTracingHookAddr);
|
||||
#if __riscv_xlen == 64
|
||||
Address[Idx++] = encodeRTypeInstruction(PatchOpcodes::PO_ADD, RegNum::RN_RA,
|
||||
RegNum::RN_T1, RegNum::RN_RA);
|
||||
#endif
|
||||
Address[Idx++] = encodeUTypeInstruction(PatchOpcodes::PO_LUI, RegNum::RN_A0,
|
||||
HiFunctionID);
|
||||
Address[Idx++] = encodeITypeInstruction(
|
||||
PatchOpcodes::PO_ADDI, RegNum::RN_A0, RegNum::RN_A0, LoFunctionID);
|
||||
Address[Idx++] = encodeITypeInstruction(PatchOpcodes::PO_JALR,
|
||||
RegNum::RN_RA, RegNum::RN_RA, 0);
|
||||
|
||||
#if __riscv_xlen == 64
|
||||
Address[Idx++] =
|
||||
encodeITypeInstruction(LoadOp, RegNum::RN_SP, RegNum::RN_T1, XLenBytes);
|
||||
#endif
|
||||
Address[Idx++] = encodeITypeInstruction(LoadOp, RegNum::RN_SP,
|
||||
RegNum::RN_A0, 2 * XLenBytes);
|
||||
Address[Idx++] = encodeITypeInstruction(LoadOp, RegNum::RN_SP,
|
||||
RegNum::RN_RA, 3 * XLenBytes);
|
||||
Address[Idx++] = encodeITypeInstruction(
|
||||
PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, 4 * XLenBytes);
|
||||
|
||||
uint32_t CreateStackSpace = encodeITypeInstruction(
|
||||
PatchOpcodes::PO_ADDI, RegNum::RN_SP, RegNum::RN_SP, -4 * XLenBytes);
|
||||
|
||||
std::atomic_store_explicit(
|
||||
reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateStackSpace,
|
||||
std::memory_order_release);
|
||||
} else {
|
||||
uint32_t CreateBranch = encodeJTypeInstruction(
|
||||
// Jump distance is different in both ISAs due to difference in size of
|
||||
// sleds
|
||||
#if __riscv_xlen == 64
|
||||
PatchOpcodes::PO_J, RegNum::RN_X0,
|
||||
68); // jump encodes an offset of 68
|
||||
#elif __riscv_xlen == 32
|
||||
PatchOpcodes::PO_J, RegNum::RN_X0,
|
||||
44); // jump encodes an offset of 44
|
||||
#endif
|
||||
std::atomic_store_explicit(
|
||||
reinterpret_cast<std::atomic<uint32_t> *>(Address), CreateBranch,
|
||||
std::memory_order_release);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled,
|
||||
const XRayTrampolines &Trampolines,
|
||||
bool LogArgs) XRAY_NEVER_INSTRUMENT {
|
||||
// We don't support logging argument at this moment, so we always
|
||||
// use EntryTrampoline.
|
||||
return patchSled(Enable, FuncId, Sled, Trampolines.EntryTrampoline);
|
||||
}
|
||||
|
||||
bool patchFunctionExit(
|
||||
const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
|
||||
const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
|
||||
return patchSled(Enable, FuncId, Sled, Trampolines.ExitTrampoline);
|
||||
}
|
||||
|
||||
bool patchFunctionTailExit(
|
||||
const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
|
||||
const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
|
||||
return patchSled(Enable, FuncId, Sled, Trampolines.TailExitTrampoline);
|
||||
}
|
||||
|
||||
bool patchCustomEvent(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool patchTypedEvent(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
|
||||
return false;
|
||||
}
|
||||
} // namespace __xray
|
||||
|
||||
extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {}
|
89
compiler-rt/lib/xray/xray_trampoline_riscv32.S
Normal file
89
compiler-rt/lib/xray/xray_trampoline_riscv32.S
Normal file
@ -0,0 +1,89 @@
|
||||
//===-- xray_trampoline_riscv32.s ----------------------------------*- ASM -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of XRay, a dynamic runtime instrumentation system.
|
||||
//
|
||||
// This implements the riscv32-specific assembler for the trampolines.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "../sanitizer_common/sanitizer_asm.h"
|
||||
|
||||
.macro SAVE_ARG_REGISTERS
|
||||
// Push argument registers to stack
|
||||
addi sp, sp, -112
|
||||
CFI_DEF_CFA_OFFSET(112)
|
||||
sw ra, 108(sp)
|
||||
sw a7, 104(sp)
|
||||
sw a6, 100(sp)
|
||||
sw a5, 96(sp)
|
||||
sw a4, 92(sp)
|
||||
sw a3, 88(sp)
|
||||
sw a2, 84(sp)
|
||||
sw a1, 80(sp)
|
||||
sw a0, 76(sp)
|
||||
fsd fa7, 64(sp)
|
||||
fsd fa6, 56(sp)
|
||||
fsd fa5, 48(sp)
|
||||
fsd fa4, 40(sp)
|
||||
fsd fa3, 32(sp)
|
||||
fsd fa2, 24(sp)
|
||||
fsd fa1, 16(sp)
|
||||
fsd fa0, 8(sp)
|
||||
.endm
|
||||
|
||||
.macro RESTORE_ARG_REGISTERS
|
||||
// Restore argument registers
|
||||
fld fa0, 8(sp)
|
||||
fld fa1, 16(sp)
|
||||
fld fa2, 24(sp)
|
||||
fld fa3, 32(sp)
|
||||
fld fa4, 40(sp)
|
||||
fld fa5, 48(sp)
|
||||
fld fa6, 56(sp)
|
||||
fld fa7, 64(sp)
|
||||
lw a0, 76(sp)
|
||||
lw a1, 80(sp)
|
||||
lw a2, 84(sp)
|
||||
lw a3, 88(sp)
|
||||
lw a4, 92(sp)
|
||||
lw a5, 96(sp)
|
||||
lw a6, 100(sp)
|
||||
lw a7, 104(sp)
|
||||
lw ra, 108(sp)
|
||||
addi sp, sp, 112
|
||||
CFI_DEF_CFA_OFFSET(0)
|
||||
.endm
|
||||
|
||||
.macro SAVE_RET_REGISTERS
|
||||
// Push return registers to stack
|
||||
addi sp, sp, -32
|
||||
CFI_DEF_CFA_OFFSET(32)
|
||||
sw ra, 28(sp)
|
||||
sw a1, 24(sp)
|
||||
sw a0, 20(sp)
|
||||
fsd fa1, 8(sp)
|
||||
fsd fa0, 0(sp)
|
||||
.endm
|
||||
|
||||
.macro RESTORE_RET_REGISTERS
|
||||
// Restore return registers
|
||||
fld fa0, 0(sp)
|
||||
fld fa1, 8(sp)
|
||||
lw a0, 20(sp)
|
||||
lw a1, 24(sp)
|
||||
lw ra, 28(sp)
|
||||
addi sp, sp, 32
|
||||
CFI_DEF_CFA_OFFSET(0)
|
||||
.endm
|
||||
|
||||
.macro LOAD_XLEN, rd, src
|
||||
lw \rd, \src
|
||||
.endm
|
||||
|
||||
#include "xray_trampoline_riscv_common.S"
|
89
compiler-rt/lib/xray/xray_trampoline_riscv64.S
Normal file
89
compiler-rt/lib/xray/xray_trampoline_riscv64.S
Normal file
@ -0,0 +1,89 @@
|
||||
//===-- xray_trampoline_riscv64.s ----------------------------------*- ASM -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of XRay, a dynamic runtime instrumentation system.
|
||||
//
|
||||
// This implements the riscv64-specific assembler for the trampolines.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "../sanitizer_common/sanitizer_asm.h"
|
||||
|
||||
.macro SAVE_ARG_REGISTERS
|
||||
// Push return registers to stack
|
||||
addi sp, sp, -144
|
||||
CFI_DEF_CFA_OFFSET(144)
|
||||
sd ra, 136(sp)
|
||||
sd a7, 128(sp)
|
||||
sd a6, 120(sp)
|
||||
sd a5, 112(sp)
|
||||
sd a4, 104(sp)
|
||||
sd a3, 96(sp)
|
||||
sd a2, 88(sp)
|
||||
sd a1, 80(sp)
|
||||
sd a0, 72(sp)
|
||||
fsd fa7, 64(sp)
|
||||
fsd fa6, 56(sp)
|
||||
fsd fa5, 48(sp)
|
||||
fsd fa4, 40(sp)
|
||||
fsd fa3, 32(sp)
|
||||
fsd fa2, 24(sp)
|
||||
fsd fa1, 16(sp)
|
||||
fsd fa0, 8(sp)
|
||||
.endm
|
||||
|
||||
.macro SAVE_RET_REGISTERS
|
||||
// Push return registers to stack
|
||||
addi sp, sp, -48
|
||||
CFI_DEF_CFA_OFFSET(48)
|
||||
sd ra, 40(sp)
|
||||
sd a1, 32(sp)
|
||||
sd a0, 24(sp)
|
||||
fsd fa1, 16(sp)
|
||||
fsd fa0, 8(sp)
|
||||
.endm
|
||||
|
||||
.macro RESTORE_RET_REGISTERS
|
||||
// Restore return registers
|
||||
fld fa0, 8(sp)
|
||||
fld fa1, 16(sp)
|
||||
ld a0, 24(sp)
|
||||
ld a1, 32(sp)
|
||||
ld ra, 40(sp)
|
||||
addi sp, sp, 48
|
||||
CFI_DEF_CFA_OFFSET(0)
|
||||
.endm
|
||||
|
||||
.macro RESTORE_ARG_REGISTERS
|
||||
// Restore argument registers
|
||||
fld fa0, 8(sp)
|
||||
fld fa1, 16(sp)
|
||||
fld fa2, 24(sp)
|
||||
fld fa3, 32(sp)
|
||||
fld fa4, 40(sp)
|
||||
fld fa5, 48(sp)
|
||||
fld fa6, 56(sp)
|
||||
fld fa7, 64(sp)
|
||||
ld a0, 72(sp)
|
||||
ld a1, 80(sp)
|
||||
ld a2, 88(sp)
|
||||
ld a3, 96(sp)
|
||||
ld a4, 104(sp)
|
||||
ld a5, 112(sp)
|
||||
ld a6, 120(sp)
|
||||
ld a7, 128(sp)
|
||||
ld ra, 136(sp)
|
||||
addi sp, sp, 144
|
||||
CFI_DEF_CFA_OFFSET(0)
|
||||
.endm
|
||||
|
||||
.macro LOAD_XLEN, rd, src
|
||||
ld \rd, \src
|
||||
.endm
|
||||
|
||||
#include "xray_trampoline_riscv_common.S"
|
96
compiler-rt/lib/xray/xray_trampoline_riscv_common.S
Normal file
96
compiler-rt/lib/xray/xray_trampoline_riscv_common.S
Normal file
@ -0,0 +1,96 @@
|
||||
//===-- xray_trampoline_riscv_common.s --------------------------*- ASM -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of XRay, a dynamic runtime instrumentation system.
|
||||
//
|
||||
// This implements the trampolines code shared between riscv32 and riscv64.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "../builtins/assembly.h"
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ASM_SYMBOL(__xray_FunctionEntry)
|
||||
ASM_TYPE_FUNCTION(__xray_FunctionEntry)
|
||||
ASM_SYMBOL(__xray_FunctionEntry):
|
||||
CFI_STARTPROC
|
||||
SAVE_ARG_REGISTERS
|
||||
|
||||
// Load the handler function pointer into a2
|
||||
la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
|
||||
LOAD_XLEN a2, 0(a2)
|
||||
|
||||
// Handler address will be null if it is not set
|
||||
beq a2, x0, 1f
|
||||
|
||||
// If we reach here, we are tracing an event
|
||||
// a0 already contains function id
|
||||
// a1 = 0 means we are tracing an entry event
|
||||
li a1, 0
|
||||
jalr a2
|
||||
|
||||
1:
|
||||
RESTORE_ARG_REGISTERS
|
||||
jr ra
|
||||
ASM_SIZE(__xray_FunctionEntry)
|
||||
CFI_ENDPROC
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ASM_SYMBOL(__xray_FunctionExit)
|
||||
ASM_TYPE_FUNCTION(__xray_FunctionExit)
|
||||
ASM_SYMBOL(__xray_FunctionExit):
|
||||
CFI_STARTPROC
|
||||
SAVE_RET_REGISTERS
|
||||
|
||||
// Load the handler function pointer into a2
|
||||
la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
|
||||
LOAD_XLEN a2, 0(a2)
|
||||
|
||||
// Handler address will be null if it is not set
|
||||
beq a2, x0, 1f
|
||||
|
||||
// If we reach here, we are tracing an event
|
||||
// a0 already contains function id
|
||||
// a1 = 1 means we are tracing an exit event
|
||||
li a1, 1
|
||||
jalr a2
|
||||
|
||||
1:
|
||||
RESTORE_RET_REGISTERS
|
||||
jr ra
|
||||
ASM_SIZE(__xray_FunctionExit)
|
||||
CFI_ENDPROC
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ASM_SYMBOL(__xray_FunctionTailExit)
|
||||
ASM_TYPE_FUNCTION(__xray_FunctionTailExit)
|
||||
ASM_SYMBOL(__xray_FunctionTailExit):
|
||||
CFI_STARTPROC
|
||||
SAVE_ARG_REGISTERS
|
||||
|
||||
// Load the handler function pointer into a2
|
||||
la a2, ASM_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)
|
||||
LOAD_XLEN a2, 0(a2)
|
||||
|
||||
// Handler address will be null if it is not set
|
||||
beq a2, x0, 1f
|
||||
|
||||
// If we reach here, we are tracing an event
|
||||
// a0 already contains function id
|
||||
// a1 = 2 means we are tracing a tail exit event
|
||||
li a1, 2
|
||||
jalr a2
|
||||
|
||||
1:
|
||||
RESTORE_ARG_REGISTERS
|
||||
jr ra
|
||||
ASM_SIZE(__xray_FunctionTailExit)
|
||||
CFI_ENDPROC
|
@ -43,7 +43,7 @@ inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
|
||||
#elif defined(__powerpc64__)
|
||||
#include "xray_powerpc64.inc"
|
||||
#elif defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
|
||||
defined(__hexagon__) || defined(__loongarch_lp64)
|
||||
defined(__hexagon__) || defined(__loongarch_lp64) || defined(__riscv)
|
||||
// Emulated TSC.
|
||||
// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
|
||||
// not have a constant frequency like TSC on x86(_64), it may go faster
|
||||
|
@ -233,10 +233,13 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
|
||||
case Triple::ArchType::mips:
|
||||
case Triple::ArchType::mipsel:
|
||||
case Triple::ArchType::mips64:
|
||||
case Triple::ArchType::mips64el: {
|
||||
case Triple::ArchType::mips64el:
|
||||
case Triple::ArchType::riscv32:
|
||||
case Triple::ArchType::riscv64: {
|
||||
// For the architectures which don't have a single return instruction
|
||||
InstrumentationOptions op;
|
||||
op.HandleTailcall = false;
|
||||
// RISC-V supports patching tail calls.
|
||||
op.HandleTailcall = MF.getTarget().getTargetTriple().isRISCV();
|
||||
op.HandleAllReturns = true;
|
||||
prependRetWithPatchableExit(MF, TII, op);
|
||||
break;
|
||||
|
@ -113,6 +113,12 @@ private:
|
||||
|
||||
void emitNTLHint(const MachineInstr *MI);
|
||||
|
||||
// XRay Support
|
||||
void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr *MI);
|
||||
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr *MI);
|
||||
void LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI);
|
||||
void emitSled(const MachineInstr *MI, SledKind Kind);
|
||||
|
||||
bool lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
|
||||
};
|
||||
}
|
||||
@ -316,6 +322,22 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||
return LowerPATCHPOINT(*OutStreamer, SM, *MI);
|
||||
case TargetOpcode::STATEPOINT:
|
||||
return LowerSTATEPOINT(*OutStreamer, SM, *MI);
|
||||
case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
|
||||
// patchable-function-entry is handled in lowerToMCInst
|
||||
// Therefore, we break out of the switch statement if we encounter it here.
|
||||
const Function &F = MI->getParent()->getParent()->getFunction();
|
||||
if (F.hasFnAttribute("patchable-function-entry"))
|
||||
break;
|
||||
|
||||
LowerPATCHABLE_FUNCTION_ENTER(MI);
|
||||
return;
|
||||
}
|
||||
case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
|
||||
LowerPATCHABLE_FUNCTION_EXIT(MI);
|
||||
return;
|
||||
case TargetOpcode::PATCHABLE_TAIL_CALL:
|
||||
LowerPATCHABLE_TAIL_CALL(MI);
|
||||
return;
|
||||
}
|
||||
|
||||
MCInst OutInst;
|
||||
@ -453,11 +475,71 @@ bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
|
||||
SetupMachineFunction(MF);
|
||||
emitFunctionBody();
|
||||
|
||||
// Emit the XRay table
|
||||
emitXRayTable();
|
||||
|
||||
if (EmittedOptionArch)
|
||||
RTS.emitDirectiveOptionPop();
|
||||
return false;
|
||||
}
|
||||
|
||||
void RISCVAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr *MI) {
|
||||
emitSled(MI, SledKind::FUNCTION_ENTER);
|
||||
}
|
||||
|
||||
void RISCVAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr *MI) {
|
||||
emitSled(MI, SledKind::FUNCTION_EXIT);
|
||||
}
|
||||
|
||||
void RISCVAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI) {
|
||||
emitSled(MI, SledKind::TAIL_CALL);
|
||||
}
|
||||
|
||||
void RISCVAsmPrinter::emitSled(const MachineInstr *MI, SledKind Kind) {
|
||||
// We want to emit the jump instruction and the nops constituting the sled.
|
||||
// The format is as follows:
|
||||
// .Lxray_sled_N
|
||||
// ALIGN
|
||||
// J .tmpN
|
||||
// 21 or 33 C.NOP instructions
|
||||
// .tmpN
|
||||
|
||||
// The following variable holds the count of the number of NOPs to be patched
|
||||
// in for XRay instrumentation during compilation.
|
||||
// Note that RV64 and RV32 each has a sled of 68 and 44 bytes, respectively.
|
||||
// Assuming we're using JAL to jump to .tmpN, then we only need
|
||||
// (68 - 4)/2 = 32 NOPs for RV64 and (44 - 4)/2 = 20 for RV32. However, there
|
||||
// is a chance that we'll use C.JAL instead, so an additional NOP is needed.
|
||||
const uint8_t NoopsInSledCount =
|
||||
MI->getParent()->getParent()->getSubtarget<RISCVSubtarget>().is64Bit()
|
||||
? 33
|
||||
: 21;
|
||||
|
||||
OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo());
|
||||
auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
|
||||
OutStreamer->emitLabel(CurSled);
|
||||
auto Target = OutContext.createTempSymbol();
|
||||
|
||||
const MCExpr *TargetExpr = MCSymbolRefExpr::create(
|
||||
Target, MCSymbolRefExpr::VariantKind::VK_None, OutContext);
|
||||
|
||||
// Emit "J bytes" instruction, which jumps over the nop sled to the actual
|
||||
// start of function.
|
||||
EmitToStreamer(
|
||||
*OutStreamer,
|
||||
MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addExpr(TargetExpr));
|
||||
|
||||
// Emit NOP instructions
|
||||
for (int8_t I = 0; I < NoopsInSledCount; ++I)
|
||||
EmitToStreamer(*OutStreamer, MCInstBuilder(RISCV::ADDI)
|
||||
.addReg(RISCV::X0)
|
||||
.addReg(RISCV::X0)
|
||||
.addImm(0));
|
||||
|
||||
OutStreamer->emitLabel(Target);
|
||||
recordSled(CurSled, *MI, Kind, 2);
|
||||
}
|
||||
|
||||
void RISCVAsmPrinter::emitStartOfAsmFile(Module &M) {
|
||||
RISCVTargetStreamer &RTS =
|
||||
static_cast<RISCVTargetStreamer &>(*OutStreamer->getTargetStreamer());
|
||||
|
@ -1576,6 +1576,26 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
|
||||
// No patch bytes means at most a PseudoCall is emitted
|
||||
return std::max(NumBytes, 8U);
|
||||
}
|
||||
case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
|
||||
case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
|
||||
case TargetOpcode::PATCHABLE_TAIL_CALL: {
|
||||
const MachineFunction &MF = *MI.getParent()->getParent();
|
||||
const Function &F = MF.getFunction();
|
||||
if (Opcode == TargetOpcode::PATCHABLE_FUNCTION_ENTER &&
|
||||
F.hasFnAttribute("patchable-function-entry")) {
|
||||
unsigned Num;
|
||||
if (F.getFnAttribute("patchable-function-entry")
|
||||
.getValueAsString()
|
||||
.getAsInteger(10, Num))
|
||||
return get(Opcode).getSize();
|
||||
|
||||
// Number of C.NOP or NOP
|
||||
return (STI.hasStdExtCOrZca() ? 2 : 4) * Num;
|
||||
}
|
||||
// XRay uses C.JAL + 21 or 33 C.NOP for each sled in RV32 and RV64,
|
||||
// respectively.
|
||||
return STI.is64Bit() ? 68 : 44;
|
||||
}
|
||||
default:
|
||||
return get(Opcode).getSize();
|
||||
}
|
||||
|
@ -236,6 +236,9 @@ public:
|
||||
return UserReservedRegister[i];
|
||||
}
|
||||
|
||||
// XRay support - require D and C extensions.
|
||||
bool isXRaySupported() const override { return hasStdExtD() && hasStdExtC(); }
|
||||
|
||||
// Vector codegen related methods.
|
||||
bool hasVInstructions() const { return HasStdExtZve32x; }
|
||||
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
|
||||
|
@ -63,7 +63,8 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
|
||||
ObjFile.getBinary()->getArch() == Triple::loongarch64 ||
|
||||
ObjFile.getBinary()->getArch() == Triple::ppc64le ||
|
||||
ObjFile.getBinary()->getArch() == Triple::arm ||
|
||||
ObjFile.getBinary()->getArch() == Triple::aarch64))
|
||||
ObjFile.getBinary()->getArch() == Triple::aarch64 ||
|
||||
ObjFile.getBinary()->getArch() == Triple::riscv64))
|
||||
return make_error<StringError>(
|
||||
"File format not supported (only does ELF and Mach-O little endian "
|
||||
"64-bit).",
|
||||
|
24
llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll
Normal file
24
llvm/test/CodeGen/RISCV/xray-attribute-instrumentation.ll
Normal file
@ -0,0 +1,24 @@
|
||||
; RUN: llc -mtriple=riscv32-unknown-linux-gnu -mattr=+d,+c < %s | FileCheck --check-prefix=CHECK %s
|
||||
; RUN: llc -mtriple=riscv64-unknown-linux-gnu -mattr=+d,+c < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-RISCV64 %s
|
||||
|
||||
define i32 @foo() nounwind "function-instrument"="xray-always" {
|
||||
; CHECK: .p2align 2
|
||||
; CHECK-LABEL: .Lxray_sled_0:
|
||||
; CHECK-NEXT: j .Ltmp0
|
||||
; CHECK-COUNT-21: nop
|
||||
; CHECK-RISCV64-COUNT-12: nop
|
||||
; CHECK-LABEL: .Ltmp0:
|
||||
ret i32 0
|
||||
; CHECK: .p2align 2
|
||||
; CHECK-LABEL: .Lxray_sled_1:
|
||||
; CHECK-NEXT: j .Ltmp1
|
||||
; CHECK-COUNT-21: nop
|
||||
; CHECK-RISCV64-COUNT-12: nop
|
||||
; CHECK-LABEL: .Ltmp1:
|
||||
; CHECK-NEXT: ret
|
||||
}
|
||||
; CHECK: .section xray_instr_map,"ao",@progbits,foo
|
||||
; CHECK-LABEL: .Lxray_sleds_start0:
|
||||
; CHECK: .Lxray_sled_0-[[TMP:.Ltmp[0-9]+]]
|
||||
; CHECK: .Lxray_sled_1-[[TMP:.Ltmp[0-9]+]]
|
||||
; CHECK-LABEL: .Lxray_sleds_end0:
|
Loading…
x
Reference in New Issue
Block a user