[SystemZ][XRay] Reland XRay runtime support for SystemZ (#124611)

Adds the runtime support routines for XRay on SystemZ. Only function
entry/exit is implemented.

The original PR 113252 was reverted due to errors caused by adding DSO
support to XRay.
This PR is the original implementation with the changed function
signatures. I'll add an implementation with DSO support later.
This commit is contained in:
Kai Nacke 2025-01-31 12:13:26 -05:00 committed by GitHub
parent b9207aef09
commit 64142391fd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 318 additions and 1 deletions

View File

@ -103,7 +103,7 @@ if(APPLE)
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM64})
else()
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}
powerpc64le ${HEXAGON} ${LOONGARCH64} ${RISCV32} ${RISCV64})
powerpc64le ${HEXAGON} ${LOONGARCH64} ${RISCV32} ${RISCV64} ${S390X})
endif()
set(ALL_XRAY_DSO_SUPPORTED_ARCH ${X86_64} ${ARM64})
set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${ARM64})

View File

@ -106,6 +106,13 @@ set(riscv64_SOURCES
xray_trampoline_riscv64.S
)
set(s390x_SOURCES
xray_s390x.cpp
xray_trampoline_s390x.S
)
# Enable vector instructions in the assembly file.
set_source_files_properties(xray_trampoline_s390x.S PROPERTIES COMPILE_FLAGS -mvx)
set(XRAY_SOURCE_ARCHS
arm
armhf
@ -116,6 +123,7 @@ set(XRAY_SOURCE_ARCHS
mips64
mips64el
powerpc64le
s390x
x86_64
)
@ -168,6 +176,7 @@ set(XRAY_ALL_SOURCE_FILES
${powerpc64le_SOURCES}
${riscv32_SOURCES}
${riscv64_SOURCES}
${s390x_SOURCES}
${XRAY_IMPL_HEADERS}
)
list(REMOVE_DUPLICATES XRAY_ALL_SOURCE_FILES)

View File

@ -61,6 +61,8 @@ static const int16_t cSledLength = 20;
static const int16_t cSledLength = 68;
#elif defined(__riscv) && (__riscv_xlen == 32)
static const int16_t cSledLength = 52;
#elif defined(__s390x__)
static const int16_t cSledLength = 18;
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */

View File

@ -29,6 +29,10 @@ extern void __xray_FunctionTailExit();
extern void __xray_ArgLoggerEntry();
extern void __xray_CustomEvent();
extern void __xray_TypedEvent();
#if defined(__s390x__)
extern void __xray_FunctionEntryVec();
extern void __xray_FunctionExitVec();
#endif
}
extern "C" {

View File

@ -0,0 +1,104 @@
//===-- xray_s390x.cpp ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// Implementation of s390x routines.
//
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
#include "xray_defs.h"
#include "xray_interface_internal.h"
#include <cassert>
#include <cstring>
bool __xray::patchFunctionEntry(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled,
const XRayTrampolines &Trampolines,
bool LogArgs) XRAY_NEVER_INSTRUMENT {
uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address());
// TODO: Trampoline addresses are currently inserted at compile-time, using
// __xray_FunctionEntry and __xray_FunctionExit only.
// To support DSO instrumentation, trampolines have to be written during
// patching (see implementation on X86_64, e.g.).
if (Enable) {
// The resulting code is:
// stmg %r2, %r15, 16(%r15)
// llilf %2, FuncID
// brasl %r14, __xray_FunctionEntry@GOT
// The FuncId and the stmg instruction must be written.
// Write FuncId into llilf.
Address[2] = FuncId;
// Write last part of stmg.
reinterpret_cast<uint16_t *>(Address)[2] = 0x24;
// Write first part of stmg.
Address[0] = 0xeb2ff010;
} else {
// j +16 instructions.
Address[0] = 0xa7f4000b;
}
return true;
}
bool __xray::patchFunctionExit(
const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
uint32_t *Address = reinterpret_cast<uint32_t *>(Sled.address());
// TODO: Trampoline addresses are currently inserted at compile-time, using
// __xray_FunctionEntry and __xray_FunctionExit only.
// To support DSO instrumentation, trampolines have to be written during
// patching (see implementation on X86_64, e.g.).
if (Enable) {
// The resulting code is:
// stmg %r2, %r15, 24(%r15)
// llilf %2,FuncID
// j __xray_FunctionEntry@GOT
// The FuncId and the stmg instruction must be written.
// Write FuncId into llilf.
Address[2] = FuncId;
// Write last part of of stmg.
reinterpret_cast<uint16_t *>(Address)[2] = 0x24;
// Write first part of stmg.
Address[0] = 0xeb2ff010;
} else {
// br %14 instruction.
reinterpret_cast<uint16_t *>(Address)[0] = 0x07fe;
}
return true;
}
bool __xray::patchFunctionTailExit(
const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled,
const XRayTrampolines &Trampolines) XRAY_NEVER_INSTRUMENT {
return patchFunctionExit(Enable, FuncId, Sled, Trampolines);
}
bool __xray::patchCustomEvent(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
// TODO Implement.
return false;
}
bool __xray::patchTypedEvent(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
// TODO Implement.
return false;
}
extern "C" void __xray_ArgLoggerEntry() XRAY_NEVER_INSTRUMENT {
// TODO this will have to be implemented in the trampoline assembly file.
}
extern "C" void __xray_FunctionTailExit() XRAY_NEVER_INSTRUMENT {
// For PowerPC, calls to __xray_FunctionEntry and __xray_FunctionExit
// are statically inserted into the sled. Tail exits are handled like normal
// function exits. This trampoline is therefore not implemented.
// This stub is placed here to avoid linking issues.
}

View File

@ -0,0 +1,176 @@
//===-- xray_trampoline_s390x.s ---------------------------------*- ASM -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// This implements the s390x-specific assembler for the trampolines.
// 2 versions of the functions are provided: one which does not store the
// vector registers, and one which does store them. The compiler decides
// which to call based on the availability of the vector extension.
//
//===----------------------------------------------------------------------===//
.text
// Minimal stack frame size
#define STACKSZ 160
// Minimal stack frame size (160) plus space for 8 vector registers a 16 bytes.
#define STACKSZ_VEC 288
//===----------------------------------------------------------------------===//
.globl __xray_FunctionEntry
.p2align 4
.type __xray_FunctionEntry,@function
__xray_FunctionEntry:
# The registers r2-15 of the instrumented function are already saved in the
# stack frame. On entry, r2 contains the function id, and %r14 the address
# of the first instruction of the instrumented function.
# Register r14 will be stored in the slot reserved for compiler use.
stg %r14, 8(%r15)
std %f0, 128(%r15)
std %f2, 136(%r15)
std %f4, 144(%r15)
std %f6, 152(%r15)
aghi %r15, -STACKSZ
lgrl %r1, _ZN6__xray19XRayPatchedFunctionE@GOT
ltg %r1, 0(%r1)
je .Lrestore0
# Set r3 to XRayEntryType::ENTRY = 0.
# The FuncId is still stored in r2.
lghi %r3, 0
basr %r14, %r1
.Lrestore0:
ld %f6, STACKSZ+152(%r15)
ld %f4, STACKSZ+144(%r15)
ld %f2, STACKSZ+136(%r15)
ld %f0, STACKSZ+128(%r15)
lmg %r1, %r15, STACKSZ+8(%r15)
br %r1
.Lfunc_end0:
.size __xray_FunctionEntry, .Lfunc_end0-__xray_FunctionEntry
//===----------------------------------------------------------------------===//
.globl __xray_FunctionEntryVec
.p2align 4
.type __xray_FunctionEntryVec,@function
__xray_FunctionEntryVec:
# The registers r2-15 of the instrumented function are already saved in the
# stack frame. On entry, r2 contains the function id, and %r14 the address
# of the first instruction of the instrumented function.
# Register r14 will be stored in the slot reserved for compiler use.
stg %r14, 8(%r15)
std %f0, 128(%r15)
std %f2, 136(%r15)
std %f4, 144(%r15)
std %f6, 152(%r15)
aghi %r15, -STACKSZ_VEC
vstm %v24, %v31, 160(%r15)
lgrl %r1, _ZN6__xray19XRayPatchedFunctionE@GOT
ltg %r1, 0(%r1)
je .Lrestore1
# Set r3 to XRayEntryType::ENTRY = 0.
# The FuncId is still stored in r2.
lghi %r3, 0
basr %r14, %r1
.Lrestore1:
vlm %v24, %v31, 160(%r15)
ld %f6, STACKSZ_VEC+152(%r15)
ld %f4, STACKSZ_VEC+144(%r15)
ld %f2, STACKSZ_VEC+136(%r15)
ld %f0, STACKSZ_VEC+128(%r15)
lmg %r1, %r15, STACKSZ_VEC+8(%r15)
br %r1
.Lfunc_end1:
.size __xray_FunctionEntryVec, .Lfunc_end1-__xray_FunctionEntryVec
//===----------------------------------------------------------------------===//
.globl __xray_FunctionExit
.p2align 4
.type __xray_FunctionExit,@function
__xray_FunctionExit:
# The registers r2-15 of the instrumented function are already saved in the
# stack frame. On entry, the register r2 contains the function id.
# At the end, the function jumps to the address saved in the slot for r14,
# which contains the return address into the caller of the instrumented
# function.
std %f0, 128(%r15)
std %f2, 136(%r15)
std %f4, 144(%r15)
std %f6, 152(%r15)
aghi %r15, -STACKSZ
lgrl %r1, _ZN6__xray19XRayPatchedFunctionE@GOT
ltg %r1, 0(%r1)
je .Lrestore2
# Set r3 to XRayEntryType::EXIT = 1.
# The FuncId is still stored in r2.
lghi %r3, 1
basr %r14, %r1
.Lrestore2:
ld %f6, STACKSZ+152(%r15)
ld %f4, STACKSZ+144(%r15)
ld %f2, STACKSZ+136(%r15)
ld %f0, STACKSZ+128(%r15)
lmg %r2, %r15, STACKSZ+16(%r15)
br %r14
.Lfunc_end2:
.size __xray_FunctionExit, .Lfunc_end2-__xray_FunctionExit
//===----------------------------------------------------------------------===//
.globl __xray_FunctionExitVec
.p2align 4
.type __xray_FunctionExitVec,@function
__xray_FunctionExitVec:
# The registers r2-15 of the instrumented function are already saved in the
# stack frame. On entry, the register r2 contains the function id.
# At the end, the function jumps to the address saved in the slot for r14,
# which contains the return address into the caller of the instrumented
# function.
std %f0, 128(%r15)
std %f2, 136(%r15)
std %f4, 144(%r15)
std %f6, 152(%r15)
aghi %r15, -STACKSZ_VEC
vstm %v24, %v31, 160(%r15)
lgrl %r1, _ZN6__xray19XRayPatchedFunctionE@GOT
ltg %r1, 0(%r1)
je .Lrestore3
# Set r3 to XRayEntryType::EXIT = 1.
# The FuncId is still stored in r2.
lghi %r3, 1
basr %r14, %r1
.Lrestore3:
vlm %v24, %v31, 160(%r15)
ld %f6, STACKSZ_VEC+152(%r15)
ld %f4, STACKSZ_VEC+144(%r15)
ld %f2, STACKSZ_VEC+136(%r15)
ld %f0, STACKSZ_VEC+128(%r15)
lmg %r2, %r15, STACKSZ_VEC+16(%r15)
br %r14
.Lfunc_end3:
.size __xray_FunctionExit, .Lfunc_end3-__xray_FunctionExit
//===----------------------------------------------------------------------===//
.section ".note.GNU-stack","",@progbits

View File

@ -83,6 +83,28 @@ inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
} // namespace __xray
#elif defined(__s390x__)
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_internal_defs.h"
#include "xray_defs.h"
#include <cerrno>
#include <cstdint>
#include <time.h>
namespace __xray {
inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; }
ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT {
return __builtin_readcyclecounter();
}
inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT {
return NanosecondsPerSecond;
}
} // namespace __xray
#else
#error Target architecture is not supported.
#endif // CPU architecture