From c02b935a9be888bbdf9f8cb0bf980bd411ae5893 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Tue, 18 Mar 2025 20:33:24 +0000 Subject: [PATCH] [openmp][nfc] Refactor shared/lds smartstack for spirv (#131905) Spirv doesn't have implicit conversions between address spaces (at least at present, we might need to change that) and address space qualified *this pointers are not handled well by clang. This commit changes the single instance of the smartstack to be explicitly a singleton, for fractionally simpler IR generation (no this pointer) and to sidestep the work in progress spirv64-- openmp target not being able to compile the original version. --- offload/DeviceRTL/src/State.cpp | 37 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/offload/DeviceRTL/src/State.cpp b/offload/DeviceRTL/src/State.cpp index cbe973514534..7995b37cdec6 100644 --- a/offload/DeviceRTL/src/State.cpp +++ b/offload/DeviceRTL/src/State.cpp @@ -75,19 +75,19 @@ extern "C" { /// struct SharedMemorySmartStackTy { /// Initialize the stack. Must be called by all threads. - void init(bool IsSPMD); + static void init(bool IsSPMD); /// Allocate \p Bytes on the stack for the encountering thread. Each thread /// can call this function. - void *push(uint64_t Bytes); + static void *push(uint64_t Bytes); /// Deallocate the last allocation made by the encountering thread and pointed /// to by \p Ptr from the stack. Each thread can call this function. - void pop(void *Ptr, uint64_t Bytes); + static void pop(void *Ptr, uint64_t Bytes); private: /// Compute the size of the storage space reserved for a thread. - uint32_t computeThreadStorageTotal() { + static uint32_t computeThreadStorageTotal() { uint32_t NumLanesInBlock = mapping::getNumberOfThreadsInBlock(); return utils::alignDown((state::SharedScratchpadSize / NumLanesInBlock), allocator::ALIGNMENT); @@ -95,23 +95,28 @@ private: /// Return the top address of the warp data stack, that is the first address /// this warp will allocate memory at next. - void *getThreadDataTop(uint32_t TId) { - return &Data[computeThreadStorageTotal() * TId + Usage[TId]]; + static void *getThreadDataTop(uint32_t TId) { + return (void *)&Data[computeThreadStorageTotal() * TId + Usage[TId]]; } /// The actual storage, shared among all warps. - [[gnu::aligned( - allocator::ALIGNMENT)]] unsigned char Data[state::SharedScratchpadSize]; - [[gnu::aligned( - allocator::ALIGNMENT)]] unsigned char Usage[mapping::MaxThreadsPerTeam]; + + [[gnu::aligned(allocator::ALIGNMENT)]] [[clang::loader_uninitialized]] + static Local Data[state::SharedScratchpadSize]; + [[gnu::aligned(allocator::ALIGNMENT)]] [[clang::loader_uninitialized]] + static Local Usage[mapping::MaxThreadsPerTeam]; }; +Local + SharedMemorySmartStackTy::Data[state::SharedScratchpadSize]; +Local + SharedMemorySmartStackTy::Usage[mapping::MaxThreadsPerTeam]; + static_assert(state::SharedScratchpadSize / mapping::MaxThreadsPerTeam <= 256, "Shared scratchpad of this size not supported yet."); -/// The allocation of a single shared memory scratchpad. -[[clang::loader_uninitialized]] static Local - SharedMemorySmartStack; +/// The single shared memory scratchpad. +using SharedMemorySmartStack = SharedMemorySmartStackTy; void SharedMemorySmartStackTy::init(bool IsSPMD) { Usage[mapping::getThreadIdInBlock()] = 0; @@ -163,11 +168,11 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint64_t Bytes) { void *memory::getDynamicBuffer() { return DynamicSharedBuffer; } void *memory::allocShared(uint64_t Bytes, const char *Reason) { - return SharedMemorySmartStack.push(Bytes); + return SharedMemorySmartStack::push(Bytes); } void memory::freeShared(void *Ptr, uint64_t Bytes, const char *Reason) { - SharedMemorySmartStack.pop(Ptr, Bytes); + SharedMemorySmartStack::pop(Ptr, Bytes); } void *memory::allocGlobal(uint64_t Bytes, const char *Reason) { @@ -247,7 +252,7 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal, void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment, KernelLaunchEnvironmentTy &KernelLaunchEnvironment) { - SharedMemorySmartStack.init(IsSPMD); + SharedMemorySmartStack::init(IsSPMD); if (mapping::isInitialThreadInLevel0(IsSPMD)) { TeamState.init(IsSPMD); ThreadStates = nullptr;