[openmp][nfc] Refactor shared/lds smartstack for spirv (#131905)

Spirv doesn't have implicit conversions between address spaces (at least at present, we might need to change that) and address space qualified *this pointers are not handled well by clang. This commit changes the single instance of the smartstack to be explicitly a singleton, for fractionally simpler IR generation (no this pointer) and to sidestep the work in progress spirv64-- openmp target not being able to compile the original version.
2025-04-16 00:16:30 +00:00 · 2025-03-18 20:33:24 +00:00 · 2025-03-18 20:33:24 +00:00 · c02b935a9b
commit c02b935a9b
parent 1442fe0c89
1 changed files with 21 additions and 16 deletions
--- a/offload/DeviceRTL/src/State.cpp
+++ b/offload/DeviceRTL/src/State.cpp
@ -75,19 +75,19 @@ extern "C" {
 ///
 struct SharedMemorySmartStackTy {
  /// Initialize the stack. Must be called by all threads.
-  void init(bool IsSPMD);
+  static void init(bool IsSPMD);

  /// Allocate \p Bytes on the stack for the encountering thread. Each thread
  /// can call this function.
-  void *push(uint64_t Bytes);
+  static void *push(uint64_t Bytes);

  /// Deallocate the last allocation made by the encountering thread and pointed
  /// to by \p Ptr from the stack. Each thread can call this function.
-  void pop(void *Ptr, uint64_t Bytes);
+  static void pop(void *Ptr, uint64_t Bytes);

 private:
  /// Compute the size of the storage space reserved for a thread.
-  uint32_t computeThreadStorageTotal() {
+  static uint32_t computeThreadStorageTotal() {
    uint32_t NumLanesInBlock = mapping::getNumberOfThreadsInBlock();
    return utils::alignDown((state::SharedScratchpadSize / NumLanesInBlock),
                            allocator::ALIGNMENT);
@ -95,23 +95,28 @@ private:

  /// Return the top address of the warp data stack, that is the first address
  /// this warp will allocate memory at next.
-  void *getThreadDataTop(uint32_t TId) {
-    return &Data[computeThreadStorageTotal() * TId + Usage[TId]];
+  static void *getThreadDataTop(uint32_t TId) {
+    return (void *)&Data[computeThreadStorageTotal() * TId + Usage[TId]];
  }

  /// The actual storage, shared among all warps.
-  [[gnu::aligned(
-      allocator::ALIGNMENT)]] unsigned char Data[state::SharedScratchpadSize];
-  [[gnu::aligned(
-      allocator::ALIGNMENT)]] unsigned char Usage[mapping::MaxThreadsPerTeam];
+
+  [[gnu::aligned(allocator::ALIGNMENT)]] [[clang::loader_uninitialized]]
+  static Local<unsigned char> Data[state::SharedScratchpadSize];
+  [[gnu::aligned(allocator::ALIGNMENT)]] [[clang::loader_uninitialized]]
+  static Local<unsigned char> Usage[mapping::MaxThreadsPerTeam];
 };

+Local<unsigned char>
+    SharedMemorySmartStackTy::Data[state::SharedScratchpadSize];
+Local<unsigned char>
+    SharedMemorySmartStackTy::Usage[mapping::MaxThreadsPerTeam];
+
 static_assert(state::SharedScratchpadSize / mapping::MaxThreadsPerTeam <= 256,
              "Shared scratchpad of this size not supported yet.");

-/// The allocation of a single shared memory scratchpad.
-[[clang::loader_uninitialized]] static Local<SharedMemorySmartStackTy>
-    SharedMemorySmartStack;
+/// The single shared memory scratchpad.
+using SharedMemorySmartStack = SharedMemorySmartStackTy;

 void SharedMemorySmartStackTy::init(bool IsSPMD) {
  Usage[mapping::getThreadIdInBlock()] = 0;
@ -163,11 +168,11 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint64_t Bytes) {
 void *memory::getDynamicBuffer() { return DynamicSharedBuffer; }

 void *memory::allocShared(uint64_t Bytes, const char *Reason) {
-  return SharedMemorySmartStack.push(Bytes);
+  return SharedMemorySmartStack::push(Bytes);
 }

 void memory::freeShared(void *Ptr, uint64_t Bytes, const char *Reason) {
-  SharedMemorySmartStack.pop(Ptr, Bytes);
+  SharedMemorySmartStack::pop(Ptr, Bytes);
 }

 void *memory::allocGlobal(uint64_t Bytes, const char *Reason) {
@ -247,7 +252,7 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,

 void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
                 KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
-  SharedMemorySmartStack.init(IsSPMD);
+  SharedMemorySmartStack::init(IsSPMD);
  if (mapping::isInitialThreadInLevel0(IsSPMD)) {
    TeamState.init(IsSPMD);
    ThreadStates = nullptr;