mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-27 12:46:08 +00:00

Tracks the registers that explicit and hidden arguments are preloaded to with new code object metadata. IR arguments may be split across multiple parts by isel, and SGPR tuple alignment means that an argument may be spread across multiple registers. To support this, some of the utilities for hidden kernel arguments are moved to `AMDGPUArgumentUsageInfo.h`. Additional bookkeeping is also needed for tracking purposes.
220 lines
8.8 KiB
C++
220 lines
8.8 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPUArgumentUsageInfo.h"
|
|
#include "AMDGPU.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "SIRegisterInfo.h"
|
|
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/Support/NativeFormatting.h"
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
using namespace llvm;
|
|
using namespace llvm::KernArgPreload;
|
|
|
|
#define DEBUG_TYPE "amdgpu-argument-reg-usage-info"
|
|
|
|
INITIALIZE_PASS(AMDGPUArgumentUsageInfo, DEBUG_TYPE,
|
|
"Argument Register Usage Information Storage", false, true)
|
|
|
|
constexpr HiddenArgInfo HiddenArgUtils::HiddenArgs[END_HIDDEN_ARGS];
|
|
|
|
void ArgDescriptor::print(raw_ostream &OS,
|
|
const TargetRegisterInfo *TRI) const {
|
|
if (!isSet()) {
|
|
OS << "<not set>\n";
|
|
return;
|
|
}
|
|
|
|
if (isRegister())
|
|
OS << "Reg " << printReg(getRegister(), TRI);
|
|
else
|
|
OS << "Stack offset " << getStackOffset();
|
|
|
|
if (isMasked()) {
|
|
OS << " & ";
|
|
llvm::write_hex(OS, Mask, llvm::HexPrintStyle::PrefixLower);
|
|
}
|
|
|
|
OS << '\n';
|
|
}
|
|
|
|
char AMDGPUArgumentUsageInfo::ID = 0;
|
|
|
|
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::ExternFunctionInfo{};
|
|
|
|
// Hardcoded registers from fixed function ABI
|
|
const AMDGPUFunctionArgInfo AMDGPUArgumentUsageInfo::FixedABIFunctionInfo
|
|
= AMDGPUFunctionArgInfo::fixedABILayout();
|
|
|
|
bool AMDGPUArgumentUsageInfo::doInitialization(Module &M) {
|
|
return false;
|
|
}
|
|
|
|
bool AMDGPUArgumentUsageInfo::doFinalization(Module &M) {
|
|
ArgInfoMap.clear();
|
|
return false;
|
|
}
|
|
|
|
// TODO: Print preload kernargs?
|
|
void AMDGPUArgumentUsageInfo::print(raw_ostream &OS, const Module *M) const {
|
|
for (const auto &FI : ArgInfoMap) {
|
|
OS << "Arguments for " << FI.first->getName() << '\n'
|
|
<< " PrivateSegmentBuffer: " << FI.second.PrivateSegmentBuffer
|
|
<< " DispatchPtr: " << FI.second.DispatchPtr
|
|
<< " QueuePtr: " << FI.second.QueuePtr
|
|
<< " KernargSegmentPtr: " << FI.second.KernargSegmentPtr
|
|
<< " DispatchID: " << FI.second.DispatchID
|
|
<< " FlatScratchInit: " << FI.second.FlatScratchInit
|
|
<< " PrivateSegmentSize: " << FI.second.PrivateSegmentSize
|
|
<< " WorkGroupIDX: " << FI.second.WorkGroupIDX
|
|
<< " WorkGroupIDY: " << FI.second.WorkGroupIDY
|
|
<< " WorkGroupIDZ: " << FI.second.WorkGroupIDZ
|
|
<< " WorkGroupInfo: " << FI.second.WorkGroupInfo
|
|
<< " LDSKernelId: " << FI.second.LDSKernelId
|
|
<< " PrivateSegmentWaveByteOffset: "
|
|
<< FI.second.PrivateSegmentWaveByteOffset
|
|
<< " ImplicitBufferPtr: " << FI.second.ImplicitBufferPtr
|
|
<< " ImplicitArgPtr: " << FI.second.ImplicitArgPtr
|
|
<< " WorkItemIDX " << FI.second.WorkItemIDX
|
|
<< " WorkItemIDY " << FI.second.WorkItemIDY
|
|
<< " WorkItemIDZ " << FI.second.WorkItemIDZ
|
|
<< '\n';
|
|
}
|
|
}
|
|
|
|
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
|
|
AMDGPUFunctionArgInfo::getPreloadedValue(
|
|
AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
|
switch (Value) {
|
|
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER: {
|
|
return std::tuple(PrivateSegmentBuffer ? &PrivateSegmentBuffer : nullptr,
|
|
&AMDGPU::SGPR_128RegClass, LLT::fixed_vector(4, 32));
|
|
}
|
|
case AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR:
|
|
return std::tuple(ImplicitBufferPtr ? &ImplicitBufferPtr : nullptr,
|
|
&AMDGPU::SGPR_64RegClass,
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
|
case AMDGPUFunctionArgInfo::WORKGROUP_ID_X:
|
|
return std::tuple(WorkGroupIDX ? &WorkGroupIDX : nullptr,
|
|
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Y:
|
|
return std::tuple(WorkGroupIDY ? &WorkGroupIDY : nullptr,
|
|
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::WORKGROUP_ID_Z:
|
|
return std::tuple(WorkGroupIDZ ? &WorkGroupIDZ : nullptr,
|
|
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::LDS_KERNEL_ID:
|
|
return std::tuple(LDSKernelId ? &LDSKernelId : nullptr,
|
|
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
|
|
return std::tuple(
|
|
PrivateSegmentWaveByteOffset ? &PrivateSegmentWaveByteOffset : nullptr,
|
|
&AMDGPU::SGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_SIZE:
|
|
return {PrivateSegmentSize ? &PrivateSegmentSize : nullptr,
|
|
&AMDGPU::SGPR_32RegClass, LLT::scalar(32)};
|
|
case AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR:
|
|
return std::tuple(KernargSegmentPtr ? &KernargSegmentPtr : nullptr,
|
|
&AMDGPU::SGPR_64RegClass,
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
|
case AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR:
|
|
return std::tuple(ImplicitArgPtr ? &ImplicitArgPtr : nullptr,
|
|
&AMDGPU::SGPR_64RegClass,
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
|
case AMDGPUFunctionArgInfo::DISPATCH_ID:
|
|
return std::tuple(DispatchID ? &DispatchID : nullptr,
|
|
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
|
|
case AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT:
|
|
return std::tuple(FlatScratchInit ? &FlatScratchInit : nullptr,
|
|
&AMDGPU::SGPR_64RegClass, LLT::scalar(64));
|
|
case AMDGPUFunctionArgInfo::DISPATCH_PTR:
|
|
return std::tuple(DispatchPtr ? &DispatchPtr : nullptr,
|
|
&AMDGPU::SGPR_64RegClass,
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
|
case AMDGPUFunctionArgInfo::QUEUE_PTR:
|
|
return std::tuple(QueuePtr ? &QueuePtr : nullptr, &AMDGPU::SGPR_64RegClass,
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
|
case AMDGPUFunctionArgInfo::WORKITEM_ID_X:
|
|
return std::tuple(WorkItemIDX ? &WorkItemIDX : nullptr,
|
|
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::WORKITEM_ID_Y:
|
|
return std::tuple(WorkItemIDY ? &WorkItemIDY : nullptr,
|
|
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
|
|
case AMDGPUFunctionArgInfo::WORKITEM_ID_Z:
|
|
return std::tuple(WorkItemIDZ ? &WorkItemIDZ : nullptr,
|
|
&AMDGPU::VGPR_32RegClass, LLT::scalar(32));
|
|
}
|
|
llvm_unreachable("unexpected preloaded value type");
|
|
}
|
|
|
|
AMDGPUFunctionArgInfo AMDGPUFunctionArgInfo::fixedABILayout() {
|
|
AMDGPUFunctionArgInfo AI;
|
|
AI.PrivateSegmentBuffer
|
|
= ArgDescriptor::createRegister(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3);
|
|
AI.DispatchPtr = ArgDescriptor::createRegister(AMDGPU::SGPR4_SGPR5);
|
|
AI.QueuePtr = ArgDescriptor::createRegister(AMDGPU::SGPR6_SGPR7);
|
|
|
|
// Do not pass kernarg segment pointer, only pass increment version in its
|
|
// place.
|
|
AI.ImplicitArgPtr = ArgDescriptor::createRegister(AMDGPU::SGPR8_SGPR9);
|
|
AI.DispatchID = ArgDescriptor::createRegister(AMDGPU::SGPR10_SGPR11);
|
|
|
|
// Skip FlatScratchInit/PrivateSegmentSize
|
|
AI.WorkGroupIDX = ArgDescriptor::createRegister(AMDGPU::SGPR12);
|
|
AI.WorkGroupIDY = ArgDescriptor::createRegister(AMDGPU::SGPR13);
|
|
AI.WorkGroupIDZ = ArgDescriptor::createRegister(AMDGPU::SGPR14);
|
|
AI.LDSKernelId = ArgDescriptor::createRegister(AMDGPU::SGPR15);
|
|
|
|
const unsigned Mask = 0x3ff;
|
|
AI.WorkItemIDX = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask);
|
|
AI.WorkItemIDY = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 10);
|
|
AI.WorkItemIDZ = ArgDescriptor::createRegister(AMDGPU::VGPR31, Mask << 20);
|
|
return AI;
|
|
}
|
|
|
|
SmallVector<const KernArgPreloadDescriptor *, 4>
|
|
AMDGPUFunctionArgInfo::getPreloadDescriptorsForArgIdx(unsigned ArgIdx) const {
|
|
SmallVector<const KernArgPreloadDescriptor *, 4> Results;
|
|
for (const auto &KV : PreloadKernArgs) {
|
|
if (KV.second.OrigArgIdx == ArgIdx)
|
|
Results.push_back(&KV.second);
|
|
}
|
|
|
|
llvm::stable_sort(Results, [](const KernArgPreloadDescriptor *A,
|
|
const KernArgPreloadDescriptor *B) {
|
|
return A->PartIdx < B->PartIdx;
|
|
});
|
|
|
|
return Results;
|
|
}
|
|
|
|
std::optional<const KernArgPreloadDescriptor *>
|
|
AMDGPUFunctionArgInfo::getHiddenArgPreloadDescriptor(HiddenArg HA) const {
|
|
assert(HA < END_HIDDEN_ARGS);
|
|
|
|
auto HiddenArgIt = PreloadHiddenArgsIndexMap.find(HA);
|
|
if (HiddenArgIt == PreloadHiddenArgsIndexMap.end())
|
|
return std::nullopt;
|
|
|
|
auto KernArgIt = PreloadKernArgs.find(HiddenArgIt->second);
|
|
if (KernArgIt == PreloadKernArgs.end())
|
|
return std::nullopt;
|
|
|
|
return &KernArgIt->second;
|
|
}
|
|
|
|
const AMDGPUFunctionArgInfo &
|
|
AMDGPUArgumentUsageInfo::lookupFuncArgInfo(const Function &F) const {
|
|
auto I = ArgInfoMap.find(&F);
|
|
if (I == ArgInfoMap.end())
|
|
return FixedABIFunctionInfo;
|
|
return I->second;
|
|
}
|