mirror of
https://github.com/llvm/llvm-project.git
synced 2025-05-17 17:16:05 +00:00

Refactor the pass to only support `IntrinsicInst` calls. `ReplaceWithVecLib` used to support instructions, as AArch64 was using this pass to replace a vectorized frem instruction to the fmod vector library call (through TLI). As this replacement is now done by the codegen (#83859), there is no need for this pass to support instructions. Additionally, removed 'frem' tests from: - AArch64/replace-with-veclib-armpl.ll - AArch64/replace-with-veclib-sleef-scalable.ll - AArch64/replace-with-veclib-sleef.ll Such testing is done at codegen level: - #83859
269 lines
11 KiB
C++
269 lines
11 KiB
C++
//=== ReplaceWithVeclib.cpp - Replace vector intrinsics with veclib calls -===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Replaces calls to LLVM Intrinsics with matching calls to functions from a
|
|
// vector library (e.g libmvec, SVML) using TargetLibraryInfo interface.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "llvm/CodeGen/ReplaceWithVeclib.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/Statistic.h"
|
|
#include "llvm/ADT/StringRef.h"
|
|
#include "llvm/Analysis/DemandedBits.h"
|
|
#include "llvm/Analysis/GlobalsModRef.h"
|
|
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
|
#include "llvm/Analysis/TargetLibraryInfo.h"
|
|
#include "llvm/Analysis/VectorUtils.h"
|
|
#include "llvm/CodeGen/Passes.h"
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
#include "llvm/IR/IRBuilder.h"
|
|
#include "llvm/IR/InstIterator.h"
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
#include "llvm/IR/VFABIDemangler.h"
|
|
#include "llvm/Support/TypeSize.h"
|
|
#include "llvm/Transforms/Utils/ModuleUtils.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "replace-with-veclib"
|
|
|
|
STATISTIC(NumCallsReplaced,
|
|
"Number of calls to intrinsics that have been replaced.");
|
|
|
|
STATISTIC(NumTLIFuncDeclAdded,
|
|
"Number of vector library function declarations added.");
|
|
|
|
STATISTIC(NumFuncUsedAdded,
|
|
"Number of functions added to `llvm.compiler.used`");
|
|
|
|
/// Returns a vector Function that it adds to the Module \p M. When an \p
|
|
/// ScalarFunc is not null, it copies its attributes to the newly created
|
|
/// Function.
|
|
Function *getTLIFunction(Module *M, FunctionType *VectorFTy,
|
|
const StringRef TLIName,
|
|
Function *ScalarFunc = nullptr) {
|
|
Function *TLIFunc = M->getFunction(TLIName);
|
|
if (!TLIFunc) {
|
|
TLIFunc =
|
|
Function::Create(VectorFTy, Function::ExternalLinkage, TLIName, *M);
|
|
if (ScalarFunc)
|
|
TLIFunc->copyAttributesFrom(ScalarFunc);
|
|
|
|
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
|
|
<< TLIName << "` of type `" << *(TLIFunc->getType())
|
|
<< "` to module.\n");
|
|
|
|
++NumTLIFuncDeclAdded;
|
|
// Add the freshly created function to llvm.compiler.used, similar to as it
|
|
// is done in InjectTLIMappings.
|
|
appendToCompilerUsed(*M, {TLIFunc});
|
|
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
|
|
<< "` to `@llvm.compiler.used`.\n");
|
|
++NumFuncUsedAdded;
|
|
}
|
|
return TLIFunc;
|
|
}
|
|
|
|
/// Replace the intrinsic call \p II to \p TLIVecFunc, which is the
|
|
/// corresponding function from the vector library.
|
|
static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info,
|
|
Function *TLIVecFunc) {
|
|
IRBuilder<> IRBuilder(II);
|
|
SmallVector<Value *> Args(II->args());
|
|
if (auto OptMaskpos = Info.getParamIndexForOptionalMask()) {
|
|
auto *MaskTy =
|
|
VectorType::get(Type::getInt1Ty(II->getContext()), Info.Shape.VF);
|
|
Args.insert(Args.begin() + OptMaskpos.value(),
|
|
Constant::getAllOnesValue(MaskTy));
|
|
}
|
|
|
|
// Preserve the operand bundles.
|
|
SmallVector<OperandBundleDef, 1> OpBundles;
|
|
II->getOperandBundlesAsDefs(OpBundles);
|
|
|
|
auto *Replacement = IRBuilder.CreateCall(TLIVecFunc, Args, OpBundles);
|
|
II->replaceAllUsesWith(Replacement);
|
|
// Preserve fast math flags for FP math.
|
|
if (isa<FPMathOperator>(Replacement))
|
|
Replacement->copyFastMathFlags(II);
|
|
}
|
|
|
|
/// Returns true when successfully replaced \p II, which is a call to a
|
|
/// vectorized intrinsic, with a suitable function taking vector arguments,
|
|
/// based on available mappings in the \p TLI.
|
|
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
|
|
IntrinsicInst *II) {
|
|
assert(II != nullptr && "Intrinsic cannot be null");
|
|
// At the moment VFABI assumes the return type is always widened unless it is
|
|
// a void type.
|
|
auto *VTy = dyn_cast<VectorType>(II->getType());
|
|
ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0));
|
|
// Compute the argument types of the corresponding scalar call and check that
|
|
// all vector operands match the previously found EC.
|
|
SmallVector<Type *, 8> ScalarArgTypes;
|
|
Intrinsic::ID IID = II->getIntrinsicID();
|
|
for (auto Arg : enumerate(II->args())) {
|
|
auto *ArgTy = Arg.value()->getType();
|
|
if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) {
|
|
ScalarArgTypes.push_back(ArgTy);
|
|
} else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
|
|
ScalarArgTypes.push_back(VectorArgTy->getElementType());
|
|
// When return type is void, set EC to the first vector argument, and
|
|
// disallow vector arguments with different ECs.
|
|
if (EC.isZero())
|
|
EC = VectorArgTy->getElementCount();
|
|
else if (EC != VectorArgTy->getElementCount())
|
|
return false;
|
|
} else
|
|
// Exit when it is supposed to be a vector argument but it isn't.
|
|
return false;
|
|
}
|
|
|
|
// Try to reconstruct the name for the scalar version of the instruction,
|
|
// using scalar argument types.
|
|
std::string ScalarName =
|
|
Intrinsic::isOverloaded(IID)
|
|
? Intrinsic::getName(IID, ScalarArgTypes, II->getModule())
|
|
: Intrinsic::getName(IID).str();
|
|
|
|
// Try to find the mapping for the scalar version of this intrinsic and the
|
|
// exact vector width of the call operands in the TargetLibraryInfo. First,
|
|
// check with a non-masked variant, and if that fails try with a masked one.
|
|
const VecDesc *VD =
|
|
TLI.getVectorMappingInfo(ScalarName, EC, /*Masked*/ false);
|
|
if (!VD && !(VD = TLI.getVectorMappingInfo(ScalarName, EC, /*Masked*/ true)))
|
|
return false;
|
|
|
|
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found TLI mapping from: `" << ScalarName
|
|
<< "` and vector width " << EC << " to: `"
|
|
<< VD->getVectorFnName() << "`.\n");
|
|
|
|
// Replace the call to the intrinsic with a call to the vector library
|
|
// function.
|
|
Type *ScalarRetTy = II->getType()->getScalarType();
|
|
FunctionType *ScalarFTy =
|
|
FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false);
|
|
const std::string MangledName = VD->getVectorFunctionABIVariantString();
|
|
auto OptInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy);
|
|
if (!OptInfo)
|
|
return false;
|
|
|
|
// There is no guarantee that the vectorized instructions followed the VFABI
|
|
// specification when being created, this is why we need to add extra check to
|
|
// make sure that the operands of the vector function obtained via VFABI match
|
|
// the operands of the original vector instruction.
|
|
for (auto &VFParam : OptInfo->Shape.Parameters) {
|
|
if (VFParam.ParamKind == VFParamKind::GlobalPredicate)
|
|
continue;
|
|
|
|
// tryDemangleForVFABI must return valid ParamPos, otherwise it could be
|
|
// a bug in the VFABI parser.
|
|
assert(VFParam.ParamPos < II->arg_size() && "ParamPos has invalid range");
|
|
Type *OrigTy = II->getArgOperand(VFParam.ParamPos)->getType();
|
|
if (OrigTy->isVectorTy() != (VFParam.ParamKind == VFParamKind::Vector)) {
|
|
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Will not replace: " << ScalarName
|
|
<< ". Wrong type at index " << VFParam.ParamPos << ": "
|
|
<< *OrigTy << "\n");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
FunctionType *VectorFTy = VFABI::createFunctionType(*OptInfo, ScalarFTy);
|
|
if (!VectorFTy)
|
|
return false;
|
|
|
|
Function *TLIFunc =
|
|
getTLIFunction(II->getModule(), VectorFTy, VD->getVectorFnName(),
|
|
II->getCalledFunction());
|
|
replaceWithTLIFunction(II, *OptInfo, TLIFunc);
|
|
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" << ScalarName
|
|
<< "` with call to `" << TLIFunc->getName() << "`.\n");
|
|
++NumCallsReplaced;
|
|
return true;
|
|
}
|
|
|
|
static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
|
|
SmallVector<Instruction *> ReplacedCalls;
|
|
for (auto &I : instructions(F)) {
|
|
// Process only intrinsic calls that return void or a vector.
|
|
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
|
|
if (!II->getType()->isVectorTy() && !II->getType()->isVoidTy())
|
|
continue;
|
|
|
|
if (replaceWithCallToVeclib(TLI, II))
|
|
ReplacedCalls.push_back(&I);
|
|
}
|
|
}
|
|
// Erase any intrinsic calls that were replaced with vector library calls.
|
|
for (auto *I : ReplacedCalls)
|
|
I->eraseFromParent();
|
|
return !ReplacedCalls.empty();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// New pass manager implementation.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
PreservedAnalyses ReplaceWithVeclib::run(Function &F,
|
|
FunctionAnalysisManager &AM) {
|
|
const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
|
|
auto Changed = runImpl(TLI, F);
|
|
if (Changed) {
|
|
LLVM_DEBUG(dbgs() << "Intrinsic calls replaced with vector libraries: "
|
|
<< NumCallsReplaced << "\n");
|
|
|
|
PreservedAnalyses PA;
|
|
PA.preserveSet<CFGAnalyses>();
|
|
PA.preserve<TargetLibraryAnalysis>();
|
|
PA.preserve<ScalarEvolutionAnalysis>();
|
|
PA.preserve<LoopAccessAnalysis>();
|
|
PA.preserve<DemandedBitsAnalysis>();
|
|
PA.preserve<OptimizationRemarkEmitterAnalysis>();
|
|
return PA;
|
|
}
|
|
|
|
// The pass did not replace any calls, hence it preserves all analyses.
|
|
return PreservedAnalyses::all();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Legacy PM Implementation.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
bool ReplaceWithVeclibLegacy::runOnFunction(Function &F) {
|
|
const TargetLibraryInfo &TLI =
|
|
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
|
|
return runImpl(TLI, F);
|
|
}
|
|
|
|
void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
|
|
AU.setPreservesCFG();
|
|
AU.addRequired<TargetLibraryInfoWrapperPass>();
|
|
AU.addPreserved<TargetLibraryInfoWrapperPass>();
|
|
AU.addPreserved<ScalarEvolutionWrapperPass>();
|
|
AU.addPreserved<AAResultsWrapperPass>();
|
|
AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
|
|
AU.addPreserved<GlobalsAAWrapperPass>();
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
// Legacy Pass manager initialization
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
char ReplaceWithVeclibLegacy::ID = 0;
|
|
|
|
INITIALIZE_PASS_BEGIN(ReplaceWithVeclibLegacy, DEBUG_TYPE,
|
|
"Replace intrinsics with calls to vector library", false,
|
|
false)
|
|
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
|
|
INITIALIZE_PASS_END(ReplaceWithVeclibLegacy, DEBUG_TYPE,
|
|
"Replace intrinsics with calls to vector library", false,
|
|
false)
|
|
|
|
FunctionPass *llvm::createReplaceWithVeclibLegacyPass() {
|
|
return new ReplaceWithVeclibLegacy();
|
|
}
|