[HardwareLoops] NewPM support.

With the NPM, we're now defaulting to preserving LCSSA, so a couple
of tests have changed slightly.

Differential Revision: https://reviews.llvm.org/D140982
This commit is contained in:
Samuel Parker 2023-02-13 09:12:12 +00:00
parent c5e1000b29
commit 2a58be4239
25 changed files with 291 additions and 97 deletions

View File

@ -0,0 +1,76 @@
//===- HardwareLoops.h ------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
///
/// Defines an IR pass for the creation of hardware loops.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_HARDWARELOOPS_H
#define LLVM_CODEGEN_HARDWARELOOPS_H
#include "llvm/IR/PassManager.h"
namespace llvm {
struct HardwareLoopOptions {
std::optional<unsigned> Decrement;
std::optional<unsigned> Bitwidth;
std::optional<bool> Force;
std::optional<bool> ForcePhi;
std::optional<bool> ForceNested;
std::optional<bool> ForceGuard;
HardwareLoopOptions &setDecrement(unsigned Count) {
Decrement = Count;
return *this;
}
HardwareLoopOptions &setCounterBitwidth(unsigned Width) {
Bitwidth = Width;
return *this;
}
HardwareLoopOptions &setForce(bool Force) {
this->Force = Force;
return *this;
}
HardwareLoopOptions &setForcePhi(bool Force) {
ForcePhi = Force;
return *this;
}
HardwareLoopOptions &setForceNested(bool Force) {
ForceNested = Force;
return *this;
}
HardwareLoopOptions &setForceGuard(bool Force) {
ForceGuard = Force;
return *this;
}
bool getForcePhi() const {
return ForcePhi.has_value() && ForcePhi.value();
}
bool getForceNested() const {
return ForceNested.has_value() && ForceNested.value();
}
bool getForceGuard() const {
return ForceGuard.has_value() && ForceGuard.value();
}
};
class HardwareLoopsPass : public PassInfoMixin<HardwareLoopsPass> {
HardwareLoopOptions Opts;
public:
explicit HardwareLoopsPass(HardwareLoopOptions Opts = {})
: Opts(Opts) { }
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
} // end namespace llvm
#endif // LLVM_CODEGEN_HARDWARELOOPS_H

View File

@ -542,7 +542,7 @@ namespace llvm {
FunctionPass *createEHContGuardCatchretPass();
/// Create Hardware Loop pass. \see HardwareLoops.cpp
FunctionPass *createHardwareLoopsPass();
FunctionPass *createHardwareLoopsLegacyPass();
/// This pass inserts pseudo probe annotation for callsite profiling.
FunctionPass *createPseudoProbeInserter();

View File

@ -153,7 +153,7 @@ void initializeGlobalOptLegacyPassPass(PassRegistry&);
void initializeGlobalSplitPass(PassRegistry&);
void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeGuardWideningLegacyPassPass(PassRegistry&);
void initializeHardwareLoopsPass(PassRegistry&);
void initializeHardwareLoopsLegacyPass(PassRegistry&);
void initializeMIRProfileLoaderPassPass(PassRegistry &);
void initializeIPSCCPLegacyPassPass(PassRegistry&);
void initializeIRCELegacyPassPass(PassRegistry&);

View File

@ -197,7 +197,7 @@ namespace {
(void) llvm::createFloat2IntPass();
(void) llvm::createEliminateAvailableExternallyPass();
(void)llvm::createScalarizeMaskedMemIntrinLegacyPass();
(void) llvm::createHardwareLoopsPass();
(void) llvm::createHardwareLoopsLegacyPass();
(void) llvm::createInjectTLIMappingsLegacyPass();
(void) llvm::createUnifyLoopExitsPass();
(void) llvm::createFixIrreduciblePass();

View File

@ -48,7 +48,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
initializeHardwareLoopsPass(Registry);
initializeHardwareLoopsLegacyPass(Registry);
initializeIfConverterPass(Registry);
initializeImplicitNullChecksPass(Registry);
initializeIndirectBrExpandPassPass(Registry);

View File

@ -15,8 +15,10 @@
///
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
@ -115,12 +117,12 @@ namespace {
using TTI = TargetTransformInfo;
class HardwareLoops : public FunctionPass {
class HardwareLoopsLegacy : public FunctionPass {
public:
static char ID;
HardwareLoops() : FunctionPass(ID) {
initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
HardwareLoopsLegacy() : FunctionPass(ID) {
initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@ -131,29 +133,44 @@ namespace {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
AU.addPreserved<BranchProbabilityInfoWrapperPass>();
}
};
class HardwareLoopsImpl {
public:
HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
DominatorTree &DT, const DataLayout &DL,
const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,
AssumptionCache &AC, OptimizationRemarkEmitter *ORE,
HardwareLoopOptions &Opts)
: SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
bool run(Function &F);
private:
// Try to convert the given Loop into a hardware loop.
bool TryConvertLoop(Loop *L);
bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
// Given that the target believes the loop to be profitable, try to
// convert it.
bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
private:
ScalarEvolution *SE = nullptr;
LoopInfo *LI = nullptr;
const DataLayout *DL = nullptr;
OptimizationRemarkEmitter *ORE = nullptr;
const TargetTransformInfo *TTI = nullptr;
DominatorTree *DT = nullptr;
bool PreserveLCSSA = false;
AssumptionCache *AC = nullptr;
TargetLibraryInfo *LibInfo = nullptr;
Module *M = nullptr;
ScalarEvolution &SE;
LoopInfo &LI;
bool PreserveLCSSA;
DominatorTree &DT;
const DataLayout &DL;
const TargetTransformInfo &TTI;
TargetLibraryInfo *TLI = nullptr;
AssumptionCache &AC;
OptimizationRemarkEmitter *ORE;
HardwareLoopOptions &Opts;
bool MadeChange = false;
};
@ -182,8 +199,9 @@ namespace {
public:
HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
const DataLayout &DL,
OptimizationRemarkEmitter *ORE) :
SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
OptimizationRemarkEmitter *ORE,
HardwareLoopOptions &Opts) :
SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
ExitCount(Info.ExitCount),
CountType(Info.CountType),
ExitBranch(Info.ExitBranch),
@ -197,6 +215,7 @@ namespace {
ScalarEvolution &SE;
const DataLayout &DL;
OptimizationRemarkEmitter *ORE = nullptr;
HardwareLoopOptions &Opts;
Loop *L = nullptr;
Module *M = nullptr;
const SCEV *ExitCount = nullptr;
@ -209,40 +228,83 @@ namespace {
};
}
char HardwareLoops::ID = 0;
char HardwareLoopsLegacy::ID = 0;
bool HardwareLoops::runOnFunction(Function &F) {
bool HardwareLoopsLegacy::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DL = &F.getParent()->getDataLayout();
ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &DL = F.getParent()->getDataLayout();
auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
M = F.getParent();
auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
for (Loop *L : *LI)
HardwareLoopOptions Opts;
if (ForceHardwareLoops.getNumOccurrences())
Opts.setForce(ForceHardwareLoops);
if (ForceHardwareLoopPHI.getNumOccurrences())
Opts.setForcePhi(ForceHardwareLoopPHI);
if (ForceNestedLoop.getNumOccurrences())
Opts.setForceNested(ForceNestedLoop);
if (ForceGuardLoopEntry.getNumOccurrences())
Opts.setForceGuard(ForceGuardLoopEntry);
if (LoopDecrement.getNumOccurrences())
Opts.setDecrement(LoopDecrement);
if (CounterBitWidth.getNumOccurrences())
Opts.setCounterBitwidth(CounterBitWidth);
HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
Opts);
return Impl.run(F);
}
PreservedAnalyses HardwareLoopsPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &LI = AM.getResult<LoopAnalysis>(F);
auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
auto &DL = F.getParent()->getDataLayout();
HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
bool Changed = Impl.run(F);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
PA.preserve<LoopAnalysis>();
PA.preserve<ScalarEvolutionAnalysis>();
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<BranchProbabilityAnalysis>();
return PA;
}
bool HardwareLoopsImpl::run(Function &F) {
LLVMContext &Ctx = F.getParent()->getContext();
for (Loop *L : LI)
if (L->isOutermost())
TryConvertLoop(L);
TryConvertLoop(L, Ctx);
return MadeChange;
}
// Return true if the search should stop, which will be when an inner loop is
// converted and the parent loop doesn't support containing a hardware loop.
bool HardwareLoops::TryConvertLoop(Loop *L) {
bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
// Process nested loops first.
bool AnyChanged = false;
for (Loop *SL : *L)
AnyChanged |= TryConvertLoop(SL);
AnyChanged |= TryConvertLoop(SL, Ctx);
if (AnyChanged) {
reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
ORE, L);
@ -252,39 +314,39 @@ bool HardwareLoops::TryConvertLoop(Loop *L) {
LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
HardwareLoopInfo HWLoopInfo(L);
if (!HWLoopInfo.canAnalyze(*LI)) {
if (!HWLoopInfo.canAnalyze(LI)) {
reportHWLoopFailure("cannot analyze loop, irreducible control flow",
"HWLoopCannotAnalyze", ORE, L);
return false;
}
if (!ForceHardwareLoops &&
!TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
if (!Opts.Force &&
!TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
reportHWLoopFailure("it's not profitable to create a hardware-loop",
"HWLoopNotProfitable", ORE, L);
return false;
}
// Allow overriding of the counter width and loop decrement value.
if (CounterBitWidth.getNumOccurrences())
HWLoopInfo.CountType =
IntegerType::get(M->getContext(), CounterBitWidth);
if (Opts.Bitwidth.has_value()) {
HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value());
}
if (LoopDecrement.getNumOccurrences())
if (Opts.Decrement.has_value())
HWLoopInfo.LoopDecrement =
ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
MadeChange |= TryConvertLoop(HWLoopInfo);
return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
}
bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
Loop *L = HWLoopInfo.L;
LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
ForceHardwareLoopPHI)) {
if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(),
Opts.getForcePhi())) {
// TODO: there can be many reasons a loop is not considered a
// candidate, so we should let isHardwareLoopCandidate fill in the
// reason and then report a better message here.
@ -300,11 +362,11 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
// If we don't have a preheader, then insert one.
if (!Preheader)
Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA);
if (!Preheader)
return false;
HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
HWLoop.Create();
++NumHWLoops;
return true;
@ -322,7 +384,7 @@ void HardwareLoop::Create() {
Value *Setup = InsertIterationSetup(LoopCountInit);
if (UsePHICounter || ForceHardwareLoopPHI) {
if (UsePHICounter || Opts.ForcePhi) {
Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
Value *EltsRem = InsertPHICounter(Setup, LoopDec);
LoopDec->setOperand(0, EltsRem);
@ -397,7 +459,8 @@ Value *HardwareLoop::InitLoopCount() {
if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
SE.getZero(ExitCount->getType()))) {
LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
UseLoopGuard |= ForceGuardLoopEntry;
if (Opts.ForceGuard)
UseLoopGuard = true;
} else
UseLoopGuard = false;
@ -441,7 +504,7 @@ Value *HardwareLoop::InitLoopCount() {
Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
IRBuilder<> Builder(BeginBB->getTerminator());
Type *Ty = LoopCountInit->getType();
bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
bool UsePhi = UsePHICounter || Opts.ForcePhi;
Intrinsic::ID ID = UseLoopGuard
? (UsePhi ? Intrinsic::test_start_loop_iterations
: Intrinsic::test_set_loop_iterations)
@ -533,11 +596,11 @@ void HardwareLoop::UpdateBranch(Value *EltsRem) {
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
}
INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }

View File

@ -73,6 +73,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/CodeGen/TypePromotion.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
@ -540,6 +541,48 @@ auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
return Result;
}
/// Parser of parameters for HardwareLoops pass.
Expected<HardwareLoopOptions> parseHardwareLoopOptions(StringRef Params) {
HardwareLoopOptions HardwareLoopOpts;
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
if (ParamName.consume_front("hardware-loop-decrement=")) {
int Count;
if (ParamName.getAsInteger(0, Count))
return make_error<StringError>(
formatv("invalid HardwareLoopPass parameter '{0}' ", ParamName).str(),
inconvertibleErrorCode());
HardwareLoopOpts.setDecrement(Count);
continue;
}
if (ParamName.consume_front("hardware-loop-counter-bitwidth=")) {
int Count;
if (ParamName.getAsInteger(0, Count))
return make_error<StringError>(
formatv("invalid HardwareLoopPass parameter '{0}' ", ParamName).str(),
inconvertibleErrorCode());
HardwareLoopOpts.setCounterBitwidth(Count);
continue;
}
if (ParamName == "force-hardware-loops") {
HardwareLoopOpts.setForce(true);
} else if (ParamName == "force-hardware-loop-phi") {
HardwareLoopOpts.setForcePhi(true);
} else if (ParamName == "force-nested-hardware-loop") {
HardwareLoopOpts.setForceNested(true);
} else if (ParamName == "force-hardware-loop-guard") {
HardwareLoopOpts.setForceGuard(true);
} else {
return make_error<StringError>(
formatv("invalid HardwarePass parameter '{0}' ", ParamName).str(),
inconvertibleErrorCode());
}
}
return HardwareLoopOpts;
}
/// Parser of parameters for LoopUnroll pass.
Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
LoopUnrollOptions UnrollOpts;

View File

@ -424,6 +424,18 @@ FUNCTION_PASS_WITH_PARAMS("ee-instrument",
},
parseEntryExitInstrumenterPassOptions,
"post-inline")
FUNCTION_PASS_WITH_PARAMS("hardware-loops",
"HardwareLoopsPass",
[](HardwareLoopOptions Opts) {
return HardwareLoopsPass(Opts);
},
parseHardwareLoopOptions,
"force-hardware-loops;"
"force-hardware-loop-phi;"
"force-nested-hardware-loop;"
"force-hardware-loop-guard;"
"hardware-loop-decrement=N;"
"hardware-loop-counter-bitwidth=N")
FUNCTION_PASS_WITH_PARAMS("lower-matrix-intrinsics",
"LowerMatrixIntrinsicsPass",
[](bool Minimal) {

View File

@ -481,7 +481,7 @@ bool ARMPassConfig::addPreISel() {
}
if (TM->getOptLevel() != CodeGenOpt::None) {
addPass(createHardwareLoopsPass());
addPass(createHardwareLoopsLegacyPass());
addPass(createMVETailPredicationPass());
// FIXME: IR passes can delete address-taken basic blocks, deleting
// corresponding blockaddresses. ARMConstantPoolConstant holds references to

View File

@ -474,7 +474,7 @@ bool PPCPassConfig::addPreISel() {
addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine()));
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
addPass(createHardwareLoopsPass());
addPass(createHardwareLoopsLegacyPass());
return false;
}

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -hardware-loops -S -verify-loop-lcssa %s | FileCheck %s
; RUN: opt < %s -passes=hardware-loops -verify-loop-lcssa -S | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "ppc64-unknown-linux-elf"
@ -20,11 +20,12 @@ define void @test() {
; CHECK-NEXT: [[C_0:%.*]] = call i1 @cond()
; CHECK-NEXT: br i1 [[C_0]], label [[WHILE_COND25_PREHEADER:%.*]], label [[FOR_BODY]]
; CHECK: while.cond25.preheader:
; CHECK-NEXT: [[INDVARS_IV349_PH:%.*]] = phi i64 [ 50, [[FOR_INC]] ]
; CHECK-NEXT: call void @llvm.set.loop.iterations.i64(i64 51)
; CHECK-NEXT: br label [[WHILE_COND25:%.*]]
; CHECK: while.cond25:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[WHILE_COND25_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[LAND_RHS:%.*]] ]
; CHECK-NEXT: [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ 50, [[WHILE_COND25_PREHEADER]] ]
; CHECK-NEXT: [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ [[INDVARS_IV349_PH]], [[WHILE_COND25_PREHEADER]] ]
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i64(i64 1)
; CHECK-NEXT: br i1 [[TMP0]], label [[LAND_RHS]], label [[WHILE_END187:%.*]]
; CHECK: land.rhs:

View File

@ -1,9 +1,9 @@
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MAIN
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8,+fullfp16 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP64
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MAIN
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8,+fullfp16 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP64
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
; DISABLED-NOT: call i32 @llvm.loop.decrement

View File

@ -1,4 +1,4 @@
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -o - | FileCheck %s
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -o - | FileCheck %s
@g = common local_unnamed_addr global ptr null, align 4

View File

@ -1,4 +1,4 @@
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s
@g = common local_unnamed_addr global ptr null, align 4

View File

@ -1,5 +1,5 @@
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+soft-float -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SOFT
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+soft-float -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SOFT
; CHECK-LABEL: test_fptosi
; CHECK-SOFT-NOT: call i32 @llvm.start.loop.iterations

View File

@ -1,5 +1,5 @@
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-LLC
; DISABLED-NOT: llvm.{{.*}}.loop.iterations

View File

@ -1,8 +1,8 @@
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | \
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | \
; RUN: FileCheck %s
; RUN: opt -mtriple=thumbv8.1m.main -passes=loop-unroll -unroll-remainder=false -S < %s | \
; RUN: llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops \
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops \
; RUN: -pass-remarks-analysis=hardware-loops %s -S -o - 2>&1 | \
; RUN: FileCheck %s --check-prefix=CHECK-REMARKS
@ -14,7 +14,7 @@
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop

View File

@ -1,6 +1,6 @@
; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EXIT
; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -force-hardware-loop-phi=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=false -S %s -o - | FileCheck %s --check-prefix=NO-GUARD
; RUN: opt -passes='hardware-loops<force-hardware-loops;force-hardware-loop-guard;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EXIT
; RUN: opt -passes='hardware-loops<force-hardware-loops;force-hardware-loop-guard;force-hardware-loop-phi;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=NO-GUARD
; NO-GUARD-NOT: @llvm.test.set.loop.iterations

View File

@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s --check-prefix=CHECK-DEC
; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s --check-prefix=CHECK-PHI
; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s --check-prefix=CHECK-NESTED
; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK-GUARD
; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK-PHIGUARD
; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK-DEC
; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-phi>' -S %s -o - | FileCheck %s --check-prefix=CHECK-PHI
; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-nested-hardware-loop>' -S %s -o - | FileCheck %s --check-prefix=CHECK-NESTED
; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-guard>' -S %s -o - | FileCheck %s --check-prefix=CHECK-GUARD
; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-phi;force-hardware-loop-guard>' -S %s -o - | FileCheck %s --check-prefix=CHECK-PHIGUARD
define void @while_lt(i32 %i, i32 %N, ptr nocapture %A) {
; CHECK-DEC-LABEL: @while_lt(

View File

@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S | FileCheck %s
; RUN: opt < %s -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S | FileCheck %s
define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noalias nocapture %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: @test(
@ -25,10 +25,11 @@ define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noali
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1)
; CHECK-NEXT: br i1 [[TMP0]], label [[FOR_BODY4_US]], label [[FOR_BODY15_US_PREHEADER:%.*]]
; CHECK: for.body15.us.preheader:
; CHECK-NEXT: [[J10_055_US_PH:%.*]] = phi i32 [ 0, [[FOR_BODY4_US]] ]
; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]])
; CHECK-NEXT: br label [[FOR_BODY15_US:%.*]]
; CHECK: for.body15.us:
; CHECK-NEXT: [[J10_055_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_BODY15_US]] ], [ 0, [[FOR_BODY15_US_PREHEADER]] ]
; CHECK-NEXT: [[J10_055_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_BODY15_US]] ], [ [[J10_055_US_PH]], [[FOR_BODY15_US_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX16_US:%.*]] = getelementptr inbounds i16, ptr [[OFF]], i32 [[J10_055_US]]
; CHECK-NEXT: [[L0:%.*]] = load i16, ptr [[ARRAYIDX16_US]], align 2
; CHECK-NEXT: [[ARRAYIDX18_US:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i32 [[J10_055_US]]

View File

@ -1,6 +1,6 @@
; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-phi>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
; CHECK-LABEL: not_rotated
; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations

View File

@ -1,6 +1,6 @@
; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s
; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s
; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s
; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s
; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s
; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s
; CHECK-LABEL: float_counter
; CHECK-NOT: set.loop.iterations

View File

@ -366,7 +366,7 @@ int main(int argc, char **argv) {
initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
initializeExpandReductionsPass(*Registry);
initializeExpandVectorPredicationPass(*Registry);
initializeHardwareLoopsPass(*Registry);
initializeHardwareLoopsLegacyPass(*Registry);
initializeTransformUtils(*Registry);
initializeReplaceWithVeclibLegacyPass(*Registry);
initializeTLSVariableHoistLegacyPassPass(*Registry);

View File

@ -368,7 +368,6 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"verify-safepoint-ir",
"atomic-expand",
"expandvp",
"hardware-loops",
"mve-tail-predication",
"interleaved-access",
"global-merge",
@ -462,7 +461,6 @@ int main(int argc, char **argv) {
initializeExpandVectorPredicationPass(Registry);
initializeWasmEHPreparePass(Registry);
initializeWriteBitcodePassPass(Registry);
initializeHardwareLoopsPass(Registry);
initializeReplaceWithVeclibLegacyPass(Registry);
initializeJMCInstrumenterPass(Registry);

View File

@ -113,7 +113,7 @@ void init(Triple TT) {
initializeExpandVectorPredicationPass(Registry);
initializeWasmEHPreparePass(Registry);
initializeWriteBitcodePassPass(Registry);
initializeHardwareLoopsPass(Registry);
initializeHardwareLoopsLegacyPass(Registry);
initializeTypePromotionLegacyPass(Registry);
initializeReplaceWithVeclibLegacyPass(Registry);
initializeJMCInstrumenterPass(Registry);