Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

841 lines
28 KiB
C++
Raw Normal View History

//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
/// This file contains implementations for different VPlan recipes.
///
//===----------------------------------------------------------------------===//
#include "VPlan.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
#include <cassert>
using namespace llvm;
extern cl::opt<bool> EnableVPlanNativePath;
bool VPRecipeBase::mayWriteToMemory() const {
switch (getVPDefID()) {
case VPWidenMemoryInstructionSC: {
return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
}
case VPReplicateSC:
case VPWidenCallSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
->mayWriteToMemory();
case VPBranchOnMaskSC:
return false;
case VPWidenIntOrFpInductionSC:
case VPWidenCanonicalIVSC:
case VPWidenPHISC:
case VPBlendSC:
case VPWidenSC:
case VPWidenGEPSC:
case VPReductionSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
assert((!I || !I->mayWriteToMemory()) &&
"underlying instruction may write to memory");
return false;
}
default:
return true;
}
}
bool VPRecipeBase::mayReadFromMemory() const {
switch (getVPDefID()) {
case VPWidenMemoryInstructionSC: {
return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
}
case VPReplicateSC:
case VPWidenCallSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
->mayReadFromMemory();
case VPBranchOnMaskSC:
return false;
case VPWidenIntOrFpInductionSC:
case VPWidenCanonicalIVSC:
case VPWidenPHISC:
case VPBlendSC:
case VPWidenSC:
case VPWidenGEPSC:
case VPReductionSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
assert((!I || !I->mayReadFromMemory()) &&
"underlying instruction may read from memory");
return false;
}
default:
return true;
}
}
bool VPRecipeBase::mayHaveSideEffects() const {
switch (getVPDefID()) {
case VPWidenIntOrFpInductionSC:
case VPWidenPointerInductionSC:
case VPWidenCanonicalIVSC:
case VPWidenPHISC:
case VPBlendSC:
case VPWidenSC:
case VPWidenGEPSC:
case VPReductionSC:
case VPWidenSelectSC:
case VPScalarIVStepsSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
assert((!I || !I->mayHaveSideEffects()) &&
"underlying instruction has side-effects");
return false;
}
case VPReplicateSC: {
auto *R = cast<VPReplicateRecipe>(this);
return R->getUnderlyingInstr()->mayHaveSideEffects();
}
default:
return true;
}
}
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
auto Lane = VPLane::getLastLaneForVF(State.VF);
VPValue *ExitValue = getOperand(0);
if (Plan.isUniformAfterVectorization(ExitValue))
Lane = VPLane::getFirstLane();
Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
State.Builder.GetInsertBlock());
}
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
"Insertion position not in any VPBasicBlock");
Parent = InsertPos->getParent();
Parent->getRecipeList().insert(InsertPos->getIterator(), this);
}
void VPRecipeBase::insertBefore(VPBasicBlock &BB,
iplist<VPRecipeBase>::iterator I) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(I == BB.end() || I->getParent() == &BB);
Parent = &BB;
BB.getRecipeList().insert(I, this);
}
void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
"Insertion position not in any VPBasicBlock");
Parent = InsertPos->getParent();
Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
}
void VPRecipeBase::removeFromParent() {
assert(getParent() && "Recipe not in any VPBasicBlock");
getParent()->getRecipeList().remove(getIterator());
Parent = nullptr;
}
iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
assert(getParent() && "Recipe not in any VPBasicBlock");
return getParent()->getRecipeList().erase(getIterator());
}
void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
removeFromParent();
insertAfter(InsertPos);
}
void VPRecipeBase::moveBefore(VPBasicBlock &BB,
iplist<VPRecipeBase>::iterator I) {
removeFromParent();
insertBefore(BB, I);
}
void VPInstruction::generateInstruction(VPTransformState &State,
unsigned Part) {
IRBuilderBase &Builder = State.Builder;
Builder.SetCurrentDebugLocation(DL);
if (Instruction::isBinaryOp(getOpcode())) {
Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
State.set(this, V, Part);
return;
}
switch (getOpcode()) {
case VPInstruction::Not: {
Value *A = State.get(getOperand(0), Part);
Value *V = Builder.CreateNot(A);
State.set(this, V, Part);
break;
}
case VPInstruction::ICmpULE: {
Value *IV = State.get(getOperand(0), Part);
Value *TC = State.get(getOperand(1), Part);
Value *V = Builder.CreateICmpULE(IV, TC);
State.set(this, V, Part);
break;
}
case Instruction::Select: {
Value *Cond = State.get(getOperand(0), Part);
Value *Op1 = State.get(getOperand(1), Part);
Value *Op2 = State.get(getOperand(2), Part);
Value *V = Builder.CreateSelect(Cond, Op1, Op2);
State.set(this, V, Part);
break;
}
case VPInstruction::ActiveLaneMask: {
// Get first lane of vector induction variable.
Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
// Get the original loop tripcount.
Value *ScalarTC = State.get(getOperand(1), Part);
auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
auto *PredTy = VectorType::get(Int1Ty, State.VF);
Instruction *Call = Builder.CreateIntrinsic(
Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
{VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
State.set(this, Call, Part);
break;
}
case VPInstruction::FirstOrderRecurrenceSplice: {
// Generate code to combine the previous and current values in vector v3.
//
// vector.ph:
// v_init = vector(..., ..., ..., a[-1])
// br vector.body
//
// vector.body
// i = phi [0, vector.ph], [i+4, vector.body]
// v1 = phi [v_init, vector.ph], [v2, vector.body]
// v2 = a[i, i+1, i+2, i+3];
// v3 = vector(v1(3), v2(0, 1, 2))
// For the first part, use the recurrence phi (v1), otherwise v2.
auto *V1 = State.get(getOperand(0), 0);
Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
if (!PartMinus1->getType()->isVectorTy()) {
State.set(this, PartMinus1, Part);
} else {
Value *V2 = State.get(getOperand(1), Part);
State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
}
break;
}
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementNUW: {
Value *Next = nullptr;
if (Part == 0) {
bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
auto *Phi = State.get(getOperand(0), 0);
// The loop step is equal to the vectorization factor (num of SIMD
// elements) times the unroll factor (num of SIMD instructions).
Value *Step =
createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
} else {
Next = State.get(this, 0);
}
State.set(this, Next, Part);
break;
}
case VPInstruction::BranchOnCond: {
if (Part != 0)
break;
Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
VPRegionBlock *ParentRegion = getParent()->getParent();
VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
// Replace the temporary unreachable terminator with a new conditional
// branch, hooking it up to backward destination for exiting blocks now and
// to forward destination(s) later when they are created.
BranchInst *CondBr =
Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
if (getParent()->isExiting())
CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
CondBr->setSuccessor(0, nullptr);
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
break;
}
case VPInstruction::BranchOnCount: {
if (Part != 0)
break;
// First create the compare.
Value *IV = State.get(getOperand(0), Part);
Value *TC = State.get(getOperand(1), Part);
Value *Cond = Builder.CreateICmpEQ(IV, TC);
// Now create the branch.
auto *Plan = getParent()->getPlan();
VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
// Replace the temporary unreachable terminator with a new conditional
// branch, hooking it up to backward destination (the header) now and to the
// forward destination (the exit/middle block) later when it is created.
// Note that CreateCondBr expects a valid BB as first argument, so we need
// to set it to nullptr later.
BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
State.CFG.VPBB2IRBB[Header]);
CondBr->setSuccessor(0, nullptr);
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
break;
}
default:
llvm_unreachable("Unsupported opcode for instruction");
}
}
void VPInstruction::execute(VPTransformState &State) {
assert(!State.Instance && "VPInstruction executing an Instance");
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
State.Builder.setFastMathFlags(FMF);
for (unsigned Part = 0; Part < State.UF; ++Part)
generateInstruction(State, Part);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPInstruction::dump() const {
VPSlotTracker SlotTracker(getParent()->getPlan());
print(dbgs(), "", SlotTracker);
}
void VPInstruction::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
if (hasResult()) {
printAsOperand(O, SlotTracker);
O << " = ";
}
switch (getOpcode()) {
case VPInstruction::Not:
O << "not";
break;
case VPInstruction::ICmpULE:
O << "icmp ule";
break;
case VPInstruction::SLPLoad:
O << "combined load";
break;
case VPInstruction::SLPStore:
O << "combined store";
break;
case VPInstruction::ActiveLaneMask:
O << "active lane mask";
break;
case VPInstruction::FirstOrderRecurrenceSplice:
O << "first-order splice";
break;
case VPInstruction::CanonicalIVIncrement:
O << "VF * UF + ";
break;
case VPInstruction::CanonicalIVIncrementNUW:
O << "VF * UF +(nuw) ";
break;
case VPInstruction::BranchOnCond:
O << "branch-on-cond";
break;
case VPInstruction::BranchOnCount:
O << "branch-on-count ";
break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
O << FMF;
for (const VPValue *Operand : operands()) {
O << " ";
Operand->printAsOperand(O, SlotTracker);
}
if (DL) {
O << ", !dbg ";
DL.print(O);
}
}
#endif
void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
// Make sure the VPInstruction is a floating-point operation.
assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
Opcode == Instruction::FCmp) &&
"this op can't take fast-math flags");
FMF = FMFNew;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-CALL ";
auto *CI = cast<CallInst>(getUnderlyingInstr());
if (CI->getType()->isVoidTy())
O << "void ";
else {
printAsOperand(O, SlotTracker);
O << " = ";
}
O << "call @" << CI->getCalledFunction()->getName() << "(";
printOperands(O, SlotTracker);
O << ")";
}
void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-SELECT ";
printAsOperand(O, SlotTracker);
O << " = select ";
getOperand(0)->printAsOperand(O, SlotTracker);
O << ", ";
getOperand(1)->printAsOperand(O, SlotTracker);
O << ", ";
getOperand(2)->printAsOperand(O, SlotTracker);
O << (InvariantCond ? " (condition is loop invariant)" : "");
}
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
printAsOperand(O, SlotTracker);
O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
printOperands(O, SlotTracker);
}
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-INDUCTION";
if (getTruncInst()) {
O << "\\l\"";
O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
O << " +\n" << Indent << "\" ";
getVPValue(0)->printAsOperand(O, SlotTracker);
} else
O << " " << VPlanIngredient(IV);
O << ", ";
getStepValue()->printAsOperand(O, SlotTracker);
}
#endif
bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
return StartC && StartC->isZero() && StepC && StepC->isOne();
}
VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
}
bool VPScalarIVStepsRecipe::isCanonical() const {
auto *CanIV = getCanonicalIV();
// The start value of the steps-recipe must match the start value of the
// canonical induction and it must step by 1.
if (CanIV->getStartValue() != getStartValue())
return false;
auto *StepVPV = getStepValue();
if (StepVPV->getDef())
return false;
auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
return StepC && StepC->isOne();
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent;
printAsOperand(O, SlotTracker);
O << Indent << "= SCALAR-STEPS ";
printOperands(O, SlotTracker);
}
void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-GEP ";
O << (IsPtrLoopInvariant ? "Inv" : "Var");
size_t IndicesNumber = IsIndexLoopInvariant.size();
for (size_t I = 0; I < IndicesNumber; ++I)
O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
O << " ";
printAsOperand(O, SlotTracker);
O << " = getelementptr ";
printOperands(O, SlotTracker);
}
void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "BLEND ";
Phi->printAsOperand(O, false);
O << " =";
if (getNumIncomingValues() == 1) {
// Not a User of any mask: not really blending, this is a
// single-predecessor phi.
O << " ";
getIncomingValue(0)->printAsOperand(O, SlotTracker);
} else {
for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
O << " ";
getIncomingValue(I)->printAsOperand(O, SlotTracker);
O << "/";
getMask(I)->printAsOperand(O, SlotTracker);
}
}
}
void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "REDUCE ";
printAsOperand(O, SlotTracker);
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
if (isa<FPMathOperator>(getUnderlyingInstr()))
O << getUnderlyingInstr()->getFastMathFlags();
O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (getCondOp()) {
O << ", ";
getCondOp()->printAsOperand(O, SlotTracker);
}
O << ")";
if (RdxDesc->IntermediateStore)
O << " (with final reduction value stored in invariant address sank "
"outside of loop)";
}
void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
if (!getUnderlyingInstr()->getType()->isVoidTy()) {
printAsOperand(O, SlotTracker);
O << " = ";
}
if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
O << "call @" << CB->getCalledFunction()->getName() << "(";
interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
O, [&O, &SlotTracker](VPValue *Op) {
Op->printAsOperand(O, SlotTracker);
});
O << ")";
} else {
O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
printOperands(O, SlotTracker);
}
if (AlsoPack)
O << " (S->V)";
}
void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "PHI-PREDICATED-INSTRUCTION ";
printAsOperand(O, SlotTracker);
O << " = ";
printOperands(O, SlotTracker);
}
void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
if (!isStore()) {
getVPSingleValue()->printAsOperand(O, SlotTracker);
O << " = ";
}
O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
printOperands(O, SlotTracker);
}
#endif
void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
Value *Start = getStartValue()->getLiveInIRValue();
PHINode *EntryPart = PHINode::Create(
Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
EntryPart->addIncoming(Start, VectorPH);
EntryPart->setDebugLoc(DL);
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(this, EntryPart, Part);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
printAsOperand(O, SlotTracker);
O << " = CANONICAL-INDUCTION";
}
#endif
bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
bool IsUniform = vputils::onlyFirstLaneUsed(this);
return all_of(users(),
[&](const VPUser *U) { return U->usesScalars(this); }) &&
(IsUniform || !VF.isScalable());
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
printAsOperand(O, SlotTracker);
O << " = WIDEN-POINTER-INDUCTION ";
getStartValue()->printAsOperand(O, SlotTracker);
O << ", " << *IndDesc.getStep();
}
#endif
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "cannot be used in per-lane");
const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
SCEVExpander Exp(SE, DL, "induction");
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
&*State.Builder.GetInsertPoint());
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
State.set(this, Res, Part);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
getVPSingleValue()->printAsOperand(O, SlotTracker);
O << " = EXPAND SCEV " << *Expr;
}
#endif
void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
Value *CanonicalIV = State.get(getOperand(0), 0);
Type *STy = CanonicalIV->getType();
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
ElementCount VF = State.VF;
Value *VStart = VF.isScalar()
? CanonicalIV
: Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
Value *VStep = createStepForVF(Builder, STy, VF, Part);
if (VF.isVector()) {
VStep = Builder.CreateVectorSplat(VF, VStep);
VStep =
Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
}
Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
State.set(this, CanonicalVectorIV, Part);
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
printAsOperand(O, SlotTracker);
O << " = WIDEN-CANONICAL-INDUCTION ";
printOperands(O, SlotTracker);
}
#endif
void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
// Create a vector from the initial value.
auto *VectorInit = getStartValue()->getLiveInIRValue();
Type *VecTy = State.VF.isScalar()
? VectorInit->getType()
: VectorType::get(VectorInit->getType(), State.VF);
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
if (State.VF.isVector()) {
auto *IdxTy = Builder.getInt32Ty();
auto *One = ConstantInt::get(IdxTy, 1);
IRBuilder<>::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(VectorPH->getTerminator());
auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
VectorInit = Builder.CreateInsertElement(
PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
}
// Create a phi node for the new recurrence.
PHINode *EntryPart = PHINode::Create(
VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
EntryPart->addIncoming(VectorInit, VectorPH);
State.set(this, EntryPart, 0);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
printAsOperand(O, SlotTracker);
O << " = phi ";
printOperands(O, SlotTracker);
}
#endif
void VPReductionPHIRecipe::execute(VPTransformState &State) {
PHINode *PN = cast<PHINode>(getUnderlyingValue());
auto &Builder = State.Builder;
// In order to support recurrences we need to be able to vectorize Phi nodes.
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
// this value when we vectorize all of the instructions that use the PHI.
bool ScalarPHI = State.VF.isScalar() || IsInLoop;
Type *VecTy =
ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
BasicBlock *HeaderBB = State.CFG.PrevBB;
assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
"recipe must be in the vector loop header");
unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
Value *EntryPart =
PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
State.set(this, EntryPart, Part);
}
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
// Reductions do not have to start at zero. They can start with
// any loop invariant values.
VPValue *StartVPV = getStartValue();
Value *StartV = StartVPV->getLiveInIRValue();
Value *Iden = nullptr;
RecurKind RK = RdxDesc.getRecurrenceKind();
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
// MinMax reduction have the start value as their identify.
if (ScalarPHI) {
Iden = StartV;
} else {
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
Builder.SetInsertPoint(VectorPH->getTerminator());
StartV = Iden =
Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
}
} else {
Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
RdxDesc.getFastMathFlags());
if (!ScalarPHI) {
Iden = Builder.CreateVectorSplat(State.VF, Iden);
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
Builder.SetInsertPoint(VectorPH->getTerminator());
Constant *Zero = Builder.getInt32(0);
StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
}
}
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
Value *EntryPart = State.get(this, Part);
// Make sure to add the reduction start value only to the
// first unroll part.
Value *StartVal = (Part == 0) ? StartV : Iden;
cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
}
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-REDUCTION-PHI ";
printAsOperand(O, SlotTracker);
O << " = phi ";
printOperands(O, SlotTracker);
}
#endif
void VPWidenPHIRecipe::execute(VPTransformState &State) {
assert(EnableVPlanNativePath &&
"Non-native vplans are not expected to have VPWidenPHIRecipes.");
// Currently we enter here in the VPlan-native path for non-induction
// PHIs where all control flow is uniform. We simply widen these PHIs.
// Create a vector phi with no operands - the vector phi operands will be
// set at the end of vector code generation.
VPBasicBlock *Parent = getParent();
VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
unsigned StartIdx = 0;
// For phis in header blocks of loop regions, use the index of the value
// coming from the preheader.
if (LoopRegion->getEntryBasicBlock() == Parent) {
for (unsigned I = 0; I < getNumOperands(); ++I) {
if (getIncomingBlock(I) ==
LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
StartIdx = I;
}
}
Value *Op0 = State.get(getOperand(StartIdx), 0);
Type *VecTy = Op0->getType();
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
State.set(this, VecPhi, 0);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-PHI ";
auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
// Unless all incoming values are modeled in VPlan print the original PHI
// directly.
// TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
// values as VPValues.
if (getNumOperands() != OriginalPhi->getNumOperands()) {
O << VPlanIngredient(OriginalPhi);
return;
}
printAsOperand(O, SlotTracker);
O << " = phi ";
printOperands(O, SlotTracker);
}
#endif