mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-16 11:36:46 +00:00
841 lines
28 KiB
C++
841 lines
28 KiB
C++
![]() |
//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
///
|
||
|
/// \file
|
||
|
/// This file contains implementations for different VPlan recipes.
|
||
|
///
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#include "VPlan.h"
|
||
|
#include "llvm/ADT/STLExtras.h"
|
||
|
#include "llvm/ADT/SmallVector.h"
|
||
|
#include "llvm/ADT/Twine.h"
|
||
|
#include "llvm/Analysis/IVDescriptors.h"
|
||
|
#include "llvm/IR/BasicBlock.h"
|
||
|
#include "llvm/IR/IRBuilder.h"
|
||
|
#include "llvm/IR/Instruction.h"
|
||
|
#include "llvm/IR/Instructions.h"
|
||
|
#include "llvm/IR/Type.h"
|
||
|
#include "llvm/IR/Value.h"
|
||
|
#include "llvm/Support/Casting.h"
|
||
|
#include "llvm/Support/CommandLine.h"
|
||
|
#include "llvm/Support/Debug.h"
|
||
|
#include "llvm/Support/raw_ostream.h"
|
||
|
#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
|
||
|
#include <cassert>
|
||
|
|
||
|
using namespace llvm;
|
||
|
|
||
|
extern cl::opt<bool> EnableVPlanNativePath;
|
||
|
|
||
|
bool VPRecipeBase::mayWriteToMemory() const {
|
||
|
switch (getVPDefID()) {
|
||
|
case VPWidenMemoryInstructionSC: {
|
||
|
return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
|
||
|
}
|
||
|
case VPReplicateSC:
|
||
|
case VPWidenCallSC:
|
||
|
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
|
||
|
->mayWriteToMemory();
|
||
|
case VPBranchOnMaskSC:
|
||
|
return false;
|
||
|
case VPWidenIntOrFpInductionSC:
|
||
|
case VPWidenCanonicalIVSC:
|
||
|
case VPWidenPHISC:
|
||
|
case VPBlendSC:
|
||
|
case VPWidenSC:
|
||
|
case VPWidenGEPSC:
|
||
|
case VPReductionSC:
|
||
|
case VPWidenSelectSC: {
|
||
|
const Instruction *I =
|
||
|
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
|
||
|
(void)I;
|
||
|
assert((!I || !I->mayWriteToMemory()) &&
|
||
|
"underlying instruction may write to memory");
|
||
|
return false;
|
||
|
}
|
||
|
default:
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool VPRecipeBase::mayReadFromMemory() const {
|
||
|
switch (getVPDefID()) {
|
||
|
case VPWidenMemoryInstructionSC: {
|
||
|
return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
|
||
|
}
|
||
|
case VPReplicateSC:
|
||
|
case VPWidenCallSC:
|
||
|
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
|
||
|
->mayReadFromMemory();
|
||
|
case VPBranchOnMaskSC:
|
||
|
return false;
|
||
|
case VPWidenIntOrFpInductionSC:
|
||
|
case VPWidenCanonicalIVSC:
|
||
|
case VPWidenPHISC:
|
||
|
case VPBlendSC:
|
||
|
case VPWidenSC:
|
||
|
case VPWidenGEPSC:
|
||
|
case VPReductionSC:
|
||
|
case VPWidenSelectSC: {
|
||
|
const Instruction *I =
|
||
|
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
|
||
|
(void)I;
|
||
|
assert((!I || !I->mayReadFromMemory()) &&
|
||
|
"underlying instruction may read from memory");
|
||
|
return false;
|
||
|
}
|
||
|
default:
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool VPRecipeBase::mayHaveSideEffects() const {
|
||
|
switch (getVPDefID()) {
|
||
|
case VPWidenIntOrFpInductionSC:
|
||
|
case VPWidenPointerInductionSC:
|
||
|
case VPWidenCanonicalIVSC:
|
||
|
case VPWidenPHISC:
|
||
|
case VPBlendSC:
|
||
|
case VPWidenSC:
|
||
|
case VPWidenGEPSC:
|
||
|
case VPReductionSC:
|
||
|
case VPWidenSelectSC:
|
||
|
case VPScalarIVStepsSC: {
|
||
|
const Instruction *I =
|
||
|
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
|
||
|
(void)I;
|
||
|
assert((!I || !I->mayHaveSideEffects()) &&
|
||
|
"underlying instruction has side-effects");
|
||
|
return false;
|
||
|
}
|
||
|
case VPReplicateSC: {
|
||
|
auto *R = cast<VPReplicateRecipe>(this);
|
||
|
return R->getUnderlyingInstr()->mayHaveSideEffects();
|
||
|
}
|
||
|
default:
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
|
||
|
auto Lane = VPLane::getLastLaneForVF(State.VF);
|
||
|
VPValue *ExitValue = getOperand(0);
|
||
|
if (Plan.isUniformAfterVectorization(ExitValue))
|
||
|
Lane = VPLane::getFirstLane();
|
||
|
Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)),
|
||
|
State.Builder.GetInsertBlock());
|
||
|
}
|
||
|
|
||
|
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
|
||
|
assert(!Parent && "Recipe already in some VPBasicBlock");
|
||
|
assert(InsertPos->getParent() &&
|
||
|
"Insertion position not in any VPBasicBlock");
|
||
|
Parent = InsertPos->getParent();
|
||
|
Parent->getRecipeList().insert(InsertPos->getIterator(), this);
|
||
|
}
|
||
|
|
||
|
void VPRecipeBase::insertBefore(VPBasicBlock &BB,
|
||
|
iplist<VPRecipeBase>::iterator I) {
|
||
|
assert(!Parent && "Recipe already in some VPBasicBlock");
|
||
|
assert(I == BB.end() || I->getParent() == &BB);
|
||
|
Parent = &BB;
|
||
|
BB.getRecipeList().insert(I, this);
|
||
|
}
|
||
|
|
||
|
void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
|
||
|
assert(!Parent && "Recipe already in some VPBasicBlock");
|
||
|
assert(InsertPos->getParent() &&
|
||
|
"Insertion position not in any VPBasicBlock");
|
||
|
Parent = InsertPos->getParent();
|
||
|
Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
|
||
|
}
|
||
|
|
||
|
void VPRecipeBase::removeFromParent() {
|
||
|
assert(getParent() && "Recipe not in any VPBasicBlock");
|
||
|
getParent()->getRecipeList().remove(getIterator());
|
||
|
Parent = nullptr;
|
||
|
}
|
||
|
|
||
|
iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
|
||
|
assert(getParent() && "Recipe not in any VPBasicBlock");
|
||
|
return getParent()->getRecipeList().erase(getIterator());
|
||
|
}
|
||
|
|
||
|
void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
|
||
|
removeFromParent();
|
||
|
insertAfter(InsertPos);
|
||
|
}
|
||
|
|
||
|
void VPRecipeBase::moveBefore(VPBasicBlock &BB,
|
||
|
iplist<VPRecipeBase>::iterator I) {
|
||
|
removeFromParent();
|
||
|
insertBefore(BB, I);
|
||
|
}
|
||
|
|
||
|
void VPInstruction::generateInstruction(VPTransformState &State,
|
||
|
unsigned Part) {
|
||
|
IRBuilderBase &Builder = State.Builder;
|
||
|
Builder.SetCurrentDebugLocation(DL);
|
||
|
|
||
|
if (Instruction::isBinaryOp(getOpcode())) {
|
||
|
Value *A = State.get(getOperand(0), Part);
|
||
|
Value *B = State.get(getOperand(1), Part);
|
||
|
Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
|
||
|
State.set(this, V, Part);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
switch (getOpcode()) {
|
||
|
case VPInstruction::Not: {
|
||
|
Value *A = State.get(getOperand(0), Part);
|
||
|
Value *V = Builder.CreateNot(A);
|
||
|
State.set(this, V, Part);
|
||
|
break;
|
||
|
}
|
||
|
case VPInstruction::ICmpULE: {
|
||
|
Value *IV = State.get(getOperand(0), Part);
|
||
|
Value *TC = State.get(getOperand(1), Part);
|
||
|
Value *V = Builder.CreateICmpULE(IV, TC);
|
||
|
State.set(this, V, Part);
|
||
|
break;
|
||
|
}
|
||
|
case Instruction::Select: {
|
||
|
Value *Cond = State.get(getOperand(0), Part);
|
||
|
Value *Op1 = State.get(getOperand(1), Part);
|
||
|
Value *Op2 = State.get(getOperand(2), Part);
|
||
|
Value *V = Builder.CreateSelect(Cond, Op1, Op2);
|
||
|
State.set(this, V, Part);
|
||
|
break;
|
||
|
}
|
||
|
case VPInstruction::ActiveLaneMask: {
|
||
|
// Get first lane of vector induction variable.
|
||
|
Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
|
||
|
// Get the original loop tripcount.
|
||
|
Value *ScalarTC = State.get(getOperand(1), Part);
|
||
|
|
||
|
auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
|
||
|
auto *PredTy = VectorType::get(Int1Ty, State.VF);
|
||
|
Instruction *Call = Builder.CreateIntrinsic(
|
||
|
Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
|
||
|
{VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
|
||
|
State.set(this, Call, Part);
|
||
|
break;
|
||
|
}
|
||
|
case VPInstruction::FirstOrderRecurrenceSplice: {
|
||
|
// Generate code to combine the previous and current values in vector v3.
|
||
|
//
|
||
|
// vector.ph:
|
||
|
// v_init = vector(..., ..., ..., a[-1])
|
||
|
// br vector.body
|
||
|
//
|
||
|
// vector.body
|
||
|
// i = phi [0, vector.ph], [i+4, vector.body]
|
||
|
// v1 = phi [v_init, vector.ph], [v2, vector.body]
|
||
|
// v2 = a[i, i+1, i+2, i+3];
|
||
|
// v3 = vector(v1(3), v2(0, 1, 2))
|
||
|
|
||
|
// For the first part, use the recurrence phi (v1), otherwise v2.
|
||
|
auto *V1 = State.get(getOperand(0), 0);
|
||
|
Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
|
||
|
if (!PartMinus1->getType()->isVectorTy()) {
|
||
|
State.set(this, PartMinus1, Part);
|
||
|
} else {
|
||
|
Value *V2 = State.get(getOperand(1), Part);
|
||
|
State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1), Part);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case VPInstruction::CanonicalIVIncrement:
|
||
|
case VPInstruction::CanonicalIVIncrementNUW: {
|
||
|
Value *Next = nullptr;
|
||
|
if (Part == 0) {
|
||
|
bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
|
||
|
auto *Phi = State.get(getOperand(0), 0);
|
||
|
// The loop step is equal to the vectorization factor (num of SIMD
|
||
|
// elements) times the unroll factor (num of SIMD instructions).
|
||
|
Value *Step =
|
||
|
createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
|
||
|
Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
|
||
|
} else {
|
||
|
Next = State.get(this, 0);
|
||
|
}
|
||
|
|
||
|
State.set(this, Next, Part);
|
||
|
break;
|
||
|
}
|
||
|
case VPInstruction::BranchOnCond: {
|
||
|
if (Part != 0)
|
||
|
break;
|
||
|
|
||
|
Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
|
||
|
VPRegionBlock *ParentRegion = getParent()->getParent();
|
||
|
VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
|
||
|
|
||
|
// Replace the temporary unreachable terminator with a new conditional
|
||
|
// branch, hooking it up to backward destination for exiting blocks now and
|
||
|
// to forward destination(s) later when they are created.
|
||
|
BranchInst *CondBr =
|
||
|
Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr);
|
||
|
|
||
|
if (getParent()->isExiting())
|
||
|
CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
|
||
|
|
||
|
CondBr->setSuccessor(0, nullptr);
|
||
|
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
|
||
|
break;
|
||
|
}
|
||
|
case VPInstruction::BranchOnCount: {
|
||
|
if (Part != 0)
|
||
|
break;
|
||
|
// First create the compare.
|
||
|
Value *IV = State.get(getOperand(0), Part);
|
||
|
Value *TC = State.get(getOperand(1), Part);
|
||
|
Value *Cond = Builder.CreateICmpEQ(IV, TC);
|
||
|
|
||
|
// Now create the branch.
|
||
|
auto *Plan = getParent()->getPlan();
|
||
|
VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
|
||
|
VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
|
||
|
|
||
|
// Replace the temporary unreachable terminator with a new conditional
|
||
|
// branch, hooking it up to backward destination (the header) now and to the
|
||
|
// forward destination (the exit/middle block) later when it is created.
|
||
|
// Note that CreateCondBr expects a valid BB as first argument, so we need
|
||
|
// to set it to nullptr later.
|
||
|
BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(),
|
||
|
State.CFG.VPBB2IRBB[Header]);
|
||
|
CondBr->setSuccessor(0, nullptr);
|
||
|
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
|
||
|
break;
|
||
|
}
|
||
|
default:
|
||
|
llvm_unreachable("Unsupported opcode for instruction");
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void VPInstruction::execute(VPTransformState &State) {
|
||
|
assert(!State.Instance && "VPInstruction executing an Instance");
|
||
|
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
|
||
|
State.Builder.setFastMathFlags(FMF);
|
||
|
for (unsigned Part = 0; Part < State.UF; ++Part)
|
||
|
generateInstruction(State, Part);
|
||
|
}
|
||
|
|
||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||
|
void VPInstruction::dump() const {
|
||
|
VPSlotTracker SlotTracker(getParent()->getPlan());
|
||
|
print(dbgs(), "", SlotTracker);
|
||
|
}
|
||
|
|
||
|
void VPInstruction::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "EMIT ";
|
||
|
|
||
|
if (hasResult()) {
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = ";
|
||
|
}
|
||
|
|
||
|
switch (getOpcode()) {
|
||
|
case VPInstruction::Not:
|
||
|
O << "not";
|
||
|
break;
|
||
|
case VPInstruction::ICmpULE:
|
||
|
O << "icmp ule";
|
||
|
break;
|
||
|
case VPInstruction::SLPLoad:
|
||
|
O << "combined load";
|
||
|
break;
|
||
|
case VPInstruction::SLPStore:
|
||
|
O << "combined store";
|
||
|
break;
|
||
|
case VPInstruction::ActiveLaneMask:
|
||
|
O << "active lane mask";
|
||
|
break;
|
||
|
case VPInstruction::FirstOrderRecurrenceSplice:
|
||
|
O << "first-order splice";
|
||
|
break;
|
||
|
case VPInstruction::CanonicalIVIncrement:
|
||
|
O << "VF * UF + ";
|
||
|
break;
|
||
|
case VPInstruction::CanonicalIVIncrementNUW:
|
||
|
O << "VF * UF +(nuw) ";
|
||
|
break;
|
||
|
case VPInstruction::BranchOnCond:
|
||
|
O << "branch-on-cond";
|
||
|
break;
|
||
|
case VPInstruction::BranchOnCount:
|
||
|
O << "branch-on-count ";
|
||
|
break;
|
||
|
default:
|
||
|
O << Instruction::getOpcodeName(getOpcode());
|
||
|
}
|
||
|
|
||
|
O << FMF;
|
||
|
|
||
|
for (const VPValue *Operand : operands()) {
|
||
|
O << " ";
|
||
|
Operand->printAsOperand(O, SlotTracker);
|
||
|
}
|
||
|
|
||
|
if (DL) {
|
||
|
O << ", !dbg ";
|
||
|
DL.print(O);
|
||
|
}
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
|
||
|
// Make sure the VPInstruction is a floating-point operation.
|
||
|
assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
|
||
|
Opcode == Instruction::FNeg || Opcode == Instruction::FSub ||
|
||
|
Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
|
||
|
Opcode == Instruction::FCmp) &&
|
||
|
"this op can't take fast-math flags");
|
||
|
FMF = FMFNew;
|
||
|
}
|
||
|
|
||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||
|
void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "WIDEN-CALL ";
|
||
|
|
||
|
auto *CI = cast<CallInst>(getUnderlyingInstr());
|
||
|
if (CI->getType()->isVoidTy())
|
||
|
O << "void ";
|
||
|
else {
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = ";
|
||
|
}
|
||
|
|
||
|
O << "call @" << CI->getCalledFunction()->getName() << "(";
|
||
|
printOperands(O, SlotTracker);
|
||
|
O << ")";
|
||
|
}
|
||
|
|
||
|
void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "WIDEN-SELECT ";
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = select ";
|
||
|
getOperand(0)->printAsOperand(O, SlotTracker);
|
||
|
O << ", ";
|
||
|
getOperand(1)->printAsOperand(O, SlotTracker);
|
||
|
O << ", ";
|
||
|
getOperand(2)->printAsOperand(O, SlotTracker);
|
||
|
O << (InvariantCond ? " (condition is loop invariant)" : "");
|
||
|
}
|
||
|
|
||
|
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "WIDEN ";
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
|
||
|
printOperands(O, SlotTracker);
|
||
|
}
|
||
|
|
||
|
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "WIDEN-INDUCTION";
|
||
|
if (getTruncInst()) {
|
||
|
O << "\\l\"";
|
||
|
O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\"";
|
||
|
O << " +\n" << Indent << "\" ";
|
||
|
getVPValue(0)->printAsOperand(O, SlotTracker);
|
||
|
} else
|
||
|
O << " " << VPlanIngredient(IV);
|
||
|
|
||
|
O << ", ";
|
||
|
getStepValue()->printAsOperand(O, SlotTracker);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
|
||
|
auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
|
||
|
auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
|
||
|
return StartC && StartC->isZero() && StepC && StepC->isOne();
|
||
|
}
|
||
|
|
||
|
VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const {
|
||
|
return cast<VPCanonicalIVPHIRecipe>(getOperand(0));
|
||
|
}
|
||
|
|
||
|
bool VPScalarIVStepsRecipe::isCanonical() const {
|
||
|
auto *CanIV = getCanonicalIV();
|
||
|
// The start value of the steps-recipe must match the start value of the
|
||
|
// canonical induction and it must step by 1.
|
||
|
if (CanIV->getStartValue() != getStartValue())
|
||
|
return false;
|
||
|
auto *StepVPV = getStepValue();
|
||
|
if (StepVPV->getDef())
|
||
|
return false;
|
||
|
auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue());
|
||
|
return StepC && StepC->isOne();
|
||
|
}
|
||
|
|
||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||
|
void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent;
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << Indent << "= SCALAR-STEPS ";
|
||
|
printOperands(O, SlotTracker);
|
||
|
}
|
||
|
|
||
|
void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "WIDEN-GEP ";
|
||
|
O << (IsPtrLoopInvariant ? "Inv" : "Var");
|
||
|
size_t IndicesNumber = IsIndexLoopInvariant.size();
|
||
|
for (size_t I = 0; I < IndicesNumber; ++I)
|
||
|
O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
|
||
|
|
||
|
O << " ";
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = getelementptr ";
|
||
|
printOperands(O, SlotTracker);
|
||
|
}
|
||
|
|
||
|
void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "BLEND ";
|
||
|
Phi->printAsOperand(O, false);
|
||
|
O << " =";
|
||
|
if (getNumIncomingValues() == 1) {
|
||
|
// Not a User of any mask: not really blending, this is a
|
||
|
// single-predecessor phi.
|
||
|
O << " ";
|
||
|
getIncomingValue(0)->printAsOperand(O, SlotTracker);
|
||
|
} else {
|
||
|
for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
|
||
|
O << " ";
|
||
|
getIncomingValue(I)->printAsOperand(O, SlotTracker);
|
||
|
O << "/";
|
||
|
getMask(I)->printAsOperand(O, SlotTracker);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "REDUCE ";
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = ";
|
||
|
getChainOp()->printAsOperand(O, SlotTracker);
|
||
|
O << " +";
|
||
|
if (isa<FPMathOperator>(getUnderlyingInstr()))
|
||
|
O << getUnderlyingInstr()->getFastMathFlags();
|
||
|
O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " (";
|
||
|
getVecOp()->printAsOperand(O, SlotTracker);
|
||
|
if (getCondOp()) {
|
||
|
O << ", ";
|
||
|
getCondOp()->printAsOperand(O, SlotTracker);
|
||
|
}
|
||
|
O << ")";
|
||
|
if (RdxDesc->IntermediateStore)
|
||
|
O << " (with final reduction value stored in invariant address sank "
|
||
|
"outside of loop)";
|
||
|
}
|
||
|
|
||
|
void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
|
||
|
|
||
|
if (!getUnderlyingInstr()->getType()->isVoidTy()) {
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = ";
|
||
|
}
|
||
|
if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
|
||
|
O << "call @" << CB->getCalledFunction()->getName() << "(";
|
||
|
interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
|
||
|
O, [&O, &SlotTracker](VPValue *Op) {
|
||
|
Op->printAsOperand(O, SlotTracker);
|
||
|
});
|
||
|
O << ")";
|
||
|
} else {
|
||
|
O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
|
||
|
printOperands(O, SlotTracker);
|
||
|
}
|
||
|
|
||
|
if (AlsoPack)
|
||
|
O << " (S->V)";
|
||
|
}
|
||
|
|
||
|
void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "PHI-PREDICATED-INSTRUCTION ";
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = ";
|
||
|
printOperands(O, SlotTracker);
|
||
|
}
|
||
|
|
||
|
void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "WIDEN ";
|
||
|
|
||
|
if (!isStore()) {
|
||
|
getVPSingleValue()->printAsOperand(O, SlotTracker);
|
||
|
O << " = ";
|
||
|
}
|
||
|
O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
|
||
|
|
||
|
printOperands(O, SlotTracker);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
|
||
|
Value *Start = getStartValue()->getLiveInIRValue();
|
||
|
PHINode *EntryPart = PHINode::Create(
|
||
|
Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
|
||
|
|
||
|
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
|
||
|
EntryPart->addIncoming(Start, VectorPH);
|
||
|
EntryPart->setDebugLoc(DL);
|
||
|
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
|
||
|
State.set(this, EntryPart, Part);
|
||
|
}
|
||
|
|
||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||
|
void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "EMIT ";
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = CANONICAL-INDUCTION";
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
|
||
|
bool IsUniform = vputils::onlyFirstLaneUsed(this);
|
||
|
return all_of(users(),
|
||
|
[&](const VPUser *U) { return U->usesScalars(this); }) &&
|
||
|
(IsUniform || !VF.isScalable());
|
||
|
}
|
||
|
|
||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||
|
void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "EMIT ";
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = WIDEN-POINTER-INDUCTION ";
|
||
|
getStartValue()->printAsOperand(O, SlotTracker);
|
||
|
O << ", " << *IndDesc.getStep();
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
void VPExpandSCEVRecipe::execute(VPTransformState &State) {
|
||
|
assert(!State.Instance && "cannot be used in per-lane");
|
||
|
const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
|
||
|
SCEVExpander Exp(SE, DL, "induction");
|
||
|
|
||
|
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
|
||
|
&*State.Builder.GetInsertPoint());
|
||
|
|
||
|
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
|
||
|
State.set(this, Res, Part);
|
||
|
}
|
||
|
|
||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||
|
void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "EMIT ";
|
||
|
getVPSingleValue()->printAsOperand(O, SlotTracker);
|
||
|
O << " = EXPAND SCEV " << *Expr;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
|
||
|
Value *CanonicalIV = State.get(getOperand(0), 0);
|
||
|
Type *STy = CanonicalIV->getType();
|
||
|
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
|
||
|
ElementCount VF = State.VF;
|
||
|
Value *VStart = VF.isScalar()
|
||
|
? CanonicalIV
|
||
|
: Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
|
||
|
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
|
||
|
Value *VStep = createStepForVF(Builder, STy, VF, Part);
|
||
|
if (VF.isVector()) {
|
||
|
VStep = Builder.CreateVectorSplat(VF, VStep);
|
||
|
VStep =
|
||
|
Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
|
||
|
}
|
||
|
Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
|
||
|
State.set(this, CanonicalVectorIV, Part);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||
|
void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "EMIT ";
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = WIDEN-CANONICAL-INDUCTION ";
|
||
|
printOperands(O, SlotTracker);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
|
||
|
auto &Builder = State.Builder;
|
||
|
// Create a vector from the initial value.
|
||
|
auto *VectorInit = getStartValue()->getLiveInIRValue();
|
||
|
|
||
|
Type *VecTy = State.VF.isScalar()
|
||
|
? VectorInit->getType()
|
||
|
: VectorType::get(VectorInit->getType(), State.VF);
|
||
|
|
||
|
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
|
||
|
if (State.VF.isVector()) {
|
||
|
auto *IdxTy = Builder.getInt32Ty();
|
||
|
auto *One = ConstantInt::get(IdxTy, 1);
|
||
|
IRBuilder<>::InsertPointGuard Guard(Builder);
|
||
|
Builder.SetInsertPoint(VectorPH->getTerminator());
|
||
|
auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);
|
||
|
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
|
||
|
VectorInit = Builder.CreateInsertElement(
|
||
|
PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");
|
||
|
}
|
||
|
|
||
|
// Create a phi node for the new recurrence.
|
||
|
PHINode *EntryPart = PHINode::Create(
|
||
|
VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt());
|
||
|
EntryPart->addIncoming(VectorInit, VectorPH);
|
||
|
State.set(this, EntryPart, 0);
|
||
|
}
|
||
|
|
||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||
|
void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = phi ";
|
||
|
printOperands(O, SlotTracker);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
void VPReductionPHIRecipe::execute(VPTransformState &State) {
|
||
|
PHINode *PN = cast<PHINode>(getUnderlyingValue());
|
||
|
auto &Builder = State.Builder;
|
||
|
|
||
|
// In order to support recurrences we need to be able to vectorize Phi nodes.
|
||
|
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
|
||
|
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
|
||
|
// this value when we vectorize all of the instructions that use the PHI.
|
||
|
bool ScalarPHI = State.VF.isScalar() || IsInLoop;
|
||
|
Type *VecTy =
|
||
|
ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF);
|
||
|
|
||
|
BasicBlock *HeaderBB = State.CFG.PrevBB;
|
||
|
assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
|
||
|
"recipe must be in the vector loop header");
|
||
|
unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF;
|
||
|
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
|
||
|
Value *EntryPart =
|
||
|
PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt());
|
||
|
State.set(this, EntryPart, Part);
|
||
|
}
|
||
|
|
||
|
BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this);
|
||
|
|
||
|
// Reductions do not have to start at zero. They can start with
|
||
|
// any loop invariant values.
|
||
|
VPValue *StartVPV = getStartValue();
|
||
|
Value *StartV = StartVPV->getLiveInIRValue();
|
||
|
|
||
|
Value *Iden = nullptr;
|
||
|
RecurKind RK = RdxDesc.getRecurrenceKind();
|
||
|
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
|
||
|
RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
|
||
|
// MinMax reduction have the start value as their identify.
|
||
|
if (ScalarPHI) {
|
||
|
Iden = StartV;
|
||
|
} else {
|
||
|
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
|
||
|
Builder.SetInsertPoint(VectorPH->getTerminator());
|
||
|
StartV = Iden =
|
||
|
Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
|
||
|
}
|
||
|
} else {
|
||
|
Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
|
||
|
RdxDesc.getFastMathFlags());
|
||
|
|
||
|
if (!ScalarPHI) {
|
||
|
Iden = Builder.CreateVectorSplat(State.VF, Iden);
|
||
|
IRBuilderBase::InsertPointGuard IPBuilder(Builder);
|
||
|
Builder.SetInsertPoint(VectorPH->getTerminator());
|
||
|
Constant *Zero = Builder.getInt32(0);
|
||
|
StartV = Builder.CreateInsertElement(Iden, StartV, Zero);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) {
|
||
|
Value *EntryPart = State.get(this, Part);
|
||
|
// Make sure to add the reduction start value only to the
|
||
|
// first unroll part.
|
||
|
Value *StartVal = (Part == 0) ? StartV : Iden;
|
||
|
cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||
|
void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "WIDEN-REDUCTION-PHI ";
|
||
|
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = phi ";
|
||
|
printOperands(O, SlotTracker);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
void VPWidenPHIRecipe::execute(VPTransformState &State) {
|
||
|
assert(EnableVPlanNativePath &&
|
||
|
"Non-native vplans are not expected to have VPWidenPHIRecipes.");
|
||
|
|
||
|
// Currently we enter here in the VPlan-native path for non-induction
|
||
|
// PHIs where all control flow is uniform. We simply widen these PHIs.
|
||
|
// Create a vector phi with no operands - the vector phi operands will be
|
||
|
// set at the end of vector code generation.
|
||
|
VPBasicBlock *Parent = getParent();
|
||
|
VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion();
|
||
|
unsigned StartIdx = 0;
|
||
|
// For phis in header blocks of loop regions, use the index of the value
|
||
|
// coming from the preheader.
|
||
|
if (LoopRegion->getEntryBasicBlock() == Parent) {
|
||
|
for (unsigned I = 0; I < getNumOperands(); ++I) {
|
||
|
if (getIncomingBlock(I) ==
|
||
|
LoopRegion->getSinglePredecessor()->getExitingBasicBlock())
|
||
|
StartIdx = I;
|
||
|
}
|
||
|
}
|
||
|
Value *Op0 = State.get(getOperand(StartIdx), 0);
|
||
|
Type *VecTy = Op0->getType();
|
||
|
Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
|
||
|
State.set(this, VecPhi, 0);
|
||
|
}
|
||
|
|
||
|
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
|
||
|
void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
|
||
|
VPSlotTracker &SlotTracker) const {
|
||
|
O << Indent << "WIDEN-PHI ";
|
||
|
|
||
|
auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
|
||
|
// Unless all incoming values are modeled in VPlan print the original PHI
|
||
|
// directly.
|
||
|
// TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
|
||
|
// values as VPValues.
|
||
|
if (getNumOperands() != OriginalPhi->getNumOperands()) {
|
||
|
O << VPlanIngredient(OriginalPhi);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
printAsOperand(O, SlotTracker);
|
||
|
O << " = phi ";
|
||
|
printOperands(O, SlotTracker);
|
||
|
}
|
||
|
#endif
|