[VPlan] Split VPWidenMemoryInstructionRecipe (NFCI). (#87411)

This patch introduces a new VPWidenMemoryRecipe base class and distinct
sub-classes to model loads and stores.

This is a first step in an effort to simplify and modularize code
generation for widened loads and stores and enable adding further more
specialized memory recipes.

PR: https://github.com/llvm/llvm-project/pull/87411
This commit is contained in:
Florian Hahn 2024-04-17 11:00:58 +01:00 committed by GitHub
parent cbe148b730
commit a9bafe91dd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 251 additions and 225 deletions

View File

@ -545,11 +545,6 @@ public:
// Return true if any runtime check is added.
bool areSafetyChecksAdded() { return AddedSafetyChecks; }
/// A type for vectorized values in the new loop. Each value from the
/// original loop, when vectorized, is represented by UF vector values in the
/// new unrolled loop, where UF is the unroll factor.
using VectorParts = SmallVector<Value *, 2>;
/// A helper function to scalarize a single Instruction in the innermost loop.
/// Generates a sequence of scalar instances for each lane between \p MinLane
/// and \p MaxLane, times each part between \p MinPart and \p MaxPart,
@ -8086,7 +8081,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB) {
BlockMaskCache[BB] = BlockMask;
}
VPWidenMemoryInstructionRecipe *
VPWidenMemoryRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
@ -8131,12 +8126,12 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
Ptr = VectorPtr;
}
if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive,
Reverse, I->getDebugLoc());
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
I->getDebugLoc());
StoreInst *Store = cast<StoreInst>(I);
return new VPWidenMemoryInstructionRecipe(
*Store, Ptr, Operands[0], Mask, Consecutive, Reverse, I->getDebugLoc());
return new VPWidenStoreRecipe(*Store, Ptr, Operands[0], Mask, Consecutive,
Reverse, I->getDebugLoc());
}
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
@ -8775,13 +8770,12 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
for (const auto *IG : InterleaveGroups) {
auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
RecipeBuilder.getRecipe(IG->getInsertPos()));
auto *Recipe =
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IG->getInsertPos()));
SmallVector<VPValue *, 4> StoredValues;
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (auto *SI = dyn_cast_or_null<StoreInst>(IG->getMember(i))) {
auto *StoreR =
cast<VPWidenMemoryInstructionRecipe>(RecipeBuilder.getRecipe(SI));
auto *StoreR = cast<VPWidenStoreRecipe>(RecipeBuilder.getRecipe(SI));
StoredValues.push_back(StoreR->getStoredValue());
}
@ -9368,92 +9362,27 @@ static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder,
return Call;
}
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
VPValue *StoredValue = isStore() ? getStoredValue() : nullptr;
// Attempt to issue a wide load.
LoadInst *LI = dyn_cast<LoadInst>(&Ingredient);
StoreInst *SI = dyn_cast<StoreInst>(&Ingredient);
assert((LI || SI) && "Invalid Load/Store instruction");
assert((!SI || StoredValue) && "No stored value provided for widened store");
assert((!LI || !StoredValue) && "Stored value provided for widened load");
void VPWidenLoadRecipe::execute(VPTransformState &State) {
auto *LI = cast<LoadInst>(&Ingredient);
Type *ScalarDataTy = getLoadStoreType(&Ingredient);
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
const Align Alignment = getLoadStoreAlignment(&Ingredient);
bool CreateGatherScatter = !isConsecutive();
bool CreateGather = !isConsecutive();
auto &Builder = State.Builder;
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
bool isMaskRequired = getMask();
if (isMaskRequired) {
// Mask reversal is only needed for non-all-one (null) masks, as reverse of
// a null all-one mask is a null mask.
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *Mask = State.get(getMask(), Part);
if (isReverse())
Mask = Builder.CreateVectorReverse(Mask, "reverse");
BlockInMaskParts[Part] = Mask;
}
}
// Handle Stores:
if (SI) {
State.setDebugLocFrom(getDebugLoc());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Instruction *NewSI = nullptr;
Value *StoredVal = State.get(StoredValue, Part);
// TODO: split this into several classes for better design.
if (State.EVL) {
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
"explicit vector length.");
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
VPInstruction::ExplicitVectorLength &&
"EVL must be VPInstruction::ExplicitVectorLength.");
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
// If EVL is not nullptr, then EVL must be a valid value set during plan
// creation, possibly default value = whole vector register length. EVL
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse store after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
NewSI = lowerStoreUsingVectorIntrinsics(
Builder, State.get(getAddr(), Part, !CreateGatherScatter),
StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Value *VectorGep = State.get(getAddr(), Part);
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
MaskPart);
} else {
if (isReverse()) {
// If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (isMaskRequired)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment,
BlockInMaskParts[Part]);
else
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
}
State.addMetadata(NewSI, SI);
}
return;
}
// Handle loads.
assert(LI && "Must have a load instruction");
State.setDebugLocFrom(getDebugLoc());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Value *NewLI;
Value *Mask = nullptr;
if (auto *VPMask = getMask()) {
// Mask reversal is only needed for non-all-one (null) masks, as reverse
// of a null all-one mask is a null mask.
Mask = State.get(VPMask, Part);
if (isReverse())
Mask = Builder.CreateVectorReverse(Mask, "reverse");
}
// TODO: split this into several classes for better design.
if (State.EVL) {
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
@ -9468,22 +9397,20 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse loading after vp_reverse is added.
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
NewLI = lowerLoadUsingVectorIntrinsics(
Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter),
CreateGatherScatter, MaskPart, EVL, Alignment);
} else if (CreateGatherScatter) {
Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr;
Builder, DataTy, State.get(getAddr(), Part, !CreateGather),
CreateGather, Mask, EVL, Alignment);
} else if (CreateGather) {
Value *VectorGep = State.get(getAddr(), Part);
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart,
NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, Mask,
nullptr, "wide.masked.gather");
State.addMetadata(NewLI, LI);
} else {
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (isMaskRequired)
NewLI = Builder.CreateMaskedLoad(
DataTy, VecPtr, Alignment, BlockInMaskParts[Part],
PoisonValue::get(DataTy), "wide.masked.load");
if (Mask)
NewLI = Builder.CreateMaskedLoad(DataTy, VecPtr, Alignment, Mask,
PoisonValue::get(DataTy),
"wide.masked.load");
else
NewLI =
Builder.CreateAlignedLoad(DataTy, VecPtr, Alignment, "wide.load");
@ -9494,7 +9421,69 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
NewLI = Builder.CreateVectorReverse(NewLI, "reverse");
}
State.set(getVPSingleValue(), NewLI, Part);
State.set(this, NewLI, Part);
}
}
void VPWidenStoreRecipe::execute(VPTransformState &State) {
auto *SI = cast<StoreInst>(&Ingredient);
VPValue *StoredVPValue = getStoredValue();
bool CreateScatter = !isConsecutive();
const Align Alignment = getLoadStoreAlignment(&Ingredient);
auto &Builder = State.Builder;
State.setDebugLocFrom(getDebugLoc());
for (unsigned Part = 0; Part < State.UF; ++Part) {
Instruction *NewSI = nullptr;
Value *Mask = nullptr;
if (auto *VPMask = getMask()) {
// Mask reversal is only needed for non-all-one (null) masks, as reverse
// of a null all-one mask is a null mask.
Mask = State.get(VPMask, Part);
if (isReverse())
Mask = Builder.CreateVectorReverse(Mask, "reverse");
}
Value *StoredVal = State.get(StoredVPValue, Part);
if (isReverse()) {
assert(!State.EVL && "reversing not yet implemented with EVL");
// If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
// We don't want to update the value in the map as it might be used in
// another expression. So don't call resetVectorValue(StoredVal).
}
// TODO: split this into several classes for better design.
if (State.EVL) {
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
"explicit vector length.");
assert(cast<VPInstruction>(State.EVL)->getOpcode() ==
VPInstruction::ExplicitVectorLength &&
"EVL must be VPInstruction::ExplicitVectorLength.");
Value *EVL = State.get(State.EVL, VPIteration(0, 0));
// If EVL is not nullptr, then EVL must be a valid value set during plan
// creation, possibly default value = whole vector register length. EVL
// is created only if TTI prefers predicated vectorization, thus if EVL
// is not nullptr it also implies preference for predicated
// vectorization.
// FIXME: Support reverse store after vp_reverse is added.
NewSI = lowerStoreUsingVectorIntrinsics(
Builder, State.get(getAddr(), Part, !CreateScatter), StoredVal,
CreateScatter, Mask, EVL, Alignment);
} else if (CreateScatter) {
Value *VectorGep = State.get(getAddr(), Part);
NewSI =
Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, Mask);
} else {
auto *VecPtr = State.get(getAddr(), Part, /*IsScalar*/ true);
if (Mask)
NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, Mask);
else
NewSI = Builder.CreateAlignedStore(StoredVal, VecPtr, Alignment);
}
State.addMetadata(NewSI, SI);
}
}

View File

@ -69,9 +69,9 @@ class VPRecipeBuilder {
/// Check if the load or store instruction \p I should widened for \p
/// Range.Start and potentially masked. Such instructions are handled by a
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryInstructionRecipe *tryToWidenMemory(Instruction *I,
ArrayRef<VPValue *> Operands,
VFRange &Range);
VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
ArrayRef<VPValue *> Operands,
VFRange &Range);
/// Check if an induction recipe should be constructed for \p Phi. If so build
/// and return it. If not, return null.

View File

@ -875,7 +875,8 @@ public:
return true;
case VPRecipeBase::VPInterleaveSC:
case VPRecipeBase::VPBranchOnMaskSC:
case VPRecipeBase::VPWidenMemoryInstructionSC:
case VPRecipeBase::VPWidenLoadSC:
case VPRecipeBase::VPWidenStoreSC:
// TODO: Widened stores don't define a value, but widened loads do. Split
// the recipes to be able to make widened loads VPSingleDefRecipes.
return false;
@ -2280,68 +2281,62 @@ public:
}
};
/// A Recipe for widening load/store operations.
/// The recipe uses the following VPValues:
/// - For load: Address, optional mask
/// - For store: Address, stored value, optional mask
/// TODO: We currently execute only per-part unless a specific instance is
/// provided.
class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
/// A common base class for widening memory operations. An optional mask can be
/// provided as the last operand.
class VPWidenMemoryRecipe : public VPRecipeBase {
protected:
Instruction &Ingredient;
// Whether the loaded-from / stored-to addresses are consecutive.
/// Whether the accessed addresses are consecutive.
bool Consecutive;
// Whether the consecutive loaded/stored addresses are in reverse order.
/// Whether the consecutive accessed addresses are in reverse order.
bool Reverse;
/// Whether the memory access is masked.
bool IsMasked = false;
void setMask(VPValue *Mask) {
assert(!IsMasked && "cannot re-set mask");
if (!Mask)
return;
addOperand(Mask);
IsMasked = true;
}
bool isMasked() const {
return isStore() ? getNumOperands() == 3 : getNumOperands() == 2;
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
std::initializer_list<VPValue *> Operands,
bool Consecutive, bool Reverse, DebugLoc DL)
: VPRecipeBase(SC, Operands, DL), Ingredient(I), Consecutive(Consecutive),
Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
}
public:
VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
bool Consecutive, bool Reverse, DebugLoc DL)
: VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}, DL),
Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
new VPValue(this, &Load);
setMask(Mask);
VPWidenMemoryRecipe *clone() override = 0;
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPDef::VPWidenLoadSC ||
R->getVPDefID() == VPDef::VPWidenStoreSC;
}
VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr,
VPValue *StoredValue, VPValue *Mask,
bool Consecutive, bool Reverse, DebugLoc DL)
: VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue},
DL),
Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) {
assert((Consecutive || !Reverse) && "Reverse implies consecutive");
setMask(Mask);
static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPRecipeBase>(U);
return R && classof(R);
}
VPWidenMemoryInstructionRecipe *clone() override {
if (isStore())
return new VPWidenMemoryInstructionRecipe(
cast<StoreInst>(Ingredient), getAddr(), getStoredValue(), getMask(),
Consecutive, Reverse, getDebugLoc());
/// Return whether the loaded-from / stored-to addresses are consecutive.
bool isConsecutive() const { return Consecutive; }
return new VPWidenMemoryInstructionRecipe(cast<LoadInst>(Ingredient),
getAddr(), getMask(), Consecutive,
Reverse, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC)
/// Return whether the consecutive loaded/stored addresses are in reverse
/// order.
bool isReverse() const { return Reverse; }
/// Return the address accessed by this recipe.
VPValue *getAddr() const {
return getOperand(0); // Address is the 1st, mandatory operand.
}
VPValue *getAddr() const { return getOperand(0); }
/// Returns true if the recipe is masked.
bool isMasked() const { return IsMasked; }
/// Return the mask used by this recipe. Note that a full mask is represented
/// by a nullptr.
@ -2350,23 +2345,34 @@ public:
return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
}
/// Returns true if this recipe is a store.
bool isStore() const { return isa<StoreInst>(Ingredient); }
/// Return the address accessed by this recipe.
VPValue *getStoredValue() const {
assert(isStore() && "Stored value only available for store instructions");
return getOperand(1); // Stored value is the 2nd, mandatory operand.
/// Generate the wide load/store.
void execute(VPTransformState &State) override {
llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
}
// Return whether the loaded-from / stored-to addresses are consecutive.
bool isConsecutive() const { return Consecutive; }
Instruction &getIngredient() const { return Ingredient; }
};
// Return whether the consecutive loaded/stored addresses are in reverse
// order.
bool isReverse() const { return Reverse; }
/// A recipe for widening load operations, using the address to load from and an
/// optional mask.
struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask,
bool Consecutive, bool Reverse, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
Reverse, DL),
VPValue(this, &Load) {
setMask(Mask);
}
/// Generate the wide load/store.
VPWidenLoadRecipe *clone() override {
return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
getMask(), Consecutive, Reverse,
getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
/// Generate a wide load or gather.
void execute(VPTransformState &State) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@ -2380,16 +2386,51 @@ public:
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Widened, consecutive memory operations only demand the first lane of
// their address, unless the same operand is also stored. That latter can
// happen with opaque pointers.
return Op == getAddr() && isConsecutive() &&
(!isStore() || Op != getStoredValue());
// Widened, consecutive loads operations only demand the first lane of
// their address.
return Op == getAddr() && isConsecutive();
}
Instruction &getIngredient() const { return Ingredient; }
};
/// A recipe for widening store operations, using the stored value, the address
/// to store to and an optional mask.
struct VPWidenStoreRecipe final : public VPWidenMemoryRecipe {
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
: VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
Consecutive, Reverse, DL) {
setMask(Mask);
}
VPWidenStoreRecipe *clone() override {
return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
getStoredValue(), getMask(), Consecutive,
Reverse, getDebugLoc());
}
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
/// Return the value stored by this recipe.
VPValue *getStoredValue() const { return getOperand(1); }
/// Generate a wide store or scatter.
void execute(VPTransformState &State) override;
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Widened, consecutive stores only demand the first lane of their address,
// unless the same operand is also stored.
return Op == getAddr() && isConsecutive() && Op != getStoredValue();
}
};
/// Recipe to expand a SCEV expression.
class VPExpandSCEVRecipe : public VPSingleDefRecipe {
const SCEV *Expr;

View File

@ -108,9 +108,9 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
return CI.getType();
}
Type *VPTypeAnalysis::inferScalarTypeForRecipe(
const VPWidenMemoryInstructionRecipe *R) {
assert(!R->isStore() && "Store recipes should not define any values");
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
assert(isa<VPWidenLoadRecipe>(R) &&
"Store recipes should not define any values");
return cast<LoadInst>(&R->getIngredient())->getType();
}
@ -231,8 +231,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
return inferScalarType(R->getOperand(0));
})
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCallRecipe, VPWidenMemoryInstructionRecipe,
VPWidenSelectRecipe>(
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
// TODO: Use info from interleave group.

View File

@ -20,7 +20,7 @@ class VPInstruction;
class VPWidenRecipe;
class VPWidenCallRecipe;
class VPWidenIntOrFpInductionRecipe;
class VPWidenMemoryInstructionRecipe;
class VPWidenMemoryRecipe;
struct VPWidenSelectRecipe;
class VPReplicateRecipe;
class Type;
@ -46,7 +46,7 @@ class VPTypeAnalysis {
Type *inferScalarTypeForRecipe(const VPWidenCallRecipe *R);
Type *inferScalarTypeForRecipe(const VPWidenRecipe *R);
Type *inferScalarTypeForRecipe(const VPWidenIntOrFpInductionRecipe *R);
Type *inferScalarTypeForRecipe(const VPWidenMemoryInstructionRecipe *R);
Type *inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R);
Type *inferScalarTypeForRecipe(const VPWidenSelectRecipe *R);
Type *inferScalarTypeForRecipe(const VPReplicateRecipe *R);

View File

@ -47,9 +47,8 @@ bool VPRecipeBase::mayWriteToMemory() const {
switch (getVPDefID()) {
case VPInterleaveSC:
return cast<VPInterleaveRecipe>(this)->getNumStoreOperands() > 0;
case VPWidenMemoryInstructionSC: {
return cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
}
case VPWidenStoreSC:
return true;
case VPReplicateSC:
case VPWidenCallSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
@ -64,6 +63,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenCastSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenLoadSC:
case VPWidenPHISC:
case VPWidenSC:
case VPWidenSelectSC: {
@ -81,16 +81,16 @@ bool VPRecipeBase::mayWriteToMemory() const {
bool VPRecipeBase::mayReadFromMemory() const {
switch (getVPDefID()) {
case VPWidenMemoryInstructionSC: {
return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore();
}
case VPWidenLoadSC:
return true;
case VPReplicateSC:
case VPWidenCallSC:
return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
->mayReadFromMemory();
case VPBranchOnMaskSC:
case VPScalarIVStepsSC:
case VPPredInstPHISC:
case VPScalarIVStepsSC:
case VPWidenStoreSC:
return false;
case VPBlendSC:
case VPReductionSC:
@ -155,12 +155,13 @@ bool VPRecipeBase::mayHaveSideEffects() const {
}
case VPInterleaveSC:
return mayWriteToMemory();
case VPWidenMemoryInstructionSC:
assert(cast<VPWidenMemoryInstructionRecipe>(this)
->getIngredient()
.mayHaveSideEffects() == mayWriteToMemory() &&
"mayHaveSideffects result for ingredient differs from this "
"implementation");
case VPWidenLoadSC:
case VPWidenStoreSC:
assert(
cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
mayWriteToMemory() &&
"mayHaveSideffects result for ingredient differs from this "
"implementation");
return mayWriteToMemory();
case VPReplicateSC: {
auto *R = cast<VPReplicateRecipe>(this);
@ -1769,16 +1770,17 @@ void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
printOperands(O, SlotTracker);
}
void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
printAsOperand(O, SlotTracker);
O << " = load ";
printOperands(O, SlotTracker);
}
if (!isStore()) {
getVPSingleValue()->printAsOperand(O, SlotTracker);
O << " = ";
}
O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN store ";
printOperands(O, SlotTracker);
}
#endif

View File

@ -60,14 +60,14 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
assert(isa<VPInstruction>(&Ingredient) &&
"only VPInstructions expected here");
assert(!isa<PHINode>(Inst) && "phis should be handled above");
// Create VPWidenMemoryInstructionRecipe for loads and stores.
// Create VPWidenMemoryRecipe for loads and stores.
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
NewRecipe = new VPWidenLoadRecipe(
*Load, Ingredient.getOperand(0), nullptr /*Mask*/,
false /*Consecutive*/, false /*Reverse*/,
Ingredient.getDebugLoc());
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
NewRecipe = new VPWidenStoreRecipe(
*Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
Ingredient.getDebugLoc());
@ -977,10 +977,9 @@ void VPlanTransforms::truncateToMinimalBitwidths(
vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
VPWidenSelectRecipe, VPWidenMemoryInstructionRecipe>(&R))
VPWidenSelectRecipe, VPWidenMemoryRecipe>(&R))
continue;
if (isa<VPWidenMemoryInstructionRecipe>(&R) &&
cast<VPWidenMemoryInstructionRecipe>(&R)->isStore())
if (isa<VPWidenStoreRecipe>(&R))
continue;
VPValue *ResultVPV = R.getVPSingleValue();
@ -1048,10 +1047,9 @@ void VPlanTransforms::truncateToMinimalBitwidths(
assert(cast<VPWidenRecipe>(&R)->getOpcode() == Instruction::ICmp &&
"Only ICmps should not need extending the result.");
if (isa<VPWidenMemoryInstructionRecipe>(&R)) {
assert(!cast<VPWidenMemoryInstructionRecipe>(&R)->isStore() && "stores cannot be narrowed");
assert(!isa<VPWidenStoreRecipe>(&R) && "stores cannot be narrowed");
if (isa<VPWidenLoadRecipe>(&R))
continue;
}
// Shrink operands by introducing truncates as needed.
unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0;
@ -1315,7 +1313,7 @@ void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) {
ConstantInt::getTrue(CanonicalIVPHI->getScalarType()->getContext());
VPValue *VPTrueMask = Plan.getOrAddLiveIn(TrueMask);
replaceHeaderPredicateWith(Plan, *VPTrueMask, [](VPUser &U, unsigned) {
return isa<VPWidenMemoryInstructionRecipe>(U);
return isa<VPWidenMemoryRecipe>(U);
});
// Now create the ExplicitVectorLengthPhi recipe in the main loop.
auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
@ -1371,8 +1369,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
// instruction. Widen memory instructions involved in address computation
// will lead to gather/scatter instructions, which don't need to be
// handled.
if (isa<VPWidenMemoryInstructionRecipe>(CurRec) ||
isa<VPInterleaveRecipe>(CurRec) ||
if (isa<VPWidenMemoryRecipe>(CurRec) || isa<VPInterleaveRecipe>(CurRec) ||
isa<VPScalarIVStepsRecipe>(CurRec) || isa<VPHeaderPHIRecipe>(CurRec))
continue;
@ -1420,7 +1417,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
auto Iter = vp_depth_first_deep(Plan.getEntry());
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
for (VPRecipeBase &Recipe : *VPBB) {
if (auto *WidenRec = dyn_cast<VPWidenMemoryInstructionRecipe>(&Recipe)) {
if (auto *WidenRec = dyn_cast<VPWidenMemoryRecipe>(&Recipe)) {
Instruction &UnderlyingInstr = WidenRec->getIngredient();
VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
if (AddrDef && WidenRec->isConsecutive() &&

View File

@ -36,7 +36,6 @@ class VPDef;
class VPSlotTracker;
class VPUser;
class VPRecipeBase;
class VPWidenMemoryInstructionRecipe;
// This is the base class of the VPlan Def/Use graph, used for modeling the data
// flow into, within and out of the VPlan. VPValues can stand for live-ins
@ -51,7 +50,6 @@ class VPValue {
friend class VPInterleavedAccessInfo;
friend class VPSlotTracker;
friend class VPRecipeBase;
friend class VPWidenMemoryInstructionRecipe;
const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
@ -358,7 +356,8 @@ public:
VPWidenCanonicalIVSC,
VPWidenCastSC,
VPWidenGEPSC,
VPWidenMemoryInstructionSC,
VPWidenLoadSC,
VPWidenStoreSC,
VPWidenSC,
VPWidenSelectSC,
VPBlendSC,

View File

@ -128,7 +128,7 @@ static bool verifyVPBasicBlock(const VPBasicBlock *VPBB,
}
return true;
}
if (isa<VPWidenMemoryInstructionRecipe>(R))
if (isa<VPWidenMemoryRecipe>(R))
VPWidenMemRecipe = R;
return true;
};

View File

@ -46,8 +46,8 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N)
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 -24
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 -56
; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison)
; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE4]], <4 x double> poison)
; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>

View File

@ -1400,15 +1400,15 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr
; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP20]], i32 -12
; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -3
; AVX2-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP16]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP18]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP19]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP25]], i32 8, <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope !21
; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP27]], i32 8, <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope !21
; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD15]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP18]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP29]], i32 8, <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope !21
; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD18]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP19]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP31]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope !21
; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; AVX2-NEXT: [[TMP32:%.*]] = fadd <4 x double> [[REVERSE13]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>
@ -1524,15 +1524,15 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr
; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP20]], i32 -24
; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -7
; AVX512-NEXT: [[REVERSE12:%.*]] = shufflevector <8 x i1> [[TMP16]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP17]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP18]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP19]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP25]], i32 8, <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope !34
; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP17]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP27]], i32 8, <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope !34
; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD15]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP18]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP29]], i32 8, <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope !34
; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD18]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP19]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP31]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope !34
; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; AVX512-NEXT: [[TMP32:%.*]] = fadd <8 x double> [[REVERSE13]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01>

View File

@ -192,9 +192,9 @@ TEST_F(VPlanHCFGTest, testVPInstructionToVPRecipesInner) {
auto Iter = VecBB->begin();
EXPECT_NE(nullptr, dyn_cast<VPWidenPHIRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPWidenGEPRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPWidenMemoryInstructionRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPWidenMemoryRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPWidenMemoryInstructionRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPWidenMemoryRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPWidenRecipe>(&*Iter++));
EXPECT_NE(nullptr, dyn_cast<VPInstruction>(&*Iter++));

View File

@ -1029,7 +1029,7 @@ TEST(VPRecipeTest, CastVPBranchOnMaskRecipeToVPUser) {
EXPECT_EQ(&Recipe, BaseR);
}
TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUserAndVPDef) {
TEST(VPRecipeTest, CastVPWidenMemoryRecipeToVPUserAndVPDef) {
LLVMContext C;
IntegerType *Int32 = IntegerType::get(C, 32);
@ -1038,7 +1038,7 @@ TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUserAndVPDef) {
new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1));
VPValue Addr;
VPValue Mask;
VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false, {});
VPWidenLoadRecipe Recipe(*Load, &Addr, &Mask, true, false, {});
EXPECT_TRUE(isa<VPUser>(&Recipe));
VPRecipeBase *BaseR = &Recipe;
EXPECT_TRUE(isa<VPUser>(BaseR));
@ -1133,7 +1133,7 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1));
VPValue Addr;
VPValue Mask;
VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false, {});
VPWidenLoadRecipe Recipe(*Load, &Addr, &Mask, true, false, {});
EXPECT_FALSE(Recipe.mayHaveSideEffects());
EXPECT_TRUE(Recipe.mayReadFromMemory());
EXPECT_FALSE(Recipe.mayWriteToMemory());
@ -1147,8 +1147,7 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
VPValue Addr;
VPValue Mask;
VPValue StoredV;
VPWidenMemoryInstructionRecipe Recipe(*Store, &Addr, &StoredV, &Mask, false,
false, {});
VPWidenStoreRecipe Recipe(*Store, &Addr, &StoredV, &Mask, false, false, {});
EXPECT_TRUE(Recipe.mayHaveSideEffects());
EXPECT_FALSE(Recipe.mayReadFromMemory());
EXPECT_TRUE(Recipe.mayWriteToMemory());