Revert "[IR] Initial introduction of llvm.experimental.memset_pattern (#97583)"

This reverts commit 7ff3a9acd84654c9ec2939f45ba27f162ae7fbc3.

Recent scheduling changes means tests need to be re-generated. Reverting
to green while I do that.
This commit is contained in:
Alex Bradbury 2024-11-15 14:48:32 +00:00
parent dad9e4a165
commit 0fb8fac5d6
15 changed files with 1 additions and 600 deletions

View File

@ -15430,63 +15430,6 @@ The behavior of '``llvm.memset.inline.*``' is equivalent to the behavior of
'``llvm.memset.*``', but the generated code is guaranteed not to call any
external functions.
.. _int_experimental_memset_pattern:
'``llvm.experimental.memset.pattern``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic. You can use
``llvm.experimental.memset.pattern`` on any integer bit width and for
different address spaces. Not all targets support all bit widths however.
::
declare void @llvm.experimental.memset.pattern.p0.i128.i64(ptr <dest>, i128 <val>,
i64 <count>, i1 <isvolatile>)
Overview:
"""""""""
The '``llvm.experimental.memset.pattern.*``' intrinsics fill a block of memory
with a particular value. This may be expanded to an inline loop, a sequence of
stores, or a libcall depending on what is available for the target and the
expected performance and code size impact.
Arguments:
""""""""""
The first argument is a pointer to the destination to fill, the second
is the value with which to fill it, the third argument is an integer
argument specifying the number of times to fill the value, and the fourth is a
boolean indicating a volatile access.
The :ref:`align <attr_align>` parameter attribute can be provided
for the first argument.
If the ``isvolatile`` parameter is ``true``, the
``llvm.experimental.memset.pattern`` call is a :ref:`volatile operation
<volatile>`. The detailed access behavior is not very cleanly specified and it
is unwise to depend on it.
Semantics:
""""""""""
The '``llvm.experimental.memset.pattern*``' intrinsic fills memory starting at
the destination location with the given pattern ``<count>`` times,
incrementing by the allocation size of the type each time. The stores follow
the usual semantics of store instructions, including regarding endianness and
padding. If the argument is known to be aligned to some boundary, this can be
specified as an attribute on the argument.
If ``<count>`` is 0, it is no-op modulo the behavior of attributes attached to
the arguments.
If ``<count>`` is not a well-defined value, the behavior is undefined.
If ``<count>`` is not zero, ``<dest>`` should be well-defined, otherwise the
behavior is undefined.
.. _int_sqrt:
'``llvm.sqrt.*``' Intrinsic

View File

@ -208,9 +208,6 @@ public:
RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I){ DELEGATE(IntrinsicInst); }
RetTy visitMemSetInst(MemSetInst &I) { DELEGATE(MemIntrinsic); }
RetTy visitMemSetInlineInst(MemSetInlineInst &I){ DELEGATE(MemSetInst); }
RetTy visitMemSetPatternInst(MemSetPatternInst &I) {
DELEGATE(IntrinsicInst);
}
RetTy visitMemCpyInst(MemCpyInst &I) { DELEGATE(MemTransferInst); }
RetTy visitMemCpyInlineInst(MemCpyInlineInst &I){ DELEGATE(MemCpyInst); }
RetTy visitMemMoveInst(MemMoveInst &I) { DELEGATE(MemTransferInst); }
@ -298,8 +295,6 @@ private:
case Intrinsic::memset: DELEGATE(MemSetInst);
case Intrinsic::memset_inline:
DELEGATE(MemSetInlineInst);
case Intrinsic::experimental_memset_pattern:
DELEGATE(MemSetPatternInst);
case Intrinsic::vastart: DELEGATE(VAStartInst);
case Intrinsic::vaend: DELEGATE(VAEndInst);
case Intrinsic::vacopy: DELEGATE(VACopyInst);

View File

@ -1263,41 +1263,6 @@ public:
}
};
/// This is the base class for llvm.experimental.memset.pattern
class MemSetPatternIntrinsic : public MemIntrinsicBase<MemIntrinsic> {
private:
enum { ARG_VOLATILE = 3 };
public:
ConstantInt *getVolatileCst() const {
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(ARG_VOLATILE)));
}
bool isVolatile() const { return !getVolatileCst()->isZero(); }
void setVolatile(Constant *V) { setArgOperand(ARG_VOLATILE, V); }
// Methods for support of type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::experimental_memset_pattern;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
/// This class wraps the llvm.experimental.memset.pattern intrinsic.
class MemSetPatternInst : public MemSetBase<MemSetPatternIntrinsic> {
public:
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::experimental_memset_pattern;
}
static bool classof(const Value *V) {
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
}
};
/// This class wraps the llvm.memcpy/memmove intrinsics.
class MemTransferInst : public MemTransferBase<MemIntrinsic> {
public:

View File

@ -1006,17 +1006,6 @@ def int_memset_inline
NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
ImmArg<ArgIndex<3>>]>;
// Memset variant that writes a given pattern.
def int_experimental_memset_pattern
: Intrinsic<[],
[llvm_anyptr_ty, // Destination.
llvm_anyint_ty, // Pattern value.
llvm_anyint_ty, // Count (number of times to fill value).
llvm_i1_ty], // IsVolatile.
[IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback,
NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
ImmArg<ArgIndex<3>>]>;
// FIXME: Add version of these floating point intrinsics which allow non-default
// rounding modes and FP exception handling.

View File

@ -25,7 +25,6 @@ class Instruction;
class MemCpyInst;
class MemMoveInst;
class MemSetInst;
class MemSetPatternInst;
class ScalarEvolution;
class TargetTransformInfo;
class Value;
@ -58,9 +57,6 @@ bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI);
/// Expand \p MemSet as a loop. \p MemSet is not deleted.
void expandMemSetAsLoop(MemSetInst *MemSet);
/// Expand \p MemSetPattern as a loop. \p MemSet is not deleted.
void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet);
/// Expand \p AtomicMemCpy as a loop. \p AtomicMemCpy is not deleted.
void expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemCpy,
const TargetTransformInfo &TTI,

View File

@ -320,13 +320,6 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
Memset->eraseFromParent();
break;
}
case Intrinsic::experimental_memset_pattern: {
auto *Memset = cast<MemSetPatternInst>(Inst);
expandMemSetPatternAsLoop(Memset);
Changed = true;
Memset->eraseFromParent();
break;
}
default:
llvm_unreachable("unhandled intrinsic");
}
@ -346,7 +339,6 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
case Intrinsic::memmove:
case Intrinsic::memset:
case Intrinsic::memset_inline:
case Intrinsic::experimental_memset_pattern:
Changed |= expandMemIntrinsicUses(F);
break;
case Intrinsic::load_relative:

View File

@ -5519,8 +5519,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
case Intrinsic::memcpy_inline:
case Intrinsic::memmove:
case Intrinsic::memset:
case Intrinsic::memset_inline:
case Intrinsic::experimental_memset_pattern: {
case Intrinsic::memset_inline: {
break;
}
case Intrinsic::memcpy_element_unordered_atomic:

View File

@ -970,15 +970,6 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
Memset->isVolatile());
}
void llvm::expandMemSetPatternAsLoop(MemSetPatternInst *Memset) {
createMemSetLoop(/* InsertBefore=*/Memset,
/* DstAddr=*/Memset->getRawDest(),
/* CopyLen=*/Memset->getLength(),
/* SetValue=*/Memset->getValue(),
/* Alignment=*/Memset->getDestAlign().valueOrOne(),
Memset->isVolatile());
}
void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy,
const TargetTransformInfo &TTI,
ScalarEvolution *SE) {

View File

@ -1,297 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv32 -mattr=+m \
; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32
; RUN: llc < %s -mtriple=riscv64 -mattr=+m \
; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+unaligned-scalar-mem \
; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+unaligned-scalar-mem \
; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
; TODO: Due to the initial naive lowering implementation of memset.pattern in
; PreISelIntrinsicLowering, the generated code is not good.
define void @memset_1(ptr %a, i128 %value) nounwind {
; RV32-BOTH-LABEL: memset_1:
; RV32-BOTH: # %bb.0: # %loadstoreloop.preheader
; RV32-BOTH-NEXT: lw a2, 0(a1)
; RV32-BOTH-NEXT: lw a3, 4(a1)
; RV32-BOTH-NEXT: lw a4, 8(a1)
; RV32-BOTH-NEXT: lw a1, 12(a1)
; RV32-BOTH-NEXT: li a5, 0
; RV32-BOTH-NEXT: li a6, 0
; RV32-BOTH-NEXT: .LBB0_1: # %loadstoreloop
; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-BOTH-NEXT: slli a7, a5, 4
; RV32-BOTH-NEXT: add a7, a0, a7
; RV32-BOTH-NEXT: addi a5, a5, 1
; RV32-BOTH-NEXT: seqz t0, a5
; RV32-BOTH-NEXT: add a6, a6, t0
; RV32-BOTH-NEXT: or t0, a5, a6
; RV32-BOTH-NEXT: sw a2, 0(a7)
; RV32-BOTH-NEXT: sw a3, 4(a7)
; RV32-BOTH-NEXT: sw a4, 8(a7)
; RV32-BOTH-NEXT: sw a1, 12(a7)
; RV32-BOTH-NEXT: beqz t0, .LBB0_1
; RV32-BOTH-NEXT: # %bb.2: # %split
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: memset_1:
; RV64-BOTH: # %bb.0: # %loadstoreloop.preheader
; RV64-BOTH-NEXT: addi a3, a0, 16
; RV64-BOTH-NEXT: .LBB0_1: # %loadstoreloop
; RV64-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-BOTH-NEXT: sd a1, 0(a0)
; RV64-BOTH-NEXT: sd a2, 8(a0)
; RV64-BOTH-NEXT: addi a0, a0, 16
; RV64-BOTH-NEXT: bne a0, a3, .LBB0_1
; RV64-BOTH-NEXT: # %bb.2: # %split
; RV64-BOTH-NEXT: ret
tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 1, i1 0)
ret void
}
define void @memset_1_noalign(ptr %a, i128 %value) nounwind {
; RV32-LABEL: memset_1_noalign:
; RV32: # %bb.0: # %loadstoreloop.preheader
; RV32-NEXT: addi sp, sp, -32
; RV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s1, 24(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s5, 8(sp) # 4-byte Folded Spill
; RV32-NEXT: li a2, 0
; RV32-NEXT: li a3, 0
; RV32-NEXT: lw a4, 4(a1)
; RV32-NEXT: lw a5, 0(a1)
; RV32-NEXT: lw a6, 8(a1)
; RV32-NEXT: lw a1, 12(a1)
; RV32-NEXT: srli a7, a4, 24
; RV32-NEXT: srli t0, a4, 16
; RV32-NEXT: srli t1, a4, 8
; RV32-NEXT: srli t2, a5, 24
; RV32-NEXT: srli t3, a5, 16
; RV32-NEXT: srli t4, a5, 8
; RV32-NEXT: srli t5, a6, 24
; RV32-NEXT: srli t6, a6, 16
; RV32-NEXT: srli s0, a6, 8
; RV32-NEXT: srli s1, a1, 24
; RV32-NEXT: srli s2, a1, 16
; RV32-NEXT: srli s3, a1, 8
; RV32-NEXT: .LBB1_1: # %loadstoreloop
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-NEXT: slli s4, a2, 4
; RV32-NEXT: add s4, a0, s4
; RV32-NEXT: sb a4, 4(s4)
; RV32-NEXT: sb t1, 5(s4)
; RV32-NEXT: sb t0, 6(s4)
; RV32-NEXT: sb a7, 7(s4)
; RV32-NEXT: sb a5, 0(s4)
; RV32-NEXT: sb t4, 1(s4)
; RV32-NEXT: sb t3, 2(s4)
; RV32-NEXT: sb t2, 3(s4)
; RV32-NEXT: sb a6, 8(s4)
; RV32-NEXT: sb s0, 9(s4)
; RV32-NEXT: sb t6, 10(s4)
; RV32-NEXT: sb t5, 11(s4)
; RV32-NEXT: addi a2, a2, 1
; RV32-NEXT: seqz s5, a2
; RV32-NEXT: add a3, a3, s5
; RV32-NEXT: or s5, a2, a3
; RV32-NEXT: sb a1, 12(s4)
; RV32-NEXT: sb s3, 13(s4)
; RV32-NEXT: sb s2, 14(s4)
; RV32-NEXT: sb s1, 15(s4)
; RV32-NEXT: beqz s5, .LBB1_1
; RV32-NEXT: # %bb.2: # %split
; RV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s1, 24(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s4, 12(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s5, 8(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 32
; RV32-NEXT: ret
;
; RV64-LABEL: memset_1_noalign:
; RV64: # %bb.0: # %loadstoreloop.preheader
; RV64-NEXT: addi sp, sp, -32
; RV64-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
; RV64-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
; RV64-NEXT: addi a3, a0, 16
; RV64-NEXT: srli a4, a1, 56
; RV64-NEXT: srli a5, a1, 48
; RV64-NEXT: srli a6, a1, 40
; RV64-NEXT: srli a7, a1, 32
; RV64-NEXT: srli t0, a1, 24
; RV64-NEXT: srli t1, a1, 16
; RV64-NEXT: srli t2, a1, 8
; RV64-NEXT: srli t3, a2, 56
; RV64-NEXT: srli t4, a2, 48
; RV64-NEXT: srli t5, a2, 40
; RV64-NEXT: srli t6, a2, 32
; RV64-NEXT: srli s0, a2, 24
; RV64-NEXT: srli s1, a2, 16
; RV64-NEXT: srli s2, a2, 8
; RV64-NEXT: .LBB1_1: # %loadstoreloop
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-NEXT: sb a7, 4(a0)
; RV64-NEXT: sb a6, 5(a0)
; RV64-NEXT: sb a5, 6(a0)
; RV64-NEXT: sb a4, 7(a0)
; RV64-NEXT: sb a1, 0(a0)
; RV64-NEXT: sb t2, 1(a0)
; RV64-NEXT: sb t1, 2(a0)
; RV64-NEXT: sb t0, 3(a0)
; RV64-NEXT: sb t6, 12(a0)
; RV64-NEXT: sb t5, 13(a0)
; RV64-NEXT: sb t4, 14(a0)
; RV64-NEXT: sb t3, 15(a0)
; RV64-NEXT: sb a2, 8(a0)
; RV64-NEXT: sb s2, 9(a0)
; RV64-NEXT: sb s1, 10(a0)
; RV64-NEXT: sb s0, 11(a0)
; RV64-NEXT: addi a0, a0, 16
; RV64-NEXT: bne a0, a3, .LBB1_1
; RV64-NEXT: # %bb.2: # %split
; RV64-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
; RV64-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
; RV64-NEXT: addi sp, sp, 32
; RV64-NEXT: ret
;
; RV32-FAST-LABEL: memset_1_noalign:
; RV32-FAST: # %bb.0: # %loadstoreloop.preheader
; RV32-FAST-NEXT: lw a2, 0(a1)
; RV32-FAST-NEXT: lw a3, 4(a1)
; RV32-FAST-NEXT: lw a4, 8(a1)
; RV32-FAST-NEXT: lw a1, 12(a1)
; RV32-FAST-NEXT: li a5, 0
; RV32-FAST-NEXT: li a6, 0
; RV32-FAST-NEXT: .LBB1_1: # %loadstoreloop
; RV32-FAST-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-FAST-NEXT: slli a7, a5, 4
; RV32-FAST-NEXT: add a7, a0, a7
; RV32-FAST-NEXT: addi a5, a5, 1
; RV32-FAST-NEXT: seqz t0, a5
; RV32-FAST-NEXT: add a6, a6, t0
; RV32-FAST-NEXT: or t0, a5, a6
; RV32-FAST-NEXT: sw a2, 0(a7)
; RV32-FAST-NEXT: sw a3, 4(a7)
; RV32-FAST-NEXT: sw a4, 8(a7)
; RV32-FAST-NEXT: sw a1, 12(a7)
; RV32-FAST-NEXT: beqz t0, .LBB1_1
; RV32-FAST-NEXT: # %bb.2: # %split
; RV32-FAST-NEXT: ret
;
; RV64-FAST-LABEL: memset_1_noalign:
; RV64-FAST: # %bb.0: # %loadstoreloop.preheader
; RV64-FAST-NEXT: addi a3, a0, 16
; RV64-FAST-NEXT: .LBB1_1: # %loadstoreloop
; RV64-FAST-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-FAST-NEXT: sd a1, 0(a0)
; RV64-FAST-NEXT: sd a2, 8(a0)
; RV64-FAST-NEXT: addi a0, a0, 16
; RV64-FAST-NEXT: bne a0, a3, .LBB1_1
; RV64-FAST-NEXT: # %bb.2: # %split
; RV64-FAST-NEXT: ret
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
ret void
}
define void @memset_4(ptr %a, i128 %value) nounwind {
; RV32-BOTH-LABEL: memset_4:
; RV32-BOTH: # %bb.0: # %loadstoreloop.preheader
; RV32-BOTH-NEXT: lw a2, 0(a1)
; RV32-BOTH-NEXT: lw a3, 4(a1)
; RV32-BOTH-NEXT: lw a4, 8(a1)
; RV32-BOTH-NEXT: lw a1, 12(a1)
; RV32-BOTH-NEXT: li a5, 0
; RV32-BOTH-NEXT: li a6, 0
; RV32-BOTH-NEXT: .LBB2_1: # %loadstoreloop
; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-BOTH-NEXT: slli a7, a5, 4
; RV32-BOTH-NEXT: add a7, a0, a7
; RV32-BOTH-NEXT: addi a5, a5, 1
; RV32-BOTH-NEXT: seqz t0, a5
; RV32-BOTH-NEXT: add a6, a6, t0
; RV32-BOTH-NEXT: seqz t0, a6
; RV32-BOTH-NEXT: sltiu t1, a5, 4
; RV32-BOTH-NEXT: and t0, t0, t1
; RV32-BOTH-NEXT: sw a2, 0(a7)
; RV32-BOTH-NEXT: sw a3, 4(a7)
; RV32-BOTH-NEXT: sw a4, 8(a7)
; RV32-BOTH-NEXT: sw a1, 12(a7)
; RV32-BOTH-NEXT: bnez t0, .LBB2_1
; RV32-BOTH-NEXT: # %bb.2: # %split
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: memset_4:
; RV64-BOTH: # %bb.0: # %loadstoreloop.preheader
; RV64-BOTH-NEXT: addi a3, a0, 64
; RV64-BOTH-NEXT: .LBB2_1: # %loadstoreloop
; RV64-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-BOTH-NEXT: sd a1, 0(a0)
; RV64-BOTH-NEXT: sd a2, 8(a0)
; RV64-BOTH-NEXT: addi a0, a0, 16
; RV64-BOTH-NEXT: bne a0, a3, .LBB2_1
; RV64-BOTH-NEXT: # %bb.2: # %split
; RV64-BOTH-NEXT: ret
tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 4, i1 0)
ret void
}
define void @memset_x(ptr %a, i128 %value, i64 %x) nounwind {
; RV32-BOTH-LABEL: memset_x:
; RV32-BOTH: # %bb.0:
; RV32-BOTH-NEXT: or a4, a2, a3
; RV32-BOTH-NEXT: beqz a4, .LBB3_5
; RV32-BOTH-NEXT: # %bb.1: # %loadstoreloop.preheader
; RV32-BOTH-NEXT: lw a4, 0(a1)
; RV32-BOTH-NEXT: lw a5, 4(a1)
; RV32-BOTH-NEXT: lw a6, 8(a1)
; RV32-BOTH-NEXT: lw a1, 12(a1)
; RV32-BOTH-NEXT: li a7, 0
; RV32-BOTH-NEXT: li t0, 0
; RV32-BOTH-NEXT: j .LBB3_3
; RV32-BOTH-NEXT: .LBB3_2: # %loadstoreloop
; RV32-BOTH-NEXT: # in Loop: Header=BB3_3 Depth=1
; RV32-BOTH-NEXT: sltu t1, t0, a3
; RV32-BOTH-NEXT: beqz t1, .LBB3_5
; RV32-BOTH-NEXT: .LBB3_3: # %loadstoreloop
; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
; RV32-BOTH-NEXT: slli t1, a7, 4
; RV32-BOTH-NEXT: add t1, a0, t1
; RV32-BOTH-NEXT: addi a7, a7, 1
; RV32-BOTH-NEXT: seqz t2, a7
; RV32-BOTH-NEXT: add t0, t0, t2
; RV32-BOTH-NEXT: sw a4, 0(t1)
; RV32-BOTH-NEXT: sw a5, 4(t1)
; RV32-BOTH-NEXT: sw a6, 8(t1)
; RV32-BOTH-NEXT: sw a1, 12(t1)
; RV32-BOTH-NEXT: bne t0, a3, .LBB3_2
; RV32-BOTH-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
; RV32-BOTH-NEXT: sltu t1, a7, a2
; RV32-BOTH-NEXT: bnez t1, .LBB3_3
; RV32-BOTH-NEXT: .LBB3_5: # %split
; RV32-BOTH-NEXT: ret
;
; RV64-BOTH-LABEL: memset_x:
; RV64-BOTH: # %bb.0:
; RV64-BOTH-NEXT: beqz a3, .LBB3_3
; RV64-BOTH-NEXT: # %bb.1: # %loadstoreloop.preheader
; RV64-BOTH-NEXT: li a4, 0
; RV64-BOTH-NEXT: .LBB3_2: # %loadstoreloop
; RV64-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
; RV64-BOTH-NEXT: sd a1, 0(a0)
; RV64-BOTH-NEXT: sd a2, 8(a0)
; RV64-BOTH-NEXT: addi a4, a4, 1
; RV64-BOTH-NEXT: addi a0, a0, 16
; RV64-BOTH-NEXT: bltu a4, a3, .LBB3_2
; RV64-BOTH-NEXT: .LBB3_3: # %split
; RV64-BOTH-NEXT: ret
tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 %x, i1 0)
ret void
}

View File

@ -1,2 +0,0 @@
if not "PowerPC" in config.root.targets:
config.unsupported = True

View File

@ -1,24 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=powerpc64 -passes=pre-isel-intrinsic-lowering -S -o - %s 2>&1 | FileCheck %s
; Simple smoke test that memset.pattern is still expanded on big endian
; targets.
define void @memset.pattern(ptr %a, i128 %value, i64 %x) nounwind {
; CHECK-LABEL: define void @memset.pattern(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 %x, i1 0)
ret void
}

View File

@ -1,2 +0,0 @@
if not "RISCV" in config.root.targets:
config.unsupported = True

View File

@ -1,127 +0,0 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=riscv64 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
ret void
}
define void @memset_pattern_i128_16(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_16(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 16
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 16, i1 0)
ret void
}
define void @memset_pattern_i127_x(ptr %a, i127 %value, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i127_x(
; CHECK-SAME: ptr [[A:%.*]], i127 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i127, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: store i127 [[VALUE]], ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i127 %value, i64 %x, i1 0)
ret void
}
define void @memset_pattern_i128_x(ptr %a, i128 %value, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_x(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP6]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP6]], [[X]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 %x, i1 0)
ret void
}
define void @memset_pattern_i256_x(ptr %a, i256 %value, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i256_x(
; CHECK-SAME: ptr [[A:%.*]], i256 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i256, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: store i256 [[VALUE]], ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP6]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP6]], [[X]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i256 %value, i64 %x, i1 0)
ret void
}
; The common alignment of the allocation of the pattern stride (its allocation
; size) and the destination pointer should be used.
define void @memset_pattern_i15_x_alignment(ptr %a, i15 %value, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i15_x_alignment(
; CHECK-SAME: ptr [[A:%.*]], i15 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i15, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: store i15 [[VALUE]], ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 0, [[X]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[SPLIT1:.*]], label %[[LOADSTORELOOP2:.*]]
; CHECK: [[LOADSTORELOOP2]]:
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ 0, %[[SPLIT]] ], [ [[TMP9:%.*]], %[[LOADSTORELOOP2]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i15, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: store i15 [[VALUE]], ptr [[TMP8]], align 2
; CHECK-NEXT: [[TMP9]] = add i64 [[TMP11]], 1
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP9]], [[X]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[LOADSTORELOOP2]], label %[[SPLIT1]]
; CHECK: [[SPLIT1]]:
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr align 1 %a, i15 %value, i64 %x, i1 0)
call void @llvm.experimental.memset.pattern(ptr align 2 %a, i15 %value, i64 %x, i1 0)
ret void
}

View File

@ -63,14 +63,6 @@ define void @memset_inline_is_volatile(ptr %dest, i8 %value, i1 %is.volatile) {
ret void
}
declare void @llvm.experimental.memset.pattern.p0.i32.i32(ptr nocapture, i32, i32, i1)
define void @memset_pattern_is_volatile(ptr %dest, i32 %value, i1 %is.volatile) {
; CHECK: immarg operand has non-immediate parameter
; CHECK-NEXT: i1 %is.volatile
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i32(ptr %dest, i32 %value, i32 8, i1 %is.volatile)
call void @llvm.experimental.memset.pattern.p0.i32.i32(ptr %dest, i32 %value, i32 8, i1 %is.volatile)
ret void
}
declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
define void @objectsize(ptr %ptr, i1 %a, i1 %b, i1 %c) {

View File

@ -1,9 +0,0 @@
; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
; CHECK: alignment is not a power of two
define void @foo(ptr %P, i32 %value) {
call void @llvm.experimental.memset.pattern.p0.i32.i32(ptr align 3 %P, i32 %value, i32 4, i1 false)
ret void
}
declare void @llvm.experimental.memset.pattern.p0.i32.i32(ptr nocapture, i32, i32, i1) nounwind