mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-26 19:46:06 +00:00
Revert "[IR] Initial introduction of llvm.experimental.memset_pattern (#97583)"
This reverts commit 7ff3a9acd84654c9ec2939f45ba27f162ae7fbc3. Recent scheduling changes means tests need to be re-generated. Reverting to green while I do that.
This commit is contained in:
parent
dad9e4a165
commit
0fb8fac5d6
@ -15430,63 +15430,6 @@ The behavior of '``llvm.memset.inline.*``' is equivalent to the behavior of
|
||||
'``llvm.memset.*``', but the generated code is guaranteed not to call any
|
||||
external functions.
|
||||
|
||||
.. _int_experimental_memset_pattern:
|
||||
|
||||
'``llvm.experimental.memset.pattern``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
|
||||
This is an overloaded intrinsic. You can use
|
||||
``llvm.experimental.memset.pattern`` on any integer bit width and for
|
||||
different address spaces. Not all targets support all bit widths however.
|
||||
|
||||
::
|
||||
|
||||
declare void @llvm.experimental.memset.pattern.p0.i128.i64(ptr <dest>, i128 <val>,
|
||||
i64 <count>, i1 <isvolatile>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.experimental.memset.pattern.*``' intrinsics fill a block of memory
|
||||
with a particular value. This may be expanded to an inline loop, a sequence of
|
||||
stores, or a libcall depending on what is available for the target and the
|
||||
expected performance and code size impact.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The first argument is a pointer to the destination to fill, the second
|
||||
is the value with which to fill it, the third argument is an integer
|
||||
argument specifying the number of times to fill the value, and the fourth is a
|
||||
boolean indicating a volatile access.
|
||||
|
||||
The :ref:`align <attr_align>` parameter attribute can be provided
|
||||
for the first argument.
|
||||
|
||||
If the ``isvolatile`` parameter is ``true``, the
|
||||
``llvm.experimental.memset.pattern`` call is a :ref:`volatile operation
|
||||
<volatile>`. The detailed access behavior is not very cleanly specified and it
|
||||
is unwise to depend on it.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
The '``llvm.experimental.memset.pattern*``' intrinsic fills memory starting at
|
||||
the destination location with the given pattern ``<count>`` times,
|
||||
incrementing by the allocation size of the type each time. The stores follow
|
||||
the usual semantics of store instructions, including regarding endianness and
|
||||
padding. If the argument is known to be aligned to some boundary, this can be
|
||||
specified as an attribute on the argument.
|
||||
|
||||
If ``<count>`` is 0, it is no-op modulo the behavior of attributes attached to
|
||||
the arguments.
|
||||
If ``<count>`` is not a well-defined value, the behavior is undefined.
|
||||
If ``<count>`` is not zero, ``<dest>`` should be well-defined, otherwise the
|
||||
behavior is undefined.
|
||||
|
||||
.. _int_sqrt:
|
||||
|
||||
'``llvm.sqrt.*``' Intrinsic
|
||||
|
@ -208,9 +208,6 @@ public:
|
||||
RetTy visitDbgInfoIntrinsic(DbgInfoIntrinsic &I){ DELEGATE(IntrinsicInst); }
|
||||
RetTy visitMemSetInst(MemSetInst &I) { DELEGATE(MemIntrinsic); }
|
||||
RetTy visitMemSetInlineInst(MemSetInlineInst &I){ DELEGATE(MemSetInst); }
|
||||
RetTy visitMemSetPatternInst(MemSetPatternInst &I) {
|
||||
DELEGATE(IntrinsicInst);
|
||||
}
|
||||
RetTy visitMemCpyInst(MemCpyInst &I) { DELEGATE(MemTransferInst); }
|
||||
RetTy visitMemCpyInlineInst(MemCpyInlineInst &I){ DELEGATE(MemCpyInst); }
|
||||
RetTy visitMemMoveInst(MemMoveInst &I) { DELEGATE(MemTransferInst); }
|
||||
@ -298,8 +295,6 @@ private:
|
||||
case Intrinsic::memset: DELEGATE(MemSetInst);
|
||||
case Intrinsic::memset_inline:
|
||||
DELEGATE(MemSetInlineInst);
|
||||
case Intrinsic::experimental_memset_pattern:
|
||||
DELEGATE(MemSetPatternInst);
|
||||
case Intrinsic::vastart: DELEGATE(VAStartInst);
|
||||
case Intrinsic::vaend: DELEGATE(VAEndInst);
|
||||
case Intrinsic::vacopy: DELEGATE(VACopyInst);
|
||||
|
@ -1263,41 +1263,6 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
/// This is the base class for llvm.experimental.memset.pattern
|
||||
class MemSetPatternIntrinsic : public MemIntrinsicBase<MemIntrinsic> {
|
||||
private:
|
||||
enum { ARG_VOLATILE = 3 };
|
||||
|
||||
public:
|
||||
ConstantInt *getVolatileCst() const {
|
||||
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(ARG_VOLATILE)));
|
||||
}
|
||||
|
||||
bool isVolatile() const { return !getVolatileCst()->isZero(); }
|
||||
|
||||
void setVolatile(Constant *V) { setArgOperand(ARG_VOLATILE, V); }
|
||||
|
||||
// Methods for support of type inquiry through isa, cast, and dyn_cast:
|
||||
static bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::experimental_memset_pattern;
|
||||
}
|
||||
static bool classof(const Value *V) {
|
||||
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
|
||||
}
|
||||
};
|
||||
|
||||
/// This class wraps the llvm.experimental.memset.pattern intrinsic.
|
||||
class MemSetPatternInst : public MemSetBase<MemSetPatternIntrinsic> {
|
||||
public:
|
||||
// Methods for support type inquiry through isa, cast, and dyn_cast:
|
||||
static bool classof(const IntrinsicInst *I) {
|
||||
return I->getIntrinsicID() == Intrinsic::experimental_memset_pattern;
|
||||
}
|
||||
static bool classof(const Value *V) {
|
||||
return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
|
||||
}
|
||||
};
|
||||
|
||||
/// This class wraps the llvm.memcpy/memmove intrinsics.
|
||||
class MemTransferInst : public MemTransferBase<MemIntrinsic> {
|
||||
public:
|
||||
|
@ -1006,17 +1006,6 @@ def int_memset_inline
|
||||
NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
|
||||
ImmArg<ArgIndex<3>>]>;
|
||||
|
||||
// Memset variant that writes a given pattern.
|
||||
def int_experimental_memset_pattern
|
||||
: Intrinsic<[],
|
||||
[llvm_anyptr_ty, // Destination.
|
||||
llvm_anyint_ty, // Pattern value.
|
||||
llvm_anyint_ty, // Count (number of times to fill value).
|
||||
llvm_i1_ty], // IsVolatile.
|
||||
[IntrWriteMem, IntrArgMemOnly, IntrWillReturn, IntrNoFree, IntrNoCallback,
|
||||
NoCapture<ArgIndex<0>>, WriteOnly<ArgIndex<0>>,
|
||||
ImmArg<ArgIndex<3>>]>;
|
||||
|
||||
// FIXME: Add version of these floating point intrinsics which allow non-default
|
||||
// rounding modes and FP exception handling.
|
||||
|
||||
|
@ -25,7 +25,6 @@ class Instruction;
|
||||
class MemCpyInst;
|
||||
class MemMoveInst;
|
||||
class MemSetInst;
|
||||
class MemSetPatternInst;
|
||||
class ScalarEvolution;
|
||||
class TargetTransformInfo;
|
||||
class Value;
|
||||
@ -58,9 +57,6 @@ bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI);
|
||||
/// Expand \p MemSet as a loop. \p MemSet is not deleted.
|
||||
void expandMemSetAsLoop(MemSetInst *MemSet);
|
||||
|
||||
/// Expand \p MemSetPattern as a loop. \p MemSet is not deleted.
|
||||
void expandMemSetPatternAsLoop(MemSetPatternInst *MemSet);
|
||||
|
||||
/// Expand \p AtomicMemCpy as a loop. \p AtomicMemCpy is not deleted.
|
||||
void expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemCpy,
|
||||
const TargetTransformInfo &TTI,
|
||||
|
@ -320,13 +320,6 @@ bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
|
||||
Memset->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
case Intrinsic::experimental_memset_pattern: {
|
||||
auto *Memset = cast<MemSetPatternInst>(Inst);
|
||||
expandMemSetPatternAsLoop(Memset);
|
||||
Changed = true;
|
||||
Memset->eraseFromParent();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
llvm_unreachable("unhandled intrinsic");
|
||||
}
|
||||
@ -346,7 +339,6 @@ bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::memset_inline:
|
||||
case Intrinsic::experimental_memset_pattern:
|
||||
Changed |= expandMemIntrinsicUses(F);
|
||||
break;
|
||||
case Intrinsic::load_relative:
|
||||
|
@ -5519,8 +5519,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
|
||||
case Intrinsic::memcpy_inline:
|
||||
case Intrinsic::memmove:
|
||||
case Intrinsic::memset:
|
||||
case Intrinsic::memset_inline:
|
||||
case Intrinsic::experimental_memset_pattern: {
|
||||
case Intrinsic::memset_inline: {
|
||||
break;
|
||||
}
|
||||
case Intrinsic::memcpy_element_unordered_atomic:
|
||||
|
@ -970,15 +970,6 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
|
||||
Memset->isVolatile());
|
||||
}
|
||||
|
||||
void llvm::expandMemSetPatternAsLoop(MemSetPatternInst *Memset) {
|
||||
createMemSetLoop(/* InsertBefore=*/Memset,
|
||||
/* DstAddr=*/Memset->getRawDest(),
|
||||
/* CopyLen=*/Memset->getLength(),
|
||||
/* SetValue=*/Memset->getValue(),
|
||||
/* Alignment=*/Memset->getDestAlign().valueOrOne(),
|
||||
Memset->isVolatile());
|
||||
}
|
||||
|
||||
void llvm::expandAtomicMemCpyAsLoop(AtomicMemCpyInst *AtomicMemcpy,
|
||||
const TargetTransformInfo &TTI,
|
||||
ScalarEvolution *SE) {
|
||||
|
@ -1,297 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc < %s -mtriple=riscv32 -mattr=+m \
|
||||
; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32
|
||||
; RUN: llc < %s -mtriple=riscv64 -mattr=+m \
|
||||
; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64
|
||||
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+unaligned-scalar-mem \
|
||||
; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
|
||||
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+unaligned-scalar-mem \
|
||||
; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
|
||||
|
||||
; TODO: Due to the initial naive lowering implementation of memset.pattern in
|
||||
; PreISelIntrinsicLowering, the generated code is not good.
|
||||
|
||||
define void @memset_1(ptr %a, i128 %value) nounwind {
|
||||
; RV32-BOTH-LABEL: memset_1:
|
||||
; RV32-BOTH: # %bb.0: # %loadstoreloop.preheader
|
||||
; RV32-BOTH-NEXT: lw a2, 0(a1)
|
||||
; RV32-BOTH-NEXT: lw a3, 4(a1)
|
||||
; RV32-BOTH-NEXT: lw a4, 8(a1)
|
||||
; RV32-BOTH-NEXT: lw a1, 12(a1)
|
||||
; RV32-BOTH-NEXT: li a5, 0
|
||||
; RV32-BOTH-NEXT: li a6, 0
|
||||
; RV32-BOTH-NEXT: .LBB0_1: # %loadstoreloop
|
||||
; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV32-BOTH-NEXT: slli a7, a5, 4
|
||||
; RV32-BOTH-NEXT: add a7, a0, a7
|
||||
; RV32-BOTH-NEXT: addi a5, a5, 1
|
||||
; RV32-BOTH-NEXT: seqz t0, a5
|
||||
; RV32-BOTH-NEXT: add a6, a6, t0
|
||||
; RV32-BOTH-NEXT: or t0, a5, a6
|
||||
; RV32-BOTH-NEXT: sw a2, 0(a7)
|
||||
; RV32-BOTH-NEXT: sw a3, 4(a7)
|
||||
; RV32-BOTH-NEXT: sw a4, 8(a7)
|
||||
; RV32-BOTH-NEXT: sw a1, 12(a7)
|
||||
; RV32-BOTH-NEXT: beqz t0, .LBB0_1
|
||||
; RV32-BOTH-NEXT: # %bb.2: # %split
|
||||
; RV32-BOTH-NEXT: ret
|
||||
;
|
||||
; RV64-BOTH-LABEL: memset_1:
|
||||
; RV64-BOTH: # %bb.0: # %loadstoreloop.preheader
|
||||
; RV64-BOTH-NEXT: addi a3, a0, 16
|
||||
; RV64-BOTH-NEXT: .LBB0_1: # %loadstoreloop
|
||||
; RV64-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV64-BOTH-NEXT: sd a1, 0(a0)
|
||||
; RV64-BOTH-NEXT: sd a2, 8(a0)
|
||||
; RV64-BOTH-NEXT: addi a0, a0, 16
|
||||
; RV64-BOTH-NEXT: bne a0, a3, .LBB0_1
|
||||
; RV64-BOTH-NEXT: # %bb.2: # %split
|
||||
; RV64-BOTH-NEXT: ret
|
||||
tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 1, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memset_1_noalign(ptr %a, i128 %value) nounwind {
|
||||
; RV32-LABEL: memset_1_noalign:
|
||||
; RV32: # %bb.0: # %loadstoreloop.preheader
|
||||
; RV32-NEXT: addi sp, sp, -32
|
||||
; RV32-NEXT: sw s0, 28(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: sw s1, 24(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: sw s5, 8(sp) # 4-byte Folded Spill
|
||||
; RV32-NEXT: li a2, 0
|
||||
; RV32-NEXT: li a3, 0
|
||||
; RV32-NEXT: lw a4, 4(a1)
|
||||
; RV32-NEXT: lw a5, 0(a1)
|
||||
; RV32-NEXT: lw a6, 8(a1)
|
||||
; RV32-NEXT: lw a1, 12(a1)
|
||||
; RV32-NEXT: srli a7, a4, 24
|
||||
; RV32-NEXT: srli t0, a4, 16
|
||||
; RV32-NEXT: srli t1, a4, 8
|
||||
; RV32-NEXT: srli t2, a5, 24
|
||||
; RV32-NEXT: srli t3, a5, 16
|
||||
; RV32-NEXT: srli t4, a5, 8
|
||||
; RV32-NEXT: srli t5, a6, 24
|
||||
; RV32-NEXT: srli t6, a6, 16
|
||||
; RV32-NEXT: srli s0, a6, 8
|
||||
; RV32-NEXT: srli s1, a1, 24
|
||||
; RV32-NEXT: srli s2, a1, 16
|
||||
; RV32-NEXT: srli s3, a1, 8
|
||||
; RV32-NEXT: .LBB1_1: # %loadstoreloop
|
||||
; RV32-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV32-NEXT: slli s4, a2, 4
|
||||
; RV32-NEXT: add s4, a0, s4
|
||||
; RV32-NEXT: sb a4, 4(s4)
|
||||
; RV32-NEXT: sb t1, 5(s4)
|
||||
; RV32-NEXT: sb t0, 6(s4)
|
||||
; RV32-NEXT: sb a7, 7(s4)
|
||||
; RV32-NEXT: sb a5, 0(s4)
|
||||
; RV32-NEXT: sb t4, 1(s4)
|
||||
; RV32-NEXT: sb t3, 2(s4)
|
||||
; RV32-NEXT: sb t2, 3(s4)
|
||||
; RV32-NEXT: sb a6, 8(s4)
|
||||
; RV32-NEXT: sb s0, 9(s4)
|
||||
; RV32-NEXT: sb t6, 10(s4)
|
||||
; RV32-NEXT: sb t5, 11(s4)
|
||||
; RV32-NEXT: addi a2, a2, 1
|
||||
; RV32-NEXT: seqz s5, a2
|
||||
; RV32-NEXT: add a3, a3, s5
|
||||
; RV32-NEXT: or s5, a2, a3
|
||||
; RV32-NEXT: sb a1, 12(s4)
|
||||
; RV32-NEXT: sb s3, 13(s4)
|
||||
; RV32-NEXT: sb s2, 14(s4)
|
||||
; RV32-NEXT: sb s1, 15(s4)
|
||||
; RV32-NEXT: beqz s5, .LBB1_1
|
||||
; RV32-NEXT: # %bb.2: # %split
|
||||
; RV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: lw s1, 24(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: lw s2, 20(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: lw s3, 16(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: lw s4, 12(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: lw s5, 8(sp) # 4-byte Folded Reload
|
||||
; RV32-NEXT: addi sp, sp, 32
|
||||
; RV32-NEXT: ret
|
||||
;
|
||||
; RV64-LABEL: memset_1_noalign:
|
||||
; RV64: # %bb.0: # %loadstoreloop.preheader
|
||||
; RV64-NEXT: addi sp, sp, -32
|
||||
; RV64-NEXT: sd s0, 24(sp) # 8-byte Folded Spill
|
||||
; RV64-NEXT: sd s1, 16(sp) # 8-byte Folded Spill
|
||||
; RV64-NEXT: sd s2, 8(sp) # 8-byte Folded Spill
|
||||
; RV64-NEXT: addi a3, a0, 16
|
||||
; RV64-NEXT: srli a4, a1, 56
|
||||
; RV64-NEXT: srli a5, a1, 48
|
||||
; RV64-NEXT: srli a6, a1, 40
|
||||
; RV64-NEXT: srli a7, a1, 32
|
||||
; RV64-NEXT: srli t0, a1, 24
|
||||
; RV64-NEXT: srli t1, a1, 16
|
||||
; RV64-NEXT: srli t2, a1, 8
|
||||
; RV64-NEXT: srli t3, a2, 56
|
||||
; RV64-NEXT: srli t4, a2, 48
|
||||
; RV64-NEXT: srli t5, a2, 40
|
||||
; RV64-NEXT: srli t6, a2, 32
|
||||
; RV64-NEXT: srli s0, a2, 24
|
||||
; RV64-NEXT: srli s1, a2, 16
|
||||
; RV64-NEXT: srli s2, a2, 8
|
||||
; RV64-NEXT: .LBB1_1: # %loadstoreloop
|
||||
; RV64-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV64-NEXT: sb a7, 4(a0)
|
||||
; RV64-NEXT: sb a6, 5(a0)
|
||||
; RV64-NEXT: sb a5, 6(a0)
|
||||
; RV64-NEXT: sb a4, 7(a0)
|
||||
; RV64-NEXT: sb a1, 0(a0)
|
||||
; RV64-NEXT: sb t2, 1(a0)
|
||||
; RV64-NEXT: sb t1, 2(a0)
|
||||
; RV64-NEXT: sb t0, 3(a0)
|
||||
; RV64-NEXT: sb t6, 12(a0)
|
||||
; RV64-NEXT: sb t5, 13(a0)
|
||||
; RV64-NEXT: sb t4, 14(a0)
|
||||
; RV64-NEXT: sb t3, 15(a0)
|
||||
; RV64-NEXT: sb a2, 8(a0)
|
||||
; RV64-NEXT: sb s2, 9(a0)
|
||||
; RV64-NEXT: sb s1, 10(a0)
|
||||
; RV64-NEXT: sb s0, 11(a0)
|
||||
; RV64-NEXT: addi a0, a0, 16
|
||||
; RV64-NEXT: bne a0, a3, .LBB1_1
|
||||
; RV64-NEXT: # %bb.2: # %split
|
||||
; RV64-NEXT: ld s0, 24(sp) # 8-byte Folded Reload
|
||||
; RV64-NEXT: ld s1, 16(sp) # 8-byte Folded Reload
|
||||
; RV64-NEXT: ld s2, 8(sp) # 8-byte Folded Reload
|
||||
; RV64-NEXT: addi sp, sp, 32
|
||||
; RV64-NEXT: ret
|
||||
;
|
||||
; RV32-FAST-LABEL: memset_1_noalign:
|
||||
; RV32-FAST: # %bb.0: # %loadstoreloop.preheader
|
||||
; RV32-FAST-NEXT: lw a2, 0(a1)
|
||||
; RV32-FAST-NEXT: lw a3, 4(a1)
|
||||
; RV32-FAST-NEXT: lw a4, 8(a1)
|
||||
; RV32-FAST-NEXT: lw a1, 12(a1)
|
||||
; RV32-FAST-NEXT: li a5, 0
|
||||
; RV32-FAST-NEXT: li a6, 0
|
||||
; RV32-FAST-NEXT: .LBB1_1: # %loadstoreloop
|
||||
; RV32-FAST-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV32-FAST-NEXT: slli a7, a5, 4
|
||||
; RV32-FAST-NEXT: add a7, a0, a7
|
||||
; RV32-FAST-NEXT: addi a5, a5, 1
|
||||
; RV32-FAST-NEXT: seqz t0, a5
|
||||
; RV32-FAST-NEXT: add a6, a6, t0
|
||||
; RV32-FAST-NEXT: or t0, a5, a6
|
||||
; RV32-FAST-NEXT: sw a2, 0(a7)
|
||||
; RV32-FAST-NEXT: sw a3, 4(a7)
|
||||
; RV32-FAST-NEXT: sw a4, 8(a7)
|
||||
; RV32-FAST-NEXT: sw a1, 12(a7)
|
||||
; RV32-FAST-NEXT: beqz t0, .LBB1_1
|
||||
; RV32-FAST-NEXT: # %bb.2: # %split
|
||||
; RV32-FAST-NEXT: ret
|
||||
;
|
||||
; RV64-FAST-LABEL: memset_1_noalign:
|
||||
; RV64-FAST: # %bb.0: # %loadstoreloop.preheader
|
||||
; RV64-FAST-NEXT: addi a3, a0, 16
|
||||
; RV64-FAST-NEXT: .LBB1_1: # %loadstoreloop
|
||||
; RV64-FAST-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV64-FAST-NEXT: sd a1, 0(a0)
|
||||
; RV64-FAST-NEXT: sd a2, 8(a0)
|
||||
; RV64-FAST-NEXT: addi a0, a0, 16
|
||||
; RV64-FAST-NEXT: bne a0, a3, .LBB1_1
|
||||
; RV64-FAST-NEXT: # %bb.2: # %split
|
||||
; RV64-FAST-NEXT: ret
|
||||
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memset_4(ptr %a, i128 %value) nounwind {
|
||||
; RV32-BOTH-LABEL: memset_4:
|
||||
; RV32-BOTH: # %bb.0: # %loadstoreloop.preheader
|
||||
; RV32-BOTH-NEXT: lw a2, 0(a1)
|
||||
; RV32-BOTH-NEXT: lw a3, 4(a1)
|
||||
; RV32-BOTH-NEXT: lw a4, 8(a1)
|
||||
; RV32-BOTH-NEXT: lw a1, 12(a1)
|
||||
; RV32-BOTH-NEXT: li a5, 0
|
||||
; RV32-BOTH-NEXT: li a6, 0
|
||||
; RV32-BOTH-NEXT: .LBB2_1: # %loadstoreloop
|
||||
; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV32-BOTH-NEXT: slli a7, a5, 4
|
||||
; RV32-BOTH-NEXT: add a7, a0, a7
|
||||
; RV32-BOTH-NEXT: addi a5, a5, 1
|
||||
; RV32-BOTH-NEXT: seqz t0, a5
|
||||
; RV32-BOTH-NEXT: add a6, a6, t0
|
||||
; RV32-BOTH-NEXT: seqz t0, a6
|
||||
; RV32-BOTH-NEXT: sltiu t1, a5, 4
|
||||
; RV32-BOTH-NEXT: and t0, t0, t1
|
||||
; RV32-BOTH-NEXT: sw a2, 0(a7)
|
||||
; RV32-BOTH-NEXT: sw a3, 4(a7)
|
||||
; RV32-BOTH-NEXT: sw a4, 8(a7)
|
||||
; RV32-BOTH-NEXT: sw a1, 12(a7)
|
||||
; RV32-BOTH-NEXT: bnez t0, .LBB2_1
|
||||
; RV32-BOTH-NEXT: # %bb.2: # %split
|
||||
; RV32-BOTH-NEXT: ret
|
||||
;
|
||||
; RV64-BOTH-LABEL: memset_4:
|
||||
; RV64-BOTH: # %bb.0: # %loadstoreloop.preheader
|
||||
; RV64-BOTH-NEXT: addi a3, a0, 64
|
||||
; RV64-BOTH-NEXT: .LBB2_1: # %loadstoreloop
|
||||
; RV64-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV64-BOTH-NEXT: sd a1, 0(a0)
|
||||
; RV64-BOTH-NEXT: sd a2, 8(a0)
|
||||
; RV64-BOTH-NEXT: addi a0, a0, 16
|
||||
; RV64-BOTH-NEXT: bne a0, a3, .LBB2_1
|
||||
; RV64-BOTH-NEXT: # %bb.2: # %split
|
||||
; RV64-BOTH-NEXT: ret
|
||||
tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 4, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memset_x(ptr %a, i128 %value, i64 %x) nounwind {
|
||||
; RV32-BOTH-LABEL: memset_x:
|
||||
; RV32-BOTH: # %bb.0:
|
||||
; RV32-BOTH-NEXT: or a4, a2, a3
|
||||
; RV32-BOTH-NEXT: beqz a4, .LBB3_5
|
||||
; RV32-BOTH-NEXT: # %bb.1: # %loadstoreloop.preheader
|
||||
; RV32-BOTH-NEXT: lw a4, 0(a1)
|
||||
; RV32-BOTH-NEXT: lw a5, 4(a1)
|
||||
; RV32-BOTH-NEXT: lw a6, 8(a1)
|
||||
; RV32-BOTH-NEXT: lw a1, 12(a1)
|
||||
; RV32-BOTH-NEXT: li a7, 0
|
||||
; RV32-BOTH-NEXT: li t0, 0
|
||||
; RV32-BOTH-NEXT: j .LBB3_3
|
||||
; RV32-BOTH-NEXT: .LBB3_2: # %loadstoreloop
|
||||
; RV32-BOTH-NEXT: # in Loop: Header=BB3_3 Depth=1
|
||||
; RV32-BOTH-NEXT: sltu t1, t0, a3
|
||||
; RV32-BOTH-NEXT: beqz t1, .LBB3_5
|
||||
; RV32-BOTH-NEXT: .LBB3_3: # %loadstoreloop
|
||||
; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV32-BOTH-NEXT: slli t1, a7, 4
|
||||
; RV32-BOTH-NEXT: add t1, a0, t1
|
||||
; RV32-BOTH-NEXT: addi a7, a7, 1
|
||||
; RV32-BOTH-NEXT: seqz t2, a7
|
||||
; RV32-BOTH-NEXT: add t0, t0, t2
|
||||
; RV32-BOTH-NEXT: sw a4, 0(t1)
|
||||
; RV32-BOTH-NEXT: sw a5, 4(t1)
|
||||
; RV32-BOTH-NEXT: sw a6, 8(t1)
|
||||
; RV32-BOTH-NEXT: sw a1, 12(t1)
|
||||
; RV32-BOTH-NEXT: bne t0, a3, .LBB3_2
|
||||
; RV32-BOTH-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1
|
||||
; RV32-BOTH-NEXT: sltu t1, a7, a2
|
||||
; RV32-BOTH-NEXT: bnez t1, .LBB3_3
|
||||
; RV32-BOTH-NEXT: .LBB3_5: # %split
|
||||
; RV32-BOTH-NEXT: ret
|
||||
;
|
||||
; RV64-BOTH-LABEL: memset_x:
|
||||
; RV64-BOTH: # %bb.0:
|
||||
; RV64-BOTH-NEXT: beqz a3, .LBB3_3
|
||||
; RV64-BOTH-NEXT: # %bb.1: # %loadstoreloop.preheader
|
||||
; RV64-BOTH-NEXT: li a4, 0
|
||||
; RV64-BOTH-NEXT: .LBB3_2: # %loadstoreloop
|
||||
; RV64-BOTH-NEXT: # =>This Inner Loop Header: Depth=1
|
||||
; RV64-BOTH-NEXT: sd a1, 0(a0)
|
||||
; RV64-BOTH-NEXT: sd a2, 8(a0)
|
||||
; RV64-BOTH-NEXT: addi a4, a4, 1
|
||||
; RV64-BOTH-NEXT: addi a0, a0, 16
|
||||
; RV64-BOTH-NEXT: bltu a4, a3, .LBB3_2
|
||||
; RV64-BOTH-NEXT: .LBB3_3: # %split
|
||||
; RV64-BOTH-NEXT: ret
|
||||
tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 %x, i1 0)
|
||||
ret void
|
||||
}
|
@ -1,2 +0,0 @@
|
||||
if not "PowerPC" in config.root.targets:
|
||||
config.unsupported = True
|
@ -1,24 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -mtriple=powerpc64 -passes=pre-isel-intrinsic-lowering -S -o - %s 2>&1 | FileCheck %s
|
||||
|
||||
; Simple smoke test that memset.pattern is still expanded on big endian
|
||||
; targets.
|
||||
|
||||
define void @memset.pattern(ptr %a, i128 %value, i64 %x) nounwind {
|
||||
; CHECK-LABEL: define void @memset.pattern(
|
||||
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
||||
; CHECK: [[LOADSTORELOOP]]:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP2]], align 1
|
||||
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
||||
; CHECK: [[SPLIT]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 %x, i1 0)
|
||||
ret void
|
||||
}
|
@ -1,2 +0,0 @@
|
||||
if not "RISCV" in config.root.targets:
|
||||
config.unsupported = True
|
@ -1,127 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -mtriple=riscv64 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
|
||||
|
||||
define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
|
||||
; CHECK-LABEL: define void @memset_pattern_i128_1(
|
||||
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
||||
; CHECK: [[LOADSTORELOOP]]:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP1]], align 1
|
||||
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
||||
; CHECK: [[SPLIT]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memset_pattern_i128_16(ptr %a, i128 %value) nounwind {
|
||||
; CHECK-LABEL: define void @memset_pattern_i128_16(
|
||||
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
||||
; CHECK: [[LOADSTORELOOP]]:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP1]], align 1
|
||||
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], 1
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 16
|
||||
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
||||
; CHECK: [[SPLIT]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 16, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memset_pattern_i127_x(ptr %a, i127 %value, i64 %x) nounwind {
|
||||
; CHECK-LABEL: define void @memset_pattern_i127_x(
|
||||
; CHECK-SAME: ptr [[A:%.*]], i127 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
||||
; CHECK: [[LOADSTORELOOP]]:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i127, ptr [[A]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: store i127 [[VALUE]], ptr [[TMP2]], align 1
|
||||
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
||||
; CHECK: [[SPLIT]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
tail call void @llvm.experimental.memset.pattern(ptr %a, i127 %value, i64 %x, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memset_pattern_i128_x(ptr %a, i128 %value, i64 %x) nounwind {
|
||||
; CHECK-LABEL: define void @memset_pattern_i128_x(
|
||||
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
||||
; CHECK: [[LOADSTORELOOP]]:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[LOADSTORELOOP]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP4]], align 1
|
||||
; CHECK-NEXT: [[TMP6]] = add i64 [[TMP2]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP6]], [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
||||
; CHECK: [[SPLIT]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 %x, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @memset_pattern_i256_x(ptr %a, i256 %value, i64 %x) nounwind {
|
||||
; CHECK-LABEL: define void @memset_pattern_i256_x(
|
||||
; CHECK-SAME: ptr [[A:%.*]], i256 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
||||
; CHECK: [[LOADSTORELOOP]]:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[LOADSTORELOOP]] ]
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i256, ptr [[A]], i64 [[TMP2]]
|
||||
; CHECK-NEXT: store i256 [[VALUE]], ptr [[TMP4]], align 1
|
||||
; CHECK-NEXT: [[TMP6]] = add i64 [[TMP2]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP6]], [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
||||
; CHECK: [[SPLIT]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
tail call void @llvm.experimental.memset.pattern(ptr %a, i256 %value, i64 %x, i1 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
; The common alignment of the allocation of the pattern stride (its allocation
|
||||
; size) and the destination pointer should be used.
|
||||
define void @memset_pattern_i15_x_alignment(ptr %a, i15 %value, i64 %x) nounwind {
|
||||
; CHECK-LABEL: define void @memset_pattern_i15_x_alignment(
|
||||
; CHECK-SAME: ptr [[A:%.*]], i15 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
||||
; CHECK: [[LOADSTORELOOP]]:
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i15, ptr [[A]], i64 [[TMP3]]
|
||||
; CHECK-NEXT: store i15 [[VALUE]], ptr [[TMP2]], align 1
|
||||
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
||||
; CHECK: [[SPLIT]]:
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 0, [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP7]], label %[[SPLIT1:.*]], label %[[LOADSTORELOOP2:.*]]
|
||||
; CHECK: [[LOADSTORELOOP2]]:
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ 0, %[[SPLIT]] ], [ [[TMP9:%.*]], %[[LOADSTORELOOP2]] ]
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i15, ptr [[A]], i64 [[TMP11]]
|
||||
; CHECK-NEXT: store i15 [[VALUE]], ptr [[TMP8]], align 2
|
||||
; CHECK-NEXT: [[TMP9]] = add i64 [[TMP11]], 1
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP9]], [[X]]
|
||||
; CHECK-NEXT: br i1 [[TMP10]], label %[[LOADSTORELOOP2]], label %[[SPLIT1]]
|
||||
; CHECK: [[SPLIT1]]:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
call void @llvm.experimental.memset.pattern(ptr align 1 %a, i15 %value, i64 %x, i1 0)
|
||||
call void @llvm.experimental.memset.pattern(ptr align 2 %a, i15 %value, i64 %x, i1 0)
|
||||
ret void
|
||||
}
|
@ -63,14 +63,6 @@ define void @memset_inline_is_volatile(ptr %dest, i8 %value, i1 %is.volatile) {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.experimental.memset.pattern.p0.i32.i32(ptr nocapture, i32, i32, i1)
|
||||
define void @memset_pattern_is_volatile(ptr %dest, i32 %value, i1 %is.volatile) {
|
||||
; CHECK: immarg operand has non-immediate parameter
|
||||
; CHECK-NEXT: i1 %is.volatile
|
||||
; CHECK-NEXT: call void @llvm.experimental.memset.pattern.p0.i32.i32(ptr %dest, i32 %value, i32 8, i1 %is.volatile)
|
||||
call void @llvm.experimental.memset.pattern.p0.i32.i32(ptr %dest, i32 %value, i32 8, i1 %is.volatile)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
|
||||
define void @objectsize(ptr %ptr, i1 %a, i1 %b, i1 %c) {
|
||||
|
@ -1,9 +0,0 @@
|
||||
; RUN: not opt -passes=verify < %s 2>&1 | FileCheck %s
|
||||
|
||||
; CHECK: alignment is not a power of two
|
||||
|
||||
define void @foo(ptr %P, i32 %value) {
|
||||
call void @llvm.experimental.memset.pattern.p0.i32.i32(ptr align 3 %P, i32 %value, i32 4, i1 false)
|
||||
ret void
|
||||
}
|
||||
declare void @llvm.experimental.memset.pattern.p0.i32.i32(ptr nocapture, i32, i32, i1) nounwind
|
Loading…
x
Reference in New Issue
Block a user