Alex Bradbury 7ff3a9acd8
[IR] Initial introduction of llvm.experimental.memset_pattern (#97583)
Supersedes the draft PR #94992, taking a different approach following
feedback:
* Lower in PreISelIntrinsicLowering
* Don't require that the number of bytes to set is a compile-time
constant
* Define llvm.memset_pattern rather than llvm.memset_pattern.inline

As discussed in the [RFC
thread](https://discourse.llvm.org/t/rfc-introducing-an-llvm-memset-pattern-inline-intrinsic/79496),
the intent is that the intrinsic will be lowered to loops, a sequence of
stores, or libcalls depending on the expected cost and availability of
libcalls on the target. Right now, there's just a single lowering path
that aims to handle all cases. My intent would be to follow up with
additional PRs that add additional optimisations when possible (e.g.
when libcalls are available, when arguments are known to be constant
etc).
2024-11-15 14:07:46 +00:00

128 lines
6.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -mtriple=riscv64 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_1(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
ret void
}
define void @memset_pattern_i128_16(ptr %a, i128 %value) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_16(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 16
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 16, i1 0)
ret void
}
define void @memset_pattern_i127_x(ptr %a, i127 %value, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i127_x(
; CHECK-SAME: ptr [[A:%.*]], i127 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i127, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: store i127 [[VALUE]], ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i127 %value, i64 %x, i1 0)
ret void
}
define void @memset_pattern_i128_x(ptr %a, i128 %value, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i128_x(
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP6]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP6]], [[X]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 %x, i1 0)
ret void
}
define void @memset_pattern_i256_x(ptr %a, i256 %value, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i256_x(
; CHECK-SAME: ptr [[A:%.*]], i256 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i256, ptr [[A]], i64 [[TMP2]]
; CHECK-NEXT: store i256 [[VALUE]], ptr [[TMP4]], align 1
; CHECK-NEXT: [[TMP6]] = add i64 [[TMP2]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP6]], [[X]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: ret void
;
tail call void @llvm.experimental.memset.pattern(ptr %a, i256 %value, i64 %x, i1 0)
ret void
}
; The common alignment of the allocation of the pattern stride (its allocation
; size) and the destination pointer should be used.
define void @memset_pattern_i15_x_alignment(ptr %a, i15 %value, i64 %x) nounwind {
; CHECK-LABEL: define void @memset_pattern_i15_x_alignment(
; CHECK-SAME: ptr [[A:%.*]], i15 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
; CHECK: [[LOADSTORELOOP]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i15, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: store i15 [[VALUE]], ptr [[TMP2]], align 1
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
; CHECK: [[SPLIT]]:
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 0, [[X]]
; CHECK-NEXT: br i1 [[TMP7]], label %[[SPLIT1:.*]], label %[[LOADSTORELOOP2:.*]]
; CHECK: [[LOADSTORELOOP2]]:
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ 0, %[[SPLIT]] ], [ [[TMP9:%.*]], %[[LOADSTORELOOP2]] ]
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i15, ptr [[A]], i64 [[TMP11]]
; CHECK-NEXT: store i15 [[VALUE]], ptr [[TMP8]], align 2
; CHECK-NEXT: [[TMP9]] = add i64 [[TMP11]], 1
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP9]], [[X]]
; CHECK-NEXT: br i1 [[TMP10]], label %[[LOADSTORELOOP2]], label %[[SPLIT1]]
; CHECK: [[SPLIT1]]:
; CHECK-NEXT: ret void
;
call void @llvm.experimental.memset.pattern(ptr align 1 %a, i15 %value, i64 %x, i1 0)
call void @llvm.experimental.memset.pattern(ptr align 2 %a, i15 %value, i64 %x, i1 0)
ret void
}