mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-27 04:56:07 +00:00

Supersedes the draft PR #94992, taking a different approach following feedback: * Lower in PreISelIntrinsicLowering * Don't require that the number of bytes to set is a compile-time constant * Define llvm.memset_pattern rather than llvm.memset_pattern.inline As discussed in the [RFC thread](https://discourse.llvm.org/t/rfc-introducing-an-llvm-memset-pattern-inline-intrinsic/79496), the intent is that the intrinsic will be lowered to loops, a sequence of stores, or libcalls depending on the expected cost and availability of libcalls on the target. Right now, there's just a single lowering path that aims to handle all cases. My intent would be to follow up with additional PRs that add additional optimisations when possible (e.g. when libcalls are available, when arguments are known to be constant etc).
128 lines
6.5 KiB
LLVM
128 lines
6.5 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt -mtriple=riscv64 -passes=pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
|
|
|
|
define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i128_1(
|
|
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
|
|
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 1
|
|
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
|
|
ret void
|
|
}
|
|
|
|
define void @memset_pattern_i128_16(ptr %a, i128 %value) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i128_16(
|
|
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP3:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
|
|
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP1]], align 1
|
|
; CHECK-NEXT: [[TMP3]] = add i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], 16
|
|
; CHECK-NEXT: br i1 [[TMP4]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 16, i1 0)
|
|
ret void
|
|
}
|
|
|
|
define void @memset_pattern_i127_x(ptr %a, i127 %value, i64 %x) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i127_x(
|
|
; CHECK-SAME: ptr [[A:%.*]], i127 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i127, ptr [[A]], i64 [[TMP3]]
|
|
; CHECK-NEXT: store i127 [[VALUE]], ptr [[TMP2]], align 1
|
|
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
tail call void @llvm.experimental.memset.pattern(ptr %a, i127 %value, i64 %x, i1 0)
|
|
ret void
|
|
}
|
|
|
|
define void @memset_pattern_i128_x(ptr %a, i128 %value, i64 %x) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i128_x(
|
|
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i128, ptr [[A]], i64 [[TMP2]]
|
|
; CHECK-NEXT: store i128 [[VALUE]], ptr [[TMP4]], align 1
|
|
; CHECK-NEXT: [[TMP6]] = add i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP6]], [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 %x, i1 0)
|
|
ret void
|
|
}
|
|
|
|
define void @memset_pattern_i256_x(ptr %a, i256 %value, i64 %x) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i256_x(
|
|
; CHECK-SAME: ptr [[A:%.*]], i256 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP6:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i256, ptr [[A]], i64 [[TMP2]]
|
|
; CHECK-NEXT: store i256 [[VALUE]], ptr [[TMP4]], align 1
|
|
; CHECK-NEXT: [[TMP6]] = add i64 [[TMP2]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP6]], [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
tail call void @llvm.experimental.memset.pattern(ptr %a, i256 %value, i64 %x, i1 0)
|
|
ret void
|
|
}
|
|
|
|
; The common alignment of the allocation of the pattern stride (its allocation
|
|
; size) and the destination pointer should be used.
|
|
define void @memset_pattern_i15_x_alignment(ptr %a, i15 %value, i64 %x) nounwind {
|
|
; CHECK-LABEL: define void @memset_pattern_i15_x_alignment(
|
|
; CHECK-SAME: ptr [[A:%.*]], i15 [[VALUE:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 0, [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP1]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
|
|
; CHECK: [[LOADSTORELOOP]]:
|
|
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
|
|
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i15, ptr [[A]], i64 [[TMP3]]
|
|
; CHECK-NEXT: store i15 [[VALUE]], ptr [[TMP2]], align 1
|
|
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP3]], 1
|
|
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
|
|
; CHECK: [[SPLIT]]:
|
|
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 0, [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP7]], label %[[SPLIT1:.*]], label %[[LOADSTORELOOP2:.*]]
|
|
; CHECK: [[LOADSTORELOOP2]]:
|
|
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ 0, %[[SPLIT]] ], [ [[TMP9:%.*]], %[[LOADSTORELOOP2]] ]
|
|
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i15, ptr [[A]], i64 [[TMP11]]
|
|
; CHECK-NEXT: store i15 [[VALUE]], ptr [[TMP8]], align 2
|
|
; CHECK-NEXT: [[TMP9]] = add i64 [[TMP11]], 1
|
|
; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i64 [[TMP9]], [[X]]
|
|
; CHECK-NEXT: br i1 [[TMP10]], label %[[LOADSTORELOOP2]], label %[[SPLIT1]]
|
|
; CHECK: [[SPLIT1]]:
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @llvm.experimental.memset.pattern(ptr align 1 %a, i15 %value, i64 %x, i1 0)
|
|
call void @llvm.experimental.memset.pattern(ptr align 2 %a, i15 %value, i64 %x, i1 0)
|
|
ret void
|
|
}
|