llvm-project/llvm/test/CodeGen/LoongArch/calling-conv-common.ll
Weining Lu 47601815ec [LoongArch] Define ual feature and override allowsMisalignedMemoryAccesses
Some CPUs do not allow memory accesses to be unaligned, e.g. 2k1000la
who uses the la264 core on which misaligned access will trigger an
exception.

In this patch, a backend feature called `ual` is defined to decribe
whether the CPU supports unaligned memroy accesses. And this feature
can be toggled by clang options `-m[no-]unaligned-access` or the
aliases `-m[no-]strict-align`. When this feature is on,
`allowsMisalignedMemoryAccesses` sets the speed number to 1 and returns
true that allows the codegen to generate unaligned memory access insns.

Clang options `-m[no-]unaligned-access` are moved from `m_arm_Features_Group`
to `m_Group` because now more than one targets use them. And a test
is added to show that they remain unused on a target that does not
support them. In addition, to keep compatible with gcc, a new alias
`-mno-strict-align` is added which is equal to `-munaligned-access`.

The feature name `ual` is consistent with linux kernel [1] and the
output of `lscpu` or `/proc/cpuinfo` [2].

There is an `LLT` variant of `allowsMisalignedMemoryAccesses`, but
seems that curently it is only used in GlobalISel which LoongArch
doesn't support yet. So this variant is not implemented in this patch.

[1]: https://github.com/torvalds/linux/blob/master/arch/loongarch/include/asm/cpu.h#L77
[2]: https://github.com/torvalds/linux/blob/master/arch/loongarch/kernel/proc.c#L75

Reviewed By: xen0n

Differential Revision: https://reviews.llvm.org/D149946
2023-06-07 13:40:58 +08:00

404 lines
13 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc --mtriple=loongarch64 --target-abi=lp64s < %s | FileCheck %s
; RUN: llc --mtriple=loongarch64 --mattr=+d --target-abi=lp64d < %s | FileCheck %s
;; This file contains tests that should have identical output for all ABIs, i.e.
;; where no arguments are passed via floating point registers.
;; Check that on LA64, i128 is passed in a pair of GPRs.
define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind {
; CHECK-LABEL: callee_i128_in_regs:
; CHECK: # %bb.0:
; CHECK-NEXT: add.d $a0, $a0, $a1
; CHECK-NEXT: ret
%b_trunc = trunc i128 %b to i64
%1 = add i64 %a, %b_trunc
ret i64 %1
}
define i64 @caller_i128_in_regs() nounwind {
; CHECK-LABEL: caller_i128_in_regs:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; CHECK-NEXT: ori $a0, $zero, 1
; CHECK-NEXT: ori $a1, $zero, 2
; CHECK-NEXT: move $a2, $zero
; CHECK-NEXT: bl %plt(callee_i128_in_regs)
; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%1 = call i64 @callee_i128_in_regs(i64 1, i128 2)
ret i64 %1
}
;; Check that the stack is used once the GPRs are exhausted.
define i64 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i128 %e, i64 %f, i128 %g, i64 %h) nounwind {
; CHECK-LABEL: callee_many_scalars:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $t0, $sp, 0
; CHECK-NEXT: xor $a5, $a5, $t0
; CHECK-NEXT: xor $a4, $a4, $a7
; CHECK-NEXT: or $a4, $a4, $a5
; CHECK-NEXT: bstrpick.d $a1, $a1, 15, 0
; CHECK-NEXT: andi $a0, $a0, 255
; CHECK-NEXT: add.d $a0, $a0, $a1
; CHECK-NEXT: bstrpick.d $a1, $a2, 31, 0
; CHECK-NEXT: add.d $a0, $a0, $a1
; CHECK-NEXT: add.d $a0, $a0, $a3
; CHECK-NEXT: sltui $a1, $a4, 1
; CHECK-NEXT: add.d $a0, $a1, $a0
; CHECK-NEXT: add.d $a0, $a0, $a6
; CHECK-NEXT: ld.d $a1, $sp, 8
; CHECK-NEXT: add.d $a0, $a0, $a1
; CHECK-NEXT: ret
%a_ext = zext i8 %a to i64
%b_ext = zext i16 %b to i64
%c_ext = zext i32 %c to i64
%1 = add i64 %a_ext, %b_ext
%2 = add i64 %1, %c_ext
%3 = add i64 %2, %d
%4 = icmp eq i128 %e, %g
%5 = zext i1 %4 to i64
%6 = add i64 %5, %3
%7 = add i64 %6, %f
%8 = add i64 %7, %h
ret i64 %8
}
define i64 @caller_many_scalars() nounwind {
; CHECK-LABEL: caller_many_scalars:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -32
; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
; CHECK-NEXT: ori $a0, $zero, 8
; CHECK-NEXT: st.d $a0, $sp, 8
; CHECK-NEXT: st.d $zero, $sp, 0
; CHECK-NEXT: ori $a0, $zero, 1
; CHECK-NEXT: ori $a1, $zero, 2
; CHECK-NEXT: ori $a2, $zero, 3
; CHECK-NEXT: ori $a3, $zero, 4
; CHECK-NEXT: ori $a4, $zero, 5
; CHECK-NEXT: ori $a6, $zero, 6
; CHECK-NEXT: ori $a7, $zero, 7
; CHECK-NEXT: move $a5, $zero
; CHECK-NEXT: bl %plt(callee_many_scalars)
; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 32
; CHECK-NEXT: ret
%1 = call i64 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i128 5, i64 6, i128 7, i64 8)
ret i64 %1
}
;; Check that i256 is passed indirectly.
define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind {
; CHECK-LABEL: callee_large_scalars:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a2, $a1, 24
; CHECK-NEXT: ld.d $a3, $a0, 24
; CHECK-NEXT: xor $a2, $a3, $a2
; CHECK-NEXT: ld.d $a3, $a1, 8
; CHECK-NEXT: ld.d $a4, $a0, 8
; CHECK-NEXT: xor $a3, $a4, $a3
; CHECK-NEXT: or $a2, $a3, $a2
; CHECK-NEXT: ld.d $a3, $a1, 16
; CHECK-NEXT: ld.d $a4, $a0, 16
; CHECK-NEXT: xor $a3, $a4, $a3
; CHECK-NEXT: ld.d $a1, $a1, 0
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: xor $a0, $a0, $a1
; CHECK-NEXT: or $a0, $a0, $a3
; CHECK-NEXT: or $a0, $a0, $a2
; CHECK-NEXT: sltui $a0, $a0, 1
; CHECK-NEXT: ret
%1 = icmp eq i256 %a, %b
%2 = zext i1 %1 to i64
ret i64 %2
}
define i64 @caller_large_scalars() nounwind {
; CHECK-LABEL: caller_large_scalars:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -80
; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
; CHECK-NEXT: ori $a0, $zero, 2
; CHECK-NEXT: st.d $a0, $sp, 0
; CHECK-NEXT: st.d $zero, $sp, 24
; CHECK-NEXT: st.d $zero, $sp, 16
; CHECK-NEXT: st.d $zero, $sp, 8
; CHECK-NEXT: st.d $zero, $sp, 56
; CHECK-NEXT: st.d $zero, $sp, 48
; CHECK-NEXT: st.d $zero, $sp, 40
; CHECK-NEXT: ori $a0, $zero, 1
; CHECK-NEXT: st.d $a0, $sp, 32
; CHECK-NEXT: addi.d $a0, $sp, 32
; CHECK-NEXT: addi.d $a1, $sp, 0
; CHECK-NEXT: bl %plt(callee_large_scalars)
; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 80
; CHECK-NEXT: ret
%1 = call i64 @callee_large_scalars(i256 1, i256 2)
ret i64 %1
}
;; Check that arguments larger than 2*GRLen are handled correctly when their
;; address is passed on the stack rather than in memory.
;; Must keep define on a single line due to an update_llc_test_checks.py limitation
define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i256 %h, i64 %i, i256 %j) nounwind {
; CHECK-LABEL: callee_large_scalars_exhausted_regs:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a0, $sp, 8
; CHECK-NEXT: ld.d $a1, $a0, 24
; CHECK-NEXT: ld.d $a2, $a7, 24
; CHECK-NEXT: xor $a1, $a2, $a1
; CHECK-NEXT: ld.d $a2, $a0, 8
; CHECK-NEXT: ld.d $a3, $a7, 8
; CHECK-NEXT: xor $a2, $a3, $a2
; CHECK-NEXT: or $a1, $a2, $a1
; CHECK-NEXT: ld.d $a2, $a0, 16
; CHECK-NEXT: ld.d $a3, $a7, 16
; CHECK-NEXT: xor $a2, $a3, $a2
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: ld.d $a3, $a7, 0
; CHECK-NEXT: xor $a0, $a3, $a0
; CHECK-NEXT: or $a0, $a0, $a2
; CHECK-NEXT: or $a0, $a0, $a1
; CHECK-NEXT: sltui $a0, $a0, 1
; CHECK-NEXT: ret
%1 = icmp eq i256 %h, %j
%2 = zext i1 %1 to i64
ret i64 %2
}
define i64 @caller_large_scalars_exhausted_regs() nounwind {
; CHECK-LABEL: caller_large_scalars_exhausted_regs:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -96
; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $a0, $sp, 16
; CHECK-NEXT: st.d $a0, $sp, 8
; CHECK-NEXT: ori $a0, $zero, 9
; CHECK-NEXT: st.d $a0, $sp, 0
; CHECK-NEXT: ori $a0, $zero, 10
; CHECK-NEXT: st.d $a0, $sp, 16
; CHECK-NEXT: st.d $zero, $sp, 40
; CHECK-NEXT: st.d $zero, $sp, 32
; CHECK-NEXT: st.d $zero, $sp, 24
; CHECK-NEXT: st.d $zero, $sp, 72
; CHECK-NEXT: st.d $zero, $sp, 64
; CHECK-NEXT: st.d $zero, $sp, 56
; CHECK-NEXT: ori $a0, $zero, 8
; CHECK-NEXT: st.d $a0, $sp, 48
; CHECK-NEXT: ori $a0, $zero, 1
; CHECK-NEXT: ori $a1, $zero, 2
; CHECK-NEXT: ori $a2, $zero, 3
; CHECK-NEXT: ori $a3, $zero, 4
; CHECK-NEXT: ori $a4, $zero, 5
; CHECK-NEXT: ori $a5, $zero, 6
; CHECK-NEXT: ori $a6, $zero, 7
; CHECK-NEXT: addi.d $a7, $sp, 48
; CHECK-NEXT: bl %plt(callee_large_scalars_exhausted_regs)
; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
%1 = call i64 @callee_large_scalars_exhausted_regs(
i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i256 8, i64 9,
i256 10)
ret i64 %1
}
;; Check large struct arguments, which are passed byval
%struct.large = type { i64, i64, i64, i64 }
define i64 @callee_large_struct(ptr byval(%struct.large) align 8 %a) nounwind {
; CHECK-LABEL: callee_large_struct:
; CHECK: # %bb.0:
; CHECK-NEXT: ld.d $a1, $a0, 24
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: add.d $a0, $a0, $a1
; CHECK-NEXT: ret
%1 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 0
%2 = getelementptr inbounds %struct.large, ptr %a, i64 0, i32 3
%3 = load i64, ptr %1
%4 = load i64, ptr %2
%5 = add i64 %3, %4
ret i64 %5
}
define i64 @caller_large_struct() nounwind {
; CHECK-LABEL: caller_large_struct:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -80
; CHECK-NEXT: st.d $ra, $sp, 72 # 8-byte Folded Spill
; CHECK-NEXT: ori $a0, $zero, 1
; CHECK-NEXT: st.d $a0, $sp, 40
; CHECK-NEXT: st.d $a0, $sp, 8
; CHECK-NEXT: ori $a0, $zero, 2
; CHECK-NEXT: st.d $a0, $sp, 48
; CHECK-NEXT: st.d $a0, $sp, 16
; CHECK-NEXT: ori $a0, $zero, 3
; CHECK-NEXT: st.d $a0, $sp, 56
; CHECK-NEXT: st.d $a0, $sp, 24
; CHECK-NEXT: ori $a0, $zero, 4
; CHECK-NEXT: st.d $a0, $sp, 64
; CHECK-NEXT: st.d $a0, $sp, 32
; CHECK-NEXT: addi.d $a0, $sp, 8
; CHECK-NEXT: bl %plt(callee_large_struct)
; CHECK-NEXT: ld.d $ra, $sp, 72 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 80
; CHECK-NEXT: ret
%ls = alloca %struct.large, align 8
%a = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 0
store i64 1, ptr %a
%b = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 1
store i64 2, ptr %b
%c = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 2
store i64 3, ptr %c
%d = getelementptr inbounds %struct.large, ptr %ls, i64 0, i32 3
store i64 4, ptr %d
%1 = call i64 @callee_large_struct(ptr byval(%struct.large) align 8 %ls)
ret i64 %1
}
;; Check return scalar which size is 2*GRLen.
define i128 @callee_small_scalar_ret() nounwind {
; CHECK-LABEL: callee_small_scalar_ret:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.w $a0, $zero, -1
; CHECK-NEXT: move $a1, $a0
; CHECK-NEXT: ret
ret i128 -1
}
define i64 @caller_small_scalar_ret() nounwind {
; CHECK-LABEL: caller_small_scalar_ret:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; CHECK-NEXT: bl %plt(callee_small_scalar_ret)
; CHECK-NEXT: addi.w $a2, $zero, -2
; CHECK-NEXT: xor $a0, $a0, $a2
; CHECK-NEXT: orn $a0, $a0, $a1
; CHECK-NEXT: sltui $a0, $a0, 1
; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%1 = call i128 @callee_small_scalar_ret()
%2 = icmp eq i128 -2, %1
%3 = zext i1 %2 to i64
ret i64 %3
}
;; Check return struct which size is 2*GRLen.
%struct.small = type { i64, ptr }
define %struct.small @callee_small_struct_ret() nounwind {
; CHECK-LABEL: callee_small_struct_ret:
; CHECK: # %bb.0:
; CHECK-NEXT: ori $a0, $zero, 1
; CHECK-NEXT: move $a1, $zero
; CHECK-NEXT: ret
ret %struct.small { i64 1, ptr null }
}
define i64 @caller_small_struct_ret() nounwind {
; CHECK-LABEL: caller_small_struct_ret:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -16
; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
; CHECK-NEXT: bl %plt(callee_small_struct_ret)
; CHECK-NEXT: add.d $a0, $a0, $a1
; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
%1 = call %struct.small @callee_small_struct_ret()
%2 = extractvalue %struct.small %1, 0
%3 = extractvalue %struct.small %1, 1
%4 = ptrtoint ptr %3 to i64
%5 = add i64 %2, %4
ret i64 %5
}
;; Check return scalar which size is more than 2*GRLen.
define i256 @callee_large_scalar_ret() nounwind {
; CHECK-LABEL: callee_large_scalar_ret:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.w $a1, $zero, -1
; CHECK-NEXT: st.d $a1, $a0, 24
; CHECK-NEXT: st.d $a1, $a0, 16
; CHECK-NEXT: st.d $a1, $a0, 8
; CHECK-NEXT: lu12i.w $a1, -30141
; CHECK-NEXT: ori $a1, $a1, 747
; CHECK-NEXT: st.d $a1, $a0, 0
; CHECK-NEXT: ret
ret i256 -123456789
}
define void @caller_large_scalar_ret() nounwind {
; CHECK-LABEL: caller_large_scalar_ret:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -48
; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $a0, $sp, 0
; CHECK-NEXT: bl %plt(callee_large_scalar_ret)
; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 48
; CHECK-NEXT: ret
%1 = call i256 @callee_large_scalar_ret()
ret void
}
;; Check return struct which size is more than 2*GRLen.
define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result) nounwind {
; CHECK-LABEL: callee_large_struct_ret:
; CHECK: # %bb.0:
; CHECK-NEXT: ori $a1, $zero, 4
; CHECK-NEXT: st.d $a1, $a0, 24
; CHECK-NEXT: ori $a1, $zero, 3
; CHECK-NEXT: st.d $a1, $a0, 16
; CHECK-NEXT: ori $a1, $zero, 2
; CHECK-NEXT: st.d $a1, $a0, 8
; CHECK-NEXT: ori $a1, $zero, 1
; CHECK-NEXT: st.d $a1, $a0, 0
; CHECK-NEXT: ret
%a = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 0
store i64 1, ptr %a, align 4
%b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1
store i64 2, ptr %b, align 4
%c = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 2
store i64 3, ptr %c, align 4
%d = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 3
store i64 4, ptr %d, align 4
ret void
}
define i64 @caller_large_struct_ret() nounwind {
; CHECK-LABEL: caller_large_struct_ret:
; CHECK: # %bb.0:
; CHECK-NEXT: addi.d $sp, $sp, -48
; CHECK-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $a0, $sp, 8
; CHECK-NEXT: bl %plt(callee_large_struct_ret)
; CHECK-NEXT: ld.d $a0, $sp, 32
; CHECK-NEXT: ld.d $a1, $sp, 8
; CHECK-NEXT: add.d $a0, $a1, $a0
; CHECK-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload
; CHECK-NEXT: addi.d $sp, $sp, 48
; CHECK-NEXT: ret
%1 = alloca %struct.large
call void @callee_large_struct_ret(ptr sret(%struct.large) %1)
%2 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 0
%3 = load i64, ptr %2
%4 = getelementptr inbounds %struct.large, ptr %1, i64 0, i32 3
%5 = load i64, ptr %4
%6 = add i64 %3, %5
ret i64 %6
}