mirror of
https://github.com/llvm/llvm-project.git
synced 2025-05-03 10:46:08 +00:00

The default legalization of unsupported vector types is to promote the integers in each lane, which leads to extra sign or zero extending and masking when moving data into and out of vectors. Switch our preferred type legalization from the default to vector widening, which keeps the data in the low lanes of the vector rather than in the low bits of each lane. The unused high lanes can be ignored. Half-wide vectors are now loaded from memory into the low 64 bits of the v128 rather than spread out among the lanes. As a result, v128.load64_splat is a much more common operation, so add new patterns to support it. Differential Revision: https://reviews.llvm.org/D107502
202 lines
7.2 KiB
LLVM
202 lines
7.2 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mattr=+simd128 | FileCheck %s
|
|
|
|
;; Test that SIMD extending operations can be successfully selected
|
|
|
|
target triple = "wasm32-unknown-unknown"
|
|
|
|
define <8 x i16> @extend_low_i8x16_s(<16 x i8> %v) {
|
|
; CHECK-LABEL: extend_low_i8x16_s:
|
|
; CHECK: .functype extend_low_i8x16_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.extend_low_i8x16_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <16 x i8> %v, <16 x i8> undef,
|
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%extended = sext <8 x i8> %low to <8 x i16>
|
|
ret <8 x i16> %extended
|
|
}
|
|
|
|
define <8 x i16> @extend_low_i8x16_u(<16 x i8> %v) {
|
|
; CHECK-LABEL: extend_low_i8x16_u:
|
|
; CHECK: .functype extend_low_i8x16_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.extend_low_i8x16_u
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <16 x i8> %v, <16 x i8> undef,
|
|
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
|
%extended = zext <8 x i8> %low to <8 x i16>
|
|
ret <8 x i16> %extended
|
|
}
|
|
|
|
define <8 x i16> @extend_high_i8x16_s(<16 x i8> %v) {
|
|
; CHECK-LABEL: extend_high_i8x16_s:
|
|
; CHECK: .functype extend_high_i8x16_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.extend_high_i8x16_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <16 x i8> %v, <16 x i8> undef,
|
|
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%extended = sext <8 x i8> %low to <8 x i16>
|
|
ret <8 x i16> %extended
|
|
}
|
|
|
|
define <8 x i16> @extend_high_i8x16_u(<16 x i8> %v) {
|
|
; CHECK-LABEL: extend_high_i8x16_u:
|
|
; CHECK: .functype extend_high_i8x16_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i16x8.extend_high_i8x16_u
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <16 x i8> %v, <16 x i8> undef,
|
|
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
|
%extended = zext <8 x i8> %low to <8 x i16>
|
|
ret <8 x i16> %extended
|
|
}
|
|
|
|
define <4 x i32> @extend_low_i16x8_s(<8 x i16> %v) {
|
|
; CHECK-LABEL: extend_low_i16x8_s:
|
|
; CHECK: .functype extend_low_i16x8_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.extend_low_i16x8_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <8 x i16> %v, <8 x i16> undef,
|
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%extended = sext <4 x i16> %low to <4 x i32>
|
|
ret <4 x i32> %extended
|
|
}
|
|
|
|
define <4 x i32> @extend_low_i16x8_u(<8 x i16> %v) {
|
|
; CHECK-LABEL: extend_low_i16x8_u:
|
|
; CHECK: .functype extend_low_i16x8_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.extend_low_i16x8_u
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <8 x i16> %v, <8 x i16> undef,
|
|
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
%extended = zext <4 x i16> %low to <4 x i32>
|
|
ret <4 x i32> %extended
|
|
}
|
|
|
|
define <4 x i32> @extend_high_i16x8_s(<8 x i16> %v) {
|
|
; CHECK-LABEL: extend_high_i16x8_s:
|
|
; CHECK: .functype extend_high_i16x8_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.extend_high_i16x8_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <8 x i16> %v, <8 x i16> undef,
|
|
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
%extended = sext <4 x i16> %low to <4 x i32>
|
|
ret <4 x i32> %extended
|
|
}
|
|
|
|
define <4 x i32> @extend_high_i16x8_u(<8 x i16> %v) {
|
|
; CHECK-LABEL: extend_high_i16x8_u:
|
|
; CHECK: .functype extend_high_i16x8_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i32x4.extend_high_i16x8_u
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <8 x i16> %v, <8 x i16> undef,
|
|
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
|
%extended = zext <4 x i16> %low to <4 x i32>
|
|
ret <4 x i32> %extended
|
|
}
|
|
|
|
define <2 x i64> @extend_low_i32x4_s(<4 x i32> %v) {
|
|
; CHECK-LABEL: extend_low_i32x4_s:
|
|
; CHECK: .functype extend_low_i32x4_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.extend_low_i32x4_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <4 x i32> %v, <4 x i32> undef,
|
|
<2 x i32> <i32 0, i32 1>
|
|
%extended = sext <2 x i32> %low to <2 x i64>
|
|
ret <2 x i64> %extended
|
|
}
|
|
|
|
define <2 x i64> @extend_low_i32x4_u(<4 x i32> %v) {
|
|
; CHECK-LABEL: extend_low_i32x4_u:
|
|
; CHECK: .functype extend_low_i32x4_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.extend_low_i32x4_u
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <4 x i32> %v, <4 x i32> undef,
|
|
<2 x i32> <i32 0, i32 1>
|
|
%extended = zext <2 x i32> %low to <2 x i64>
|
|
ret <2 x i64> %extended
|
|
}
|
|
|
|
define <2 x i64> @extend_high_i32x4_s(<4 x i32> %v) {
|
|
; CHECK-LABEL: extend_high_i32x4_s:
|
|
; CHECK: .functype extend_high_i32x4_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.extend_high_i32x4_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <4 x i32> %v, <4 x i32> undef,
|
|
<2 x i32> <i32 2, i32 3>
|
|
%extended = sext <2 x i32> %low to <2 x i64>
|
|
ret <2 x i64> %extended
|
|
}
|
|
|
|
define <2 x i64> @extend_high_i32x4_u(<4 x i32> %v) {
|
|
; CHECK-LABEL: extend_high_i32x4_u:
|
|
; CHECK: .functype extend_high_i32x4_u (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i64x2.extend_high_i32x4_u
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%low = shufflevector <4 x i32> %v, <4 x i32> undef,
|
|
<2 x i32> <i32 2, i32 3>
|
|
%extended = zext <2 x i32> %low to <2 x i64>
|
|
ret <2 x i64> %extended
|
|
}
|
|
|
|
;; Also test that similar patterns with offsets not corresponding to
|
|
;; the low or high half are correctly expanded.
|
|
|
|
define <8 x i16> @extend_lowish_i8x16_s(<16 x i8> %v) {
|
|
; CHECK-LABEL: extend_lowish_i8x16_s:
|
|
; CHECK: .functype extend_lowish_i8x16_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.shuffle 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 0
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i16x8.shl
|
|
; CHECK-NEXT: i32.const 8
|
|
; CHECK-NEXT: i16x8.shr_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%lowish = shufflevector <16 x i8> %v, <16 x i8> undef,
|
|
<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
|
|
%extended = sext <8 x i8> %lowish to <8 x i16>
|
|
ret <8 x i16> %extended
|
|
}
|
|
|
|
define <4 x i32> @extend_lowish_i16x8_s(<8 x i16> %v) {
|
|
; CHECK-LABEL: extend_lowish_i16x8_s:
|
|
; CHECK: .functype extend_lowish_i16x8_s (v128) -> (v128)
|
|
; CHECK-NEXT: # %bb.0:
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: local.get 0
|
|
; CHECK-NEXT: i8x16.shuffle 2, 3, 0, 0, 4, 5, 0, 0, 6, 7, 0, 0, 8, 9, 0, 0
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32x4.shl
|
|
; CHECK-NEXT: i32.const 16
|
|
; CHECK-NEXT: i32x4.shr_s
|
|
; CHECK-NEXT: # fallthrough-return
|
|
%lowish = shufflevector <8 x i16> %v, <8 x i16> undef,
|
|
<4 x i32> <i32 1, i32 2, i32 3, i32 4>
|
|
%extended = sext <4 x i16> %lowish to <4 x i32>
|
|
ret <4 x i32> %extended
|
|
}
|