llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll

13428 lines
522 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32,RV32V
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64V
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32,RV32ZVE32F
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+zve32f,+zvl128b -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64ZVE32F
declare <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i8>)
define <1 x i8> @mgather_v1i8(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i8> %passthru) {
; RV32V-LABEL: mgather_v1i8:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1i8:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB0_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vle8.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB0_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i8> @llvm.masked.gather.v1i8.v1p0(<1 x ptr> %ptrs, i32 1, <1 x i1> %m, <1 x i8> %passthru)
ret <1 x i8> %v
}
declare <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i8>)
define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB1_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB1_4
; RV64ZVE32F-NEXT: .LBB1_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB1_3: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB1_2
; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
ret <2 x i8> %v
}
define <2 x i16> @mgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_sextload_v2i16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV32V-NEXT: vsext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_sextload_v2i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV64V-NEXT: vsext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB2_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB2_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB2_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB2_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vsext.vf2 v9, v8
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = sext <2 x i8> %v to <2 x i16>
ret <2 x i16> %ev
}
define <2 x i16> @mgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_zextload_v2i16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV32V-NEXT: vzext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_zextload_v2i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
; RV64V-NEXT: vzext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf2 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB3_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB3_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB3_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB3_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vzext.vf2 v9, v8
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = zext <2 x i8> %v to <2 x i16>
ret <2 x i16> %ev
}
define <2 x i32> @mgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_sextload_v2i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV32V-NEXT: vsext.vf4 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_sextload_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64V-NEXT: vsext.vf4 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB4_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB4_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB4_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB4_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vsext.vf4 v9, v8
; RV64ZVE32F-NEXT: vmv.v.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = sext <2 x i8> %v to <2 x i32>
ret <2 x i32> %ev
}
define <2 x i32> @mgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_zextload_v2i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV32V-NEXT: vzext.vf4 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_zextload_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64V-NEXT: vzext.vf4 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vzext.vf4 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB5_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB5_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB5_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB5_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vzext.vf4 v9, v8
; RV64ZVE32F-NEXT: vmv.v.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = zext <2 x i8> %v to <2 x i32>
ret <2 x i32> %ev
}
define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_sextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vsext.vf8 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_sextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vsext.vf8 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_sextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: srai a2, a1, 31
; RV32ZVE32F-NEXT: vmv.x.s a3, v9
; RV32ZVE32F-NEXT: srai a4, a3, 31
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_sextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB6_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB6_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB6_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB6_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = sext <2 x i8> %v to <2 x i64>
ret <2 x i64> %ev
}
define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) {
; RV32V-LABEL: mgather_v2i8_zextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vzext.vf8 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i8_zextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e8, mf8, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vzext.vf8 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: andi a1, a1, 255
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: andi a2, a2, 255
; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB7_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB7_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB7_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lbu a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB7_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: andi a0, a0, 255
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: ret
%v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru)
%ev = zext <2 x i8> %v to <2 x i64>
ret <2 x i64> %ev
}
declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i8>)
define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) {
; RV32-LABEL: mgather_v4i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v4i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB8_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB8_6
; RV64ZVE32F-NEXT: .LBB8_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB8_7
; RV64ZVE32F-NEXT: .LBB8_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB8_8
; RV64ZVE32F-NEXT: .LBB8_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB8_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB8_2
; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB8_3
; RV64ZVE32F-NEXT: .LBB8_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB8_4
; RV64ZVE32F-NEXT: .LBB8_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %m, <4 x i8> %passthru)
ret <4 x i8> %v
}
define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
; RV32-LABEL: mgather_truemask_v4i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV64V-NEXT: vluxei64.v v10, (zero), v8
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v9
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
; RV64ZVE32F-NEXT: beqz zero, .LBB9_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB9_6
; RV64ZVE32F-NEXT: .LBB9_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB9_7
; RV64ZVE32F-NEXT: .LBB9_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB9_8
; RV64ZVE32F-NEXT: .LBB9_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB9_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB9_2
; RV64ZVE32F-NEXT: .LBB9_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB9_3
; RV64ZVE32F-NEXT: .LBB9_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB9_4
; RV64ZVE32F-NEXT: .LBB9_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%mhead = insertelement <4 x i1> poison, i1 1, i32 0
%mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> %mtrue, <4 x i8> %passthru)
ret <4 x i8> %v
}
define <4 x i8> @mgather_falsemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) {
; RV32-LABEL: mgather_falsemask_v4i8:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
%v = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> zeroinitializer, <4 x i8> %passthru)
ret <4 x i8> %v
}
declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>)
define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) {
; RV32-LABEL: mgather_v8i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v8i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB11_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB11_10
; RV64ZVE32F-NEXT: .LBB11_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB11_11
; RV64ZVE32F-NEXT: .LBB11_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB11_12
; RV64ZVE32F-NEXT: .LBB11_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB11_13
; RV64ZVE32F-NEXT: .LBB11_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB11_14
; RV64ZVE32F-NEXT: .LBB11_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB11_15
; RV64ZVE32F-NEXT: .LBB11_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB11_16
; RV64ZVE32F-NEXT: .LBB11_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB11_2
; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB11_3
; RV64ZVE32F-NEXT: .LBB11_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB11_4
; RV64ZVE32F-NEXT: .LBB11_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB11_5
; RV64ZVE32F-NEXT: .LBB11_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB11_6
; RV64ZVE32F-NEXT: .LBB11_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB11_7
; RV64ZVE32F-NEXT: .LBB11_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB11_8
; RV64ZVE32F-NEXT: .LBB11_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
ret <8 x i8> %v
}
define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i8> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vsetvli zero, zero, e8, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB12_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB12_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB12_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB12_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB12_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB12_13
; RV64ZVE32F-NEXT: .LBB12_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB12_14
; RV64ZVE32F-NEXT: .LBB12_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB12_9
; RV64ZVE32F-NEXT: .LBB12_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB12_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB12_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB12_16
; RV64ZVE32F-NEXT: .LBB12_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB12_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB12_6
; RV64ZVE32F-NEXT: .LBB12_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB12_7
; RV64ZVE32F-NEXT: .LBB12_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB12_8
; RV64ZVE32F-NEXT: j .LBB12_9
; RV64ZVE32F-NEXT: .LBB12_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB12_11
; RV64ZVE32F-NEXT: .LBB12_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
%v = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %ptrs, i32 1, <8 x i1> %m, <8 x i8> %passthru)
ret <8 x i8> %v
}
declare <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i16>)
define <1 x i16> @mgather_v1i16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i16> %passthru) {
; RV32V-LABEL: mgather_v1i16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1i16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB13_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vle16.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB13_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i16> @llvm.masked.gather.v1i16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x i16> %passthru)
ret <1 x i16> %v
}
declare <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i16>)
define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
; RV32V-LABEL: mgather_v2i16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB14_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB14_4
; RV64ZVE32F-NEXT: .LBB14_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB14_3: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB14_2
; RV64ZVE32F-NEXT: .LBB14_4: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
%v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
ret <2 x i16> %v
}
define <2 x i32> @mgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
; RV32V-LABEL: mgather_v2i16_sextload_v2i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV32V-NEXT: vsext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i16_sextload_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64V-NEXT: vsext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB15_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB15_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB15_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB15_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vsext.vf2 v9, v8
; RV64ZVE32F-NEXT: vmv.v.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
%ev = sext <2 x i16> %v to <2 x i32>
ret <2 x i32> %ev
}
define <2 x i32> @mgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
; RV32V-LABEL: mgather_v2i16_zextload_v2i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV32V-NEXT: vzext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i16_zextload_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; RV64V-NEXT: vzext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vzext.vf2 v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB16_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB16_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB16_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB16_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vzext.vf2 v9, v8
; RV64ZVE32F-NEXT: vmv.v.v v8, v9
; RV64ZVE32F-NEXT: ret
%v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
%ev = zext <2 x i16> %v to <2 x i32>
ret <2 x i32> %ev
}
define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
; RV32V-LABEL: mgather_v2i16_sextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vsext.vf4 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i16_sextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vsext.vf4 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i16_sextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: srai a2, a1, 31
; RV32ZVE32F-NEXT: vmv.x.s a3, v9
; RV32ZVE32F-NEXT: srai a4, a3, 31
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i16_sextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB17_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB17_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB17_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB17_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: ret
%v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
%ev = sext <2 x i16> %v to <2 x i64>
ret <2 x i64> %ev
}
define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthru) {
; RV32V-LABEL: mgather_v2i16_zextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vzext.vf4 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i16_zextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vzext.vf4 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lui a2, 16
; RV32ZVE32F-NEXT: addi a2, a2, -1
; RV32ZVE32F-NEXT: and a1, a1, a2
; RV32ZVE32F-NEXT: vmv.x.s a3, v9
; RV32ZVE32F-NEXT: and a2, a3, a2
; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i16_zextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB18_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB18_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB18_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lh a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB18_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: addiw a1, a1, -1
; RV64ZVE32F-NEXT: and a0, a0, a1
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a1, a2, a1
; RV64ZVE32F-NEXT: ret
%v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru)
%ev = zext <2 x i16> %v to <2 x i64>
ret <2 x i64> %ev
}
declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>)
define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthru) {
; RV32-LABEL: mgather_v4i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v4i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB19_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB19_6
; RV64ZVE32F-NEXT: .LBB19_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB19_7
; RV64ZVE32F-NEXT: .LBB19_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB19_8
; RV64ZVE32F-NEXT: .LBB19_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB19_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB19_2
; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB19_3
; RV64ZVE32F-NEXT: .LBB19_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB19_4
; RV64ZVE32F-NEXT: .LBB19_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x i16> %passthru)
ret <4 x i16> %v
}
define <4 x i16> @mgather_truemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
; RV32-LABEL: mgather_truemask_v4i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64V-NEXT: vluxei64.v v10, (zero), v8
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v9
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
; RV64ZVE32F-NEXT: beqz zero, .LBB20_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB20_6
; RV64ZVE32F-NEXT: .LBB20_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB20_7
; RV64ZVE32F-NEXT: .LBB20_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB20_8
; RV64ZVE32F-NEXT: .LBB20_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB20_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB20_2
; RV64ZVE32F-NEXT: .LBB20_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB20_3
; RV64ZVE32F-NEXT: .LBB20_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB20_4
; RV64ZVE32F-NEXT: .LBB20_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%mhead = insertelement <4 x i1> poison, i1 1, i32 0
%mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue, <4 x i16> %passthru)
ret <4 x i16> %v
}
define <4 x i16> @mgather_falsemask_v4i16(<4 x ptr> %ptrs, <4 x i16> %passthru) {
; RV32-LABEL: mgather_falsemask_v4i16:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
%v = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x i16> %passthru)
ret <4 x i16> %v
}
declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>)
define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v8i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB22_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB22_10
; RV64ZVE32F-NEXT: .LBB22_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB22_11
; RV64ZVE32F-NEXT: .LBB22_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB22_12
; RV64ZVE32F-NEXT: .LBB22_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB22_13
; RV64ZVE32F-NEXT: .LBB22_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB22_14
; RV64ZVE32F-NEXT: .LBB22_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB22_15
; RV64ZVE32F-NEXT: .LBB22_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB22_16
; RV64ZVE32F-NEXT: .LBB22_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB22_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB22_2
; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB22_3
; RV64ZVE32F-NEXT: .LBB22_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB22_4
; RV64ZVE32F-NEXT: .LBB22_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB22_5
; RV64ZVE32F-NEXT: .LBB22_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB22_6
; RV64ZVE32F-NEXT: .LBB22_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB22_7
; RV64ZVE32F-NEXT: .LBB22_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB22_8
; RV64ZVE32F-NEXT: .LBB22_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
}
define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i8_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB23_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB23_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB23_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB23_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB23_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB23_13
; RV64ZVE32F-NEXT: .LBB23_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB23_14
; RV64ZVE32F-NEXT: .LBB23_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB23_9
; RV64ZVE32F-NEXT: .LBB23_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB23_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB23_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB23_16
; RV64ZVE32F-NEXT: .LBB23_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB23_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB23_6
; RV64ZVE32F-NEXT: .LBB23_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB23_7
; RV64ZVE32F-NEXT: .LBB23_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB23_8
; RV64ZVE32F-NEXT: j .LBB23_9
; RV64ZVE32F-NEXT: .LBB23_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB23_11
; RV64ZVE32F-NEXT: .LBB23_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
}
define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB24_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB24_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB24_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB24_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB24_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB24_13
; RV64ZVE32F-NEXT: .LBB24_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB24_14
; RV64ZVE32F-NEXT: .LBB24_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB24_9
; RV64ZVE32F-NEXT: .LBB24_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB24_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB24_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB24_16
; RV64ZVE32F-NEXT: .LBB24_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB24_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB24_6
; RV64ZVE32F-NEXT: .LBB24_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB24_7
; RV64ZVE32F-NEXT: .LBB24_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB24_8
; RV64ZVE32F-NEXT: j .LBB24_9
; RV64ZVE32F-NEXT: .LBB24_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB24_11
; RV64ZVE32F-NEXT: .LBB24_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
}
define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vzext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vzext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB25_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB25_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB25_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB25_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB25_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB25_13
; RV64ZVE32F-NEXT: .LBB25_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB25_14
; RV64ZVE32F-NEXT: .LBB25_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB25_9
; RV64ZVE32F-NEXT: .LBB25_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB25_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB25_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB25_16
; RV64ZVE32F-NEXT: .LBB25_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB25_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB25_6
; RV64ZVE32F-NEXT: .LBB25_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB25_7
; RV64ZVE32F-NEXT: .LBB25_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB25_8
; RV64ZVE32F-NEXT: j .LBB25_9
; RV64ZVE32F-NEXT: .LBB25_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB25_11
; RV64ZVE32F-NEXT: .LBB25_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
}
define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vwadd.vv v10, v8, v8
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB26_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB26_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB26_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB26_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB26_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB26_13
; RV64ZVE32F-NEXT: .LBB26_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB26_14
; RV64ZVE32F-NEXT: .LBB26_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB26_9
; RV64ZVE32F-NEXT: .LBB26_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB26_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB26_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB26_16
; RV64ZVE32F-NEXT: .LBB26_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB26_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB26_6
; RV64ZVE32F-NEXT: .LBB26_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB26_7
; RV64ZVE32F-NEXT: .LBB26_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB26_8
; RV64ZVE32F-NEXT: j .LBB26_9
; RV64ZVE32F-NEXT: .LBB26_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB26_11
; RV64ZVE32F-NEXT: .LBB26_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x i16> %passthru)
ret <8 x i16> %v
}
declare <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i32>)
define <1 x i32> @mgather_v1i32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i32> %passthru) {
; RV32V-LABEL: mgather_v1i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv.v.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB27_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vle32.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB27_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x i32> @llvm.masked.gather.v1i32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x i32> %passthru)
ret <1 x i32> %v
}
declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>)
define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
; RV32V-LABEL: mgather_v2i32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv.v.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB28_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB28_4
; RV64ZVE32F-NEXT: .LBB28_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB28_3: # %cond.load
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB28_2
; RV64ZVE32F-NEXT: .LBB28_4: # %cond.load1
; RV64ZVE32F-NEXT: lw a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
%v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
ret <2 x i32> %v
}
define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
; RV32V-LABEL: mgather_v2i32_sextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vsext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i32_sextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vsext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: srai a1, a1, 31
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: srai a2, a2, 31
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vse32.v v9, (a0)
; RV32ZVE32F-NEXT: addi a3, a0, 8
; RV32ZVE32F-NEXT: vse32.v v8, (a3)
; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a1, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i32_sextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB29_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB29_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB29_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lw a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB29_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: ret
%v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
%ev = sext <2 x i32> %v to <2 x i64>
ret <2 x i64> %ev
}
define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthru) {
; RV32V-LABEL: mgather_v2i32_zextload_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV32V-NEXT: vzext.vf2 v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i32_zextload_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; RV64V-NEXT: vzext.vf2 v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV32ZVE32F-NEXT: sw zero, 12(a0)
; RV32ZVE32F-NEXT: sw zero, 4(a0)
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vse32.v v9, (a0)
; RV32ZVE32F-NEXT: addi a0, a0, 8
; RV32ZVE32F-NEXT: vse32.v v8, (a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i32_zextload_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB30_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: .LBB30_2: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB30_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: lw a0, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: .LBB30_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a0, v8
; RV64ZVE32F-NEXT: slli a0, a0, 32
; RV64ZVE32F-NEXT: srli a0, a0, 32
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 32
; RV64ZVE32F-NEXT: srli a1, a1, 32
; RV64ZVE32F-NEXT: ret
%v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru)
%ev = zext <2 x i32> %v to <2 x i64>
ret <2 x i64> %ev
}
declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthru) {
; RV32-LABEL: mgather_v4i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v4i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB31_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB31_6
; RV64ZVE32F-NEXT: .LBB31_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB31_7
; RV64ZVE32F-NEXT: .LBB31_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB31_8
; RV64ZVE32F-NEXT: .LBB31_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB31_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB31_2
; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB31_3
; RV64ZVE32F-NEXT: .LBB31_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB31_4
; RV64ZVE32F-NEXT: .LBB31_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> %passthru)
ret <4 x i32> %v
}
define <4 x i32> @mgather_truemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) {
; RV32-LABEL: mgather_truemask_v4i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v10, (zero), v8
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v9
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
; RV64ZVE32F-NEXT: beqz zero, .LBB32_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB32_6
; RV64ZVE32F-NEXT: .LBB32_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB32_7
; RV64ZVE32F-NEXT: .LBB32_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB32_8
; RV64ZVE32F-NEXT: .LBB32_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB32_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB32_2
; RV64ZVE32F-NEXT: .LBB32_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB32_3
; RV64ZVE32F-NEXT: .LBB32_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB32_4
; RV64ZVE32F-NEXT: .LBB32_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%mhead = insertelement <4 x i1> poison, i1 1, i32 0
%mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue, <4 x i32> %passthru)
ret <4 x i32> %v
}
define <4 x i32> @mgather_falsemask_v4i32(<4 x ptr> %ptrs, <4 x i32> %passthru) {
; RV32-LABEL: mgather_falsemask_v4i32:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x i32> %passthru)
ret <4 x i32> %v
}
declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>)
define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB34_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB34_10
; RV64ZVE32F-NEXT: .LBB34_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB34_11
; RV64ZVE32F-NEXT: .LBB34_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB34_12
; RV64ZVE32F-NEXT: .LBB34_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB34_13
; RV64ZVE32F-NEXT: .LBB34_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB34_14
; RV64ZVE32F-NEXT: .LBB34_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB34_15
; RV64ZVE32F-NEXT: .LBB34_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB34_16
; RV64ZVE32F-NEXT: .LBB34_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB34_2
; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB34_3
; RV64ZVE32F-NEXT: .LBB34_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB34_4
; RV64ZVE32F-NEXT: .LBB34_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB34_5
; RV64ZVE32F-NEXT: .LBB34_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB34_6
; RV64ZVE32F-NEXT: .LBB34_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB34_7
; RV64ZVE32F-NEXT: .LBB34_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB34_8
; RV64ZVE32F-NEXT: .LBB34_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i8_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB35_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB35_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB35_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB35_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB35_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB35_13
; RV64ZVE32F-NEXT: .LBB35_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB35_14
; RV64ZVE32F-NEXT: .LBB35_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB35_9
; RV64ZVE32F-NEXT: .LBB35_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB35_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB35_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB35_16
; RV64ZVE32F-NEXT: .LBB35_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB35_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB35_6
; RV64ZVE32F-NEXT: .LBB35_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB35_7
; RV64ZVE32F-NEXT: .LBB35_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB35_8
; RV64ZVE32F-NEXT: j .LBB35_9
; RV64ZVE32F-NEXT: .LBB35_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB35_11
; RV64ZVE32F-NEXT: .LBB35_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i8_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB36_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB36_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB36_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB36_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB36_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB36_13
; RV64ZVE32F-NEXT: .LBB36_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB36_14
; RV64ZVE32F-NEXT: .LBB36_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB36_9
; RV64ZVE32F-NEXT: .LBB36_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB36_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB36_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB36_16
; RV64ZVE32F-NEXT: .LBB36_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB36_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB36_6
; RV64ZVE32F-NEXT: .LBB36_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB36_7
; RV64ZVE32F-NEXT: .LBB36_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB36_8
; RV64ZVE32F-NEXT: j .LBB36_9
; RV64ZVE32F-NEXT: .LBB36_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB36_11
; RV64ZVE32F-NEXT: .LBB36_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vzext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vzext.vf8 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB37_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB37_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB37_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB37_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB37_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB37_13
; RV64ZVE32F-NEXT: .LBB37_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB37_14
; RV64ZVE32F-NEXT: .LBB37_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB37_9
; RV64ZVE32F-NEXT: .LBB37_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB37_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB37_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB37_16
; RV64ZVE32F-NEXT: .LBB37_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB37_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB37_6
; RV64ZVE32F-NEXT: .LBB37_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB37_7
; RV64ZVE32F-NEXT: .LBB37_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB37_8
; RV64ZVE32F-NEXT: j .LBB37_9
; RV64ZVE32F-NEXT: .LBB37_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB37_11
; RV64ZVE32F-NEXT: .LBB37_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i16_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i16_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB38_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB38_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB38_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB38_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB38_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB38_13
; RV64ZVE32F-NEXT: .LBB38_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB38_14
; RV64ZVE32F-NEXT: .LBB38_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB38_9
; RV64ZVE32F-NEXT: .LBB38_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB38_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB38_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB38_16
; RV64ZVE32F-NEXT: .LBB38_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB38_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB38_6
; RV64ZVE32F-NEXT: .LBB38_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB38_7
; RV64ZVE32F-NEXT: .LBB38_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB38_8
; RV64ZVE32F-NEXT: j .LBB38_9
; RV64ZVE32F-NEXT: .LBB38_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB38_11
; RV64ZVE32F-NEXT: .LBB38_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i16_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB39_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB39_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB39_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB39_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB39_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB39_13
; RV64ZVE32F-NEXT: .LBB39_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB39_14
; RV64ZVE32F-NEXT: .LBB39_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB39_9
; RV64ZVE32F-NEXT: .LBB39_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB39_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB39_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB39_16
; RV64ZVE32F-NEXT: .LBB39_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB39_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB39_6
; RV64ZVE32F-NEXT: .LBB39_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB39_7
; RV64ZVE32F-NEXT: .LBB39_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB39_8
; RV64ZVE32F-NEXT: j .LBB39_9
; RV64ZVE32F-NEXT: .LBB39_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB39_11
; RV64ZVE32F-NEXT: .LBB39_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i16_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vzext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vzext.vf4 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: addiw a1, a1, -1
; RV64ZVE32F-NEXT: beqz a3, .LBB40_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a3
; RV64ZVE32F-NEXT: .LBB40_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB40_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a3
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB40_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB40_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB40_13
; RV64ZVE32F-NEXT: .LBB40_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB40_14
; RV64ZVE32F-NEXT: .LBB40_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB40_9
; RV64ZVE32F-NEXT: .LBB40_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a3
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB40_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB40_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: bnez a2, .LBB40_16
; RV64ZVE32F-NEXT: .LBB40_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB40_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a3
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB40_6
; RV64ZVE32F-NEXT: .LBB40_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a3
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB40_7
; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a3
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB40_8
; RV64ZVE32F-NEXT: j .LBB40_9
; RV64ZVE32F-NEXT: .LBB40_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: lw a3, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a3
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB40_11
; RV64ZVE32F-NEXT: .LBB40_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a1, a2, a1
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf2 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB41_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB41_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB41_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB41_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB41_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB41_13
; RV64ZVE32F-NEXT: .LBB41_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB41_14
; RV64ZVE32F-NEXT: .LBB41_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB41_9
; RV64ZVE32F-NEXT: .LBB41_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB41_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB41_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB41_16
; RV64ZVE32F-NEXT: .LBB41_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB41_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB41_6
; RV64ZVE32F-NEXT: .LBB41_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB41_7
; RV64ZVE32F-NEXT: .LBB41_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB41_8
; RV64ZVE32F-NEXT: j .LBB41_9
; RV64ZVE32F-NEXT: .LBB41_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB41_11
; RV64ZVE32F-NEXT: .LBB41_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
%v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru)
ret <8 x i32> %v
}
declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>)
define <1 x i64> @mgather_v1i64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x i64> %passthru) {
; RV32V-LABEL: mgather_v1i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vfirst.m a2, v0
; RV32ZVE32F-NEXT: bnez a2, .LBB42_2
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: lw a1, 4(a0)
; RV32ZVE32F-NEXT: lw a0, 0(a0)
; RV32ZVE32F-NEXT: .LBB42_2: # %else
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a2, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a2, v0
; RV64ZVE32F-NEXT: bnez a2, .LBB42_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a1, 0(a0)
; RV64ZVE32F-NEXT: .LBB42_2: # %else
; RV64ZVE32F-NEXT: mv a0, a1
; RV64ZVE32F-NEXT: ret
%v = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x i64> %passthru)
ret <1 x i64> %v
}
declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
define <2 x i64> @mgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> %passthru) {
; RV32V-LABEL: mgather_v2i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a4, v0
; RV32ZVE32F-NEXT: andi a2, a4, 1
; RV32ZVE32F-NEXT: beqz a2, .LBB43_3
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, a4, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB43_4
; RV32ZVE32F-NEXT: .LBB43_2:
; RV32ZVE32F-NEXT: lw a4, 12(a1)
; RV32ZVE32F-NEXT: lw a1, 8(a1)
; RV32ZVE32F-NEXT: j .LBB43_5
; RV32ZVE32F-NEXT: .LBB43_3:
; RV32ZVE32F-NEXT: lw a2, 4(a1)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: andi a4, a4, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB43_2
; RV32ZVE32F-NEXT: .LBB43_4: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a4, 4(a1)
; RV32ZVE32F-NEXT: lw a1, 0(a1)
; RV32ZVE32F-NEXT: .LBB43_5: # %else2
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a1, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a4, v0
; RV64ZVE32F-NEXT: andi a5, a4, 1
; RV64ZVE32F-NEXT: beqz a5, .LBB43_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: .LBB43_2: # %else
; RV64ZVE32F-NEXT: andi a4, a4, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB43_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: .LBB43_4: # %else2
; RV64ZVE32F-NEXT: mv a0, a2
; RV64ZVE32F-NEXT: mv a1, a3
; RV64ZVE32F-NEXT: ret
%v = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x i64> %passthru)
ret <2 x i64> %v
}
declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>)
define <4 x i64> @mgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i64> %passthru) {
; RV32V-LABEL: mgather_v4i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV32V-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v4i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v4i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a6, v0
; RV32ZVE32F-NEXT: andi a2, a6, 1
; RV32ZVE32F-NEXT: beqz a2, .LBB44_5
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, a6, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB44_6
; RV32ZVE32F-NEXT: .LBB44_2:
; RV32ZVE32F-NEXT: lw a4, 12(a1)
; RV32ZVE32F-NEXT: lw a5, 8(a1)
; RV32ZVE32F-NEXT: andi a7, a6, 4
; RV32ZVE32F-NEXT: bnez a7, .LBB44_7
; RV32ZVE32F-NEXT: .LBB44_3:
; RV32ZVE32F-NEXT: lw a7, 20(a1)
; RV32ZVE32F-NEXT: lw t0, 16(a1)
; RV32ZVE32F-NEXT: andi a6, a6, 8
; RV32ZVE32F-NEXT: bnez a6, .LBB44_8
; RV32ZVE32F-NEXT: .LBB44_4:
; RV32ZVE32F-NEXT: lw a6, 28(a1)
; RV32ZVE32F-NEXT: lw a1, 24(a1)
; RV32ZVE32F-NEXT: j .LBB44_9
; RV32ZVE32F-NEXT: .LBB44_5:
; RV32ZVE32F-NEXT: lw a2, 4(a1)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: andi a4, a6, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB44_2
; RV32ZVE32F-NEXT: .LBB44_6: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v9
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a7, a6, 4
; RV32ZVE32F-NEXT: beqz a7, .LBB44_3
; RV32ZVE32F-NEXT: .LBB44_7: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s t0, v9
; RV32ZVE32F-NEXT: lw a7, 4(t0)
; RV32ZVE32F-NEXT: lw t0, 0(t0)
; RV32ZVE32F-NEXT: andi a6, a6, 8
; RV32ZVE32F-NEXT: beqz a6, .LBB44_4
; RV32ZVE32F-NEXT: .LBB44_8: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a6, 4(a1)
; RV32ZVE32F-NEXT: lw a1, 0(a1)
; RV32ZVE32F-NEXT: .LBB44_9: # %else8
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw t0, 16(a0)
; RV32ZVE32F-NEXT: sw a7, 20(a0)
; RV32ZVE32F-NEXT: sw a1, 24(a0)
; RV32ZVE32F-NEXT: sw a6, 28(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB44_5
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB44_6
; RV64ZVE32F-NEXT: .LBB44_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: bnez a6, .LBB44_7
; RV64ZVE32F-NEXT: .LBB44_3:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a5, a5, 8
; RV64ZVE32F-NEXT: bnez a5, .LBB44_8
; RV64ZVE32F-NEXT: .LBB44_4:
; RV64ZVE32F-NEXT: ld a1, 24(a2)
; RV64ZVE32F-NEXT: j .LBB44_9
; RV64ZVE32F-NEXT: .LBB44_5:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB44_2
; RV64ZVE32F-NEXT: .LBB44_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a1)
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: beqz a6, .LBB44_3
; RV64ZVE32F-NEXT: .LBB44_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a6, 16(a1)
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a5, a5, 8
; RV64ZVE32F-NEXT: beqz a5, .LBB44_4
; RV64ZVE32F-NEXT: .LBB44_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB44_9: # %else8
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a1, 24(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x i64> %passthru)
ret <4 x i64> %v
}
define <4 x i64> @mgather_truemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
; RV32V-LABEL: mgather_truemask_v4i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32V-NEXT: vluxei32.v v10, (zero), v8
; RV32V-NEXT: vmv.v.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vluxei64.v v8, (zero), v8
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_truemask_v4i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmset.m v9
; RV32ZVE32F-NEXT: vmv.x.s a6, v9
; RV32ZVE32F-NEXT: bnez zero, .LBB45_5
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, a6, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB45_6
; RV32ZVE32F-NEXT: .LBB45_2:
; RV32ZVE32F-NEXT: lw a4, 12(a1)
; RV32ZVE32F-NEXT: lw a5, 8(a1)
; RV32ZVE32F-NEXT: andi a7, a6, 4
; RV32ZVE32F-NEXT: bnez a7, .LBB45_7
; RV32ZVE32F-NEXT: .LBB45_3:
; RV32ZVE32F-NEXT: lw a7, 20(a1)
; RV32ZVE32F-NEXT: lw t0, 16(a1)
; RV32ZVE32F-NEXT: andi a6, a6, 8
; RV32ZVE32F-NEXT: bnez a6, .LBB45_8
; RV32ZVE32F-NEXT: .LBB45_4:
; RV32ZVE32F-NEXT: lw a6, 28(a1)
; RV32ZVE32F-NEXT: lw a1, 24(a1)
; RV32ZVE32F-NEXT: j .LBB45_9
; RV32ZVE32F-NEXT: .LBB45_5:
; RV32ZVE32F-NEXT: lw a2, 4(a1)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: andi a4, a6, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB45_2
; RV32ZVE32F-NEXT: .LBB45_6: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v9
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a7, a6, 4
; RV32ZVE32F-NEXT: beqz a7, .LBB45_3
; RV32ZVE32F-NEXT: .LBB45_7: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s t0, v9
; RV32ZVE32F-NEXT: lw a7, 4(t0)
; RV32ZVE32F-NEXT: lw t0, 0(t0)
; RV32ZVE32F-NEXT: andi a6, a6, 8
; RV32ZVE32F-NEXT: beqz a6, .LBB45_4
; RV32ZVE32F-NEXT: .LBB45_8: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw a6, 4(a1)
; RV32ZVE32F-NEXT: lw a1, 0(a1)
; RV32ZVE32F-NEXT: .LBB45_9: # %else8
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw t0, 16(a0)
; RV32ZVE32F-NEXT: sw a7, 20(a0)
; RV32ZVE32F-NEXT: sw a1, 24(a0)
; RV32ZVE32F-NEXT: sw a6, 28(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a5, v8
; RV64ZVE32F-NEXT: bnez zero, .LBB45_5
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB45_6
; RV64ZVE32F-NEXT: .LBB45_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: bnez a6, .LBB45_7
; RV64ZVE32F-NEXT: .LBB45_3:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a5, a5, 8
; RV64ZVE32F-NEXT: bnez a5, .LBB45_8
; RV64ZVE32F-NEXT: .LBB45_4:
; RV64ZVE32F-NEXT: ld a1, 24(a2)
; RV64ZVE32F-NEXT: j .LBB45_9
; RV64ZVE32F-NEXT: .LBB45_5:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB45_2
; RV64ZVE32F-NEXT: .LBB45_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a1)
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: beqz a6, .LBB45_3
; RV64ZVE32F-NEXT: .LBB45_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a6, 16(a1)
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a5, a5, 8
; RV64ZVE32F-NEXT: beqz a5, .LBB45_4
; RV64ZVE32F-NEXT: .LBB45_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB45_9: # %else8
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a1, 24(a0)
; RV64ZVE32F-NEXT: ret
%mhead = insertelement <4 x i1> poison, i1 1, i32 0
%mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue, <4 x i64> %passthru)
ret <4 x i64> %v
}
define <4 x i64> @mgather_falsemask_v4i64(<4 x ptr> %ptrs, <4 x i64> %passthru) {
; RV32V-LABEL: mgather_falsemask_v4i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vmv2r.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv2r.v v8, v10
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_falsemask_v4i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: lw a2, 0(a1)
; RV32ZVE32F-NEXT: lw a3, 4(a1)
; RV32ZVE32F-NEXT: lw a4, 8(a1)
; RV32ZVE32F-NEXT: lw a5, 12(a1)
; RV32ZVE32F-NEXT: lw a6, 28(a1)
; RV32ZVE32F-NEXT: lw a7, 24(a1)
; RV32ZVE32F-NEXT: lw t0, 20(a1)
; RV32ZVE32F-NEXT: lw a1, 16(a1)
; RV32ZVE32F-NEXT: sw a6, 28(a0)
; RV32ZVE32F-NEXT: sw a7, 24(a0)
; RV32ZVE32F-NEXT: sw t0, 20(a0)
; RV32ZVE32F-NEXT: sw a1, 16(a0)
; RV32ZVE32F-NEXT: sw a5, 12(a0)
; RV32ZVE32F-NEXT: sw a4, 8(a0)
; RV32ZVE32F-NEXT: sw a3, 4(a0)
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ld a1, 24(a2)
; RV64ZVE32F-NEXT: ld a3, 16(a2)
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: ld a2, 0(a2)
; RV64ZVE32F-NEXT: sd a1, 24(a0)
; RV64ZVE32F-NEXT: sd a3, 16(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a2, 0(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x i64> %passthru)
ret <4 x i64> %v
}
declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>)
define <8 x i64> @mgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a2, t0, 1
; RV32ZVE32F-NEXT: beqz a2, .LBB47_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a2, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB47_8
; RV32ZVE32F-NEXT: .LBB47_2:
; RV32ZVE32F-NEXT: lw a4, 12(a1)
; RV32ZVE32F-NEXT: lw a5, 8(a1)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB47_9
; RV32ZVE32F-NEXT: .LBB47_3:
; RV32ZVE32F-NEXT: lw a6, 20(a1)
; RV32ZVE32F-NEXT: lw a7, 16(a1)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB47_10
; RV32ZVE32F-NEXT: .LBB47_4:
; RV32ZVE32F-NEXT: lw t1, 28(a1)
; RV32ZVE32F-NEXT: lw t2, 24(a1)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB47_11
; RV32ZVE32F-NEXT: .LBB47_5:
; RV32ZVE32F-NEXT: lw t3, 36(a1)
; RV32ZVE32F-NEXT: lw t4, 32(a1)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB47_12
; RV32ZVE32F-NEXT: .LBB47_6:
; RV32ZVE32F-NEXT: lw t5, 44(a1)
; RV32ZVE32F-NEXT: lw t6, 40(a1)
; RV32ZVE32F-NEXT: j .LBB47_13
; RV32ZVE32F-NEXT: .LBB47_7:
; RV32ZVE32F-NEXT: lw a2, 4(a1)
; RV32ZVE32F-NEXT: lw a3, 0(a1)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB47_2
; RV32ZVE32F-NEXT: .LBB47_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB47_3
; RV32ZVE32F-NEXT: .LBB47_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB47_4
; RV32ZVE32F-NEXT: .LBB47_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB47_5
; RV32ZVE32F-NEXT: .LBB47_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB47_6
; RV32ZVE32F-NEXT: .LBB47_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB47_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB47_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB47_17
; RV32ZVE32F-NEXT: .LBB47_15:
; RV32ZVE32F-NEXT: lw t0, 60(a1)
; RV32ZVE32F-NEXT: lw a1, 56(a1)
; RV32ZVE32F-NEXT: j .LBB47_18
; RV32ZVE32F-NEXT: .LBB47_16:
; RV32ZVE32F-NEXT: lw s0, 52(a1)
; RV32ZVE32F-NEXT: lw s1, 48(a1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB47_15
; RV32ZVE32F-NEXT: .LBB47_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: lw t0, 4(a1)
; RV32ZVE32F-NEXT: lw a1, 0(a1)
; RV32ZVE32F-NEXT: .LBB47_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a2, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a1, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a6, v0
; RV64ZVE32F-NEXT: andi a3, a6, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB47_9
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a6, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB47_10
; RV64ZVE32F-NEXT: .LBB47_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: andi a5, a6, 4
; RV64ZVE32F-NEXT: bnez a5, .LBB47_11
; RV64ZVE32F-NEXT: .LBB47_3:
; RV64ZVE32F-NEXT: ld a5, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a6, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB47_12
; RV64ZVE32F-NEXT: .LBB47_4:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a6, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB47_13
; RV64ZVE32F-NEXT: .LBB47_5:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a6, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB47_14
; RV64ZVE32F-NEXT: .LBB47_6:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: andi t2, a6, 64
; RV64ZVE32F-NEXT: bnez t2, .LBB47_15
; RV64ZVE32F-NEXT: .LBB47_7:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: bnez a6, .LBB47_16
; RV64ZVE32F-NEXT: .LBB47_8:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB47_17
; RV64ZVE32F-NEXT: .LBB47_9:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a6, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB47_2
; RV64ZVE32F-NEXT: .LBB47_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a1)
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: andi a5, a6, 4
; RV64ZVE32F-NEXT: beqz a5, .LBB47_3
; RV64ZVE32F-NEXT: .LBB47_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a5, 16(a1)
; RV64ZVE32F-NEXT: ld a5, 0(a5)
; RV64ZVE32F-NEXT: andi a7, a6, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB47_4
; RV64ZVE32F-NEXT: .LBB47_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a7, 24(a1)
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a6, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB47_5
; RV64ZVE32F-NEXT: .LBB47_13: # %cond.load10
; RV64ZVE32F-NEXT: ld t0, 32(a1)
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a6, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB47_6
; RV64ZVE32F-NEXT: .LBB47_14: # %cond.load13
; RV64ZVE32F-NEXT: ld t1, 40(a1)
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: andi t2, a6, 64
; RV64ZVE32F-NEXT: beqz t2, .LBB47_7
; RV64ZVE32F-NEXT: .LBB47_15: # %cond.load16
; RV64ZVE32F-NEXT: ld t2, 48(a1)
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: beqz a6, .LBB47_8
; RV64ZVE32F-NEXT: .LBB47_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a1, 56(a1)
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB47_17: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a5, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i8_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf4 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB48_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB48_8
; RV32ZVE32F-NEXT: .LBB48_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB48_9
; RV32ZVE32F-NEXT: .LBB48_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB48_10
; RV32ZVE32F-NEXT: .LBB48_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB48_11
; RV32ZVE32F-NEXT: .LBB48_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB48_12
; RV32ZVE32F-NEXT: .LBB48_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB48_13
; RV32ZVE32F-NEXT: .LBB48_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB48_2
; RV32ZVE32F-NEXT: .LBB48_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB48_3
; RV32ZVE32F-NEXT: .LBB48_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB48_4
; RV32ZVE32F-NEXT: .LBB48_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB48_5
; RV32ZVE32F-NEXT: .LBB48_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB48_6
; RV32ZVE32F-NEXT: .LBB48_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB48_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB48_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB48_17
; RV32ZVE32F-NEXT: .LBB48_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB48_18
; RV32ZVE32F-NEXT: .LBB48_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB48_15
; RV32ZVE32F-NEXT: .LBB48_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB48_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB48_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB48_4
; RV64ZVE32F-NEXT: .LBB48_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB48_5
; RV64ZVE32F-NEXT: .LBB48_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB48_2
; RV64ZVE32F-NEXT: .LBB48_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB48_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB48_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB48_11
; RV64ZVE32F-NEXT: .LBB48_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB48_12
; RV64ZVE32F-NEXT: .LBB48_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB48_13
; RV64ZVE32F-NEXT: .LBB48_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB48_14
; RV64ZVE32F-NEXT: .LBB48_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB48_7
; RV64ZVE32F-NEXT: .LBB48_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB48_8
; RV64ZVE32F-NEXT: .LBB48_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB48_9
; RV64ZVE32F-NEXT: .LBB48_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB48_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB48_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB48_18
; RV64ZVE32F-NEXT: .LBB48_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB48_19
; RV64ZVE32F-NEXT: .LBB48_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB48_16
; RV64ZVE32F-NEXT: .LBB48_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB48_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf4 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB49_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB49_8
; RV32ZVE32F-NEXT: .LBB49_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB49_9
; RV32ZVE32F-NEXT: .LBB49_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB49_10
; RV32ZVE32F-NEXT: .LBB49_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB49_11
; RV32ZVE32F-NEXT: .LBB49_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB49_12
; RV32ZVE32F-NEXT: .LBB49_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB49_13
; RV32ZVE32F-NEXT: .LBB49_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB49_2
; RV32ZVE32F-NEXT: .LBB49_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB49_3
; RV32ZVE32F-NEXT: .LBB49_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB49_4
; RV32ZVE32F-NEXT: .LBB49_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB49_5
; RV32ZVE32F-NEXT: .LBB49_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB49_6
; RV32ZVE32F-NEXT: .LBB49_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB49_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB49_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB49_17
; RV32ZVE32F-NEXT: .LBB49_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB49_18
; RV32ZVE32F-NEXT: .LBB49_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB49_15
; RV32ZVE32F-NEXT: .LBB49_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB49_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB49_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB49_4
; RV64ZVE32F-NEXT: .LBB49_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB49_5
; RV64ZVE32F-NEXT: .LBB49_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB49_2
; RV64ZVE32F-NEXT: .LBB49_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB49_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB49_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB49_11
; RV64ZVE32F-NEXT: .LBB49_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB49_12
; RV64ZVE32F-NEXT: .LBB49_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB49_13
; RV64ZVE32F-NEXT: .LBB49_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB49_14
; RV64ZVE32F-NEXT: .LBB49_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB49_7
; RV64ZVE32F-NEXT: .LBB49_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB49_8
; RV64ZVE32F-NEXT: .LBB49_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB49_9
; RV64ZVE32F-NEXT: .LBB49_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB49_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB49_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB49_18
; RV64ZVE32F-NEXT: .LBB49_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB49_19
; RV64ZVE32F-NEXT: .LBB49_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB49_16
; RV64ZVE32F-NEXT: .LBB49_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB49_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vzext.vf4 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vzext.vf8 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB50_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB50_8
; RV32ZVE32F-NEXT: .LBB50_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB50_9
; RV32ZVE32F-NEXT: .LBB50_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB50_10
; RV32ZVE32F-NEXT: .LBB50_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB50_11
; RV32ZVE32F-NEXT: .LBB50_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB50_12
; RV32ZVE32F-NEXT: .LBB50_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB50_13
; RV32ZVE32F-NEXT: .LBB50_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB50_2
; RV32ZVE32F-NEXT: .LBB50_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB50_3
; RV32ZVE32F-NEXT: .LBB50_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB50_4
; RV32ZVE32F-NEXT: .LBB50_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB50_5
; RV32ZVE32F-NEXT: .LBB50_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB50_6
; RV32ZVE32F-NEXT: .LBB50_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB50_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB50_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB50_17
; RV32ZVE32F-NEXT: .LBB50_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB50_18
; RV32ZVE32F-NEXT: .LBB50_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB50_15
; RV32ZVE32F-NEXT: .LBB50_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB50_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB50_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB50_4
; RV64ZVE32F-NEXT: .LBB50_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB50_5
; RV64ZVE32F-NEXT: .LBB50_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB50_2
; RV64ZVE32F-NEXT: .LBB50_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: andi a4, a4, 255
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB50_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB50_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: andi a6, a6, 255
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB50_11
; RV64ZVE32F-NEXT: .LBB50_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB50_12
; RV64ZVE32F-NEXT: .LBB50_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB50_13
; RV64ZVE32F-NEXT: .LBB50_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB50_14
; RV64ZVE32F-NEXT: .LBB50_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB50_7
; RV64ZVE32F-NEXT: .LBB50_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: andi a7, a7, 255
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB50_8
; RV64ZVE32F-NEXT: .LBB50_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: andi t0, t0, 255
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB50_9
; RV64ZVE32F-NEXT: .LBB50_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: andi t1, t1, 255
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB50_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB50_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: andi t2, t2, 255
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB50_18
; RV64ZVE32F-NEXT: .LBB50_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB50_19
; RV64ZVE32F-NEXT: .LBB50_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB50_16
; RV64ZVE32F-NEXT: .LBB50_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB50_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i16_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i16_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf4 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB51_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB51_8
; RV32ZVE32F-NEXT: .LBB51_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB51_9
; RV32ZVE32F-NEXT: .LBB51_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB51_10
; RV32ZVE32F-NEXT: .LBB51_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB51_11
; RV32ZVE32F-NEXT: .LBB51_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB51_12
; RV32ZVE32F-NEXT: .LBB51_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB51_13
; RV32ZVE32F-NEXT: .LBB51_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB51_2
; RV32ZVE32F-NEXT: .LBB51_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB51_3
; RV32ZVE32F-NEXT: .LBB51_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB51_4
; RV32ZVE32F-NEXT: .LBB51_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB51_5
; RV32ZVE32F-NEXT: .LBB51_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB51_6
; RV32ZVE32F-NEXT: .LBB51_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB51_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB51_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB51_17
; RV32ZVE32F-NEXT: .LBB51_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB51_18
; RV32ZVE32F-NEXT: .LBB51_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB51_15
; RV32ZVE32F-NEXT: .LBB51_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB51_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB51_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB51_4
; RV64ZVE32F-NEXT: .LBB51_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB51_5
; RV64ZVE32F-NEXT: .LBB51_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB51_2
; RV64ZVE32F-NEXT: .LBB51_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB51_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB51_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB51_11
; RV64ZVE32F-NEXT: .LBB51_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB51_12
; RV64ZVE32F-NEXT: .LBB51_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB51_13
; RV64ZVE32F-NEXT: .LBB51_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB51_14
; RV64ZVE32F-NEXT: .LBB51_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB51_7
; RV64ZVE32F-NEXT: .LBB51_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB51_8
; RV64ZVE32F-NEXT: .LBB51_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB51_9
; RV64ZVE32F-NEXT: .LBB51_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB51_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB51_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB51_18
; RV64ZVE32F-NEXT: .LBB51_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB51_19
; RV64ZVE32F-NEXT: .LBB51_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB51_16
; RV64ZVE32F-NEXT: .LBB51_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB51_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf4 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB52_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB52_8
; RV32ZVE32F-NEXT: .LBB52_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB52_9
; RV32ZVE32F-NEXT: .LBB52_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB52_10
; RV32ZVE32F-NEXT: .LBB52_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB52_11
; RV32ZVE32F-NEXT: .LBB52_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB52_12
; RV32ZVE32F-NEXT: .LBB52_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB52_13
; RV32ZVE32F-NEXT: .LBB52_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB52_2
; RV32ZVE32F-NEXT: .LBB52_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB52_3
; RV32ZVE32F-NEXT: .LBB52_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB52_4
; RV32ZVE32F-NEXT: .LBB52_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB52_5
; RV32ZVE32F-NEXT: .LBB52_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB52_6
; RV32ZVE32F-NEXT: .LBB52_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB52_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB52_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB52_17
; RV32ZVE32F-NEXT: .LBB52_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB52_18
; RV32ZVE32F-NEXT: .LBB52_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB52_15
; RV32ZVE32F-NEXT: .LBB52_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB52_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB52_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB52_4
; RV64ZVE32F-NEXT: .LBB52_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB52_5
; RV64ZVE32F-NEXT: .LBB52_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB52_2
; RV64ZVE32F-NEXT: .LBB52_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB52_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB52_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB52_11
; RV64ZVE32F-NEXT: .LBB52_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB52_12
; RV64ZVE32F-NEXT: .LBB52_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB52_13
; RV64ZVE32F-NEXT: .LBB52_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB52_14
; RV64ZVE32F-NEXT: .LBB52_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB52_7
; RV64ZVE32F-NEXT: .LBB52_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB52_8
; RV64ZVE32F-NEXT: .LBB52_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v9
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB52_9
; RV64ZVE32F-NEXT: .LBB52_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB52_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB52_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB52_18
; RV64ZVE32F-NEXT: .LBB52_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB52_19
; RV64ZVE32F-NEXT: .LBB52_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB52_16
; RV64ZVE32F-NEXT: .LBB52_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB52_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vzext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vzext.vf4 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB53_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB53_8
; RV32ZVE32F-NEXT: .LBB53_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB53_9
; RV32ZVE32F-NEXT: .LBB53_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB53_10
; RV32ZVE32F-NEXT: .LBB53_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB53_11
; RV32ZVE32F-NEXT: .LBB53_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB53_12
; RV32ZVE32F-NEXT: .LBB53_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB53_13
; RV32ZVE32F-NEXT: .LBB53_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB53_2
; RV32ZVE32F-NEXT: .LBB53_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB53_3
; RV32ZVE32F-NEXT: .LBB53_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB53_4
; RV32ZVE32F-NEXT: .LBB53_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB53_5
; RV32ZVE32F-NEXT: .LBB53_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB53_6
; RV32ZVE32F-NEXT: .LBB53_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB53_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB53_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB53_17
; RV32ZVE32F-NEXT: .LBB53_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB53_18
; RV32ZVE32F-NEXT: .LBB53_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB53_15
; RV32ZVE32F-NEXT: .LBB53_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB53_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lui a5, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a6, v0
; RV64ZVE32F-NEXT: andi a3, a6, 1
; RV64ZVE32F-NEXT: addiw a5, a5, -1
; RV64ZVE32F-NEXT: beqz a3, .LBB53_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a5
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a6, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB53_4
; RV64ZVE32F-NEXT: .LBB53_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB53_5
; RV64ZVE32F-NEXT: .LBB53_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a6, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB53_2
; RV64ZVE32F-NEXT: .LBB53_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: and a4, a4, a5
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB53_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a7, a6, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a7, .LBB53_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: and a7, a7, a5
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a6, 8
; RV64ZVE32F-NEXT: bnez t0, .LBB53_11
; RV64ZVE32F-NEXT: .LBB53_7:
; RV64ZVE32F-NEXT: ld t0, 24(a2)
; RV64ZVE32F-NEXT: andi t1, a6, 16
; RV64ZVE32F-NEXT: bnez t1, .LBB53_12
; RV64ZVE32F-NEXT: .LBB53_8:
; RV64ZVE32F-NEXT: ld t1, 32(a2)
; RV64ZVE32F-NEXT: andi t2, a6, 32
; RV64ZVE32F-NEXT: bnez t2, .LBB53_13
; RV64ZVE32F-NEXT: .LBB53_9:
; RV64ZVE32F-NEXT: ld t2, 40(a2)
; RV64ZVE32F-NEXT: j .LBB53_14
; RV64ZVE32F-NEXT: .LBB53_10:
; RV64ZVE32F-NEXT: ld a7, 16(a2)
; RV64ZVE32F-NEXT: andi t0, a6, 8
; RV64ZVE32F-NEXT: beqz t0, .LBB53_7
; RV64ZVE32F-NEXT: .LBB53_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s t0, v8
; RV64ZVE32F-NEXT: and t0, t0, a5
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a6, 16
; RV64ZVE32F-NEXT: beqz t1, .LBB53_8
; RV64ZVE32F-NEXT: .LBB53_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t1, v9
; RV64ZVE32F-NEXT: and t1, t1, a5
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: andi t2, a6, 32
; RV64ZVE32F-NEXT: beqz t2, .LBB53_9
; RV64ZVE32F-NEXT: .LBB53_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: and t2, t2, a5
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: .LBB53_14: # %else14
; RV64ZVE32F-NEXT: andi t3, a6, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz t3, .LBB53_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t3, v8
; RV64ZVE32F-NEXT: and t3, t3, a5
; RV64ZVE32F-NEXT: slli t3, t3, 3
; RV64ZVE32F-NEXT: add t3, a1, t3
; RV64ZVE32F-NEXT: ld t3, 0(t3)
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: bnez a6, .LBB53_18
; RV64ZVE32F-NEXT: .LBB53_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB53_19
; RV64ZVE32F-NEXT: .LBB53_17:
; RV64ZVE32F-NEXT: ld t3, 48(a2)
; RV64ZVE32F-NEXT: andi a6, a6, -128
; RV64ZVE32F-NEXT: beqz a6, .LBB53_16
; RV64ZVE32F-NEXT: .LBB53_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a2, a2, a5
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB53_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a7, 16(a0)
; RV64ZVE32F-NEXT: sd t0, 24(a0)
; RV64ZVE32F-NEXT: sd t1, 32(a0)
; RV64ZVE32F-NEXT: sd t2, 40(a0)
; RV64ZVE32F-NEXT: sd t3, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i32_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i32_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB54_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB54_8
; RV32ZVE32F-NEXT: .LBB54_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB54_9
; RV32ZVE32F-NEXT: .LBB54_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB54_10
; RV32ZVE32F-NEXT: .LBB54_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB54_11
; RV32ZVE32F-NEXT: .LBB54_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB54_12
; RV32ZVE32F-NEXT: .LBB54_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB54_13
; RV32ZVE32F-NEXT: .LBB54_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB54_2
; RV32ZVE32F-NEXT: .LBB54_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB54_3
; RV32ZVE32F-NEXT: .LBB54_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB54_4
; RV32ZVE32F-NEXT: .LBB54_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB54_5
; RV32ZVE32F-NEXT: .LBB54_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB54_6
; RV32ZVE32F-NEXT: .LBB54_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB54_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB54_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB54_17
; RV32ZVE32F-NEXT: .LBB54_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB54_18
; RV32ZVE32F-NEXT: .LBB54_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB54_15
; RV32ZVE32F-NEXT: .LBB54_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB54_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB54_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB54_4
; RV64ZVE32F-NEXT: .LBB54_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB54_5
; RV64ZVE32F-NEXT: .LBB54_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB54_2
; RV64ZVE32F-NEXT: .LBB54_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB54_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB54_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB54_11
; RV64ZVE32F-NEXT: .LBB54_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB54_12
; RV64ZVE32F-NEXT: .LBB54_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB54_13
; RV64ZVE32F-NEXT: .LBB54_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB54_14
; RV64ZVE32F-NEXT: .LBB54_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB54_7
; RV64ZVE32F-NEXT: .LBB54_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB54_8
; RV64ZVE32F-NEXT: .LBB54_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB54_9
; RV64ZVE32F-NEXT: .LBB54_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB54_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB54_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB54_18
; RV64ZVE32F-NEXT: .LBB54_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB54_19
; RV64ZVE32F-NEXT: .LBB54_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB54_16
; RV64ZVE32F-NEXT: .LBB54_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB54_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB55_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB55_8
; RV32ZVE32F-NEXT: .LBB55_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB55_9
; RV32ZVE32F-NEXT: .LBB55_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB55_10
; RV32ZVE32F-NEXT: .LBB55_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB55_11
; RV32ZVE32F-NEXT: .LBB55_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB55_12
; RV32ZVE32F-NEXT: .LBB55_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB55_13
; RV32ZVE32F-NEXT: .LBB55_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB55_2
; RV32ZVE32F-NEXT: .LBB55_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB55_3
; RV32ZVE32F-NEXT: .LBB55_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB55_4
; RV32ZVE32F-NEXT: .LBB55_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB55_5
; RV32ZVE32F-NEXT: .LBB55_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB55_6
; RV32ZVE32F-NEXT: .LBB55_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB55_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB55_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB55_17
; RV32ZVE32F-NEXT: .LBB55_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB55_18
; RV32ZVE32F-NEXT: .LBB55_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB55_15
; RV32ZVE32F-NEXT: .LBB55_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB55_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB55_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB55_4
; RV64ZVE32F-NEXT: .LBB55_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB55_5
; RV64ZVE32F-NEXT: .LBB55_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB55_2
; RV64ZVE32F-NEXT: .LBB55_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB55_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB55_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB55_11
; RV64ZVE32F-NEXT: .LBB55_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB55_12
; RV64ZVE32F-NEXT: .LBB55_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB55_13
; RV64ZVE32F-NEXT: .LBB55_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB55_14
; RV64ZVE32F-NEXT: .LBB55_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB55_7
; RV64ZVE32F-NEXT: .LBB55_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 3
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB55_8
; RV64ZVE32F-NEXT: .LBB55_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB55_9
; RV64ZVE32F-NEXT: .LBB55_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB55_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB55_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB55_18
; RV64ZVE32F-NEXT: .LBB55_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB55_19
; RV64ZVE32F-NEXT: .LBB55_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB55_16
; RV64ZVE32F-NEXT: .LBB55_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB55_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vzext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB56_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB56_8
; RV32ZVE32F-NEXT: .LBB56_2:
; RV32ZVE32F-NEXT: lw a4, 12(a2)
; RV32ZVE32F-NEXT: lw a5, 8(a2)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB56_9
; RV32ZVE32F-NEXT: .LBB56_3:
; RV32ZVE32F-NEXT: lw a6, 20(a2)
; RV32ZVE32F-NEXT: lw a7, 16(a2)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB56_10
; RV32ZVE32F-NEXT: .LBB56_4:
; RV32ZVE32F-NEXT: lw t1, 28(a2)
; RV32ZVE32F-NEXT: lw t2, 24(a2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB56_11
; RV32ZVE32F-NEXT: .LBB56_5:
; RV32ZVE32F-NEXT: lw t3, 36(a2)
; RV32ZVE32F-NEXT: lw t4, 32(a2)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB56_12
; RV32ZVE32F-NEXT: .LBB56_6:
; RV32ZVE32F-NEXT: lw t5, 44(a2)
; RV32ZVE32F-NEXT: lw t6, 40(a2)
; RV32ZVE32F-NEXT: j .LBB56_13
; RV32ZVE32F-NEXT: .LBB56_7:
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a3, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB56_2
; RV32ZVE32F-NEXT: .LBB56_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB56_3
; RV32ZVE32F-NEXT: .LBB56_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB56_4
; RV32ZVE32F-NEXT: .LBB56_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB56_5
; RV32ZVE32F-NEXT: .LBB56_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB56_6
; RV32ZVE32F-NEXT: .LBB56_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB56_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB56_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB56_17
; RV32ZVE32F-NEXT: .LBB56_15:
; RV32ZVE32F-NEXT: lw t0, 60(a2)
; RV32ZVE32F-NEXT: lw a2, 56(a2)
; RV32ZVE32F-NEXT: j .LBB56_18
; RV32ZVE32F-NEXT: .LBB56_16:
; RV32ZVE32F-NEXT: lw s0, 52(a2)
; RV32ZVE32F-NEXT: lw s1, 48(a2)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB56_15
; RV32ZVE32F-NEXT: .LBB56_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw t0, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: .LBB56_18: # %else20
; RV32ZVE32F-NEXT: sw a3, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a2, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a5, v0
; RV64ZVE32F-NEXT: andi a3, a5, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB56_3
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: ld a3, 0(a3)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB56_4
; RV64ZVE32F-NEXT: .LBB56_2:
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: j .LBB56_5
; RV64ZVE32F-NEXT: .LBB56_3:
; RV64ZVE32F-NEXT: ld a3, 0(a2)
; RV64ZVE32F-NEXT: andi a4, a5, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB56_2
; RV64ZVE32F-NEXT: .LBB56_4: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v10
; RV64ZVE32F-NEXT: slli a4, a4, 32
; RV64ZVE32F-NEXT: srli a4, a4, 29
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: .LBB56_5: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a6, a5, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: beqz a6, .LBB56_10
; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a6, v8
; RV64ZVE32F-NEXT: slli a6, a6, 32
; RV64ZVE32F-NEXT: srli a6, a6, 29
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: bnez a7, .LBB56_11
; RV64ZVE32F-NEXT: .LBB56_7:
; RV64ZVE32F-NEXT: ld a7, 24(a2)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: bnez t0, .LBB56_12
; RV64ZVE32F-NEXT: .LBB56_8:
; RV64ZVE32F-NEXT: ld t0, 32(a2)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: bnez t1, .LBB56_13
; RV64ZVE32F-NEXT: .LBB56_9:
; RV64ZVE32F-NEXT: ld t1, 40(a2)
; RV64ZVE32F-NEXT: j .LBB56_14
; RV64ZVE32F-NEXT: .LBB56_10:
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: andi a7, a5, 8
; RV64ZVE32F-NEXT: beqz a7, .LBB56_7
; RV64ZVE32F-NEXT: .LBB56_11: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a7, v8
; RV64ZVE32F-NEXT: slli a7, a7, 32
; RV64ZVE32F-NEXT: srli a7, a7, 29
; RV64ZVE32F-NEXT: add a7, a1, a7
; RV64ZVE32F-NEXT: ld a7, 0(a7)
; RV64ZVE32F-NEXT: andi t0, a5, 16
; RV64ZVE32F-NEXT: beqz t0, .LBB56_8
; RV64ZVE32F-NEXT: .LBB56_12: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s t0, v10
; RV64ZVE32F-NEXT: slli t0, t0, 32
; RV64ZVE32F-NEXT: srli t0, t0, 29
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a5, 32
; RV64ZVE32F-NEXT: beqz t1, .LBB56_9
; RV64ZVE32F-NEXT: .LBB56_13: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s t1, v8
; RV64ZVE32F-NEXT: slli t1, t1, 32
; RV64ZVE32F-NEXT: srli t1, t1, 29
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: .LBB56_14: # %else14
; RV64ZVE32F-NEXT: andi t2, a5, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz t2, .LBB56_17
; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s t2, v8
; RV64ZVE32F-NEXT: slli t2, t2, 32
; RV64ZVE32F-NEXT: srli t2, t2, 29
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: bnez a5, .LBB56_18
; RV64ZVE32F-NEXT: .LBB56_16:
; RV64ZVE32F-NEXT: ld a1, 56(a2)
; RV64ZVE32F-NEXT: j .LBB56_19
; RV64ZVE32F-NEXT: .LBB56_17:
; RV64ZVE32F-NEXT: ld t2, 48(a2)
; RV64ZVE32F-NEXT: andi a5, a5, -128
; RV64ZVE32F-NEXT: beqz a5, .LBB56_16
; RV64ZVE32F-NEXT: .LBB56_18: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB56_19: # %else20
; RV64ZVE32F-NEXT: sd a3, 0(a0)
; RV64ZVE32F-NEXT: sd a4, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd a7, 24(a0)
; RV64ZVE32F-NEXT: sd t0, 32(a0)
; RV64ZVE32F-NEXT: sd t1, 40(a0)
; RV64ZVE32F-NEXT: sd t2, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vnsrl.wi v16, v8, 0
; RV32V-NEXT: vsll.vi v8, v16, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsll.vi v8, v8, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: lw a4, 56(a2)
; RV32ZVE32F-NEXT: lw a5, 48(a2)
; RV32ZVE32F-NEXT: lw a6, 40(a2)
; RV32ZVE32F-NEXT: lw a7, 32(a2)
; RV32ZVE32F-NEXT: lw t0, 24(a2)
; RV32ZVE32F-NEXT: lw t1, 0(a2)
; RV32ZVE32F-NEXT: lw t2, 8(a2)
; RV32ZVE32F-NEXT: lw a2, 16(a2)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t2
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s t0, v0
; RV32ZVE32F-NEXT: andi a1, t0, 1
; RV32ZVE32F-NEXT: beqz a1, .LBB57_7
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: lw a1, 4(a2)
; RV32ZVE32F-NEXT: lw a2, 0(a2)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: bnez a4, .LBB57_8
; RV32ZVE32F-NEXT: .LBB57_2:
; RV32ZVE32F-NEXT: lw a4, 12(a3)
; RV32ZVE32F-NEXT: lw a5, 8(a3)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: bnez a6, .LBB57_9
; RV32ZVE32F-NEXT: .LBB57_3:
; RV32ZVE32F-NEXT: lw a6, 20(a3)
; RV32ZVE32F-NEXT: lw a7, 16(a3)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: bnez t1, .LBB57_10
; RV32ZVE32F-NEXT: .LBB57_4:
; RV32ZVE32F-NEXT: lw t1, 28(a3)
; RV32ZVE32F-NEXT: lw t2, 24(a3)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: bnez t3, .LBB57_11
; RV32ZVE32F-NEXT: .LBB57_5:
; RV32ZVE32F-NEXT: lw t3, 36(a3)
; RV32ZVE32F-NEXT: lw t4, 32(a3)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: bnez t5, .LBB57_12
; RV32ZVE32F-NEXT: .LBB57_6:
; RV32ZVE32F-NEXT: lw t5, 44(a3)
; RV32ZVE32F-NEXT: lw t6, 40(a3)
; RV32ZVE32F-NEXT: j .LBB57_13
; RV32ZVE32F-NEXT: .LBB57_7:
; RV32ZVE32F-NEXT: lw a1, 4(a3)
; RV32ZVE32F-NEXT: lw a2, 0(a3)
; RV32ZVE32F-NEXT: andi a4, t0, 2
; RV32ZVE32F-NEXT: beqz a4, .LBB57_2
; RV32ZVE32F-NEXT: .LBB57_8: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a5, v10
; RV32ZVE32F-NEXT: lw a4, 4(a5)
; RV32ZVE32F-NEXT: lw a5, 0(a5)
; RV32ZVE32F-NEXT: andi a6, t0, 4
; RV32ZVE32F-NEXT: beqz a6, .LBB57_3
; RV32ZVE32F-NEXT: .LBB57_9: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a7, v10
; RV32ZVE32F-NEXT: lw a6, 4(a7)
; RV32ZVE32F-NEXT: lw a7, 0(a7)
; RV32ZVE32F-NEXT: andi t1, t0, 8
; RV32ZVE32F-NEXT: beqz t1, .LBB57_4
; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s t2, v10
; RV32ZVE32F-NEXT: lw t1, 4(t2)
; RV32ZVE32F-NEXT: lw t2, 0(t2)
; RV32ZVE32F-NEXT: andi t3, t0, 16
; RV32ZVE32F-NEXT: beqz t3, .LBB57_5
; RV32ZVE32F-NEXT: .LBB57_11: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s t4, v10
; RV32ZVE32F-NEXT: lw t3, 4(t4)
; RV32ZVE32F-NEXT: lw t4, 0(t4)
; RV32ZVE32F-NEXT: andi t5, t0, 32
; RV32ZVE32F-NEXT: beqz t5, .LBB57_6
; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s t6, v10
; RV32ZVE32F-NEXT: lw t5, 4(t6)
; RV32ZVE32F-NEXT: lw t6, 0(t6)
; RV32ZVE32F-NEXT: .LBB57_13: # %else14
; RV32ZVE32F-NEXT: addi sp, sp, -16
; RV32ZVE32F-NEXT: .cfi_def_cfa_offset 16
; RV32ZVE32F-NEXT: sw s0, 12(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: sw s1, 8(sp) # 4-byte Folded Spill
; RV32ZVE32F-NEXT: .cfi_offset s0, -4
; RV32ZVE32F-NEXT: .cfi_offset s1, -8
; RV32ZVE32F-NEXT: andi s0, t0, 64
; RV32ZVE32F-NEXT: beqz s0, .LBB57_16
; RV32ZVE32F-NEXT: # %bb.14: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s s1, v10
; RV32ZVE32F-NEXT: lw s0, 4(s1)
; RV32ZVE32F-NEXT: lw s1, 0(s1)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: bnez t0, .LBB57_17
; RV32ZVE32F-NEXT: .LBB57_15:
; RV32ZVE32F-NEXT: lw t0, 60(a3)
; RV32ZVE32F-NEXT: lw a3, 56(a3)
; RV32ZVE32F-NEXT: j .LBB57_18
; RV32ZVE32F-NEXT: .LBB57_16:
; RV32ZVE32F-NEXT: lw s0, 52(a3)
; RV32ZVE32F-NEXT: lw s1, 48(a3)
; RV32ZVE32F-NEXT: andi t0, t0, -128
; RV32ZVE32F-NEXT: beqz t0, .LBB57_15
; RV32ZVE32F-NEXT: .LBB57_17: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a3, v8
; RV32ZVE32F-NEXT: lw t0, 4(a3)
; RV32ZVE32F-NEXT: lw a3, 0(a3)
; RV32ZVE32F-NEXT: .LBB57_18: # %else20
; RV32ZVE32F-NEXT: sw a2, 0(a0)
; RV32ZVE32F-NEXT: sw a1, 4(a0)
; RV32ZVE32F-NEXT: sw a5, 8(a0)
; RV32ZVE32F-NEXT: sw a4, 12(a0)
; RV32ZVE32F-NEXT: sw a7, 16(a0)
; RV32ZVE32F-NEXT: sw a6, 20(a0)
; RV32ZVE32F-NEXT: sw t2, 24(a0)
; RV32ZVE32F-NEXT: sw t1, 28(a0)
; RV32ZVE32F-NEXT: sw t4, 32(a0)
; RV32ZVE32F-NEXT: sw t3, 36(a0)
; RV32ZVE32F-NEXT: sw t6, 40(a0)
; RV32ZVE32F-NEXT: sw t5, 44(a0)
; RV32ZVE32F-NEXT: sw s1, 48(a0)
; RV32ZVE32F-NEXT: sw s0, 52(a0)
; RV32ZVE32F-NEXT: sw a3, 56(a0)
; RV32ZVE32F-NEXT: sw t0, 60(a0)
; RV32ZVE32F-NEXT: lw s0, 12(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: lw s1, 8(sp) # 4-byte Folded Reload
; RV32ZVE32F-NEXT: addi sp, sp, 16
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a7, v0
; RV64ZVE32F-NEXT: andi a4, a7, 1
; RV64ZVE32F-NEXT: beqz a4, .LBB57_9
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: ld a4, 0(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: ld a4, 0(a4)
; RV64ZVE32F-NEXT: andi a5, a7, 2
; RV64ZVE32F-NEXT: bnez a5, .LBB57_10
; RV64ZVE32F-NEXT: .LBB57_2:
; RV64ZVE32F-NEXT: ld a5, 8(a3)
; RV64ZVE32F-NEXT: andi a6, a7, 4
; RV64ZVE32F-NEXT: bnez a6, .LBB57_11
; RV64ZVE32F-NEXT: .LBB57_3:
; RV64ZVE32F-NEXT: ld a6, 16(a3)
; RV64ZVE32F-NEXT: andi t0, a7, 8
; RV64ZVE32F-NEXT: bnez t0, .LBB57_12
; RV64ZVE32F-NEXT: .LBB57_4:
; RV64ZVE32F-NEXT: ld t0, 24(a3)
; RV64ZVE32F-NEXT: andi t1, a7, 16
; RV64ZVE32F-NEXT: bnez t1, .LBB57_13
; RV64ZVE32F-NEXT: .LBB57_5:
; RV64ZVE32F-NEXT: ld t1, 32(a3)
; RV64ZVE32F-NEXT: andi t2, a7, 32
; RV64ZVE32F-NEXT: bnez t2, .LBB57_14
; RV64ZVE32F-NEXT: .LBB57_6:
; RV64ZVE32F-NEXT: ld t2, 40(a3)
; RV64ZVE32F-NEXT: andi t3, a7, 64
; RV64ZVE32F-NEXT: bnez t3, .LBB57_15
; RV64ZVE32F-NEXT: .LBB57_7:
; RV64ZVE32F-NEXT: ld t3, 48(a3)
; RV64ZVE32F-NEXT: andi a7, a7, -128
; RV64ZVE32F-NEXT: bnez a7, .LBB57_16
; RV64ZVE32F-NEXT: .LBB57_8:
; RV64ZVE32F-NEXT: ld a1, 56(a3)
; RV64ZVE32F-NEXT: j .LBB57_17
; RV64ZVE32F-NEXT: .LBB57_9:
; RV64ZVE32F-NEXT: ld a4, 0(a3)
; RV64ZVE32F-NEXT: andi a5, a7, 2
; RV64ZVE32F-NEXT: beqz a5, .LBB57_2
; RV64ZVE32F-NEXT: .LBB57_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a5, 8(a2)
; RV64ZVE32F-NEXT: slli a5, a5, 3
; RV64ZVE32F-NEXT: add a5, a1, a5
; RV64ZVE32F-NEXT: ld a5, 0(a5)
; RV64ZVE32F-NEXT: andi a6, a7, 4
; RV64ZVE32F-NEXT: beqz a6, .LBB57_3
; RV64ZVE32F-NEXT: .LBB57_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a6, 16(a2)
; RV64ZVE32F-NEXT: slli a6, a6, 3
; RV64ZVE32F-NEXT: add a6, a1, a6
; RV64ZVE32F-NEXT: ld a6, 0(a6)
; RV64ZVE32F-NEXT: andi t0, a7, 8
; RV64ZVE32F-NEXT: beqz t0, .LBB57_4
; RV64ZVE32F-NEXT: .LBB57_12: # %cond.load7
; RV64ZVE32F-NEXT: ld t0, 24(a2)
; RV64ZVE32F-NEXT: slli t0, t0, 3
; RV64ZVE32F-NEXT: add t0, a1, t0
; RV64ZVE32F-NEXT: ld t0, 0(t0)
; RV64ZVE32F-NEXT: andi t1, a7, 16
; RV64ZVE32F-NEXT: beqz t1, .LBB57_5
; RV64ZVE32F-NEXT: .LBB57_13: # %cond.load10
; RV64ZVE32F-NEXT: ld t1, 32(a2)
; RV64ZVE32F-NEXT: slli t1, t1, 3
; RV64ZVE32F-NEXT: add t1, a1, t1
; RV64ZVE32F-NEXT: ld t1, 0(t1)
; RV64ZVE32F-NEXT: andi t2, a7, 32
; RV64ZVE32F-NEXT: beqz t2, .LBB57_6
; RV64ZVE32F-NEXT: .LBB57_14: # %cond.load13
; RV64ZVE32F-NEXT: ld t2, 40(a2)
; RV64ZVE32F-NEXT: slli t2, t2, 3
; RV64ZVE32F-NEXT: add t2, a1, t2
; RV64ZVE32F-NEXT: ld t2, 0(t2)
; RV64ZVE32F-NEXT: andi t3, a7, 64
; RV64ZVE32F-NEXT: beqz t3, .LBB57_7
; RV64ZVE32F-NEXT: .LBB57_15: # %cond.load16
; RV64ZVE32F-NEXT: ld t3, 48(a2)
; RV64ZVE32F-NEXT: slli t3, t3, 3
; RV64ZVE32F-NEXT: add t3, a1, t3
; RV64ZVE32F-NEXT: ld t3, 0(t3)
; RV64ZVE32F-NEXT: andi a7, a7, -128
; RV64ZVE32F-NEXT: beqz a7, .LBB57_8
; RV64ZVE32F-NEXT: .LBB57_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a2, 56(a2)
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: ld a1, 0(a1)
; RV64ZVE32F-NEXT: .LBB57_17: # %else20
; RV64ZVE32F-NEXT: sd a4, 0(a0)
; RV64ZVE32F-NEXT: sd a5, 8(a0)
; RV64ZVE32F-NEXT: sd a6, 16(a0)
; RV64ZVE32F-NEXT: sd t0, 24(a0)
; RV64ZVE32F-NEXT: sd t1, 32(a0)
; RV64ZVE32F-NEXT: sd t2, 40(a0)
; RV64ZVE32F-NEXT: sd t3, 48(a0)
; RV64ZVE32F-NEXT: sd a1, 56(a0)
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
%v = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x i64> %passthru)
ret <8 x i64> %v
}
declare <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x half>)
define <1 x half> @mgather_v1f16(<1 x ptr> %ptrs, <1 x i1> %m, <1 x half> %passthru) {
; RV32V-LABEL: mgather_v1f16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1f16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB58_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vle16.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB58_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x half> @llvm.masked.gather.v1f16.v1p0(<1 x ptr> %ptrs, i32 2, <1 x i1> %m, <1 x half> %passthru)
ret <1 x half> %v
}
declare <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x half>)
define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passthru) {
; RV32V-LABEL: mgather_v2f16:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e16, mf4, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2f16:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv1r.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB59_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB59_4
; RV64ZVE32F-NEXT: .LBB59_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB59_2
; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1
; RV64ZVE32F-NEXT: flh fa5, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
%v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru)
ret <2 x half> %v
}
declare <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x half>)
define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passthru) {
; RV32-LABEL: mgather_v4f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v4f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB60_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB60_6
; RV64ZVE32F-NEXT: .LBB60_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB60_7
; RV64ZVE32F-NEXT: .LBB60_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB60_8
; RV64ZVE32F-NEXT: .LBB60_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB60_2
; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB60_3
; RV64ZVE32F-NEXT: .LBB60_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB60_4
; RV64ZVE32F-NEXT: .LBB60_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %m, <4 x half> %passthru)
ret <4 x half> %v
}
define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
; RV32-LABEL: mgather_truemask_v4f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32-NEXT: vluxei32.v v9, (zero), v8
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64V-NEXT: vluxei64.v v10, (zero), v8
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v9
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
; RV64ZVE32F-NEXT: beqz zero, .LBB61_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB61_6
; RV64ZVE32F-NEXT: .LBB61_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB61_7
; RV64ZVE32F-NEXT: .LBB61_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB61_8
; RV64ZVE32F-NEXT: .LBB61_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB61_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB61_2
; RV64ZVE32F-NEXT: .LBB61_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB61_3
; RV64ZVE32F-NEXT: .LBB61_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB61_4
; RV64ZVE32F-NEXT: .LBB61_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%mhead = insertelement <4 x i1> poison, i1 1, i32 0
%mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> %mtrue, <4 x half> %passthru)
ret <4 x half> %v
}
define <4 x half> @mgather_falsemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) {
; RV32-LABEL: mgather_falsemask_v4f16:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
%v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> zeroinitializer, <4 x half> %passthru)
ret <4 x half> %v
}
declare <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x half>)
define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passthru) {
; RV32-LABEL: mgather_v8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB63_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB63_10
; RV64ZVE32F-NEXT: .LBB63_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB63_11
; RV64ZVE32F-NEXT: .LBB63_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB63_12
; RV64ZVE32F-NEXT: .LBB63_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB63_13
; RV64ZVE32F-NEXT: .LBB63_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB63_14
; RV64ZVE32F-NEXT: .LBB63_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB63_15
; RV64ZVE32F-NEXT: .LBB63_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB63_16
; RV64ZVE32F-NEXT: .LBB63_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB63_2
; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB63_3
; RV64ZVE32F-NEXT: .LBB63_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB63_4
; RV64ZVE32F-NEXT: .LBB63_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB63_5
; RV64ZVE32F-NEXT: .LBB63_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB63_6
; RV64ZVE32F-NEXT: .LBB63_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB63_7
; RV64ZVE32F-NEXT: .LBB63_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB63_8
; RV64ZVE32F-NEXT: .LBB63_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
}
define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i8_v8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB64_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: .LBB64_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB64_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB64_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB64_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB64_13
; RV64ZVE32F-NEXT: .LBB64_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB64_14
; RV64ZVE32F-NEXT: .LBB64_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB64_9
; RV64ZVE32F-NEXT: .LBB64_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB64_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB64_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB64_16
; RV64ZVE32F-NEXT: .LBB64_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB64_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB64_6
; RV64ZVE32F-NEXT: .LBB64_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB64_7
; RV64ZVE32F-NEXT: .LBB64_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB64_8
; RV64ZVE32F-NEXT: j .LBB64_9
; RV64ZVE32F-NEXT: .LBB64_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB64_11
; RV64ZVE32F-NEXT: .LBB64_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
}
define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vsext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB65_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: .LBB65_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB65_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB65_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB65_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB65_13
; RV64ZVE32F-NEXT: .LBB65_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB65_14
; RV64ZVE32F-NEXT: .LBB65_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB65_9
; RV64ZVE32F-NEXT: .LBB65_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB65_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB65_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB65_16
; RV64ZVE32F-NEXT: .LBB65_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB65_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB65_6
; RV64ZVE32F-NEXT: .LBB65_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB65_7
; RV64ZVE32F-NEXT: .LBB65_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB65_8
; RV64ZVE32F-NEXT: j .LBB65_9
; RV64ZVE32F-NEXT: .LBB65_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB65_11
; RV64ZVE32F-NEXT: .LBB65_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
}
define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32-NEXT: vzext.vf4 v10, v8
; RV32-NEXT: vadd.vv v10, v10, v10
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vzext.vf8 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB66_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: .LBB66_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB66_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB66_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB66_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB66_13
; RV64ZVE32F-NEXT: .LBB66_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB66_14
; RV64ZVE32F-NEXT: .LBB66_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB66_9
; RV64ZVE32F-NEXT: .LBB66_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB66_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB66_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB66_16
; RV64ZVE32F-NEXT: .LBB66_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB66_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB66_6
; RV64ZVE32F-NEXT: .LBB66_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB66_7
; RV64ZVE32F-NEXT: .LBB66_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB66_8
; RV64ZVE32F-NEXT: j .LBB66_9
; RV64ZVE32F-NEXT: .LBB66_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB66_11
; RV64ZVE32F-NEXT: .LBB66_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i16>
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
}
define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x half> %passthru) {
; RV32-LABEL: mgather_baseidx_v8f16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; RV32-NEXT: vwadd.vv v10, v8, v8
; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8f16:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vadd.vv v12, v12, v12
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8f16:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB67_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: .LBB67_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB67_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB67_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB67_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB67_13
; RV64ZVE32F-NEXT: .LBB67_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB67_14
; RV64ZVE32F-NEXT: .LBB67_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB67_9
; RV64ZVE32F-NEXT: .LBB67_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5
; RV64ZVE32F-NEXT: .LBB67_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB67_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB67_16
; RV64ZVE32F-NEXT: .LBB67_11: # %else20
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB67_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v11, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB67_6
; RV64ZVE32F-NEXT: .LBB67_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB67_7
; RV64ZVE32F-NEXT: .LBB67_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB67_8
; RV64ZVE32F-NEXT: j .LBB67_9
; RV64ZVE32F-NEXT: .LBB67_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 1
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flh fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB67_11
; RV64ZVE32F-NEXT: .LBB67_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 1
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flh fa5, 0(a0)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
%v = call <8 x half> @llvm.masked.gather.v8f16.v8p0(<8 x ptr> %ptrs, i32 2, <8 x i1> %m, <8 x half> %passthru)
ret <8 x half> %v
}
declare <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x float>)
define <1 x float> @mgather_v1f32(<1 x ptr> %ptrs, <1 x i1> %m, <1 x float> %passthru) {
; RV32V-LABEL: mgather_v1f32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1f32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv.v.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB68_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vle32.v v8, (a0)
; RV64ZVE32F-NEXT: .LBB68_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x float> @llvm.masked.gather.v1f32.v1p0(<1 x ptr> %ptrs, i32 4, <1 x i1> %m, <1 x float> %passthru)
ret <1 x float> %v
}
declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>)
define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %passthru) {
; RV32V-LABEL: mgather_v2f32:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv1r.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e32, mf2, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv1r.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2f32:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu
; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32ZVE32F-NEXT: vmv.v.v v8, v9
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB69_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB69_4
; RV64ZVE32F-NEXT: .LBB69_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB69_3: # %cond.load
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB69_2
; RV64ZVE32F-NEXT: .LBB69_4: # %cond.load1
; RV64ZVE32F-NEXT: flw fa5, 0(a1)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: ret
%v = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x float> %passthru)
ret <2 x float> %v
}
declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>)
define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %passthru) {
; RV32-LABEL: mgather_v4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v4f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v4f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB70_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB70_6
; RV64ZVE32F-NEXT: .LBB70_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB70_7
; RV64ZVE32F-NEXT: .LBB70_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB70_8
; RV64ZVE32F-NEXT: .LBB70_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB70_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB70_2
; RV64ZVE32F-NEXT: .LBB70_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB70_3
; RV64ZVE32F-NEXT: .LBB70_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB70_4
; RV64ZVE32F-NEXT: .LBB70_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x float> %passthru)
ret <4 x float> %v
}
define <4 x float> @mgather_truemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) {
; RV32-LABEL: mgather_truemask_v4f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vluxei32.v v8, (zero), v8
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v10, (zero), v8
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_truemask_v4f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v9
; RV64ZVE32F-NEXT: vmv.x.s a1, v9
; RV64ZVE32F-NEXT: beqz zero, .LBB71_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB71_6
; RV64ZVE32F-NEXT: .LBB71_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB71_7
; RV64ZVE32F-NEXT: .LBB71_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB71_8
; RV64ZVE32F-NEXT: .LBB71_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB71_5: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB71_2
; RV64ZVE32F-NEXT: .LBB71_6: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB71_3
; RV64ZVE32F-NEXT: .LBB71_7: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB71_4
; RV64ZVE32F-NEXT: .LBB71_8: # %cond.load7
; RV64ZVE32F-NEXT: ld a0, 24(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%mhead = insertelement <4 x i1> poison, i1 1, i32 0
%mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mtrue, <4 x float> %passthru)
ret <4 x float> %v
}
define <4 x float> @mgather_falsemask_v4f32(<4 x ptr> %ptrs, <4 x float> %passthru) {
; RV32-LABEL: mgather_falsemask_v4f32:
; RV32: # %bb.0:
; RV32-NEXT: vmv1r.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv1r.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: ret
%v = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> zeroinitializer, <4 x float> %passthru)
ret <4 x float> %v
}
declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>)
define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: bnez a2, .LBB73_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB73_10
; RV64ZVE32F-NEXT: .LBB73_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB73_11
; RV64ZVE32F-NEXT: .LBB73_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB73_12
; RV64ZVE32F-NEXT: .LBB73_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB73_13
; RV64ZVE32F-NEXT: .LBB73_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB73_14
; RV64ZVE32F-NEXT: .LBB73_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB73_15
; RV64ZVE32F-NEXT: .LBB73_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB73_16
; RV64ZVE32F-NEXT: .LBB73_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB73_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB73_2
; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB73_3
; RV64ZVE32F-NEXT: .LBB73_11: # %cond.load4
; RV64ZVE32F-NEXT: ld a2, 16(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB73_4
; RV64ZVE32F-NEXT: .LBB73_12: # %cond.load7
; RV64ZVE32F-NEXT: ld a2, 24(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB73_5
; RV64ZVE32F-NEXT: .LBB73_13: # %cond.load10
; RV64ZVE32F-NEXT: ld a2, 32(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB73_6
; RV64ZVE32F-NEXT: .LBB73_14: # %cond.load13
; RV64ZVE32F-NEXT: ld a2, 40(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB73_7
; RV64ZVE32F-NEXT: .LBB73_15: # %cond.load16
; RV64ZVE32F-NEXT: ld a2, 48(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB73_8
; RV64ZVE32F-NEXT: .LBB73_16: # %cond.load19
; RV64ZVE32F-NEXT: ld a0, 56(a0)
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7
; RV64ZVE32F-NEXT: ret
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i8_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB74_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB74_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB74_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB74_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB74_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB74_13
; RV64ZVE32F-NEXT: .LBB74_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB74_14
; RV64ZVE32F-NEXT: .LBB74_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB74_9
; RV64ZVE32F-NEXT: .LBB74_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB74_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB74_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB74_16
; RV64ZVE32F-NEXT: .LBB74_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB74_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB74_6
; RV64ZVE32F-NEXT: .LBB74_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB74_7
; RV64ZVE32F-NEXT: .LBB74_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB74_8
; RV64ZVE32F-NEXT: j .LBB74_9
; RV64ZVE32F-NEXT: .LBB74_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB74_11
; RV64ZVE32F-NEXT: .LBB74_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i8_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf8 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB75_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB75_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB75_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB75_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB75_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB75_13
; RV64ZVE32F-NEXT: .LBB75_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB75_14
; RV64ZVE32F-NEXT: .LBB75_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB75_9
; RV64ZVE32F-NEXT: .LBB75_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB75_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB75_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB75_16
; RV64ZVE32F-NEXT: .LBB75_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB75_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB75_6
; RV64ZVE32F-NEXT: .LBB75_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB75_7
; RV64ZVE32F-NEXT: .LBB75_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB75_8
; RV64ZVE32F-NEXT: j .LBB75_9
; RV64ZVE32F-NEXT: .LBB75_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB75_11
; RV64ZVE32F-NEXT: .LBB75_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vzext.vf4 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vzext.vf8 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB76_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB76_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB76_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB76_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB76_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB76_13
; RV64ZVE32F-NEXT: .LBB76_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB76_14
; RV64ZVE32F-NEXT: .LBB76_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB76_9
; RV64ZVE32F-NEXT: .LBB76_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB76_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB76_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB76_16
; RV64ZVE32F-NEXT: .LBB76_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB76_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB76_6
; RV64ZVE32F-NEXT: .LBB76_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB76_7
; RV64ZVE32F-NEXT: .LBB76_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB76_8
; RV64ZVE32F-NEXT: j .LBB76_9
; RV64ZVE32F-NEXT: .LBB76_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB76_11
; RV64ZVE32F-NEXT: .LBB76_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: andi a1, a1, 255
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i8> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_v8i16_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i16_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB77_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB77_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB77_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB77_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB77_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB77_13
; RV64ZVE32F-NEXT: .LBB77_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB77_14
; RV64ZVE32F-NEXT: .LBB77_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB77_9
; RV64ZVE32F-NEXT: .LBB77_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB77_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB77_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB77_16
; RV64ZVE32F-NEXT: .LBB77_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB77_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB77_6
; RV64ZVE32F-NEXT: .LBB77_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB77_7
; RV64ZVE32F-NEXT: .LBB77_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB77_8
; RV64ZVE32F-NEXT: j .LBB77_9
; RV64ZVE32F-NEXT: .LBB77_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB77_11
; RV64ZVE32F-NEXT: .LBB77_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_sext_v8i16_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf4 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB78_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB78_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB78_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB78_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB78_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB78_13
; RV64ZVE32F-NEXT: .LBB78_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB78_14
; RV64ZVE32F-NEXT: .LBB78_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB78_9
; RV64ZVE32F-NEXT: .LBB78_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB78_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB78_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB78_16
; RV64ZVE32F-NEXT: .LBB78_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB78_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB78_6
; RV64ZVE32F-NEXT: .LBB78_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB78_7
; RV64ZVE32F-NEXT: .LBB78_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB78_8
; RV64ZVE32F-NEXT: j .LBB78_9
; RV64ZVE32F-NEXT: .LBB78_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB78_11
; RV64ZVE32F-NEXT: .LBB78_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = sext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i16_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vzext.vf2 v12, v8
; RV32-NEXT: vsll.vi v8, v12, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vzext.vf4 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lui a1, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: addiw a1, a1, -1
; RV64ZVE32F-NEXT: beqz a3, .LBB79_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB79_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB79_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB79_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB79_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB79_13
; RV64ZVE32F-NEXT: .LBB79_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB79_14
; RV64ZVE32F-NEXT: .LBB79_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB79_9
; RV64ZVE32F-NEXT: .LBB79_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB79_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB79_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: bnez a2, .LBB79_16
; RV64ZVE32F-NEXT: .LBB79_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB79_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB79_6
; RV64ZVE32F-NEXT: .LBB79_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB79_7
; RV64ZVE32F-NEXT: .LBB79_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB79_8
; RV64ZVE32F-NEXT: j .LBB79_9
; RV64ZVE32F-NEXT: .LBB79_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a3, a3, a1
; RV64ZVE32F-NEXT: slli a3, a3, 2
; RV64ZVE32F-NEXT: add a3, a0, a3
; RV64ZVE32F-NEXT: flw fa5, 0(a3)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB79_11
; RV64ZVE32F-NEXT: .LBB79_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: and a1, a2, a1
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%eidxs = zext <8 x i16> %idxs to <8 x i32>
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x float> %passthru) {
; RV32-LABEL: mgather_baseidx_v8f32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8f32:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; RV64V-NEXT: vsext.vf2 v12, v8
; RV64V-NEXT: vsll.vi v12, v12, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8f32:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB80_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5
; RV64ZVE32F-NEXT: .LBB80_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB80_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB80_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB80_12
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB80_13
; RV64ZVE32F-NEXT: .LBB80_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB80_14
; RV64ZVE32F-NEXT: .LBB80_7: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB80_9
; RV64ZVE32F-NEXT: .LBB80_8: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5
; RV64ZVE32F-NEXT: .LBB80_9: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB80_15
; RV64ZVE32F-NEXT: # %bb.10: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB80_16
; RV64ZVE32F-NEXT: .LBB80_11: # %else20
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB80_12: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v14, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB80_6
; RV64ZVE32F-NEXT: .LBB80_13: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB80_7
; RV64ZVE32F-NEXT: .LBB80_14: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB80_8
; RV64ZVE32F-NEXT: j .LBB80_9
; RV64ZVE32F-NEXT: .LBB80_15: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 2
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: flw fa5, 0(a2)
; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB80_11
; RV64ZVE32F-NEXT: .LBB80_16: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: slli a1, a1, 2
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: flw fa5, 0(a0)
; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5
; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
%ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
%v = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru)
ret <8 x float> %v
}
declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>)
define <1 x double> @mgather_v1f64(<1 x ptr> %ptrs, <1 x i1> %m, <1 x double> %passthru) {
; RV32V-LABEL: mgather_v1f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v1f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 1, e64, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v1f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vfirst.m a0, v0
; RV32ZVE32F-NEXT: bnez a0, .LBB81_2
; RV32ZVE32F-NEXT: # %bb.1: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a0)
; RV32ZVE32F-NEXT: .LBB81_2: # %else
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v1f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vfirst.m a1, v0
; RV64ZVE32F-NEXT: bnez a1, .LBB81_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: fld fa0, 0(a0)
; RV64ZVE32F-NEXT: .LBB81_2: # %else
; RV64ZVE32F-NEXT: ret
%v = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> %ptrs, i32 8, <1 x i1> %m, <1 x double> %passthru)
ret <1 x double> %v
}
declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>)
define <2 x double> @mgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x double> %passthru) {
; RV32V-LABEL: mgather_v2f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV32V-NEXT: vluxei32.v v9, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v9
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v2f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v2f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a0, v0
; RV32ZVE32F-NEXT: andi a1, a0, 1
; RV32ZVE32F-NEXT: bnez a1, .LBB82_3
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a0, a0, 2
; RV32ZVE32F-NEXT: bnez a0, .LBB82_4
; RV32ZVE32F-NEXT: .LBB82_2: # %else2
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB82_3: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a1)
; RV32ZVE32F-NEXT: andi a0, a0, 2
; RV32ZVE32F-NEXT: beqz a0, .LBB82_2
; RV32ZVE32F-NEXT: .LBB82_4: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a0, v8
; RV32ZVE32F-NEXT: fld fa1, 0(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_v2f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB82_3
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB82_4
; RV64ZVE32F-NEXT: .LBB82_2: # %else2
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB82_3: # %cond.load
; RV64ZVE32F-NEXT: fld fa0, 0(a0)
; RV64ZVE32F-NEXT: andi a2, a2, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB82_2
; RV64ZVE32F-NEXT: .LBB82_4: # %cond.load1
; RV64ZVE32F-NEXT: fld fa1, 0(a1)
; RV64ZVE32F-NEXT: ret
%v = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 8, <2 x i1> %m, <2 x double> %passthru)
ret <2 x double> %v
}
declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>)
define <4 x double> @mgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, <4 x double> %passthru) {
; RV32V-LABEL: mgather_v4f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV32V-NEXT: vluxei32.v v10, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v4f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64V-NEXT: vluxei64.v v10, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v4f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB83_6
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB83_7
; RV32ZVE32F-NEXT: .LBB83_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB83_8
; RV32ZVE32F-NEXT: .LBB83_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a1, 8
; RV32ZVE32F-NEXT: beqz a1, .LBB83_5
; RV32ZVE32F-NEXT: .LBB83_4: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: .LBB83_5: # %else8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB83_6: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB83_2
; RV32ZVE32F-NEXT: .LBB83_7: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB83_3
; RV32ZVE32F-NEXT: .LBB83_8: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, 8
; RV32ZVE32F-NEXT: bnez a1, .LBB83_4
; RV32ZVE32F-NEXT: j .LBB83_5
;
; RV64ZVE32F-LABEL: mgather_v4f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB83_6
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB83_7
; RV64ZVE32F-NEXT: .LBB83_2: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: bnez a3, .LBB83_8
; RV64ZVE32F-NEXT: .LBB83_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a2, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB83_5
; RV64ZVE32F-NEXT: .LBB83_4: # %cond.load7
; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: fld fa3, 0(a1)
; RV64ZVE32F-NEXT: .LBB83_5: # %else8
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB83_6: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB83_2
; RV64ZVE32F-NEXT: .LBB83_7: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 8(a1)
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: beqz a3, .LBB83_3
; RV64ZVE32F-NEXT: .LBB83_8: # %cond.load4
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a2, a2, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB83_4
; RV64ZVE32F-NEXT: j .LBB83_5
%v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %m, <4 x double> %passthru)
ret <4 x double> %v
}
define <4 x double> @mgather_truemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) {
; RV32V-LABEL: mgather_truemask_v4f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32V-NEXT: vluxei32.v v10, (zero), v8
; RV32V-NEXT: vmv.v.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_truemask_v4f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vluxei64.v v8, (zero), v8
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_truemask_v4f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32ZVE32F-NEXT: vmset.m v9
; RV32ZVE32F-NEXT: vmv.x.s a1, v9
; RV32ZVE32F-NEXT: beqz zero, .LBB84_6
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB84_7
; RV32ZVE32F-NEXT: .LBB84_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB84_8
; RV32ZVE32F-NEXT: .LBB84_3: # %else5
; RV32ZVE32F-NEXT: andi a1, a1, 8
; RV32ZVE32F-NEXT: beqz a1, .LBB84_5
; RV32ZVE32F-NEXT: .LBB84_4: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa3, 0(a1)
; RV32ZVE32F-NEXT: .LBB84_5: # %else8
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB84_6: # %cond.load
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB84_2
; RV32ZVE32F-NEXT: .LBB84_7: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB84_3
; RV32ZVE32F-NEXT: .LBB84_8: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v9, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v9
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, 8
; RV32ZVE32F-NEXT: bnez a1, .LBB84_4
; RV32ZVE32F-NEXT: j .LBB84_5
;
; RV64ZVE32F-LABEL: mgather_truemask_v4f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: beqz zero, .LBB84_6
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB84_7
; RV64ZVE32F-NEXT: .LBB84_2: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: bnez a3, .LBB84_8
; RV64ZVE32F-NEXT: .LBB84_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a2, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB84_5
; RV64ZVE32F-NEXT: .LBB84_4: # %cond.load7
; RV64ZVE32F-NEXT: ld a1, 24(a1)
; RV64ZVE32F-NEXT: fld fa3, 0(a1)
; RV64ZVE32F-NEXT: .LBB84_5: # %else8
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB84_6: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB84_2
; RV64ZVE32F-NEXT: .LBB84_7: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 8(a1)
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: beqz a3, .LBB84_3
; RV64ZVE32F-NEXT: .LBB84_8: # %cond.load4
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a2, a2, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB84_4
; RV64ZVE32F-NEXT: j .LBB84_5
%mhead = insertelement <4 x i1> poison, i1 1, i32 0
%mtrue = shufflevector <4 x i1> %mhead, <4 x i1> poison, <4 x i32> zeroinitializer
%v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> %mtrue, <4 x double> %passthru)
ret <4 x double> %v
}
define <4 x double> @mgather_falsemask_v4f64(<4 x ptr> %ptrs, <4 x double> %passthru) {
; RV32V-LABEL: mgather_falsemask_v4f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vmv2r.v v8, v10
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_falsemask_v4f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vmv2r.v v8, v10
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_falsemask_v4f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_falsemask_v4f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: ret
%v = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %ptrs, i32 8, <4 x i1> zeroinitializer, <4 x double> %passthru)
ret <4 x double> %v
}
declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>)
define <8 x double> @mgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (zero), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vluxei64.v v12, (zero), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB86_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB86_11
; RV32ZVE32F-NEXT: .LBB86_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB86_12
; RV32ZVE32F-NEXT: .LBB86_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB86_13
; RV32ZVE32F-NEXT: .LBB86_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB86_14
; RV32ZVE32F-NEXT: .LBB86_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB86_15
; RV32ZVE32F-NEXT: .LBB86_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB86_16
; RV32ZVE32F-NEXT: .LBB86_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB86_9
; RV32ZVE32F-NEXT: .LBB86_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB86_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB86_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB86_2
; RV32ZVE32F-NEXT: .LBB86_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB86_3
; RV32ZVE32F-NEXT: .LBB86_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB86_4
; RV32ZVE32F-NEXT: .LBB86_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB86_5
; RV32ZVE32F-NEXT: .LBB86_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB86_6
; RV32ZVE32F-NEXT: .LBB86_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB86_7
; RV32ZVE32F-NEXT: .LBB86_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB86_8
; RV32ZVE32F-NEXT: j .LBB86_9
;
; RV64ZVE32F-LABEL: mgather_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: bnez a3, .LBB86_10
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB86_11
; RV64ZVE32F-NEXT: .LBB86_2: # %else2
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: bnez a3, .LBB86_12
; RV64ZVE32F-NEXT: .LBB86_3: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB86_13
; RV64ZVE32F-NEXT: .LBB86_4: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB86_14
; RV64ZVE32F-NEXT: .LBB86_5: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB86_15
; RV64ZVE32F-NEXT: .LBB86_6: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: bnez a3, .LBB86_16
; RV64ZVE32F-NEXT: .LBB86_7: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB86_9
; RV64ZVE32F-NEXT: .LBB86_8: # %cond.load19
; RV64ZVE32F-NEXT: ld a1, 56(a1)
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB86_9: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB86_10: # %cond.load
; RV64ZVE32F-NEXT: ld a3, 0(a1)
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB86_2
; RV64ZVE32F-NEXT: .LBB86_11: # %cond.load1
; RV64ZVE32F-NEXT: ld a3, 8(a1)
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: beqz a3, .LBB86_3
; RV64ZVE32F-NEXT: .LBB86_12: # %cond.load4
; RV64ZVE32F-NEXT: ld a3, 16(a1)
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB86_4
; RV64ZVE32F-NEXT: .LBB86_13: # %cond.load7
; RV64ZVE32F-NEXT: ld a3, 24(a1)
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB86_5
; RV64ZVE32F-NEXT: .LBB86_14: # %cond.load10
; RV64ZVE32F-NEXT: ld a3, 32(a1)
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB86_6
; RV64ZVE32F-NEXT: .LBB86_15: # %cond.load13
; RV64ZVE32F-NEXT: ld a3, 40(a1)
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: beqz a3, .LBB86_7
; RV64ZVE32F-NEXT: .LBB86_16: # %cond.load16
; RV64ZVE32F-NEXT: ld a3, 48(a1)
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: bnez a2, .LBB86_8
; RV64ZVE32F-NEXT: j .LBB86_9
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i8_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf4 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i8_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB87_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB87_11
; RV32ZVE32F-NEXT: .LBB87_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB87_12
; RV32ZVE32F-NEXT: .LBB87_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB87_13
; RV32ZVE32F-NEXT: .LBB87_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB87_14
; RV32ZVE32F-NEXT: .LBB87_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB87_15
; RV32ZVE32F-NEXT: .LBB87_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB87_16
; RV32ZVE32F-NEXT: .LBB87_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB87_9
; RV32ZVE32F-NEXT: .LBB87_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB87_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB87_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB87_2
; RV32ZVE32F-NEXT: .LBB87_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB87_3
; RV32ZVE32F-NEXT: .LBB87_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB87_4
; RV32ZVE32F-NEXT: .LBB87_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB87_5
; RV32ZVE32F-NEXT: .LBB87_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB87_6
; RV32ZVE32F-NEXT: .LBB87_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB87_7
; RV32ZVE32F-NEXT: .LBB87_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB87_8
; RV32ZVE32F-NEXT: j .LBB87_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB87_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB87_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB87_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB87_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB87_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB87_15
; RV64ZVE32F-NEXT: .LBB87_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB87_16
; RV64ZVE32F-NEXT: .LBB87_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB87_9
; RV64ZVE32F-NEXT: .LBB87_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB87_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB87_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB87_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB87_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB87_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB87_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB87_6
; RV64ZVE32F-NEXT: .LBB87_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB87_7
; RV64ZVE32F-NEXT: .LBB87_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB87_8
; RV64ZVE32F-NEXT: j .LBB87_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i8_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf4 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i8_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB88_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB88_11
; RV32ZVE32F-NEXT: .LBB88_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB88_12
; RV32ZVE32F-NEXT: .LBB88_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB88_13
; RV32ZVE32F-NEXT: .LBB88_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB88_14
; RV32ZVE32F-NEXT: .LBB88_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB88_15
; RV32ZVE32F-NEXT: .LBB88_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB88_16
; RV32ZVE32F-NEXT: .LBB88_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB88_9
; RV32ZVE32F-NEXT: .LBB88_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB88_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB88_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB88_2
; RV32ZVE32F-NEXT: .LBB88_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB88_3
; RV32ZVE32F-NEXT: .LBB88_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB88_4
; RV32ZVE32F-NEXT: .LBB88_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB88_5
; RV32ZVE32F-NEXT: .LBB88_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB88_6
; RV32ZVE32F-NEXT: .LBB88_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB88_7
; RV32ZVE32F-NEXT: .LBB88_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB88_8
; RV32ZVE32F-NEXT: j .LBB88_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB88_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB88_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB88_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB88_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB88_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB88_15
; RV64ZVE32F-NEXT: .LBB88_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB88_16
; RV64ZVE32F-NEXT: .LBB88_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB88_9
; RV64ZVE32F-NEXT: .LBB88_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB88_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB88_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB88_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB88_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB88_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB88_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB88_6
; RV64ZVE32F-NEXT: .LBB88_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB88_7
; RV64ZVE32F-NEXT: .LBB88_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB88_8
; RV64ZVE32F-NEXT: j .LBB88_9
%eidxs = sext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vzext.vf4 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vzext.vf8 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf4 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB89_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB89_11
; RV32ZVE32F-NEXT: .LBB89_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB89_12
; RV32ZVE32F-NEXT: .LBB89_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB89_13
; RV32ZVE32F-NEXT: .LBB89_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB89_14
; RV32ZVE32F-NEXT: .LBB89_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB89_15
; RV32ZVE32F-NEXT: .LBB89_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB89_16
; RV32ZVE32F-NEXT: .LBB89_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB89_9
; RV32ZVE32F-NEXT: .LBB89_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB89_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB89_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB89_2
; RV32ZVE32F-NEXT: .LBB89_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB89_3
; RV32ZVE32F-NEXT: .LBB89_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB89_4
; RV32ZVE32F-NEXT: .LBB89_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB89_5
; RV32ZVE32F-NEXT: .LBB89_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB89_6
; RV32ZVE32F-NEXT: .LBB89_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB89_7
; RV32ZVE32F-NEXT: .LBB89_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB89_8
; RV32ZVE32F-NEXT: j .LBB89_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i8_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB89_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB89_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB89_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB89_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB89_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB89_15
; RV64ZVE32F-NEXT: .LBB89_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB89_16
; RV64ZVE32F-NEXT: .LBB89_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB89_9
; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB89_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB89_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB89_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB89_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: andi a2, a2, 255
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB89_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB89_6
; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB89_7
; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: andi a3, a3, 255
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB89_8
; RV64ZVE32F-NEXT: j .LBB89_9
%eidxs = zext <8 x i8> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i16_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i16_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf4 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB90_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB90_11
; RV32ZVE32F-NEXT: .LBB90_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB90_12
; RV32ZVE32F-NEXT: .LBB90_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB90_13
; RV32ZVE32F-NEXT: .LBB90_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB90_14
; RV32ZVE32F-NEXT: .LBB90_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB90_15
; RV32ZVE32F-NEXT: .LBB90_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB90_16
; RV32ZVE32F-NEXT: .LBB90_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB90_9
; RV32ZVE32F-NEXT: .LBB90_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB90_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB90_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB90_2
; RV32ZVE32F-NEXT: .LBB90_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB90_3
; RV32ZVE32F-NEXT: .LBB90_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB90_4
; RV32ZVE32F-NEXT: .LBB90_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB90_5
; RV32ZVE32F-NEXT: .LBB90_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB90_6
; RV32ZVE32F-NEXT: .LBB90_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB90_7
; RV32ZVE32F-NEXT: .LBB90_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB90_8
; RV32ZVE32F-NEXT: j .LBB90_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB90_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB90_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB90_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB90_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB90_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB90_15
; RV64ZVE32F-NEXT: .LBB90_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB90_16
; RV64ZVE32F-NEXT: .LBB90_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB90_9
; RV64ZVE32F-NEXT: .LBB90_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB90_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB90_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB90_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB90_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB90_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB90_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB90_6
; RV64ZVE32F-NEXT: .LBB90_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB90_7
; RV64ZVE32F-NEXT: .LBB90_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB90_8
; RV64ZVE32F-NEXT: j .LBB90_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i16_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i16_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf4 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB91_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB91_11
; RV32ZVE32F-NEXT: .LBB91_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB91_12
; RV32ZVE32F-NEXT: .LBB91_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB91_13
; RV32ZVE32F-NEXT: .LBB91_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB91_14
; RV32ZVE32F-NEXT: .LBB91_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB91_15
; RV32ZVE32F-NEXT: .LBB91_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB91_16
; RV32ZVE32F-NEXT: .LBB91_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB91_9
; RV32ZVE32F-NEXT: .LBB91_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB91_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB91_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB91_2
; RV32ZVE32F-NEXT: .LBB91_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB91_3
; RV32ZVE32F-NEXT: .LBB91_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB91_4
; RV32ZVE32F-NEXT: .LBB91_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB91_5
; RV32ZVE32F-NEXT: .LBB91_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB91_6
; RV32ZVE32F-NEXT: .LBB91_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB91_7
; RV32ZVE32F-NEXT: .LBB91_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB91_8
; RV32ZVE32F-NEXT: j .LBB91_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB91_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB91_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB91_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB91_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB91_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB91_15
; RV64ZVE32F-NEXT: .LBB91_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB91_16
; RV64ZVE32F-NEXT: .LBB91_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB91_9
; RV64ZVE32F-NEXT: .LBB91_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB91_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB91_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB91_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB91_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB91_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB91_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB91_6
; RV64ZVE32F-NEXT: .LBB91_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB91_7
; RV64ZVE32F-NEXT: .LBB91_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v9
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB91_8
; RV64ZVE32F-NEXT: j .LBB91_9
%eidxs = sext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vzext.vf2 v10, v8
; RV32V-NEXT: vsll.vi v8, v10, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vzext.vf4 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vzext.vf2 v10, v8
; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB92_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB92_11
; RV32ZVE32F-NEXT: .LBB92_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB92_12
; RV32ZVE32F-NEXT: .LBB92_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB92_13
; RV32ZVE32F-NEXT: .LBB92_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB92_14
; RV32ZVE32F-NEXT: .LBB92_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB92_15
; RV32ZVE32F-NEXT: .LBB92_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB92_16
; RV32ZVE32F-NEXT: .LBB92_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB92_9
; RV32ZVE32F-NEXT: .LBB92_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB92_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB92_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB92_2
; RV32ZVE32F-NEXT: .LBB92_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB92_3
; RV32ZVE32F-NEXT: .LBB92_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB92_4
; RV32ZVE32F-NEXT: .LBB92_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB92_5
; RV32ZVE32F-NEXT: .LBB92_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB92_6
; RV32ZVE32F-NEXT: .LBB92_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB92_7
; RV32ZVE32F-NEXT: .LBB92_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB92_8
; RV32ZVE32F-NEXT: j .LBB92_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: lui a2, 16
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a4, a3, 1
; RV64ZVE32F-NEXT: addiw a2, a2, -1
; RV64ZVE32F-NEXT: beqz a4, .LBB92_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa0, 0(a4)
; RV64ZVE32F-NEXT: .LBB92_2: # %else
; RV64ZVE32F-NEXT: andi a4, a3, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB92_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa1, 0(a4)
; RV64ZVE32F-NEXT: .LBB92_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: andi a4, a3, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB92_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a4, a3, 8
; RV64ZVE32F-NEXT: bnez a4, .LBB92_15
; RV64ZVE32F-NEXT: .LBB92_6: # %else8
; RV64ZVE32F-NEXT: andi a4, a3, 16
; RV64ZVE32F-NEXT: bnez a4, .LBB92_16
; RV64ZVE32F-NEXT: .LBB92_7: # %else11
; RV64ZVE32F-NEXT: andi a4, a3, 32
; RV64ZVE32F-NEXT: beqz a4, .LBB92_9
; RV64ZVE32F-NEXT: .LBB92_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa5, 0(a4)
; RV64ZVE32F-NEXT: .LBB92_9: # %else14
; RV64ZVE32F-NEXT: andi a4, a3, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB92_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa6, 0(a4)
; RV64ZVE32F-NEXT: .LBB92_11: # %else17
; RV64ZVE32F-NEXT: andi a3, a3, -128
; RV64ZVE32F-NEXT: beqz a3, .LBB92_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: and a2, a3, a2
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB92_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB92_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa2, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 8
; RV64ZVE32F-NEXT: beqz a4, .LBB92_6
; RV64ZVE32F-NEXT: .LBB92_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a4, v8
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa3, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 16
; RV64ZVE32F-NEXT: beqz a4, .LBB92_7
; RV64ZVE32F-NEXT: .LBB92_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a4, v9
; RV64ZVE32F-NEXT: and a4, a4, a2
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa4, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 32
; RV64ZVE32F-NEXT: bnez a4, .LBB92_8
; RV64ZVE32F-NEXT: j .LBB92_9
%eidxs = zext <8 x i16> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8i32_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8i32_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB93_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB93_11
; RV32ZVE32F-NEXT: .LBB93_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB93_12
; RV32ZVE32F-NEXT: .LBB93_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB93_13
; RV32ZVE32F-NEXT: .LBB93_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB93_14
; RV32ZVE32F-NEXT: .LBB93_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB93_15
; RV32ZVE32F-NEXT: .LBB93_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB93_16
; RV32ZVE32F-NEXT: .LBB93_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB93_9
; RV32ZVE32F-NEXT: .LBB93_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB93_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB93_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB93_2
; RV32ZVE32F-NEXT: .LBB93_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB93_3
; RV32ZVE32F-NEXT: .LBB93_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB93_4
; RV32ZVE32F-NEXT: .LBB93_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB93_5
; RV32ZVE32F-NEXT: .LBB93_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB93_6
; RV32ZVE32F-NEXT: .LBB93_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB93_7
; RV32ZVE32F-NEXT: .LBB93_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB93_8
; RV32ZVE32F-NEXT: j .LBB93_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB93_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB93_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB93_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB93_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB93_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB93_15
; RV64ZVE32F-NEXT: .LBB93_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB93_16
; RV64ZVE32F-NEXT: .LBB93_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB93_9
; RV64ZVE32F-NEXT: .LBB93_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB93_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB93_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB93_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB93_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB93_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB93_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB93_6
; RV64ZVE32F-NEXT: .LBB93_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB93_7
; RV64ZVE32F-NEXT: .LBB93_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB93_8
; RV64ZVE32F-NEXT: j .LBB93_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_sext_v8i32_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_sext_v8i32_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB94_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB94_11
; RV32ZVE32F-NEXT: .LBB94_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB94_12
; RV32ZVE32F-NEXT: .LBB94_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB94_13
; RV32ZVE32F-NEXT: .LBB94_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB94_14
; RV32ZVE32F-NEXT: .LBB94_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB94_15
; RV32ZVE32F-NEXT: .LBB94_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB94_16
; RV32ZVE32F-NEXT: .LBB94_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB94_9
; RV32ZVE32F-NEXT: .LBB94_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB94_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB94_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB94_2
; RV32ZVE32F-NEXT: .LBB94_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB94_3
; RV32ZVE32F-NEXT: .LBB94_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB94_4
; RV32ZVE32F-NEXT: .LBB94_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB94_5
; RV32ZVE32F-NEXT: .LBB94_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB94_6
; RV32ZVE32F-NEXT: .LBB94_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB94_7
; RV32ZVE32F-NEXT: .LBB94_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB94_8
; RV32ZVE32F-NEXT: j .LBB94_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_sext_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB94_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB94_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB94_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB94_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB94_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB94_15
; RV64ZVE32F-NEXT: .LBB94_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB94_16
; RV64ZVE32F-NEXT: .LBB94_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB94_9
; RV64ZVE32F-NEXT: .LBB94_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB94_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB94_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB94_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB94_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB94_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB94_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB94_6
; RV64ZVE32F-NEXT: .LBB94_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB94_7
; RV64ZVE32F-NEXT: .LBB94_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 3
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB94_8
; RV64ZVE32F-NEXT: j .LBB94_9
%eidxs = sext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i32_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vsll.vi v8, v8, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i32_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vzext.vf2 v16, v8
; RV64V-NEXT: vsll.vi v8, v16, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB95_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB95_11
; RV32ZVE32F-NEXT: .LBB95_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB95_12
; RV32ZVE32F-NEXT: .LBB95_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB95_13
; RV32ZVE32F-NEXT: .LBB95_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB95_14
; RV32ZVE32F-NEXT: .LBB95_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB95_15
; RV32ZVE32F-NEXT: .LBB95_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB95_16
; RV32ZVE32F-NEXT: .LBB95_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB95_9
; RV32ZVE32F-NEXT: .LBB95_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB95_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB95_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB95_2
; RV32ZVE32F-NEXT: .LBB95_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB95_3
; RV32ZVE32F-NEXT: .LBB95_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB95_4
; RV32ZVE32F-NEXT: .LBB95_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB95_5
; RV32ZVE32F-NEXT: .LBB95_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB95_6
; RV32ZVE32F-NEXT: .LBB95_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB95_7
; RV32ZVE32F-NEXT: .LBB95_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB95_8
; RV32ZVE32F-NEXT: j .LBB95_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i32_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v0
; RV64ZVE32F-NEXT: andi a3, a2, 1
; RV64ZVE32F-NEXT: beqz a3, .LBB95_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa0, 0(a3)
; RV64ZVE32F-NEXT: .LBB95_2: # %else
; RV64ZVE32F-NEXT: andi a3, a2, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB95_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa1, 0(a3)
; RV64ZVE32F-NEXT: .LBB95_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; RV64ZVE32F-NEXT: andi a3, a2, 4
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a3, .LBB95_14
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: bnez a3, .LBB95_15
; RV64ZVE32F-NEXT: .LBB95_6: # %else8
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: bnez a3, .LBB95_16
; RV64ZVE32F-NEXT: .LBB95_7: # %else11
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: beqz a3, .LBB95_9
; RV64ZVE32F-NEXT: .LBB95_8: # %cond.load13
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa5, 0(a3)
; RV64ZVE32F-NEXT: .LBB95_9: # %else14
; RV64ZVE32F-NEXT: andi a3, a2, 64
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: beqz a3, .LBB95_11
; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa6, 0(a3)
; RV64ZVE32F-NEXT: .LBB95_11: # %else17
; RV64ZVE32F-NEXT: andi a2, a2, -128
; RV64ZVE32F-NEXT: beqz a2, .LBB95_13
; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: slli a2, a2, 32
; RV64ZVE32F-NEXT: srli a2, a2, 29
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB95_13: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB95_14: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa2, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 8
; RV64ZVE32F-NEXT: beqz a3, .LBB95_6
; RV64ZVE32F-NEXT: .LBB95_15: # %cond.load7
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a3, v8
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa3, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 16
; RV64ZVE32F-NEXT: beqz a3, .LBB95_7
; RV64ZVE32F-NEXT: .LBB95_16: # %cond.load10
; RV64ZVE32F-NEXT: vmv.x.s a3, v10
; RV64ZVE32F-NEXT: slli a3, a3, 32
; RV64ZVE32F-NEXT: srli a3, a3, 29
; RV64ZVE32F-NEXT: add a3, a1, a3
; RV64ZVE32F-NEXT: fld fa4, 0(a3)
; RV64ZVE32F-NEXT: andi a3, a2, 32
; RV64ZVE32F-NEXT: bnez a3, .LBB95_8
; RV64ZVE32F-NEXT: j .LBB95_9
%eidxs = zext <8 x i32> %idxs to <8 x i64>
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, <8 x double> %passthru) {
; RV32V-LABEL: mgather_baseidx_v8f64:
; RV32V: # %bb.0:
; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32V-NEXT: vnsrl.wi v16, v8, 0
; RV32V-NEXT: vsll.vi v8, v16, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v8f64:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64V-NEXT: vsll.vi v8, v8, 3
; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV32ZVE32F-LABEL: mgather_baseidx_v8f64:
; RV32ZVE32F: # %bb.0:
; RV32ZVE32F-NEXT: lw a3, 56(a2)
; RV32ZVE32F-NEXT: lw a4, 48(a2)
; RV32ZVE32F-NEXT: lw a5, 40(a2)
; RV32ZVE32F-NEXT: lw a6, 32(a2)
; RV32ZVE32F-NEXT: lw a7, 24(a2)
; RV32ZVE32F-NEXT: lw t0, 0(a2)
; RV32ZVE32F-NEXT: lw t1, 8(a2)
; RV32ZVE32F-NEXT: lw a2, 16(a2)
; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t0
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, t1
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a2
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4
; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a3
; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3
; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a1, v0
; RV32ZVE32F-NEXT: andi a2, a1, 1
; RV32ZVE32F-NEXT: bnez a2, .LBB96_10
; RV32ZVE32F-NEXT: # %bb.1: # %else
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: bnez a2, .LBB96_11
; RV32ZVE32F-NEXT: .LBB96_2: # %else2
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: bnez a2, .LBB96_12
; RV32ZVE32F-NEXT: .LBB96_3: # %else5
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: bnez a2, .LBB96_13
; RV32ZVE32F-NEXT: .LBB96_4: # %else8
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: bnez a2, .LBB96_14
; RV32ZVE32F-NEXT: .LBB96_5: # %else11
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: bnez a2, .LBB96_15
; RV32ZVE32F-NEXT: .LBB96_6: # %else14
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: bnez a2, .LBB96_16
; RV32ZVE32F-NEXT: .LBB96_7: # %else17
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: beqz a1, .LBB96_9
; RV32ZVE32F-NEXT: .LBB96_8: # %cond.load19
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v8, v8, 7
; RV32ZVE32F-NEXT: vmv.x.s a1, v8
; RV32ZVE32F-NEXT: fld fa7, 0(a1)
; RV32ZVE32F-NEXT: .LBB96_9: # %else20
; RV32ZVE32F-NEXT: fsd fa0, 0(a0)
; RV32ZVE32F-NEXT: fsd fa1, 8(a0)
; RV32ZVE32F-NEXT: fsd fa2, 16(a0)
; RV32ZVE32F-NEXT: fsd fa3, 24(a0)
; RV32ZVE32F-NEXT: fsd fa4, 32(a0)
; RV32ZVE32F-NEXT: fsd fa5, 40(a0)
; RV32ZVE32F-NEXT: fsd fa6, 48(a0)
; RV32ZVE32F-NEXT: fsd fa7, 56(a0)
; RV32ZVE32F-NEXT: ret
; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load
; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vmv.x.s a2, v8
; RV32ZVE32F-NEXT: fld fa0, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 2
; RV32ZVE32F-NEXT: beqz a2, .LBB96_2
; RV32ZVE32F-NEXT: .LBB96_11: # %cond.load1
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa1, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 4
; RV32ZVE32F-NEXT: beqz a2, .LBB96_3
; RV32ZVE32F-NEXT: .LBB96_12: # %cond.load4
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa2, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 8
; RV32ZVE32F-NEXT: beqz a2, .LBB96_4
; RV32ZVE32F-NEXT: .LBB96_13: # %cond.load7
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa3, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 16
; RV32ZVE32F-NEXT: beqz a2, .LBB96_5
; RV32ZVE32F-NEXT: .LBB96_14: # %cond.load10
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa4, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 32
; RV32ZVE32F-NEXT: beqz a2, .LBB96_6
; RV32ZVE32F-NEXT: .LBB96_15: # %cond.load13
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 5
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa5, 0(a2)
; RV32ZVE32F-NEXT: andi a2, a1, 64
; RV32ZVE32F-NEXT: beqz a2, .LBB96_7
; RV32ZVE32F-NEXT: .LBB96_16: # %cond.load16
; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 6
; RV32ZVE32F-NEXT: vmv.x.s a2, v10
; RV32ZVE32F-NEXT: fld fa6, 0(a2)
; RV32ZVE32F-NEXT: andi a1, a1, -128
; RV32ZVE32F-NEXT: bnez a1, .LBB96_8
; RV32ZVE32F-NEXT: j .LBB96_9
;
; RV64ZVE32F-LABEL: mgather_baseidx_v8f64:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a3, v0
; RV64ZVE32F-NEXT: andi a4, a3, 1
; RV64ZVE32F-NEXT: bnez a4, .LBB96_10
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a4, a3, 2
; RV64ZVE32F-NEXT: bnez a4, .LBB96_11
; RV64ZVE32F-NEXT: .LBB96_2: # %else2
; RV64ZVE32F-NEXT: andi a4, a3, 4
; RV64ZVE32F-NEXT: bnez a4, .LBB96_12
; RV64ZVE32F-NEXT: .LBB96_3: # %else5
; RV64ZVE32F-NEXT: andi a4, a3, 8
; RV64ZVE32F-NEXT: bnez a4, .LBB96_13
; RV64ZVE32F-NEXT: .LBB96_4: # %else8
; RV64ZVE32F-NEXT: andi a4, a3, 16
; RV64ZVE32F-NEXT: bnez a4, .LBB96_14
; RV64ZVE32F-NEXT: .LBB96_5: # %else11
; RV64ZVE32F-NEXT: andi a4, a3, 32
; RV64ZVE32F-NEXT: bnez a4, .LBB96_15
; RV64ZVE32F-NEXT: .LBB96_6: # %else14
; RV64ZVE32F-NEXT: andi a4, a3, 64
; RV64ZVE32F-NEXT: bnez a4, .LBB96_16
; RV64ZVE32F-NEXT: .LBB96_7: # %else17
; RV64ZVE32F-NEXT: andi a3, a3, -128
; RV64ZVE32F-NEXT: beqz a3, .LBB96_9
; RV64ZVE32F-NEXT: .LBB96_8: # %cond.load19
; RV64ZVE32F-NEXT: ld a2, 56(a2)
; RV64ZVE32F-NEXT: slli a2, a2, 3
; RV64ZVE32F-NEXT: add a1, a1, a2
; RV64ZVE32F-NEXT: fld fa7, 0(a1)
; RV64ZVE32F-NEXT: .LBB96_9: # %else20
; RV64ZVE32F-NEXT: fsd fa0, 0(a0)
; RV64ZVE32F-NEXT: fsd fa1, 8(a0)
; RV64ZVE32F-NEXT: fsd fa2, 16(a0)
; RV64ZVE32F-NEXT: fsd fa3, 24(a0)
; RV64ZVE32F-NEXT: fsd fa4, 32(a0)
; RV64ZVE32F-NEXT: fsd fa5, 40(a0)
; RV64ZVE32F-NEXT: fsd fa6, 48(a0)
; RV64ZVE32F-NEXT: fsd fa7, 56(a0)
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB96_10: # %cond.load
; RV64ZVE32F-NEXT: ld a4, 0(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa0, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 2
; RV64ZVE32F-NEXT: beqz a4, .LBB96_2
; RV64ZVE32F-NEXT: .LBB96_11: # %cond.load1
; RV64ZVE32F-NEXT: ld a4, 8(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa1, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 4
; RV64ZVE32F-NEXT: beqz a4, .LBB96_3
; RV64ZVE32F-NEXT: .LBB96_12: # %cond.load4
; RV64ZVE32F-NEXT: ld a4, 16(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa2, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 8
; RV64ZVE32F-NEXT: beqz a4, .LBB96_4
; RV64ZVE32F-NEXT: .LBB96_13: # %cond.load7
; RV64ZVE32F-NEXT: ld a4, 24(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa3, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 16
; RV64ZVE32F-NEXT: beqz a4, .LBB96_5
; RV64ZVE32F-NEXT: .LBB96_14: # %cond.load10
; RV64ZVE32F-NEXT: ld a4, 32(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa4, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 32
; RV64ZVE32F-NEXT: beqz a4, .LBB96_6
; RV64ZVE32F-NEXT: .LBB96_15: # %cond.load13
; RV64ZVE32F-NEXT: ld a4, 40(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa5, 0(a4)
; RV64ZVE32F-NEXT: andi a4, a3, 64
; RV64ZVE32F-NEXT: beqz a4, .LBB96_7
; RV64ZVE32F-NEXT: .LBB96_16: # %cond.load16
; RV64ZVE32F-NEXT: ld a4, 48(a2)
; RV64ZVE32F-NEXT: slli a4, a4, 3
; RV64ZVE32F-NEXT: add a4, a1, a4
; RV64ZVE32F-NEXT: fld fa6, 0(a4)
; RV64ZVE32F-NEXT: andi a3, a3, -128
; RV64ZVE32F-NEXT: bnez a3, .LBB96_8
; RV64ZVE32F-NEXT: j .LBB96_9
%ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
%v = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %ptrs, i32 8, <8 x i1> %m, <8 x double> %passthru)
ret <8 x double> %v
}
declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>)
define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m, <16 x i8> %passthru) {
; RV32-LABEL: mgather_baseidx_v16i8:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; RV32-NEXT: vsext.vf4 v12, v8
; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV32-NEXT: vluxei32.v v9, (a0), v12, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v16i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64V-NEXT: vluxei64.v v9, (a0), v16, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v16i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB97_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: .LBB97_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB97_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1
; RV64ZVE32F-NEXT: .LBB97_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_25
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB97_26
; RV64ZVE32F-NEXT: .LBB97_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB97_8
; RV64ZVE32F-NEXT: .LBB97_7: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4
; RV64ZVE32F-NEXT: .LBB97_8: # %else11
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB97_10
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 5
; RV64ZVE32F-NEXT: .LBB97_10: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_27
; RV64ZVE32F-NEXT: # %bb.11: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB97_28
; RV64ZVE32F-NEXT: .LBB97_12: # %else20
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: bnez a2, .LBB97_29
; RV64ZVE32F-NEXT: .LBB97_13: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB97_15
; RV64ZVE32F-NEXT: .LBB97_14: # %cond.load25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 9
; RV64ZVE32F-NEXT: .LBB97_15: # %else26
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 1024
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB97_30
; RV64ZVE32F-NEXT: # %bb.16: # %else29
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: bltz a2, .LBB97_31
; RV64ZVE32F-NEXT: .LBB97_17: # %else32
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: bltz a2, .LBB97_32
; RV64ZVE32F-NEXT: .LBB97_18: # %else35
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bgez a2, .LBB97_20
; RV64ZVE32F-NEXT: .LBB97_19: # %cond.load37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 13
; RV64ZVE32F-NEXT: .LBB97_20: # %else38
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2
; RV64ZVE32F-NEXT: bgez a2, .LBB97_22
; RV64ZVE32F-NEXT: # %bb.21: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14
; RV64ZVE32F-NEXT: .LBB97_22: # %else41
; RV64ZVE32F-NEXT: lui a2, 1048568
; RV64ZVE32F-NEXT: and a1, a1, a2
; RV64ZVE32F-NEXT: beqz a1, .LBB97_24
; RV64ZVE32F-NEXT: # %bb.23: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15
; RV64ZVE32F-NEXT: .LBB97_24: # %else44
; RV64ZVE32F-NEXT: vmv1r.v v8, v9
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB97_25: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v12, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB97_6
; RV64ZVE32F-NEXT: .LBB97_26: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v11
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB97_7
; RV64ZVE32F-NEXT: j .LBB97_8
; RV64ZVE32F-NEXT: .LBB97_27: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB97_12
; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: beqz a2, .LBB97_13
; RV64ZVE32F-NEXT: .LBB97_29: # %cond.load22
; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 8
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: bnez a2, .LBB97_14
; RV64ZVE32F-NEXT: j .LBB97_15
; RV64ZVE32F-NEXT: .LBB97_30: # %cond.load28
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v11, a2
; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: bgez a2, .LBB97_17
; RV64ZVE32F-NEXT: .LBB97_31: # %cond.load31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 11
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: bgez a2, .LBB97_18
; RV64ZVE32F-NEXT: .LBB97_32: # %cond.load34
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v10
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 12
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bltz a2, .LBB97_19
; RV64ZVE32F-NEXT: j .LBB97_20
%ptrs = getelementptr inbounds i8, ptr %base, <16 x i8> %idxs
%v = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %ptrs, i32 2, <16 x i1> %m, <16 x i8> %passthru)
ret <16 x i8> %v
}
declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>)
define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m, <32 x i8> %passthru) {
; RV32-LABEL: mgather_baseidx_v32i8:
; RV32: # %bb.0:
; RV32-NEXT: li a1, 32
; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
; RV32-NEXT: vsext.vf4 v16, v8
; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, mu
; RV32-NEXT: vluxei32.v v10, (a0), v16, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_v32i8:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; RV64V-NEXT: vmv1r.v v12, v10
; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t
; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64V-NEXT: vslidedown.vi v10, v10, 16
; RV64V-NEXT: vslidedown.vi v8, v8, 16
; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV64V-NEXT: vsext.vf8 v16, v8
; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64V-NEXT: vslidedown.vi v0, v0, 2
; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t
; RV64V-NEXT: li a0, 32
; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; RV64V-NEXT: vslideup.vi v12, v10, 16
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_baseidx_v32i8:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: beqz a2, .LBB98_2
; RV64ZVE32F-NEXT: # %bb.1: # %cond.load
; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: .LBB98_2: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_4
; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1
; RV64ZVE32F-NEXT: .LBB98_4: # %else2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB98_49
; RV64ZVE32F-NEXT: # %bb.5: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB98_50
; RV64ZVE32F-NEXT: .LBB98_6: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB98_8
; RV64ZVE32F-NEXT: .LBB98_7: # %cond.load10
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 4
; RV64ZVE32F-NEXT: .LBB98_8: # %else11
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB98_10
; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v14, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v14
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5
; RV64ZVE32F-NEXT: .LBB98_10: # %else14
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB98_51
; RV64ZVE32F-NEXT: # %bb.11: # %else17
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: bnez a2, .LBB98_52
; RV64ZVE32F-NEXT: .LBB98_12: # %else20
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: bnez a2, .LBB98_53
; RV64ZVE32F-NEXT: .LBB98_13: # %else23
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: beqz a2, .LBB98_15
; RV64ZVE32F-NEXT: .LBB98_14: # %cond.load25
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 9
; RV64ZVE32F-NEXT: .LBB98_15: # %else26
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: andi a2, a1, 1024
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB98_17
; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10
; RV64ZVE32F-NEXT: .LBB98_17: # %else29
; RV64ZVE32F-NEXT: slli a2, a1, 52
; RV64ZVE32F-NEXT: bgez a2, .LBB98_19
; RV64ZVE32F-NEXT: # %bb.18: # %cond.load31
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 11
; RV64ZVE32F-NEXT: .LBB98_19: # %else32
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 51
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16
; RV64ZVE32F-NEXT: bgez a2, .LBB98_21
; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 12
; RV64ZVE32F-NEXT: .LBB98_21: # %else35
; RV64ZVE32F-NEXT: slli a2, a1, 50
; RV64ZVE32F-NEXT: bgez a2, .LBB98_23
; RV64ZVE32F-NEXT: # %bb.22: # %cond.load37
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 13
; RV64ZVE32F-NEXT: .LBB98_23: # %else38
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 49
; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB98_54
; RV64ZVE32F-NEXT: # %bb.24: # %else41
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: bltz a2, .LBB98_55
; RV64ZVE32F-NEXT: .LBB98_25: # %else44
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bltz a2, .LBB98_56
; RV64ZVE32F-NEXT: .LBB98_26: # %else47
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bgez a2, .LBB98_28
; RV64ZVE32F-NEXT: .LBB98_27: # %cond.load49
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17
; RV64ZVE32F-NEXT: .LBB98_28: # %else50
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 45
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB98_57
; RV64ZVE32F-NEXT: # %bb.29: # %else53
; RV64ZVE32F-NEXT: slli a2, a1, 44
; RV64ZVE32F-NEXT: bltz a2, .LBB98_58
; RV64ZVE32F-NEXT: .LBB98_30: # %else56
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bgez a2, .LBB98_32
; RV64ZVE32F-NEXT: .LBB98_31: # %cond.load58
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20
; RV64ZVE32F-NEXT: .LBB98_32: # %else59
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 42
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8
; RV64ZVE32F-NEXT: bgez a2, .LBB98_34
; RV64ZVE32F-NEXT: # %bb.33: # %cond.load61
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21
; RV64ZVE32F-NEXT: .LBB98_34: # %else62
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 41
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB98_59
; RV64ZVE32F-NEXT: # %bb.35: # %else65
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bltz a2, .LBB98_60
; RV64ZVE32F-NEXT: .LBB98_36: # %else68
; RV64ZVE32F-NEXT: slli a2, a1, 39
; RV64ZVE32F-NEXT: bltz a2, .LBB98_61
; RV64ZVE32F-NEXT: .LBB98_37: # %else71
; RV64ZVE32F-NEXT: slli a2, a1, 38
; RV64ZVE32F-NEXT: bgez a2, .LBB98_39
; RV64ZVE32F-NEXT: .LBB98_38: # %cond.load73
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25
; RV64ZVE32F-NEXT: .LBB98_39: # %else74
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 37
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2
; RV64ZVE32F-NEXT: bltz a2, .LBB98_62
; RV64ZVE32F-NEXT: # %bb.40: # %else77
; RV64ZVE32F-NEXT: slli a2, a1, 36
; RV64ZVE32F-NEXT: bltz a2, .LBB98_63
; RV64ZVE32F-NEXT: .LBB98_41: # %else80
; RV64ZVE32F-NEXT: slli a2, a1, 35
; RV64ZVE32F-NEXT: bltz a2, .LBB98_64
; RV64ZVE32F-NEXT: .LBB98_42: # %else83
; RV64ZVE32F-NEXT: slli a2, a1, 34
; RV64ZVE32F-NEXT: bgez a2, .LBB98_44
; RV64ZVE32F-NEXT: .LBB98_43: # %cond.load85
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29
; RV64ZVE32F-NEXT: .LBB98_44: # %else86
; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: slli a2, a1, 33
; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2
; RV64ZVE32F-NEXT: bgez a2, .LBB98_46
; RV64ZVE32F-NEXT: # %bb.45: # %cond.load88
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30
; RV64ZVE32F-NEXT: .LBB98_46: # %else89
; RV64ZVE32F-NEXT: lui a2, 524288
; RV64ZVE32F-NEXT: and a1, a1, a2
; RV64ZVE32F-NEXT: beqz a1, .LBB98_48
; RV64ZVE32F-NEXT: # %bb.47: # %cond.load91
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: add a0, a0, a1
; RV64ZVE32F-NEXT: lbu a0, 0(a0)
; RV64ZVE32F-NEXT: li a1, 32
; RV64ZVE32F-NEXT: vmv.s.x v8, a0
; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31
; RV64ZVE32F-NEXT: .LBB98_48: # %else92
; RV64ZVE32F-NEXT: vmv2r.v v8, v10
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB98_49: # %cond.load4
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB98_6
; RV64ZVE32F-NEXT: .LBB98_50: # %cond.load7
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB98_7
; RV64ZVE32F-NEXT: j .LBB98_8
; RV64ZVE32F-NEXT: .LBB98_51: # %cond.load16
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6
; RV64ZVE32F-NEXT: andi a2, a1, 128
; RV64ZVE32F-NEXT: beqz a2, .LBB98_12
; RV64ZVE32F-NEXT: .LBB98_52: # %cond.load19
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v13
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 7
; RV64ZVE32F-NEXT: andi a2, a1, 256
; RV64ZVE32F-NEXT: beqz a2, .LBB98_13
; RV64ZVE32F-NEXT: .LBB98_53: # %cond.load22
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 8
; RV64ZVE32F-NEXT: andi a2, a1, 512
; RV64ZVE32F-NEXT: bnez a2, .LBB98_14
; RV64ZVE32F-NEXT: j .LBB98_15
; RV64ZVE32F-NEXT: .LBB98_54: # %cond.load40
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 14
; RV64ZVE32F-NEXT: slli a2, a1, 48
; RV64ZVE32F-NEXT: bgez a2, .LBB98_25
; RV64ZVE32F-NEXT: .LBB98_55: # %cond.load43
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 15
; RV64ZVE32F-NEXT: slli a2, a1, 47
; RV64ZVE32F-NEXT: bgez a2, .LBB98_26
; RV64ZVE32F-NEXT: .LBB98_56: # %cond.load46
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16
; RV64ZVE32F-NEXT: slli a2, a1, 46
; RV64ZVE32F-NEXT: bltz a2, .LBB98_27
; RV64ZVE32F-NEXT: j .LBB98_28
; RV64ZVE32F-NEXT: .LBB98_57: # %cond.load52
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v14, a2
; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18
; RV64ZVE32F-NEXT: slli a2, a1, 44
; RV64ZVE32F-NEXT: bgez a2, .LBB98_30
; RV64ZVE32F-NEXT: .LBB98_58: # %cond.load55
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v12
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 19
; RV64ZVE32F-NEXT: slli a2, a1, 43
; RV64ZVE32F-NEXT: bltz a2, .LBB98_31
; RV64ZVE32F-NEXT: j .LBB98_32
; RV64ZVE32F-NEXT: .LBB98_59: # %cond.load64
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22
; RV64ZVE32F-NEXT: slli a2, a1, 40
; RV64ZVE32F-NEXT: bgez a2, .LBB98_36
; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load67
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23
; RV64ZVE32F-NEXT: slli a2, a1, 39
; RV64ZVE32F-NEXT: bgez a2, .LBB98_37
; RV64ZVE32F-NEXT: .LBB98_61: # %cond.load70
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24
; RV64ZVE32F-NEXT: slli a2, a1, 38
; RV64ZVE32F-NEXT: bltz a2, .LBB98_38
; RV64ZVE32F-NEXT: j .LBB98_39
; RV64ZVE32F-NEXT: .LBB98_62: # %cond.load76
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26
; RV64ZVE32F-NEXT: slli a2, a1, 36
; RV64ZVE32F-NEXT: bgez a2, .LBB98_41
; RV64ZVE32F-NEXT: .LBB98_63: # %cond.load79
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1
; RV64ZVE32F-NEXT: vmv.x.s a2, v8
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27
; RV64ZVE32F-NEXT: slli a2, a1, 35
; RV64ZVE32F-NEXT: bgez a2, .LBB98_42
; RV64ZVE32F-NEXT: .LBB98_64: # %cond.load82
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a2, v9
; RV64ZVE32F-NEXT: add a2, a0, a2
; RV64ZVE32F-NEXT: lbu a2, 0(a2)
; RV64ZVE32F-NEXT: li a3, 32
; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v12, a2
; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28
; RV64ZVE32F-NEXT: slli a2, a1, 34
; RV64ZVE32F-NEXT: bltz a2, .LBB98_43
; RV64ZVE32F-NEXT: j .LBB98_44
%ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
%v = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %ptrs, i32 2, <32 x i1> %m, <32 x i8> %passthru)
ret <32 x i8> %v
}
; TODO: This should be a strided load with zero stride
define <4 x i32> @mgather_broadcast_load_unmasked(ptr %base) {
; RV32-LABEL: mgather_broadcast_load_unmasked:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vlse32.v v8, (a0), zero
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_broadcast_load_unmasked:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vmv.v.i v10, 0
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v8, (a0), v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_broadcast_load_unmasked:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: beqz zero, .LBB99_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB99_6
; RV64ZVE32F-NEXT: .LBB99_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB99_7
; RV64ZVE32F-NEXT: .LBB99_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB99_8
; RV64ZVE32F-NEXT: .LBB99_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB99_5: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB99_2
; RV64ZVE32F-NEXT: .LBB99_6: # %cond.load1
; RV64ZVE32F-NEXT: lw a2, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB99_3
; RV64ZVE32F-NEXT: .LBB99_7: # %cond.load4
; RV64ZVE32F-NEXT: lw a2, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB99_4
; RV64ZVE32F-NEXT: .LBB99_8: # %cond.load7
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison)
ret <4 x i32> %v
}
; Same as previous, but use an explicit splat instead of splat-via-gep
define <4 x i32> @mgather_broadcast_load_unmasked2(ptr %base) {
; RV32-LABEL: mgather_broadcast_load_unmasked2:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vluxei32.v v8, (zero), v8
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_broadcast_load_unmasked2:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vmv.v.x v10, a0
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v8, (zero), v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_broadcast_load_unmasked2:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: beqz zero, .LBB100_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB100_6
; RV64ZVE32F-NEXT: .LBB100_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB100_7
; RV64ZVE32F-NEXT: .LBB100_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB100_8
; RV64ZVE32F-NEXT: .LBB100_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB100_5: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB100_2
; RV64ZVE32F-NEXT: .LBB100_6: # %cond.load1
; RV64ZVE32F-NEXT: lw a2, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB100_3
; RV64ZVE32F-NEXT: .LBB100_7: # %cond.load4
; RV64ZVE32F-NEXT: lw a2, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB100_4
; RV64ZVE32F-NEXT: .LBB100_8: # %cond.load7
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%ptrhead = insertelement <4 x ptr> poison, ptr %base, i32 0
%ptrs = shufflevector <4 x ptr> %ptrhead, <4 x ptr> poison, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison)
ret <4 x i32> %v
}
define <4 x i32> @mgather_broadcast_load_masked(ptr %base, <4 x i1> %m) {
; RV32-LABEL: mgather_broadcast_load_masked:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vlse32.v v8, (a0), zero, v0.t
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_broadcast_load_masked:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vmv.v.i v10, 0
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v8, (a0), v10, v0.t
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_broadcast_load_masked:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma
; RV64ZVE32F-NEXT: vmv.x.s a1, v0
; RV64ZVE32F-NEXT: andi a2, a1, 1
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: bnez a2, .LBB101_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB101_6
; RV64ZVE32F-NEXT: .LBB101_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB101_7
; RV64ZVE32F-NEXT: .LBB101_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB101_8
; RV64ZVE32F-NEXT: .LBB101_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB101_5: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB101_2
; RV64ZVE32F-NEXT: .LBB101_6: # %cond.load1
; RV64ZVE32F-NEXT: lw a2, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB101_3
; RV64ZVE32F-NEXT: .LBB101_7: # %cond.load4
; RV64ZVE32F-NEXT: lw a2, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB101_4
; RV64ZVE32F-NEXT: .LBB101_8: # %cond.load7
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%ptrs = getelementptr inbounds i8, ptr %base, <4 x i32> zeroinitializer
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %m, <4 x i32> poison)
ret <4 x i32> %v
}
; TODO: Should be recognized as a unit stride load
define <4 x i32> @mgather_unit_stride_load(ptr %base) {
; RV32-LABEL: mgather_unit_stride_load:
; RV32: # %bb.0:
; RV32-NEXT: li a1, 4
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vlse32.v v8, (a0), a1
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_unit_stride_load:
; RV64V: # %bb.0:
; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64V-NEXT: vid.v v8
; RV64V-NEXT: vsll.vi v10, v8, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64V-NEXT: vluxei64.v v8, (a0), v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_unit_stride_load:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: beqz zero, .LBB102_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB102_6
; RV64ZVE32F-NEXT: .LBB102_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB102_7
; RV64ZVE32F-NEXT: .LBB102_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB102_8
; RV64ZVE32F-NEXT: .LBB102_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB102_5: # %cond.load
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a0), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB102_2
; RV64ZVE32F-NEXT: .LBB102_6: # %cond.load1
; RV64ZVE32F-NEXT: addi a2, a0, 4
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB102_3
; RV64ZVE32F-NEXT: .LBB102_7: # %cond.load4
; RV64ZVE32F-NEXT: addi a2, a0, 8
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB102_4
; RV64ZVE32F-NEXT: .LBB102_8: # %cond.load7
; RV64ZVE32F-NEXT: addi a0, a0, 12
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison)
ret <4 x i32> %v
}
; TODO: Recognize as unit stride load with offset 16b
define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) {
; RV32-LABEL: mgather_unit_stride_load_with_offset:
; RV32: # %bb.0:
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: li a1, 4
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV32-NEXT: vlse32.v v8, (a0), a1
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_unit_stride_load_with_offset:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, %hi(.LCPI103_0)
; RV64V-NEXT: addi a1, a1, %lo(.LCPI103_0)
; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64V-NEXT: vle64.v v10, (a1)
; RV64V-NEXT: vluxei64.v v8, (a0), v10
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_unit_stride_load_with_offset:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: beqz zero, .LBB103_5
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB103_6
; RV64ZVE32F-NEXT: .LBB103_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB103_7
; RV64ZVE32F-NEXT: .LBB103_3: # %else5
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: bnez a1, .LBB103_8
; RV64ZVE32F-NEXT: .LBB103_4: # %else8
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB103_5: # %cond.load
; RV64ZVE32F-NEXT: addi a2, a0, 16
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vlse32.v v8, (a2), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB103_2
; RV64ZVE32F-NEXT: .LBB103_6: # %cond.load1
; RV64ZVE32F-NEXT: addi a2, a0, 20
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB103_3
; RV64ZVE32F-NEXT: .LBB103_7: # %cond.load4
; RV64ZVE32F-NEXT: addi a2, a0, 24
; RV64ZVE32F-NEXT: lw a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a1, a1, 8
; RV64ZVE32F-NEXT: beqz a1, .LBB103_4
; RV64ZVE32F-NEXT: .LBB103_8: # %cond.load7
; RV64ZVE32F-NEXT: addi a0, a0, 28
; RV64ZVE32F-NEXT: lw a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: ret
%head = insertelement <4 x i1> poison, i1 true, i32 0
%allones = shufflevector <4 x i1> %head, <4 x i1> poison, <4 x i32> zeroinitializer
%ptrs = getelementptr inbounds i32, ptr %base, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%v = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %allones, <4 x i32> poison)
ret <4 x i32> %v
}
; TODO: Recognize as strided load with SEW=32
define <8 x i16> @mgather_strided_2xSEW(ptr %base) {
; RV32-LABEL: mgather_strided_2xSEW:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, %hi(.LCPI104_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI104_0)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle32.v v10, (a1)
; RV32-NEXT: vluxei32.v v8, (a0), v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_strided_2xSEW:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, %hi(.LCPI104_0)
; RV64V-NEXT: addi a1, a1, %lo(.LCPI104_0)
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vle64.v v12, (a1)
; RV64V-NEXT: vluxei64.v v8, (a0), v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_strided_2xSEW:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: beqz zero, .LBB104_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB104_10
; RV64ZVE32F-NEXT: .LBB104_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB104_11
; RV64ZVE32F-NEXT: .LBB104_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB104_12
; RV64ZVE32F-NEXT: .LBB104_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB104_13
; RV64ZVE32F-NEXT: .LBB104_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB104_14
; RV64ZVE32F-NEXT: .LBB104_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB104_15
; RV64ZVE32F-NEXT: .LBB104_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB104_16
; RV64ZVE32F-NEXT: .LBB104_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB104_9: # %cond.load
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB104_2
; RV64ZVE32F-NEXT: .LBB104_10: # %cond.load1
; RV64ZVE32F-NEXT: addi a2, a0, 2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB104_3
; RV64ZVE32F-NEXT: .LBB104_11: # %cond.load4
; RV64ZVE32F-NEXT: addi a2, a0, 8
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB104_4
; RV64ZVE32F-NEXT: .LBB104_12: # %cond.load7
; RV64ZVE32F-NEXT: addi a2, a0, 10
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB104_5
; RV64ZVE32F-NEXT: .LBB104_13: # %cond.load10
; RV64ZVE32F-NEXT: addi a2, a0, 16
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB104_6
; RV64ZVE32F-NEXT: .LBB104_14: # %cond.load13
; RV64ZVE32F-NEXT: addi a2, a0, 18
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB104_7
; RV64ZVE32F-NEXT: .LBB104_15: # %cond.load16
; RV64ZVE32F-NEXT: addi a2, a0, 24
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB104_8
; RV64ZVE32F-NEXT: .LBB104_16: # %cond.load19
; RV64ZVE32F-NEXT: addi a0, a0, 26
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
; RV64ZVE32F-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i16 0
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
ret <8 x i16> %v
}
; TODO: Recognize as indexed load with SEW=32
define <8 x i16> @mgather_gather_2xSEW(ptr %base) {
; RV32-LABEL: mgather_gather_2xSEW:
; RV32: # %bb.0:
; RV32-NEXT: lui a1, %hi(.LCPI105_0)
; RV32-NEXT: addi a1, a1, %lo(.LCPI105_0)
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV32-NEXT: vle32.v v10, (a1)
; RV32-NEXT: vluxei32.v v8, (a0), v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_gather_2xSEW:
; RV64V: # %bb.0:
; RV64V-NEXT: lui a1, %hi(.LCPI105_0)
; RV64V-NEXT: addi a1, a1, %lo(.LCPI105_0)
; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64V-NEXT: vle64.v v12, (a1)
; RV64V-NEXT: vluxei64.v v8, (a0), v12
; RV64V-NEXT: ret
;
; RV64ZVE32F-LABEL: mgather_gather_2xSEW:
; RV64ZVE32F: # %bb.0:
; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64ZVE32F-NEXT: vmset.m v8
; RV64ZVE32F-NEXT: vmv.x.s a1, v8
; RV64ZVE32F-NEXT: # implicit-def: $v8
; RV64ZVE32F-NEXT: beqz zero, .LBB105_9
; RV64ZVE32F-NEXT: # %bb.1: # %else
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: bnez a2, .LBB105_10
; RV64ZVE32F-NEXT: .LBB105_2: # %else2
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: bnez a2, .LBB105_11
; RV64ZVE32F-NEXT: .LBB105_3: # %else5
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: bnez a2, .LBB105_12
; RV64ZVE32F-NEXT: .LBB105_4: # %else8
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: bnez a2, .LBB105_13
; RV64ZVE32F-NEXT: .LBB105_5: # %else11
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: bnez a2, .LBB105_14
; RV64ZVE32F-NEXT: .LBB105_6: # %else14
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: bnez a2, .LBB105_15
; RV64ZVE32F-NEXT: .LBB105_7: # %else17
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: bnez a1, .LBB105_16
; RV64ZVE32F-NEXT: .LBB105_8: # %else20
; RV64ZVE32F-NEXT: ret
; RV64ZVE32F-NEXT: .LBB105_9: # %cond.load
; RV64ZVE32F-NEXT: vlse16.v v8, (a0), zero
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB105_2
; RV64ZVE32F-NEXT: .LBB105_10: # %cond.load1
; RV64ZVE32F-NEXT: addi a2, a0, 2
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
; RV64ZVE32F-NEXT: beqz a2, .LBB105_3
; RV64ZVE32F-NEXT: .LBB105_11: # %cond.load4
; RV64ZVE32F-NEXT: addi a2, a0, 4
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 3, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 2
; RV64ZVE32F-NEXT: andi a2, a1, 8
; RV64ZVE32F-NEXT: beqz a2, .LBB105_4
; RV64ZVE32F-NEXT: .LBB105_12: # %cond.load7
; RV64ZVE32F-NEXT: addi a2, a0, 6
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3
; RV64ZVE32F-NEXT: andi a2, a1, 16
; RV64ZVE32F-NEXT: beqz a2, .LBB105_5
; RV64ZVE32F-NEXT: .LBB105_13: # %cond.load10
; RV64ZVE32F-NEXT: addi a2, a0, 16
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 4
; RV64ZVE32F-NEXT: andi a2, a1, 32
; RV64ZVE32F-NEXT: beqz a2, .LBB105_6
; RV64ZVE32F-NEXT: .LBB105_14: # %cond.load13
; RV64ZVE32F-NEXT: addi a2, a0, 18
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 5
; RV64ZVE32F-NEXT: andi a2, a1, 64
; RV64ZVE32F-NEXT: beqz a2, .LBB105_7
; RV64ZVE32F-NEXT: .LBB105_15: # %cond.load16
; RV64ZVE32F-NEXT: addi a2, a0, 20
; RV64ZVE32F-NEXT: lh a2, 0(a2)
; RV64ZVE32F-NEXT: vsetivli zero, 7, e16, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 6
; RV64ZVE32F-NEXT: andi a1, a1, -128
; RV64ZVE32F-NEXT: beqz a1, .LBB105_8
; RV64ZVE32F-NEXT: .LBB105_16: # %cond.load19
; RV64ZVE32F-NEXT: addi a0, a0, 22
; RV64ZVE32F-NEXT: lh a0, 0(a0)
; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
; RV64ZVE32F-NEXT: vmv.s.x v9, a0
; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7
; RV64ZVE32F-NEXT: ret
%head = insertelement <8 x i1> poison, i1 true, i16 0
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
%ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
%v = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> %allones, <8 x i16> poison)
ret <8 x i16> %v
}