mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-14 17:56:39 +00:00
[PowerPC] Add Dense Math binary integer outer-Product accumulate to DMR Instructions (#130791)
This commit adds the following Dense Math Facility integer calculation instructions: dmxvi8gerx4, dmxvi8gerx4pp, dmxvi8gerx4spp, pmdmxvi8gerx4, pmdmxvi8gerx4pp, and pmdmxvi8gerx4spp, along with their corresponding intrinsics and tests.
This commit is contained in:
parent
a2d7451a13
commit
22c6674f1d
@ -280,6 +280,13 @@ multiclass PowerPC_MMA_ACC_PP_Intrinsic<list<LLVMType> args> {
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
multiclass PowerPC_MMA_DMR_PP_Intrinsic<list<LLVMType> args> {
|
||||
def NAME: DefaultAttrsIntrinsic<[llvm_v1024i1_ty], args, [IntrNoMem]>;
|
||||
def pp : DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
|
||||
!listconcat([llvm_v1024i1_ty], args),
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PowerPC Altivec Intrinsic Class Definitions.
|
||||
//
|
||||
@ -1711,6 +1718,20 @@ let TargetPrefix = "ppc" in {
|
||||
[llvm_v512i1_ty, llvm_v16i8_ty, llvm_v16i8_ty,
|
||||
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
defm int_ppc_mma_dmxvi8gerx4 :
|
||||
PowerPC_MMA_DMR_PP_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty]>;
|
||||
defm int_ppc_mma_pmdmxvi8gerx4 :
|
||||
PowerPC_MMA_DMR_PP_Intrinsic<[llvm_v256i1_ty, llvm_v16i8_ty, llvm_i32_ty,
|
||||
llvm_i32_ty, llvm_i32_ty]>;
|
||||
def int_ppc_mma_dmxvi8gerx4spp :
|
||||
DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
|
||||
[llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_ppc_mma_pmdmxvi8gerx4spp :
|
||||
DefaultAttrsIntrinsic<[llvm_v1024i1_ty],
|
||||
[llvm_v1024i1_ty, llvm_v256i1_ty, llvm_v16i8_ty,
|
||||
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// XL Compat intrinsics.
|
||||
|
@ -69,6 +69,96 @@ class XForm_ATB3<bits<6> opcode, bits<5> o, bits<10> xo, dag OOL, dag IOL,
|
||||
let Inst{31} = 0;
|
||||
}
|
||||
|
||||
class XX3Form_AT3_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
|
||||
string asmstr, InstrItinClass itin,
|
||||
list<dag> pattern>
|
||||
: I<opcode, OOL, IOL, asmstr, itin> {
|
||||
bits<3> AT;
|
||||
bits<5> XAp;
|
||||
bits<6> XB;
|
||||
|
||||
let Pattern = pattern;
|
||||
|
||||
let Inst{6-8} = AT;
|
||||
let Inst{9-10} = 0;
|
||||
let Inst{11-14} = XAp{3-0};
|
||||
let Inst{15} = 0;
|
||||
let Inst{16-20} = XB{4-0};
|
||||
let Inst{21-28} = xo;
|
||||
let Inst{29} = XAp{4};
|
||||
let Inst{30} = XB{5};
|
||||
let Inst{31} = 0;
|
||||
}
|
||||
|
||||
class MMIRR_XX3Form_X8YP4_XAp5B6<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
|
||||
string asmstr, InstrItinClass itin,
|
||||
list<dag> pattern>
|
||||
: PI<1, opcode, OOL, IOL, asmstr, itin> {
|
||||
bits<3> AT;
|
||||
bits<6> XAp;
|
||||
bits<6> XB;
|
||||
bits<8> XMSK;
|
||||
bits<4> YMSK;
|
||||
bits<4> PMSK;
|
||||
|
||||
let Pattern = pattern;
|
||||
|
||||
// The prefix.
|
||||
let Inst{6-7} = 3;
|
||||
let Inst{8-11} = 9;
|
||||
let Inst{12-15} = 0;
|
||||
let Inst{16-19} = PMSK;
|
||||
let Inst{20-27} = XMSK;
|
||||
let Inst{28-31} = YMSK;
|
||||
|
||||
// The instruction.
|
||||
let Inst{38-40} = AT;
|
||||
let Inst{41-42} = 0;
|
||||
let Inst{43-46} = XAp{3-0};
|
||||
let Inst{47} = 0;
|
||||
let Inst{48-52} = XB{4-0};
|
||||
let Inst{53-60} = xo;
|
||||
let Inst{61} = XAp{4};
|
||||
let Inst{62} = XB{5};
|
||||
let Inst{63} = 0;
|
||||
}
|
||||
|
||||
multiclass DMR_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
|
||||
string asmstr> {
|
||||
let Predicates = [MMA, IsISAFuture] in {
|
||||
def NAME :
|
||||
XX3Form_AT3_XAp5B6<opcode, !or(xo, 0x01), (outs dmr:$AT), IOL,
|
||||
!strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
|
||||
RegConstraint<"@earlyclobber $AT">;
|
||||
def PP :
|
||||
XX3Form_AT3_XAp5B6<opcode, xo, (outs dmr:$AT), !con((ins dmr:$ATi), IOL),
|
||||
!strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
|
||||
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass DMR_UM_M448_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
|
||||
string asmstr> {
|
||||
defm NAME : DMR_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
|
||||
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
|
||||
def PM#NAME :
|
||||
MMIRR_XX3Form_X8YP4_XAp5B6<
|
||||
opcode, !or(xo, 0x01), (outs dmr:$AT),
|
||||
!con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
|
||||
!strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
|
||||
IIC_VecFP, []>,
|
||||
RegConstraint<"@earlyclobber $AT">;
|
||||
def PM#NAME#PP :
|
||||
MMIRR_XX3Form_X8YP4_XAp5B6<
|
||||
opcode, xo, (outs dmr:$AT),
|
||||
!con((ins dmr:$ATi),
|
||||
!con(IOL, (ins u8imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
|
||||
!strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
|
||||
IIC_VecFP, []>,
|
||||
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
|
||||
}
|
||||
}
|
||||
|
||||
let Predicates = [IsISAFuture] in {
|
||||
def DMXXEXTFDMR512 : XX3Form_AT3_XABp5_P1<60, 226,
|
||||
(outs vsrprc:$XAp, vsrprc:$XBp),
|
||||
@ -117,3 +207,56 @@ let Predicates = [IsISAFuture] in {
|
||||
"dmsetdmrz $AT", NoItinerary,
|
||||
[(set v1024i1:$AT, (int_ppc_mma_dmsetdmrz))]>;
|
||||
}
|
||||
|
||||
// MMA+ accumulating/non-accumulating instructions.
|
||||
|
||||
// DMXVI8GERX4, DMXVI8GERX4PP, PMDMXVI8GERX4, PMDMXVI8GERX4PP
|
||||
defm DMXVI8GERX4 : DMR_UM_M448_XOEO<59, 10, (ins vsrprc:$XAp, vsrc:$XB),
|
||||
"dmxvi8gerx4", "$AT, $XAp, $XB">;
|
||||
|
||||
let Predicates = [MMA, IsISAFuture] in {
|
||||
def DMXVI8GERX4SPP :
|
||||
XX3Form_AT3_XAp5B6<59, 98, (outs dmr:$AT), (ins dmr:$ATi, vsrprc:$XAp, vsrc:$XB),
|
||||
"dmxvi8gerx4spp $AT, $XAp, $XB", IIC_VecGeneral, []>,
|
||||
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
|
||||
}
|
||||
|
||||
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
|
||||
def PMDMXVI8GERX4SPP :
|
||||
MMIRR_XX3Form_X8YP4_XAp5B6<59, 98, (outs dmr:$AT),
|
||||
(ins dmr:$ATi, vsrprc:$XAp,vsrc:$XB, u8imm:$XMSK,
|
||||
u4imm:$YMSK, u4imm:$PMSK),
|
||||
"pmdmxvi8gerx4spp $AT, $XAp, $XB, $XMSK, $YMSK, $PMSK",
|
||||
IIC_VecGeneral, []>,
|
||||
RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
|
||||
}
|
||||
|
||||
// MMA+ Intrinsics
|
||||
let Predicates = [MMA, IsISAFuture] in {
|
||||
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4 v256i1:$XAp, v16i8:$XB)),
|
||||
(DMXVI8GERX4 $XAp, RCCp.BToVSRC)>;
|
||||
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
|
||||
(DMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC)>;
|
||||
|
||||
def : Pat<(v1024i1 (int_ppc_mma_dmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB)),
|
||||
(DMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC)>;
|
||||
}
|
||||
|
||||
let Predicates = [MMA, PrefixInstrs, IsISAFuture] in {
|
||||
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4 v256i1:$XAp, v16i8:$XB, Msk8Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
|
||||
(PMDMXVI8GERX4 $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
|
||||
|
||||
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4pp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
|
||||
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk4Imm:$PMSK)),
|
||||
(PMDMXVI8GERX4PP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
|
||||
|
||||
def : Pat<(v1024i1 (int_ppc_mma_pmdmxvi8gerx4spp v1024i1:$ATi, v256i1:$XAp, v16i8:$XB,
|
||||
Msk8Imm:$XMSK, Msk4Imm:$YMSK,
|
||||
Msk4Imm:$PMSK)),
|
||||
(PMDMXVI8GERX4SPP $ATi, $XAp, RCCp.BToVSRC, Msk8Imm:$XMSK,
|
||||
Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
|
||||
}
|
||||
|
@ -3474,6 +3474,13 @@ class PPCAsmPseudo<string asm, dag iops>
|
||||
let TSFlags{10} = MemriOp;
|
||||
}
|
||||
|
||||
// Mask immediates for MMA instructions (2, 4 and 8 bits).
|
||||
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
|
||||
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
|
||||
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
|
||||
|
||||
def MMA : Predicate<"Subtarget->hasMMA()">;
|
||||
|
||||
// Prefixed instructions may require access to the above defs at a later
|
||||
// time so we include this after the def.
|
||||
include "PPCInstrP10.td"
|
||||
|
@ -1,12 +1,4 @@
|
||||
|
||||
// Mask immediates for MMA instructions (2, 4 and 8 bits).
|
||||
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
|
||||
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
|
||||
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;
|
||||
|
||||
def MMA : Predicate<"Subtarget->hasMMA()">;
|
||||
|
||||
|
||||
// Multiclass definitions for MMA accumulator instructions.
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
|
287
llvm/test/CodeGen/PowerPC/dmf-outer-product.ll
Normal file
287
llvm/test/CodeGen/PowerPC/dmf-outer-product.ll
Normal file
@ -0,0 +1,287 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \
|
||||
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
|
||||
; RUN: -mcpu=future -enable-subreg-liveness -ppc-asm-full-reg-names \
|
||||
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
|
||||
|
||||
declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1>, <16 x i8>)
|
||||
|
||||
define void @test_dmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) {
|
||||
; CHECK-LABEL: test_dmxvi8gerx4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv v3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs0, 0(r4)
|
||||
; CHECK-NEXT: lxv v2, 16(r3)
|
||||
; CHECK-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-NEXT: stxvp vsp34, 96(r5)
|
||||
; CHECK-NEXT: stxvp vsp36, 64(r5)
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-NEXT: stxvp vsp34, 32(r5)
|
||||
; CHECK-NEXT: stxvp vsp36, 0(r5)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_dmxvi8gerx4:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv v3, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r4)
|
||||
; CHECK-BE-NEXT: lxv v2, 0(r3)
|
||||
; CHECK-BE-NEXT: dmxvi8gerx4 dmr0, vsp34, vs0
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%v1 = load <256 x i1>, ptr %vpp, align 32
|
||||
%v2 = load <16 x i8>, ptr %vcp, align 32
|
||||
%call = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4(<256 x i1> %v1, <16 x i8> %v2)
|
||||
store <1024 x i1> %call, ptr %resp, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1>, <256 x i1>, <16 x i8>)
|
||||
|
||||
define void @test_dmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
|
||||
; CHECK-LABEL: test_dmxvi8gerx4pp:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvp vsp34, 0(r3)
|
||||
; CHECK-NEXT: lxvp vsp36, 32(r3)
|
||||
; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
|
||||
; CHECK-NEXT: lxvp vsp34, 64(r3)
|
||||
; CHECK-NEXT: lxvp vsp36, 96(r3)
|
||||
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
|
||||
; CHECK-NEXT: lxv v3, 0(r4)
|
||||
; CHECK-NEXT: lxv vs0, 0(r5)
|
||||
; CHECK-NEXT: lxv v2, 16(r4)
|
||||
; CHECK-NEXT: dmxvi8gerx4pp dmr0, vsp34, vs0
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-NEXT: stxvp vsp34, 96(r6)
|
||||
; CHECK-NEXT: stxvp vsp36, 64(r6)
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-NEXT: stxvp vsp34, 32(r6)
|
||||
; CHECK-NEXT: stxvp vsp36, 0(r6)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_dmxvi8gerx4pp:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
|
||||
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
|
||||
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
|
||||
; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
|
||||
; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
|
||||
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
|
||||
; CHECK-BE-NEXT: lxv v3, 16(r4)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r5)
|
||||
; CHECK-BE-NEXT: lxv v2, 0(r4)
|
||||
; CHECK-BE-NEXT: dmxvi8gerx4pp dmr0, vsp34, vs0
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%v.dmr = load <1024 x i1>, ptr %vop, align 64
|
||||
%v1 = load <256 x i1>, ptr %vpp, align 32
|
||||
%v2 = load <16 x i8>, ptr %vcp, align 32
|
||||
%call = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2)
|
||||
store <1024 x i1> %call, ptr %resp, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1>, <256 x i1>, <16 x i8>)
|
||||
|
||||
define void @test_dmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
|
||||
; CHECK-LABEL: test_dmxvi8gerx4spp:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvp vsp34, 0(r3)
|
||||
; CHECK-NEXT: lxvp vsp36, 32(r3)
|
||||
; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
|
||||
; CHECK-NEXT: lxvp vsp34, 64(r3)
|
||||
; CHECK-NEXT: lxvp vsp36, 96(r3)
|
||||
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
|
||||
; CHECK-NEXT: lxv v3, 0(r4)
|
||||
; CHECK-NEXT: lxv vs0, 0(r5)
|
||||
; CHECK-NEXT: lxv v2, 16(r4)
|
||||
; CHECK-NEXT: dmxvi8gerx4spp dmr0, vsp34, vs0
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-NEXT: stxvp vsp34, 96(r6)
|
||||
; CHECK-NEXT: stxvp vsp36, 64(r6)
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-NEXT: stxvp vsp34, 32(r6)
|
||||
; CHECK-NEXT: stxvp vsp36, 0(r6)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_dmxvi8gerx4spp:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
|
||||
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
|
||||
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
|
||||
; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
|
||||
; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
|
||||
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
|
||||
; CHECK-BE-NEXT: lxv v3, 16(r4)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r5)
|
||||
; CHECK-BE-NEXT: lxv v2, 0(r4)
|
||||
; CHECK-BE-NEXT: dmxvi8gerx4spp dmr0, vsp34, vs0
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%v.dmr = load <1024 x i1>, ptr %vop, align 64
|
||||
%v1 = load <256 x i1>, ptr %vpp, align 32
|
||||
%v2 = load <16 x i8>, ptr %vcp, align 32
|
||||
%call = tail call <1024 x i1> @llvm.ppc.mma.dmxvi8gerx4spp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2)
|
||||
store <1024 x i1> %call, ptr %resp, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32)
|
||||
|
||||
define void @test_pmdmxvi8gerx4pp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
|
||||
; CHECK-LABEL: test_pmdmxvi8gerx4pp:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvp vsp34, 0(r3)
|
||||
; CHECK-NEXT: lxvp vsp36, 32(r3)
|
||||
; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
|
||||
; CHECK-NEXT: lxvp vsp34, 64(r3)
|
||||
; CHECK-NEXT: lxvp vsp36, 96(r3)
|
||||
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
|
||||
; CHECK-NEXT: lxv v3, 0(r4)
|
||||
; CHECK-NEXT: lxv vs0, 0(r5)
|
||||
; CHECK-NEXT: lxv v2, 16(r4)
|
||||
; CHECK-NEXT: pmdmxvi8gerx4pp dmr0, vsp34, vs0, 42, 7, 9
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-NEXT: stxvp vsp34, 96(r6)
|
||||
; CHECK-NEXT: stxvp vsp36, 64(r6)
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-NEXT: stxvp vsp34, 32(r6)
|
||||
; CHECK-NEXT: stxvp vsp36, 0(r6)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_pmdmxvi8gerx4pp:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
|
||||
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
|
||||
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
|
||||
; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
|
||||
; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
|
||||
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
|
||||
; CHECK-BE-NEXT: lxv v3, 16(r4)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r5)
|
||||
; CHECK-BE-NEXT: lxv v2, 0(r4)
|
||||
; CHECK-BE-NEXT: pmdmxvi8gerx4pp dmr0, vsp34, vs0, 42, 7, 9
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%v.dmr = load <1024 x i1>, ptr %vop, align 64
|
||||
%v1 = load <256 x i1>, ptr %vpp, align 32
|
||||
%v2 = load <16 x i8>, ptr %vcp, align 32
|
||||
%call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4pp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 42, i32 7, i32 9)
|
||||
store <1024 x i1> %call, ptr %resp, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1>, <16 x i8>, i32, i32, i32)
|
||||
|
||||
define void @test_pmdmxvi8gerx4(ptr %vpp, ptr %vcp, ptr %resp) {
|
||||
; CHECK-LABEL: test_pmdmxvi8gerx4:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxv v3, 0(r3)
|
||||
; CHECK-NEXT: lxv vs0, 0(r4)
|
||||
; CHECK-NEXT: lxv v2, 16(r3)
|
||||
; CHECK-NEXT: pmdmxvi8gerx4 dmr0, vsp34, vs0, 55, 5, 10
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-NEXT: stxvp vsp34, 96(r5)
|
||||
; CHECK-NEXT: stxvp vsp36, 64(r5)
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-NEXT: stxvp vsp34, 32(r5)
|
||||
; CHECK-NEXT: stxvp vsp36, 0(r5)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_pmdmxvi8gerx4:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxv v3, 16(r3)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r4)
|
||||
; CHECK-BE-NEXT: lxv v2, 0(r3)
|
||||
; CHECK-BE-NEXT: pmdmxvi8gerx4 dmr0, vsp34, vs0, 55, 5, 10
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 96(r5)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 64(r5)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 32(r5)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 0(r5)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%v1 = load <256 x i1>, ptr %vpp, align 32
|
||||
%v2 = load <16 x i8>, ptr %vcp, align 32
|
||||
%call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4(<256 x i1> %v1, <16 x i8> %v2, i32 55, i32 5, i32 10)
|
||||
store <1024 x i1> %call, ptr %resp, align 64
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1>, <256 x i1>, <16 x i8>, i32, i32, i32)
|
||||
|
||||
define dso_local void @test_pmdmxvi8gerx4spp(ptr %vop, ptr %vpp, ptr %vcp, ptr %resp) {
|
||||
; CHECK-LABEL: test_pmdmxvi8gerx4spp:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: lxvp vsp34, 0(r3)
|
||||
; CHECK-NEXT: lxvp vsp36, 32(r3)
|
||||
; CHECK-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
|
||||
; CHECK-NEXT: lxvp vsp34, 64(r3)
|
||||
; CHECK-NEXT: lxvp vsp36, 96(r3)
|
||||
; CHECK-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
|
||||
; CHECK-NEXT: lxv v3, 0(r4)
|
||||
; CHECK-NEXT: lxv vs0, 0(r5)
|
||||
; CHECK-NEXT: lxv v2, 16(r4)
|
||||
; CHECK-NEXT: pmdmxvi8gerx4spp dmr0, vsp34, vs0, 100, 6, 12
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-NEXT: stxvp vsp34, 96(r6)
|
||||
; CHECK-NEXT: stxvp vsp36, 64(r6)
|
||||
; CHECK-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-NEXT: stxvp vsp34, 32(r6)
|
||||
; CHECK-NEXT: stxvp vsp36, 0(r6)
|
||||
; CHECK-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: test_pmdmxvi8gerx4spp:
|
||||
; CHECK-BE: # %bb.0: # %entry
|
||||
; CHECK-BE-NEXT: lxvp vsp34, 96(r3)
|
||||
; CHECK-BE-NEXT: lxvp vsp36, 64(r3)
|
||||
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc_hi0, vsp36, vsp34, 1
|
||||
; CHECK-BE-NEXT: lxvp vsp34, 32(r3)
|
||||
; CHECK-BE-NEXT: lxvp vsp36, 0(r3)
|
||||
; CHECK-BE-NEXT: dmxxinstfdmr512 wacc0, vsp36, vsp34, 0
|
||||
; CHECK-BE-NEXT: lxv v3, 16(r4)
|
||||
; CHECK-BE-NEXT: lxv vs0, 0(r5)
|
||||
; CHECK-BE-NEXT: lxv v2, 0(r4)
|
||||
; CHECK-BE-NEXT: pmdmxvi8gerx4spp dmr0, vsp34, vs0, 100, 6, 12
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc_hi0, vsp34, vsp36, 1
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 96(r6)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 64(r6)
|
||||
; CHECK-BE-NEXT: dmxxextfdmr512 wacc0, vsp34, vsp36, 0
|
||||
; CHECK-BE-NEXT: stxvp vsp36, 32(r6)
|
||||
; CHECK-BE-NEXT: stxvp vsp34, 0(r6)
|
||||
; CHECK-BE-NEXT: blr
|
||||
entry:
|
||||
%v.dmr = load <1024 x i1>, ptr %vop, align 64
|
||||
%v1 = load <256 x i1>, ptr %vpp, align 32
|
||||
%v2 = load <16 x i8>, ptr %vcp, align 32
|
||||
%call = tail call <1024 x i1> @llvm.ppc.mma.pmdmxvi8gerx4spp(<1024 x i1> %v.dmr, <256 x i1> %v1, <16 x i8> %v2, i32 100, i32 6, i32 12)
|
||||
store <1024 x i1> %call, ptr %resp, align 64
|
||||
ret void
|
||||
}
|
@ -75,3 +75,21 @@
|
||||
|
||||
#CHECK: stxvprll 6, 0, 1
|
||||
0x7c 0xc0 0x0d 0xda
|
||||
|
||||
#CHECK: dmxvi8gerx4 1, 2, 4
|
||||
0xec,0x82,0x20,0x58
|
||||
|
||||
#CHECK: dmxvi8gerx4pp 1, 0, 2
|
||||
0xec,0x80,0x10,0x50
|
||||
|
||||
#CHECK: pmdmxvi8gerx4 0, 2, 4, 8, 4, 4
|
||||
0x07,0x90,0x40,0x84,0xec,0x02,0x20,0x58
|
||||
|
||||
#CHECK: pmdmxvi8gerx4pp 1, 0, 4, 8, 4, 4
|
||||
0x07,0x90,0x40,0x84,0xec,0x80,0x20,0x50
|
||||
|
||||
#CHECK: dmxvi8gerx4spp 1, 2, 4
|
||||
0xec,0x82,0x23,0x10
|
||||
|
||||
#CHECK: pmdmxvi8gerx4spp 0, 2, 4, 8, 4, 4
|
||||
[0x07,0x90,0x40,0x84,0xec,0x02,0x23,0x10]
|
||||
|
@ -69,3 +69,21 @@
|
||||
|
||||
#CHECK: stxvprll 6, 0, 1
|
||||
0xda 0x0d 0xc0 0x7c
|
||||
|
||||
#CHECK: dmxvi8gerx4 1, 2, 4
|
||||
0x58,0x20,0x82,0xec
|
||||
|
||||
#CHECK: dmxvi8gerx4pp 1, 0, 2
|
||||
0x50,0x10,0x80,0xec
|
||||
|
||||
#CHECK: pmdmxvi8gerx4 0, 2, 4, 8, 4, 4
|
||||
0x84,0x40,0x90,0x07,0x58,0x20,0x02,0xec
|
||||
|
||||
#CHECK: pmdmxvi8gerx4pp 1, 0, 4, 8, 4, 4
|
||||
0x84,0x40,0x90,0x07,0x50,0x20,0x80,0xec
|
||||
|
||||
#CHECK: dmxvi8gerx4spp 1, 2, 4
|
||||
0x10,0x23,0x82,0xec
|
||||
|
||||
#CHECK: pmdmxvi8gerx4spp 0, 2, 4, 8, 4, 4
|
||||
0x84,0x40,0x90,0x07,0x10,0x23,0x02,0xec
|
||||
|
@ -96,3 +96,33 @@
|
||||
# CHECK-BE: stxvprll 6, 0, 1 # encoding: [0x7c,0xc0,0x0d,0xda]
|
||||
# CHECK-LE: stxvprll 6, 0, 1 # encoding: [0xda,0x0d,0xc0,0x7c]
|
||||
stxvprll 6, 0, 1
|
||||
|
||||
dmxvi8gerx4 1, 2, 4
|
||||
# CHECK-BE: dmxvi8gerx4 1, 2, 4 # encoding: [0xec,0x82,0x20,0x58]
|
||||
# CHECK-LE: dmxvi8gerx4 1, 2, 4 # encoding: [0x58,0x20,0x82,0xec]
|
||||
|
||||
dmxvi8gerx4pp 1, 0, 2
|
||||
# CHECK-BE: dmxvi8gerx4pp 1, 0, 2 # encoding: [0xec,0x80,0x10,0x50]
|
||||
# CHECK-LE: dmxvi8gerx4pp 1, 0, 2 # encoding: [0x50,0x10,0x80,0xec]
|
||||
|
||||
pmdmxvi8gerx4 0, 2, 4, 8, 4, 4
|
||||
# CHECK-BE: pmdmxvi8gerx4 0, 2, 4, 8, 4, 4 # encoding: [0x07,0x90,0x40,0x84,
|
||||
# CHECK-BE-SAME: 0xec,0x02,0x20,0x58]
|
||||
# CHECK-LE: pmdmxvi8gerx4 0, 2, 4, 8, 4, 4 # encoding: [0x84,0x40,0x90,0x07,
|
||||
# CHECK-LE-SAME: 0x58,0x20,0x02,0xec]
|
||||
|
||||
pmdmxvi8gerx4pp 1, 0, 4, 8, 4, 4
|
||||
#CHECK-BE: pmdmxvi8gerx4pp 1, 0, 4, 8, 4, 4 # encoding: [0x07,0x90,0x40,0x84,
|
||||
#CHECK-BE-SAME: 0xec,0x80,0x20,0x50]
|
||||
#CHECK-LE: pmdmxvi8gerx4pp 1, 0, 4, 8, 4, 4 # encoding: [0x84,0x40,0x90,0x07,
|
||||
#CHECK-LE-SAME: 0x50,0x20,0x80,0xec]
|
||||
|
||||
dmxvi8gerx4spp 1, 2, 4
|
||||
#CHECK-BE: dmxvi8gerx4spp 1, 2, 4 # encoding: [0xec,0x82,0x23,0x10]
|
||||
#CHECK-LE: dmxvi8gerx4spp 1, 2, 4 # encoding: [0x10,0x23,0x82,0xec]
|
||||
|
||||
pmdmxvi8gerx4spp 0, 2, 4, 8, 4, 4
|
||||
#CHECK-BE: pmdmxvi8gerx4spp 0, 2, 4, 8, 4, 4 # encoding: [0x07,0x90,0x40,0x84,
|
||||
#CHECK-BE-SAME: 0xec,0x02,0x23,0x10]
|
||||
#CHECK-LE: pmdmxvi8gerx4spp 0, 2, 4, 8, 4, 4 # encoding: [0x84,0x40,0x90,0x07,
|
||||
#CHECK-LE-SAME: 0x10,0x23,0x02,0xec]
|
||||
|
Loading…
x
Reference in New Issue
Block a user