2020-04-23 14:26:07 +09:00
|
|
|
//===-- VOP3PInstructions.td - Vector Instruction Definitions -------------===//
|
2017-02-27 18:49:11 +00:00
|
|
|
//
|
2019-01-19 08:50:56 +00:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-02-27 18:49:11 +00:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// VOP3P Classes
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2022-05-24 13:31:09 -04:00
|
|
|
class VOP3P_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
|
|
|
|
bit HasDPP = 0> : VOP3_Profile<P, Features> {
|
|
|
|
let IsVOP3P = 1;
|
|
|
|
let HasExtVOP3DPP = HasDPP;
|
|
|
|
// We do not want to print src modifiers for vop3p because the bits are
|
|
|
|
// overloaded in meaning and the logic in printOperandAndFPInputMods is
|
|
|
|
// wrong for vop3p
|
|
|
|
let AsmVOP3DPPBase = AsmVOP3P;
|
|
|
|
}
|
|
|
|
|
2021-04-15 17:41:04 -04:00
|
|
|
// Used for FMA_MIX* and MAD_MIX* insts
|
|
|
|
// Their operands are only sort of f16 operands. Depending on
|
|
|
|
// op_sel_hi, these may be interpreted as f32. The inline immediate
|
|
|
|
// values are really f16 converted to f32, so we treat these as f16
|
|
|
|
// operands.
|
|
|
|
class VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
|
2022-05-24 13:31:09 -04:00
|
|
|
bit useTiedOutput = 0> : VOP3P_Profile<P, Features, 1> {
|
2021-04-15 17:41:04 -04:00
|
|
|
bit UseTiedOutput = useTiedOutput;
|
|
|
|
|
|
|
|
dag srcs =
|
|
|
|
(ins FP16InputMods:$src0_modifiers, VCSrc_f16:$src0,
|
|
|
|
FP16InputMods:$src1_modifiers, VCSrc_f16:$src1,
|
|
|
|
FP16InputMods:$src2_modifiers, VCSrc_f16:$src2);
|
2022-05-24 13:31:09 -04:00
|
|
|
dag dpp_srcs =
|
|
|
|
(ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0,
|
|
|
|
FP16InputMods:$src1_modifiers, VCSrc_f16:$src1,
|
|
|
|
FP16InputMods:$src2_modifiers, VCSrc_f16:$src2);
|
2021-04-15 17:41:04 -04:00
|
|
|
|
|
|
|
// FIXME: clampmod0 misbehaves with the non-default vdst_in
|
|
|
|
// following it. For now workaround this by requiring clamp
|
|
|
|
// in tied patterns. This should use undef_tied_input, but it
|
|
|
|
// seems underdeveloped and doesn't apply the right register
|
|
|
|
// class constraints.
|
|
|
|
dag mods = !con(!if(UseTiedOutput, (ins clampmod:$clamp, VGPR_32:$vdst_in),
|
|
|
|
(ins clampmod0:$clamp)),
|
|
|
|
(ins op_sel0:$op_sel, op_sel_hi0:$op_sel_hi));
|
|
|
|
// We use Ins64 because that is the one which populates InOperandList
|
|
|
|
// due to the logic in class VOP3_Pseudo
|
|
|
|
let Ins64 = !con(srcs, mods);
|
2022-05-24 13:31:09 -04:00
|
|
|
let InsVOP3Base = !con(dpp_srcs, mods);
|
2021-04-15 17:41:04 -04:00
|
|
|
let Asm64 =
|
|
|
|
"$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$op_sel$op_sel_hi$clamp";
|
2022-05-24 13:31:09 -04:00
|
|
|
let AsmVOP3DPPBase = Asm64;
|
2021-04-15 17:41:04 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
multiclass VOP3PInst<string OpName, VOPProfile P,
|
2022-03-14 12:39:52 -07:00
|
|
|
SDPatternOperator node = null_frag, bit IsDOT = 0> {
|
2021-04-15 17:41:04 -04:00
|
|
|
def NAME : VOP3P_Pseudo<OpName, P,
|
|
|
|
!if (P.HasModifiers,
|
2022-03-14 12:39:52 -07:00
|
|
|
getVOP3PModPat<P, node, IsDOT, IsDOT>.ret,
|
2021-04-15 17:41:04 -04:00
|
|
|
getVOP3Pat<P, node>.ret)>;
|
2022-05-24 13:31:09 -04:00
|
|
|
let SubtargetPredicate = isGFX11Plus in {
|
|
|
|
if P.HasExtVOP3DPP then
|
|
|
|
def _dpp : VOP3_DPP_Pseudo<OpName, P> {
|
|
|
|
let VOP3P = 1;
|
|
|
|
let PseudoInstr = OpName #"_dpp";
|
|
|
|
}
|
|
|
|
} // end SubtargetPredicate = isGFX11Plus
|
2021-04-15 17:41:04 -04:00
|
|
|
}
|
|
|
|
|
2017-07-07 14:29:06 +00:00
|
|
|
// Non-packed instructions that use the VOP3P encoding.
|
|
|
|
// VOP3 neg/abs and VOP3P opsel/opsel_hi modifiers are allowed.
|
2021-09-15 14:30:18 +01:00
|
|
|
multiclass VOP3_VOP3PInst<string OpName, VOP3P_Mix_Profile P> {
|
2021-04-15 17:41:04 -04:00
|
|
|
def NAME : VOP3P_Pseudo<OpName, P> {
|
|
|
|
let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", "");
|
|
|
|
let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", "");
|
|
|
|
}
|
2022-05-24 13:31:09 -04:00
|
|
|
let SubtargetPredicate = isGFX11Plus in {
|
|
|
|
if P.HasExtVOP3DPP then
|
|
|
|
def _dpp : VOP3_DPP_Pseudo<OpName, P> {
|
|
|
|
let VOP3P = 1;
|
|
|
|
let PseudoInstr = OpName#"_dpp";
|
|
|
|
let Constraints = !if(P.UseTiedOutput, "$vdst = $vdst_in", "");
|
|
|
|
let DisableEncoding = !if(P.UseTiedOutput, "$vdst_in", "");
|
|
|
|
}
|
|
|
|
} // end SubtargetPredicate = isGFX11Plus
|
2017-07-07 14:29:06 +00:00
|
|
|
}
|
2017-02-27 18:49:11 +00:00
|
|
|
|
|
|
|
let isCommutable = 1 in {
|
2022-05-24 13:31:09 -04:00
|
|
|
defm V_PK_MAD_I16 : VOP3PInst<"v_pk_mad_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
|
|
|
|
defm V_PK_MAD_U16 : VOP3PInst<"v_pk_mad_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16_V2I16>>;
|
2017-07-18 09:24:10 +00:00
|
|
|
|
2018-12-10 12:06:10 +00:00
|
|
|
let FPDPRounding = 1 in {
|
2022-05-24 13:31:09 -04:00
|
|
|
defm V_PK_FMA_F16 : VOP3PInst<"v_pk_fma_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16_V2F16>, any_fma>;
|
|
|
|
defm V_PK_ADD_F16 : VOP3PInst<"v_pk_add_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, any_fadd>;
|
|
|
|
defm V_PK_MUL_F16 : VOP3PInst<"v_pk_mul_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, any_fmul>;
|
2018-12-10 12:06:10 +00:00
|
|
|
} // End FPDPRounding = 1
|
2022-05-24 13:31:09 -04:00
|
|
|
defm V_PK_MAX_F16 : VOP3PInst<"v_pk_max_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fmaxnum_like>;
|
|
|
|
defm V_PK_MIN_F16 : VOP3PInst<"v_pk_min_f16", VOP3P_Profile<VOP_V2F16_V2F16_V2F16>, fminnum_like>;
|
2017-02-27 18:49:11 +00:00
|
|
|
|
2022-05-24 13:31:09 -04:00
|
|
|
defm V_PK_ADD_U16 : VOP3PInst<"v_pk_add_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, add>;
|
|
|
|
defm V_PK_ADD_I16 : VOP3PInst<"v_pk_add_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>;
|
|
|
|
defm V_PK_MUL_LO_U16 : VOP3PInst<"v_pk_mul_lo_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, mul>;
|
2017-02-27 18:49:11 +00:00
|
|
|
|
2022-05-24 13:31:09 -04:00
|
|
|
defm V_PK_MIN_I16 : VOP3PInst<"v_pk_min_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smin>;
|
|
|
|
defm V_PK_MIN_U16 : VOP3PInst<"v_pk_min_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umin>;
|
|
|
|
defm V_PK_MAX_I16 : VOP3PInst<"v_pk_max_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, smax>;
|
|
|
|
defm V_PK_MAX_U16 : VOP3PInst<"v_pk_max_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, umax>;
|
2017-02-27 18:49:11 +00:00
|
|
|
}
|
|
|
|
|
2022-05-24 13:31:09 -04:00
|
|
|
defm V_PK_SUB_U16 : VOP3PInst<"v_pk_sub_u16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>>;
|
|
|
|
defm V_PK_SUB_I16 : VOP3PInst<"v_pk_sub_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, sub>;
|
2017-07-18 09:24:10 +00:00
|
|
|
|
2022-05-24 13:31:09 -04:00
|
|
|
defm V_PK_LSHLREV_B16 : VOP3PInst<"v_pk_lshlrev_b16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, clshl_rev_16>;
|
|
|
|
defm V_PK_ASHRREV_I16 : VOP3PInst<"v_pk_ashrrev_i16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, cashr_rev_16>;
|
|
|
|
defm V_PK_LSHRREV_B16 : VOP3PInst<"v_pk_lshrrev_b16", VOP3P_Profile<VOP_V2I16_V2I16_V2I16>, clshr_rev_16>;
|
2017-02-27 18:49:11 +00:00
|
|
|
|
2019-06-19 23:37:43 +00:00
|
|
|
|
2020-07-12 14:16:36 -04:00
|
|
|
let SubtargetPredicate = HasVOP3PInsts in {
|
|
|
|
|
2019-06-19 23:37:43 +00:00
|
|
|
// Undo sub x, c -> add x, -c canonicalization since c is more likely
|
|
|
|
// an inline immediate than -c.
|
|
|
|
// The constant will be emitted as a mov, and folded later.
|
|
|
|
// TODO: We could directly encode the immediate now
|
|
|
|
def : GCNPat<
|
2020-02-18 09:34:31 -05:00
|
|
|
(add (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)), NegSubInlineConstV216:$src1),
|
|
|
|
(V_PK_SUB_U16 $src0_modifiers, $src0, SRCMODS.OP_SEL_1, NegSubInlineConstV216:$src1)
|
2019-06-19 23:37:43 +00:00
|
|
|
>;
|
|
|
|
|
2020-07-12 14:16:36 -04:00
|
|
|
// Integer operations with clamp bit set.
|
|
|
|
class VOP3PSatPat<SDPatternOperator pat, Instruction inst> : GCNPat<
|
|
|
|
(pat (v2i16 (VOP3PMods v2i16:$src0, i32:$src0_modifiers)),
|
|
|
|
(v2i16 (VOP3PMods v2i16:$src1, i32:$src1_modifiers))),
|
|
|
|
(inst $src0_modifiers, $src0, $src1_modifiers, $src1, DSTCLAMP.ENABLE)
|
|
|
|
>;
|
|
|
|
|
|
|
|
def : VOP3PSatPat<uaddsat, V_PK_ADD_U16>;
|
|
|
|
def : VOP3PSatPat<saddsat, V_PK_ADD_I16>;
|
|
|
|
def : VOP3PSatPat<usubsat, V_PK_SUB_U16>;
|
|
|
|
def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>;
|
|
|
|
} // End SubtargetPredicate = HasVOP3PInsts
|
|
|
|
|
2018-04-30 19:08:16 +00:00
|
|
|
multiclass MadFmaMixPats<SDPatternOperator fma_like,
|
|
|
|
Instruction mixlo_inst,
|
|
|
|
Instruction mixhi_inst> {
|
|
|
|
def : GCNPat <
|
|
|
|
(f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
|
|
|
|
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
|
|
|
|
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
|
|
|
|
(mixlo_inst $src0_modifiers, $src0,
|
|
|
|
$src1_modifiers, $src1,
|
|
|
|
$src2_modifiers, $src2,
|
|
|
|
DSTCLAMP.NONE,
|
|
|
|
(i32 (IMPLICIT_DEF)))
|
|
|
|
>;
|
|
|
|
|
|
|
|
// FIXME: Special case handling for maxhi (especially for clamp)
|
|
|
|
// because dealing with the write to high half of the register is
|
|
|
|
// difficult.
|
|
|
|
def : GCNPat <
|
|
|
|
(build_vector f16:$elt0, (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
|
|
|
|
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
|
|
|
|
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers))))),
|
|
|
|
(v2f16 (mixhi_inst $src0_modifiers, $src0,
|
|
|
|
$src1_modifiers, $src1,
|
|
|
|
$src2_modifiers, $src2,
|
|
|
|
DSTCLAMP.NONE,
|
|
|
|
$elt0))
|
|
|
|
>;
|
|
|
|
|
|
|
|
def : GCNPat <
|
|
|
|
(build_vector
|
|
|
|
f16:$elt0,
|
|
|
|
(AMDGPUclamp (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
|
|
|
|
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
|
|
|
|
(f32 (VOP3PMadMixMods f16:$src2, i32:$src2_modifiers)))))),
|
|
|
|
(v2f16 (mixhi_inst $src0_modifiers, $src0,
|
|
|
|
$src1_modifiers, $src1,
|
|
|
|
$src2_modifiers, $src2,
|
|
|
|
DSTCLAMP.ENABLE,
|
|
|
|
$elt0))
|
|
|
|
>;
|
|
|
|
|
|
|
|
def : GCNPat <
|
|
|
|
(AMDGPUclamp (build_vector
|
|
|
|
(fpround (fma_like (f32 (VOP3PMadMixMods f16:$lo_src0, i32:$lo_src0_modifiers)),
|
|
|
|
(f32 (VOP3PMadMixMods f16:$lo_src1, i32:$lo_src1_modifiers)),
|
|
|
|
(f32 (VOP3PMadMixMods f16:$lo_src2, i32:$lo_src2_modifiers)))),
|
|
|
|
(fpround (fma_like (f32 (VOP3PMadMixMods f16:$hi_src0, i32:$hi_src0_modifiers)),
|
|
|
|
(f32 (VOP3PMadMixMods f16:$hi_src1, i32:$hi_src1_modifiers)),
|
|
|
|
(f32 (VOP3PMadMixMods f16:$hi_src2, i32:$hi_src2_modifiers)))))),
|
|
|
|
(v2f16 (mixhi_inst $hi_src0_modifiers, $hi_src0,
|
|
|
|
$hi_src1_modifiers, $hi_src1,
|
|
|
|
$hi_src2_modifiers, $hi_src2,
|
|
|
|
DSTCLAMP.ENABLE,
|
|
|
|
(mixlo_inst $lo_src0_modifiers, $lo_src0,
|
|
|
|
$lo_src1_modifiers, $lo_src1,
|
|
|
|
$lo_src2_modifiers, $lo_src2,
|
|
|
|
DSTCLAMP.ENABLE,
|
|
|
|
(i32 (IMPLICIT_DEF)))))
|
|
|
|
>;
|
|
|
|
}
|
2017-10-25 07:00:51 +00:00
|
|
|
|
|
|
|
let SubtargetPredicate = HasMadMixInsts in {
|
2020-05-27 13:25:37 -04:00
|
|
|
|
2017-07-07 14:29:06 +00:00
|
|
|
// These are VOP3a-like opcodes which accept no omod.
|
|
|
|
// Size of src arguments (16/32) is controlled by op_sel.
|
|
|
|
// For 16-bit src arguments their location (hi/lo) are controlled by op_sel_hi.
|
2020-05-27 13:25:37 -04:00
|
|
|
let isCommutable = 1, mayRaiseFPException = 0 in {
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_MAD_MIX_F32 : VOP3_VOP3PInst<"v_mad_mix_f32", VOP3P_Mix_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
|
2017-09-20 20:28:39 +00:00
|
|
|
|
2018-12-10 12:06:10 +00:00
|
|
|
let FPDPRounding = 1 in {
|
2017-09-20 20:28:39 +00:00
|
|
|
// Clamp modifier is applied after conversion to f16.
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_MAD_MIXLO_F16 : VOP3_VOP3PInst<"v_mad_mixlo_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>;
|
2017-09-20 21:01:24 +00:00
|
|
|
|
|
|
|
let ClampLo = 0, ClampHi = 1 in {
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>;
|
2017-08-30 22:18:40 +00:00
|
|
|
}
|
2018-12-10 12:06:10 +00:00
|
|
|
} // End FPDPRounding = 1
|
2017-09-20 21:01:24 +00:00
|
|
|
}
|
2017-02-27 18:49:11 +00:00
|
|
|
|
2021-09-15 14:30:18 +01:00
|
|
|
defm : MadFmaMixPats<fmad, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
|
2018-04-30 19:08:16 +00:00
|
|
|
} // End SubtargetPredicate = HasMadMixInsts
|
2017-09-20 20:28:39 +00:00
|
|
|
|
2017-09-20 21:01:24 +00:00
|
|
|
|
2018-04-30 19:08:16 +00:00
|
|
|
// Essentially the same as the mad_mix versions
|
|
|
|
let SubtargetPredicate = HasFmaMixInsts in {
|
|
|
|
let isCommutable = 1 in {
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_FMA_MIX_F32 : VOP3_VOP3PInst<"v_fma_mix_f32", VOP3P_Mix_Profile<VOP_F32_F16_F16_F16, VOP3_OPSEL>>;
|
2017-09-20 21:01:24 +00:00
|
|
|
|
2018-12-10 12:06:10 +00:00
|
|
|
let FPDPRounding = 1 in {
|
2018-04-30 19:08:16 +00:00
|
|
|
// Clamp modifier is applied after conversion to f16.
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_FMA_MIXLO_F16 : VOP3_VOP3PInst<"v_fma_mixlo_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>;
|
2018-04-30 19:08:16 +00:00
|
|
|
|
|
|
|
let ClampLo = 0, ClampHi = 1 in {
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL, 1>>;
|
2018-04-30 19:08:16 +00:00
|
|
|
}
|
2018-12-10 12:06:10 +00:00
|
|
|
} // End FPDPRounding = 1
|
2018-04-30 19:08:16 +00:00
|
|
|
}
|
|
|
|
|
2021-09-15 14:30:18 +01:00
|
|
|
defm : MadFmaMixPats<fma, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
|
2018-04-30 19:08:16 +00:00
|
|
|
}
|
2017-09-20 21:01:24 +00:00
|
|
|
|
2018-10-04 16:57:37 +00:00
|
|
|
// Defines patterns that extract signed 4bit from each Idx[0].
|
|
|
|
foreach Idx = [[0,28],[4,24],[8,20],[12,16],[16,12],[20,8],[24,4]] in
|
|
|
|
def ExtractSigned4bit_#Idx[0] : PatFrag<(ops node:$src),
|
|
|
|
(sra (shl node:$src, (i32 Idx[1])), (i32 28))>;
|
2018-08-29 16:31:18 +00:00
|
|
|
|
2018-10-04 16:57:37 +00:00
|
|
|
// Defines code pattern that extracts U(unsigned/signed) 4/8bit from FromBitIndex.
|
|
|
|
class Extract<int FromBitIndex, int BitMask, bit U>: PatFrag<
|
2018-09-18 16:59:48 +00:00
|
|
|
(ops node:$src),
|
2018-10-04 16:57:37 +00:00
|
|
|
!if (!or (!and (!eq (BitMask, 255), !eq (FromBitIndex, 24)), !eq (FromBitIndex, 28)), // last element
|
|
|
|
!if (U, (srl node:$src, (i32 FromBitIndex)), (sra node:$src, (i32 FromBitIndex))),
|
2018-08-29 16:31:18 +00:00
|
|
|
!if (!eq (FromBitIndex, 0), // first element
|
2018-10-04 16:57:37 +00:00
|
|
|
!if (U, (and node:$src, (i32 BitMask)),
|
|
|
|
!if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src),
|
|
|
|
(sext_inreg node:$src, i8))),
|
|
|
|
!if (U, (and (srl node:$src, (i32 FromBitIndex)), (i32 BitMask)),
|
|
|
|
!if (!eq (BitMask, 15), (!cast<PatFrag>("ExtractSigned4bit_"#FromBitIndex) node:$src),
|
|
|
|
(sext_inreg (srl node:$src, (i32 FromBitIndex)), i8)))))>;
|
|
|
|
|
|
|
|
|
|
|
|
foreach Type = ["I", "U"] in
|
|
|
|
foreach Index = 0-3 in {
|
|
|
|
// Defines patterns that extract each Index'ed 8bit from an unsigned
|
|
|
|
// 32bit scalar value;
|
2020-10-21 09:17:28 +01:00
|
|
|
def Type#Index#"_8bit" : Extract<!shl(Index, 3), 255, !eq (Type, "U")>;
|
2018-10-04 16:57:37 +00:00
|
|
|
|
|
|
|
// Defines multiplication patterns where the multiplication is happening on each
|
|
|
|
// Index'ed 8bit of a 32bit scalar value.
|
|
|
|
|
|
|
|
def Mul#Type#_Elt#Index : PatFrag<
|
|
|
|
(ops node:$src0, node:$src1),
|
|
|
|
(!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), AMDGPUmul_i24_oneuse, AMDGPUmul_u24_oneuse))
|
2020-04-25 15:58:40 -07:00
|
|
|
(!cast<Extract>(Type#Index#"_8bit") node:$src0),
|
|
|
|
(!cast<Extract>(Type#Index#"_8bit") node:$src1))>;
|
2018-10-04 16:57:37 +00:00
|
|
|
}
|
2018-09-18 16:59:48 +00:00
|
|
|
|
|
|
|
// Different variants of dot8 patterns cause a huge increase in the compile time.
|
|
|
|
// Define non-associative/commutative add/mul to prevent permutation in the dot8
|
|
|
|
// pattern.
|
|
|
|
def NonACAdd : SDNode<"ISD::ADD" , SDTIntBinOp>;
|
|
|
|
def NonACAdd_oneuse : HasOneUseBinOp<NonACAdd>;
|
|
|
|
|
|
|
|
def NonACAMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24" , SDTIntBinOp>;
|
|
|
|
def NonACAMDGPUmul_u24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_u24>;
|
|
|
|
|
2018-10-04 16:57:37 +00:00
|
|
|
def NonACAMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24" , SDTIntBinOp>;
|
|
|
|
def NonACAMDGPUmul_i24_oneuse : HasOneUseBinOp<NonACAMDGPUmul_i24>;
|
|
|
|
|
|
|
|
foreach Type = ["I", "U"] in
|
|
|
|
foreach Index = 0-7 in {
|
|
|
|
// Defines patterns that extract each Index'ed 4bit from an unsigned
|
|
|
|
// 32bit scalar value;
|
2020-10-21 09:17:28 +01:00
|
|
|
def Type#Index#"_4bit" : Extract<!shl(Index, 2), 15, !eq (Type, "U")>;
|
2018-10-04 16:57:37 +00:00
|
|
|
|
|
|
|
// Defines multiplication patterns where the multiplication is happening on each
|
|
|
|
// Index'ed 8bit of a 32bit scalar value.
|
|
|
|
def Mul#Type#Index#"_4bit" : PatFrag<
|
|
|
|
(ops node:$src0, node:$src1),
|
|
|
|
(!cast<HasOneUseBinOp>(!if (!eq (Type, "I"), NonACAMDGPUmul_i24_oneuse, NonACAMDGPUmul_u24_oneuse))
|
2020-04-25 15:58:40 -07:00
|
|
|
(!cast<Extract>(Type#Index#"_4bit") node:$src0),
|
|
|
|
(!cast<Extract>(Type#Index#"_4bit") node:$src1))>;
|
2018-10-04 16:57:37 +00:00
|
|
|
}
|
2018-08-29 16:31:18 +00:00
|
|
|
|
2018-08-21 16:21:15 +00:00
|
|
|
class UDot2Pat<Instruction Inst> : GCNPat <
|
|
|
|
(add (add_oneuse (AMDGPUmul_u24_oneuse (srl i32:$src0, (i32 16)),
|
|
|
|
(srl i32:$src1, (i32 16))), i32:$src2),
|
|
|
|
(AMDGPUmul_u24_oneuse (and i32:$src0, (i32 65535)),
|
|
|
|
(and i32:$src1, (i32 65535)))
|
|
|
|
),
|
2019-02-09 00:34:21 +00:00
|
|
|
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
|
|
|
|
let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
|
|
|
|
}
|
2018-08-21 16:21:15 +00:00
|
|
|
|
|
|
|
class SDot2Pat<Instruction Inst> : GCNPat <
|
|
|
|
(add (add_oneuse (AMDGPUmul_i24_oneuse (sra i32:$src0, (i32 16)),
|
|
|
|
(sra i32:$src1, (i32 16))), i32:$src2),
|
|
|
|
(AMDGPUmul_i24_oneuse (sext_inreg i32:$src0, i16),
|
|
|
|
(sext_inreg i32:$src1, i16))),
|
2019-02-09 00:34:21 +00:00
|
|
|
(Inst (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))> {
|
|
|
|
let SubtargetPredicate = !cast<VOP_Pseudo>(Inst).SubtargetPredicate;
|
|
|
|
}
|
2018-08-21 16:21:15 +00:00
|
|
|
|
2019-09-17 17:56:13 +00:00
|
|
|
let IsDOT = 1 in {
|
2019-02-09 00:34:21 +00:00
|
|
|
let SubtargetPredicate = HasDot2Insts in {
|
2018-04-30 19:08:16 +00:00
|
|
|
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16",
|
2022-05-24 13:31:09 -04:00
|
|
|
VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>;
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16",
|
2022-05-24 13:31:09 -04:00
|
|
|
VOP3P_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>;
|
2021-03-16 16:01:03 +00:00
|
|
|
|
|
|
|
} // End SubtargetPredicate = HasDot2Insts
|
|
|
|
|
|
|
|
let SubtargetPredicate = HasDot7Insts in {
|
|
|
|
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
|
2022-05-24 13:31:09 -04:00
|
|
|
VOP3P_Profile<VOP_F32_V2F16_V2F16_F32, VOP3_REGULAR, /*HasDPP*/ 1>,
|
2021-03-16 16:01:03 +00:00
|
|
|
AMDGPUfdot2, 1/*ExplicitClamp*/>;
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
|
2022-05-24 13:31:09 -04:00
|
|
|
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
|
2022-05-24 13:31:09 -04:00
|
|
|
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
|
2018-08-01 01:31:30 +00:00
|
|
|
|
2021-03-16 16:01:03 +00:00
|
|
|
} // End SubtargetPredicate = HasDot7Insts
|
2019-02-09 00:34:21 +00:00
|
|
|
|
|
|
|
let SubtargetPredicate = HasDot1Insts in {
|
|
|
|
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8",
|
2022-05-24 13:31:09 -04:00
|
|
|
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot4, 1>;
|
2021-04-15 17:41:04 -04:00
|
|
|
defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4",
|
2022-05-24 13:31:09 -04:00
|
|
|
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_sdot8, 1>;
|
2019-02-09 00:34:21 +00:00
|
|
|
|
|
|
|
} // End SubtargetPredicate = HasDot1Insts
|
2022-05-24 13:31:09 -04:00
|
|
|
|
|
|
|
let SubtargetPredicate = HasDot8Insts in {
|
|
|
|
|
|
|
|
defm V_DOT2_F32_BF16 : VOP3PInst<"v_dot2_f32_bf16",
|
|
|
|
VOP3P_Profile<VOP_F32_V2I16_V2I16_F32, VOP3_REGULAR, /*HasDPP*/ 1>,
|
|
|
|
null_frag, 1>;
|
|
|
|
|
|
|
|
} // End SubtargetPredicate = HasDot8Insts
|
|
|
|
|
2019-09-17 17:56:13 +00:00
|
|
|
} // End let IsDOT = 1
|
2019-02-09 00:34:21 +00:00
|
|
|
|
2022-05-24 13:31:09 -04:00
|
|
|
multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
|
|
|
|
let IsDOT = 1 in
|
|
|
|
defm NAME : VOP3PInst<OpName, VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>,
|
|
|
|
null_frag, 1>;
|
|
|
|
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
|
|
|
|
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.
|
|
|
|
def : GCNPat < (intrinsic_node (DotIUVOP3PMods i32:$src0_mods), i32:$src0,
|
|
|
|
(DotIUVOP3PMods i32:$src1_mods), i32:$src1,
|
|
|
|
i32:$src2, (i1 timm:$clamp)),
|
|
|
|
(!cast<Instruction>(NAME) $src0_mods, i32:$src0,
|
|
|
|
$src1_mods, i32:$src1,
|
|
|
|
(i32 8), i32:$src2, i1:$clamp)
|
|
|
|
>;
|
|
|
|
}
|
|
|
|
|
|
|
|
let SubtargetPredicate = HasDot8Insts in {
|
|
|
|
defm V_DOT4_I32_IU8 : VOP3PDOTIUInst<"v_dot4_i32_iu8", null_frag>;
|
|
|
|
defm V_DOT8_I32_IU4 : VOP3PDOTIUInst<"v_dot8_i32_iu4", null_frag>;
|
|
|
|
} // End SubtargetPredicate = HasDot8Insts
|
|
|
|
|
2018-08-21 16:21:15 +00:00
|
|
|
def : UDot2Pat<V_DOT2_U32_U16>;
|
|
|
|
def : SDot2Pat<V_DOT2_I32_I16>;
|
|
|
|
|
2018-10-04 16:57:37 +00:00
|
|
|
foreach Type = ["U", "I"] in
|
2019-02-09 00:34:21 +00:00
|
|
|
let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT4_"#Type#"32_"#Type#8).SubtargetPredicate in
|
2018-10-04 16:57:37 +00:00
|
|
|
def : GCNPat <
|
|
|
|
!cast<dag>(!foldl((i32 i32:$src2), [0, 1, 2, 3], lhs, y,
|
|
|
|
(add_oneuse lhs, (!cast<PatFrag>("Mul"#Type#"_Elt"#y) i32:$src0, i32:$src1)))),
|
2021-04-15 17:41:04 -04:00
|
|
|
(!cast<VOP3P_Pseudo>("V_DOT4_"#Type#"32_"#Type#8) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
|
2018-08-29 16:31:18 +00:00
|
|
|
|
2018-10-04 16:57:37 +00:00
|
|
|
foreach Type = ["U", "I"] in
|
2019-02-09 00:34:21 +00:00
|
|
|
let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
|
2018-10-04 16:57:37 +00:00
|
|
|
def : GCNPat <
|
|
|
|
!cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
|
|
|
|
[1, 2, 3, 4, 5, 6, 7], lhs, y,
|
|
|
|
(NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
|
2021-04-15 17:41:04 -04:00
|
|
|
(!cast<VOP3P_Pseudo>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
|
2018-09-18 16:59:48 +00:00
|
|
|
|
2018-11-01 22:48:19 +00:00
|
|
|
// Different variants of dot8 code-gen dag patterns are not generated through table-gen due to a huge increase
|
|
|
|
// in the compile time. Directly handle the pattern generated by the FE here.
|
|
|
|
foreach Type = ["U", "I"] in
|
2019-02-09 00:34:21 +00:00
|
|
|
let SubtargetPredicate = !cast<VOP_Pseudo>("V_DOT8_"#Type#"32_"#Type#4).SubtargetPredicate in
|
2018-11-01 22:48:19 +00:00
|
|
|
def : GCNPat <
|
|
|
|
!cast<dag>(!foldl((add_oneuse i32:$src2, (!cast<PatFrag>("Mul"#Type#"0_4bit") i32:$src0, i32:$src1)),
|
|
|
|
[7, 1, 2, 3, 4, 5, 6], lhs, y,
|
|
|
|
(NonACAdd_oneuse lhs, (!cast<PatFrag>("Mul"#Type#y#"_4bit") i32:$src0, i32:$src1)))),
|
2021-04-15 17:41:04 -04:00
|
|
|
(!cast<VOP3P_Pseudo>("V_DOT8_"#Type#"32_"#Type#4) (i32 8), $src0, (i32 8), $src1, (i32 8), $src2, (i1 0))>;
|
2018-11-01 22:48:19 +00:00
|
|
|
|
2019-07-09 21:43:09 +00:00
|
|
|
def ADst_32 : VOPDstOperand<AGPR_32>;
|
2021-02-17 13:37:46 -08:00
|
|
|
def ADst_64 : VOPDstOperand<AReg_64>;
|
2019-07-09 21:43:09 +00:00
|
|
|
def ADst_128 : VOPDstOperand<AReg_128>;
|
2021-02-17 13:37:46 -08:00
|
|
|
def ADst_256 : VOPDstOperand<AReg_256>;
|
2019-07-09 21:43:09 +00:00
|
|
|
def ADst_512 : VOPDstOperand<AReg_512>;
|
|
|
|
def ADst_1024 : VOPDstOperand<AReg_1024>;
|
2021-02-17 13:37:46 -08:00
|
|
|
def VDst_64 : VOPDstOperand<VReg_64>;
|
|
|
|
def VDst_128 : VOPDstOperand<VReg_128>;
|
|
|
|
def VDst_256 : VOPDstOperand<VReg_256>;
|
|
|
|
def VDst_512 : VOPDstOperand<VReg_512>;
|
|
|
|
def VDst_1024 : VOPDstOperand<VReg_1024>;
|
2019-07-09 21:43:09 +00:00
|
|
|
|
2022-05-24 13:31:09 -04:00
|
|
|
def VOPProfileAccRead : VOP3P_Profile<VOP_I32_I32, VOP3_MAI> {
|
2019-07-09 21:43:09 +00:00
|
|
|
let Src0RC64 = ARegSrc_32;
|
|
|
|
}
|
|
|
|
|
2022-05-24 13:31:09 -04:00
|
|
|
def VOPProfileAccWrite : VOP3P_Profile<VOP_I32_I32, VOP3_MAI> {
|
2019-07-09 21:43:09 +00:00
|
|
|
let DstRC = ADst_32;
|
2022-03-16 12:39:31 -07:00
|
|
|
let Src0RC64 = VCSrc_b32;
|
2019-07-09 21:43:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
class VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC,
|
|
|
|
RegisterOperand SrcABRC = AVSrc_32>
|
2022-05-24 13:31:09 -04:00
|
|
|
: VOP3P_Profile<P, VOP3_MAI> {
|
2019-07-09 21:43:09 +00:00
|
|
|
let DstRC = _DstRC;
|
|
|
|
let Src0RC64 = SrcABRC;
|
|
|
|
let Src1RC64 = SrcABRC;
|
|
|
|
let Src2RC64 = _SrcRC;
|
|
|
|
let HasOpSel = 0;
|
|
|
|
let HasClamp = 0;
|
2021-02-17 13:37:46 -08:00
|
|
|
let HasIntClamp = 0;
|
|
|
|
let HasOMod = 0;
|
|
|
|
let HasModifiers = 0;
|
2021-04-01 13:41:09 +03:00
|
|
|
let Asm64 = "$vdst, $src0, $src1, $src2$cbsz$abid$blgp";
|
2022-05-24 13:31:09 -04:00
|
|
|
let AsmVOP3DPPBase = Asm64;
|
2019-07-09 21:43:09 +00:00
|
|
|
let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp);
|
2022-05-24 13:31:09 -04:00
|
|
|
let InsVOP3Base = Ins64;
|
2022-01-21 12:12:31 -08:00
|
|
|
// Dst and SrcC cannot partially overlap if SrcC/Dst is bigger than 4 VGPRs.
|
|
|
|
// We then create two versions of the instruction: with tied dst and src2
|
2022-02-18 14:57:21 +01:00
|
|
|
// and with the earlyclobber flag on the dst. This is stricter than the
|
2022-01-21 12:12:31 -08:00
|
|
|
// actual HW restriction. In particular earlyclobber also affects src0 and
|
|
|
|
// src1 allocation which is not required.
|
|
|
|
bit NoDstOverlap = !gt(DstVT.Size, 128);
|
2019-07-09 21:43:09 +00:00
|
|
|
}
|
|
|
|
|
2022-03-21 12:59:33 -07:00
|
|
|
class VOPProfileSMFMAC<VOPProfile P, RegisterOperand _DstRC,
|
|
|
|
RegisterOperand _SrcARC, RegisterOperand _SrcBRC>
|
|
|
|
: VOPProfileMAI<P, _DstRC, _DstRC, _SrcARC> {
|
|
|
|
let Src1RC64 = _SrcBRC;
|
|
|
|
let Src2VT = DstVT;
|
|
|
|
let Asm64 = " $vdst, $src0, $src1, $idx$cbsz$abid";
|
|
|
|
let Outs64 = (outs DstRC:$vdst);
|
|
|
|
let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, VRegSrc_32:$idx, cbsz:$cbsz, abid:$abid, Src2RC64:$src2);
|
|
|
|
}
|
|
|
|
|
2019-07-09 21:43:09 +00:00
|
|
|
def VOPProfileMAI_F32_F32_X4 : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, AISrc_128_f32, ADst_128>;
|
|
|
|
def VOPProfileMAI_F32_F32_X16 : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, AISrc_512_f32, ADst_512>;
|
|
|
|
def VOPProfileMAI_F32_F32_X32 : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32, AISrc_1024_f32, ADst_1024>;
|
|
|
|
def VOPProfileMAI_I32_I32_X4 : VOPProfileMAI<VOP_V4I32_I32_I32_V4I32, AISrc_128_b32, ADst_128>;
|
|
|
|
def VOPProfileMAI_I32_I32_X16 : VOPProfileMAI<VOP_V16I32_I32_I32_V16I32, AISrc_512_b32, ADst_512>;
|
|
|
|
def VOPProfileMAI_I32_I32_X32 : VOPProfileMAI<VOP_V32I32_I32_I32_V32I32, AISrc_1024_b32, ADst_1024>;
|
|
|
|
def VOPProfileMAI_F32_V2I16_X4 : VOPProfileMAI<VOP_V4F32_V2I16_V2I16_V4F32, AISrc_128_b32, ADst_128>;
|
|
|
|
def VOPProfileMAI_F32_V2I16_X16 : VOPProfileMAI<VOP_V16F32_V2I16_V2I16_V16F32, AISrc_512_b32, ADst_512>;
|
|
|
|
def VOPProfileMAI_F32_V2I16_X32 : VOPProfileMAI<VOP_V32F32_V2I16_V2I16_V32F32, AISrc_1024_b32, ADst_1024>;
|
|
|
|
def VOPProfileMAI_F32_V4F16_X4 : VOPProfileMAI<VOP_V4F32_V4F16_V4F16_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V4F16_X16 : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V4F16_X32 : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>;
|
2021-02-17 13:37:46 -08:00
|
|
|
def VOPProfileMAI_F32_V4I16_X4 : VOPProfileMAI<VOP_V4F32_V4I16_V4I16_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V4I16_X16 : VOPProfileMAI<VOP_V16F32_V4I16_V4I16_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V4I16_X32 : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F32, AISrc_1024_b32, ADst_1024, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F64_16X16X4F64 : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64, AISrc_256_f64, ADst_256, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F64_4X4X4F64 : VOPProfileMAI<VOP_F64_F64_F64_F64, AISrc_64_f64, ADst_64, AVSrc_64>;
|
2022-03-18 12:33:21 -07:00
|
|
|
def VOPProfileMAI_I32_I64_X16 : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, AISrc_128_b32, ADst_128, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_I32_I64_X32 : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, AISrc_512_b32, ADst_512, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V2F32_X16 : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, AISrc_128_b32, ADst_128, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V2F32_X32 : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, AISrc_512_b32, ADst_512, AVSrc_64>;
|
2021-02-17 13:37:46 -08:00
|
|
|
|
|
|
|
def VOPProfileMAI_F32_F32_X4_VCD : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32, VISrc_128_f32, VDst_128>;
|
|
|
|
def VOPProfileMAI_F32_F32_X16_VCD : VOPProfileMAI<VOP_V16F32_F32_F32_V16F32, VISrc_512_f32, VDst_512>;
|
|
|
|
def VOPProfileMAI_F32_F32_X32_VCD : VOPProfileMAI<VOP_V32F32_F32_F32_V32F32, VISrc_1024_f32, VDst_1024>;
|
|
|
|
def VOPProfileMAI_I32_I32_X4_VCD : VOPProfileMAI<VOP_V4I32_I32_I32_V4I32, VISrc_128_b32, VDst_128>;
|
|
|
|
def VOPProfileMAI_I32_I32_X16_VCD : VOPProfileMAI<VOP_V16I32_I32_I32_V16I32, VISrc_512_b32, VDst_512>;
|
|
|
|
def VOPProfileMAI_I32_I32_X32_VCD : VOPProfileMAI<VOP_V32I32_I32_I32_V32I32, VISrc_1024_b32, VDst_1024>;
|
|
|
|
def VOPProfileMAI_F32_V2I16_X4_VCD : VOPProfileMAI<VOP_V4F32_V2I16_V2I16_V4F32, VISrc_128_b32, VDst_128>;
|
|
|
|
def VOPProfileMAI_F32_V2I16_X16_VCD : VOPProfileMAI<VOP_V16F32_V2I16_V2I16_V16F32, VISrc_512_b32, VDst_512>;
|
|
|
|
def VOPProfileMAI_F32_V2I16_X32_VCD : VOPProfileMAI<VOP_V32F32_V2I16_V2I16_V32F32, VISrc_1024_b32, VDst_1024>;
|
|
|
|
def VOPProfileMAI_F32_V4F16_X4_VCD : VOPProfileMAI<VOP_V4F32_V4F16_V4F16_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V4F16_X16_VCD : VOPProfileMAI<VOP_V16F32_V4F16_V4F16_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V4F16_X32_VCD : VOPProfileMAI<VOP_V32F32_V4F16_V4F16_V32F32, VISrc_1024_b32, VDst_1024, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V4I16_X4_VCD : VOPProfileMAI<VOP_V4F32_V4I16_V4I16_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V4I16_X16_VCD : VOPProfileMAI<VOP_V16F32_V4I16_V4I16_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V4I16_X32_VCD : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F32, VISrc_1024_b32, VDst_1024, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F64_16X16X4F64_VCD : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64, VISrc_256_f64, VDst_256, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F64_4X4X4F64_VCD : VOPProfileMAI<VOP_F64_F64_F64_F64, VISrc_64_f64, VDst_64, AVSrc_64>;
|
2022-03-18 12:33:21 -07:00
|
|
|
def VOPProfileMAI_I32_I64_X16_VCD : VOPProfileMAI<VOP_V4I32_I64_I64_V4I32, VISrc_128_b32, VDst_128, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_I32_I64_X32_VCD : VOPProfileMAI<VOP_V16I32_I64_I64_V16I32, VISrc_512_b32, VDst_512, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V2F32_X16_VCD : VOPProfileMAI<VOP_V4F32_V2F32_V2F32_V4F32, VISrc_128_b32, VDst_128, AVSrc_64>;
|
|
|
|
def VOPProfileMAI_F32_V2F32_X32_VCD : VOPProfileMAI<VOP_V16F32_V2F32_V2F32_V16F32, VISrc_512_b32, VDst_512, AVSrc_64>;
|
2019-07-09 21:43:09 +00:00
|
|
|
|
2022-05-18 15:37:30 +03:00
|
|
|
def VOPProfileSMFMAC_F32_16X16X32_F16 : VOPProfileSMFMAC<VOP_V4F32_V4F16_V8F16_I32, AVDst_128, AVSrc_64, AVSrc_128>;
|
|
|
|
def VOPProfileSMFMAC_F32_32X32X16_F16 : VOPProfileSMFMAC<VOP_V16F32_V4F16_V8F16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
|
|
|
|
def VOPProfileSMFMAC_F32_16X16X32_I16 : VOPProfileSMFMAC<VOP_V4F32_V4I16_V8I16_I32, AVDst_128, AVSrc_64, AVSrc_128>;
|
|
|
|
def VOPProfileSMFMAC_F32_32X32X16_I16 : VOPProfileSMFMAC<VOP_V16F32_V4I16_V8I16_I32, AVDst_512, AVSrc_64, AVSrc_128>;
|
|
|
|
def VOPProfileSMFMAC_I32_16X16X64_I8 : VOPProfileSMFMAC<VOP_V4I32_V2I32_V4I32_I32, AVDst_128, AVSrc_64, AVSrc_128>;
|
|
|
|
def VOPProfileSMFMAC_I32_32X32X32_I8 : VOPProfileSMFMAC<VOP_V16I32_V2I32_V4I32_I32, AVDst_512, AVSrc_64, AVSrc_128>;
|
2022-03-21 12:59:33 -07:00
|
|
|
|
2022-01-21 12:12:31 -08:00
|
|
|
class MFMATable <bit is_mac, string Name> {
|
|
|
|
bit IsMac = is_mac;
|
|
|
|
string FMAOp = Name;
|
|
|
|
}
|
|
|
|
|
2022-01-12 16:03:16 -08:00
|
|
|
class MAIFrag<SDPatternOperator Op, code pred> : PatFrag <
|
|
|
|
(ops node:$src0, node:$src1, node:$src2, node:$cbsz, node:$abid, node:$blgp),
|
|
|
|
(Op $src0, $src1, $src2, $cbsz, $abid, $blgp),
|
|
|
|
pred
|
|
|
|
>;
|
|
|
|
|
|
|
|
let GISelPredicateCode = [{ return MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); }] in
|
|
|
|
class AgprMAIFrag<SDPatternOperator Op> :
|
|
|
|
MAIFrag<Op, [{ return MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); }]>;
|
|
|
|
|
|
|
|
let GISelPredicateCode = [{ return !MF.getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); }] in
|
|
|
|
class VgprMAIFrag<SDPatternOperator Op> :
|
|
|
|
MAIFrag<Op, [{ return !MF->getInfo<SIMachineFunctionInfo>()->mayNeedAGPRs(); }]>;
|
|
|
|
|
2019-07-09 21:43:09 +00:00
|
|
|
let Predicates = [HasMAIInsts] in {
|
2020-07-01 16:34:51 -04:00
|
|
|
|
|
|
|
let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
|
2021-01-07 13:56:02 -05:00
|
|
|
defm V_ACCVGPR_READ_B32 : VOP3Inst<"v_accvgpr_read_b32", VOPProfileAccRead>;
|
|
|
|
let isMoveImm = 1 in {
|
|
|
|
defm V_ACCVGPR_WRITE_B32 : VOP3Inst<"v_accvgpr_write_b32", VOPProfileAccWrite>;
|
|
|
|
} // End isMoveImm = 1
|
|
|
|
} // End isAsCheapAsAMove = 1, isReMaterializable = 1
|
2019-07-09 21:43:09 +00:00
|
|
|
|
2022-03-23 09:12:34 -07:00
|
|
|
class MAIInst<string OpName, VOPProfile P, SDPatternOperator node>
|
|
|
|
: VOP3InstBase<OpName, P, node> {
|
|
|
|
Instruction Opcode = !cast<Instruction>(NAME);
|
2022-03-23 11:29:56 -07:00
|
|
|
bit is_dgemm = 0;
|
2022-03-23 09:12:34 -07:00
|
|
|
bit is_gfx940_xdl = 0;
|
|
|
|
}
|
|
|
|
|
2022-01-21 12:12:31 -08:00
|
|
|
multiclass MAIInst<string OpName, string P, SDPatternOperator node,
|
|
|
|
bit NoDstOverlap = !cast<VOPProfileMAI>("VOPProfileMAI_" # P).NoDstOverlap> {
|
2021-02-17 13:37:46 -08:00
|
|
|
let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
|
|
|
|
// FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported.
|
2022-01-21 12:12:31 -08:00
|
|
|
let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in {
|
2022-03-23 09:12:34 -07:00
|
|
|
def _e64 : MAIInst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P),
|
2022-01-12 16:03:16 -08:00
|
|
|
!if(NoDstOverlap, null_frag, AgprMAIFrag<node>)>,
|
2022-03-23 09:12:34 -07:00
|
|
|
MFMATable<0, NAME # "_e64">;
|
2022-01-21 12:12:31 -08:00
|
|
|
|
|
|
|
let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in
|
2022-03-23 09:12:34 -07:00
|
|
|
def _vgprcd_e64 : MAIInst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
|
|
|
|
!if(NoDstOverlap, null_frag, VgprMAIFrag<node>)>,
|
|
|
|
MFMATable<0, NAME # "_vgprcd_e64">;
|
2022-01-21 12:12:31 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
foreach _ = BoolToList<NoDstOverlap>.ret in {
|
|
|
|
let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""),
|
|
|
|
isConvertibleToThreeAddress = NoDstOverlap,
|
|
|
|
Mnemonic = OpName in {
|
2022-03-23 09:12:34 -07:00
|
|
|
def "_mac_e64" : MAIInst<OpName # "_mac", !cast<VOPProfileMAI>("VOPProfileMAI_" # P), AgprMAIFrag<node>>,
|
|
|
|
MFMATable<1, NAME # "_e64">;
|
2022-01-21 12:12:31 -08:00
|
|
|
|
|
|
|
let SubtargetPredicate = isGFX90APlus in
|
2022-03-23 09:12:34 -07:00
|
|
|
def _mac_vgprcd_e64 : MAIInst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD"),
|
|
|
|
VgprMAIFrag<node>>,
|
|
|
|
MFMATable<1, NAME # "_vgprcd_e64">;
|
2022-01-21 12:12:31 -08:00
|
|
|
}
|
|
|
|
}
|
2021-02-17 13:37:46 -08:00
|
|
|
} // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1
|
|
|
|
}
|
|
|
|
|
|
|
|
defm V_MFMA_F32_4X4X1F32 : MAIInst<"v_mfma_f32_4x4x1f32", "F32_F32_X4", int_amdgcn_mfma_f32_4x4x1f32>;
|
|
|
|
defm V_MFMA_F32_16X16X1F32 : MAIInst<"v_mfma_f32_16x16x1f32", "F32_F32_X16", int_amdgcn_mfma_f32_16x16x1f32>;
|
|
|
|
defm V_MFMA_F32_16X16X4F32 : MAIInst<"v_mfma_f32_16x16x4f32", "F32_F32_X4", int_amdgcn_mfma_f32_16x16x4f32>;
|
2022-03-23 09:12:34 -07:00
|
|
|
defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>;
|
|
|
|
defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>;
|
|
|
|
|
|
|
|
let is_gfx940_xdl = 1 in {
|
|
|
|
defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>;
|
|
|
|
defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>;
|
2021-02-17 13:37:46 -08:00
|
|
|
defm V_MFMA_F32_16X16X4F16 : MAIInst<"v_mfma_f32_16x16x4f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_16x16x4f16>;
|
|
|
|
defm V_MFMA_F32_16X16X16F16 : MAIInst<"v_mfma_f32_16x16x16f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_16x16x16f16>;
|
|
|
|
defm V_MFMA_I32_16X16X4I8 : MAIInst<"v_mfma_i32_16x16x4i8", "I32_I32_X16", int_amdgcn_mfma_i32_16x16x4i8>;
|
|
|
|
defm V_MFMA_F32_32X32X4F16 : MAIInst<"v_mfma_f32_32x32x4f16", "F32_V4F16_X32", int_amdgcn_mfma_f32_32x32x4f16>;
|
|
|
|
defm V_MFMA_F32_32X32X8F16 : MAIInst<"v_mfma_f32_32x32x8f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_32x32x8f16>;
|
|
|
|
defm V_MFMA_I32_32X32X4I8 : MAIInst<"v_mfma_i32_32x32x4i8", "I32_I32_X32", int_amdgcn_mfma_i32_32x32x4i8>;
|
2022-03-23 09:12:34 -07:00
|
|
|
}
|
2022-03-17 14:14:47 -07:00
|
|
|
|
|
|
|
let Predicates = [isGFX908orGFX90A] in {
|
2021-02-17 13:37:46 -08:00
|
|
|
defm V_MFMA_I32_16X16X16I8 : MAIInst<"v_mfma_i32_16x16x16i8", "I32_I32_X4", int_amdgcn_mfma_i32_16x16x16i8>;
|
|
|
|
defm V_MFMA_I32_32X32X8I8 : MAIInst<"v_mfma_i32_32x32x8i8", "I32_I32_X16", int_amdgcn_mfma_i32_32x32x8i8>;
|
|
|
|
defm V_MFMA_F32_4X4X2BF16 : MAIInst<"v_mfma_f32_4x4x2bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_4x4x2bf16>;
|
|
|
|
defm V_MFMA_F32_16X16X2BF16 : MAIInst<"v_mfma_f32_16x16x2bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_16x16x2bf16>;
|
|
|
|
defm V_MFMA_F32_16X16X8BF16 : MAIInst<"v_mfma_f32_16x16x8bf16", "F32_V2I16_X4", int_amdgcn_mfma_f32_16x16x8bf16>;
|
|
|
|
defm V_MFMA_F32_32X32X2BF16 : MAIInst<"v_mfma_f32_32x32x2bf16", "F32_V2I16_X32", int_amdgcn_mfma_f32_32x32x2bf16>;
|
|
|
|
defm V_MFMA_F32_32X32X4BF16 : MAIInst<"v_mfma_f32_32x32x4bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_32x32x4bf16>;
|
2022-03-17 14:14:47 -07:00
|
|
|
}
|
2019-07-09 21:43:09 +00:00
|
|
|
|
|
|
|
} // End SubtargetPredicate = HasMAIInsts
|
|
|
|
|
2021-02-17 13:37:46 -08:00
|
|
|
let Predicates = [isGFX90APlus] in {
|
2022-03-23 09:12:34 -07:00
|
|
|
let is_gfx940_xdl = 1 in {
|
2021-02-17 13:37:46 -08:00
|
|
|
defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>;
|
|
|
|
defm V_MFMA_F32_16X16X4BF16_1K : MAIInst<"v_mfma_f32_16x16x4bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_16x16x4bf16_1k>;
|
|
|
|
defm V_MFMA_F32_4X4X4BF16_1K : MAIInst<"v_mfma_f32_4x4x4bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_4x4x4bf16_1k>;
|
|
|
|
defm V_MFMA_F32_32X32X8BF16_1K : MAIInst<"v_mfma_f32_32x32x8bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_32x32x8bf16_1k>;
|
|
|
|
defm V_MFMA_F32_16X16X16BF16_1K : MAIInst<"v_mfma_f32_16x16x16bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_16x16x16bf16_1k>;
|
2022-03-23 09:12:34 -07:00
|
|
|
}
|
2021-02-17 13:37:46 -08:00
|
|
|
|
2022-03-23 11:29:56 -07:00
|
|
|
let is_dgemm = 1 in {
|
2021-02-17 13:37:46 -08:00
|
|
|
defm V_MFMA_F64_16X16X4F64 : MAIInst<"v_mfma_f64_16x16x4f64", "F64_16X16X4F64", int_amdgcn_mfma_f64_16x16x4f64>;
|
|
|
|
defm V_MFMA_F64_4X4X4F64 : MAIInst<"v_mfma_f64_4x4x4f64", "F64_4X4X4F64", int_amdgcn_mfma_f64_4x4x4f64>;
|
2022-03-23 11:29:56 -07:00
|
|
|
}
|
2021-02-17 13:37:46 -08:00
|
|
|
} // End Predicates = [isGFX90APlus]
|
|
|
|
|
2022-03-23 09:12:34 -07:00
|
|
|
let Predicates = [isGFX940Plus], is_gfx940_xdl = 1 in {
|
2022-03-18 12:33:21 -07:00
|
|
|
defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>;
|
|
|
|
defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>;
|
|
|
|
defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>;
|
|
|
|
defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>;
|
2022-03-23 09:12:34 -07:00
|
|
|
} // End Predicates = [isGFX940Plus], is_gfx940_xdl = 1
|
2022-03-18 12:33:21 -07:00
|
|
|
|
2022-03-21 12:59:33 -07:00
|
|
|
multiclass SMFMACInst<string OpName, string P, SDPatternOperator node> {
|
|
|
|
let Constraints = "$vdst = $src2", DisableEncoding = "$src2",
|
2022-03-23 09:12:34 -07:00
|
|
|
isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx940_xdl = 1 in {
|
|
|
|
def _e64 : MAIInst<OpName, !cast<VOPProfileSMFMAC>("VOPProfileSMFMAC_" # P), node>;
|
2022-03-21 12:59:33 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let SubtargetPredicate = isGFX940Plus in {
|
|
|
|
defm V_SMFMAC_F32_16X16X32_F16 : SMFMACInst<"v_smfmac_f32_16x16x32_f16", "F32_16X16X32_F16", int_amdgcn_smfmac_f32_16x16x32_f16>;
|
|
|
|
defm V_SMFMAC_F32_32X32X16_F16 : SMFMACInst<"v_smfmac_f32_32x32x16_f16", "F32_32X32X16_F16", int_amdgcn_smfmac_f32_32x32x16_f16>;
|
|
|
|
defm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16", "F32_16X16X32_I16", int_amdgcn_smfmac_f32_16x16x32_bf16>;
|
|
|
|
defm V_SMFMAC_F32_32X32X16_BF16 : SMFMACInst<"v_smfmac_f32_32x32x16_bf16", "F32_32X32X16_I16", int_amdgcn_smfmac_f32_32x32x16_bf16>;
|
|
|
|
defm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8", "I32_16X16X64_I8", int_amdgcn_smfmac_i32_16x16x64_i8>;
|
|
|
|
defm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>;
|
|
|
|
}
|
|
|
|
|
2022-03-23 09:12:34 -07:00
|
|
|
def MAIInstInfoTable : GenericTable {
|
|
|
|
let FilterClass = "MAIInst";
|
|
|
|
let CppTypeName = "MAIInstInfo";
|
|
|
|
let Fields = [
|
2022-03-23 11:29:56 -07:00
|
|
|
"Opcode", "is_dgemm", "is_gfx940_xdl"
|
2022-03-23 09:12:34 -07:00
|
|
|
];
|
|
|
|
|
|
|
|
let PrimaryKey = ["Opcode"];
|
|
|
|
let PrimaryKeyName = "getMAIInstInfoHelper";
|
|
|
|
}
|
|
|
|
|
2021-02-17 13:37:46 -08:00
|
|
|
let SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1 in {
|
2022-05-24 13:31:09 -04:00
|
|
|
defm V_PK_FMA_F32 : VOP3PInst<"v_pk_fma_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fma>;
|
|
|
|
defm V_PK_MUL_F32 : VOP3PInst<"v_pk_mul_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fmul>;
|
|
|
|
defm V_PK_ADD_F32 : VOP3PInst<"v_pk_add_f32", VOP3P_Profile<VOP_V2F32_V2F32_V2F32, VOP3_PACKED>, any_fadd>;
|
|
|
|
defm V_PK_MOV_B32 : VOP3PInst<"v_pk_mov_b32", VOP3P_Profile<VOP_V2I32_V2I32_V2I32, VOP3_PACKED>>;
|
2021-02-17 13:37:46 -08:00
|
|
|
} // End SubtargetPredicate = HasPackedFP32Ops, isCommutable = 1
|
|
|
|
|
2019-07-09 21:43:09 +00:00
|
|
|
def : MnemonicAlias<"v_accvgpr_read", "v_accvgpr_read_b32">;
|
|
|
|
def : MnemonicAlias<"v_accvgpr_write", "v_accvgpr_write_b32">;
|
|
|
|
|
2021-01-07 13:56:02 -05:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Begin Real Encodings
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2022-05-24 13:31:09 -04:00
|
|
|
class VOP3P_DPP16<bits<7> op, VOP_DPP_Pseudo ps, int subtarget,
|
|
|
|
string opName = ps.OpName>
|
|
|
|
: VOP3P_DPP<op, opName, ps.Pfl, 1>, SIMCInstr<ps.PseudoInstr, subtarget> {
|
|
|
|
let hasSideEffects = ps.hasSideEffects;
|
|
|
|
let Defs = ps.Defs;
|
|
|
|
let SchedRW = ps.SchedRW;
|
|
|
|
let Uses = ps.Uses;
|
|
|
|
let AssemblerPredicate = HasDPP16;
|
|
|
|
let SubtargetPredicate = HasDPP16;
|
|
|
|
let OtherPredicates = ps.OtherPredicates;
|
|
|
|
}
|
|
|
|
|
|
|
|
class VOP3P_DPP8_Base<bits<7> op, VOP_Pseudo ps, string opName = ps.OpName>
|
|
|
|
: VOP3P_DPP8<op, opName, ps.Pfl> {
|
|
|
|
let hasSideEffects = ps.hasSideEffects;
|
|
|
|
let Defs = ps.Defs;
|
|
|
|
let SchedRW = ps.SchedRW;
|
|
|
|
let Uses = ps.Uses;
|
|
|
|
let OtherPredicates = ps.OtherPredicates;
|
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// GFX11.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
let AssemblerPredicate = isGFX11Plus,
|
|
|
|
DecoderNamespace = "GFX11" in {
|
|
|
|
|
|
|
|
multiclass VOP3P_Real_gfx11<bits<7> op, string backing_ps_name = NAME,
|
|
|
|
string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
|
|
|
|
def _gfx11 : VOP3P_Real<!cast<VOP3P_Pseudo>(backing_ps_name),
|
|
|
|
SIEncodingFamily.GFX11, asmName>,
|
|
|
|
VOP3Pe_gfx11<op, !cast<VOP3P_Pseudo>(backing_ps_name).Pfl>;
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass VOP3P_Real_dpp_gfx11<bits<7> op, string backing_ps_name = NAME,
|
|
|
|
string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
|
|
|
|
defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name);
|
|
|
|
def _dpp_gfx11
|
|
|
|
: VOP3P_DPP16<op, !cast<VOP_DPP_Pseudo>(backing_ps_name #"_dpp"),
|
|
|
|
SIEncodingFamily.GFX11> {
|
|
|
|
let AsmString = asmName #ps.Pfl.AsmVOP3DPP16;
|
|
|
|
let DecoderNamespace = "DPPGFX11";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass VOP3P_Real_dpp8_gfx11<bits<7> op, string backing_ps_name = NAME,
|
|
|
|
string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic> {
|
|
|
|
defvar ps = !cast<VOP3P_Pseudo>(backing_ps_name);
|
|
|
|
def _dpp8_gfx11 : VOP3P_DPP8_Base<op, ps> {
|
|
|
|
let AsmString = asmName #ps.Pfl.AsmVOP3DPP8;
|
|
|
|
let DecoderNamespace = "DPP8GFX11";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass VOP3P_Realtriple_gfx11<bits<7> op, string backing_ps_name = NAME,
|
|
|
|
string asmName = !cast<VOP3P_Pseudo>(NAME).Mnemonic>
|
|
|
|
: VOP3P_Real_gfx11<op, backing_ps_name, asmName>,
|
|
|
|
VOP3P_Real_dpp_gfx11<op, backing_ps_name, asmName>,
|
|
|
|
VOP3P_Real_dpp8_gfx11<op, backing_ps_name, asmName>;
|
|
|
|
} // End AssemblerPredicate = isGFX11Plus, DecoderNamespace = "GFX11"
|
|
|
|
|
|
|
|
defm V_DOT4_I32_IU8 : VOP3P_Real_gfx11 <0x16>;
|
|
|
|
defm V_DOT8_I32_IU4 : VOP3P_Real_gfx11 <0x18>;
|
|
|
|
defm V_DOT2_F32_BF16 : VOP3P_Real_gfx11 <0x1a>;
|
|
|
|
|
2021-01-07 13:56:02 -05:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// GFX8 (VI)
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2020-09-30 15:01:33 -07:00
|
|
|
multiclass VOP3P_Real_vi<bits<7> op> {
|
2018-03-26 13:56:53 +00:00
|
|
|
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
|
|
|
|
VOP3Pe <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
|
2019-11-04 11:50:18 -08:00
|
|
|
let AssemblerPredicate = HasVOP3PInsts;
|
2019-04-06 09:20:48 +00:00
|
|
|
let DecoderNamespace = "GFX8";
|
2021-06-16 14:29:36 +01:00
|
|
|
let VOP3P = 1;
|
2017-02-27 18:49:11 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-30 15:01:33 -07:00
|
|
|
multiclass VOP3P_Real_MAI<bits<7> op> {
|
2021-01-07 13:56:02 -05:00
|
|
|
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
2021-02-17 13:37:46 -08:00
|
|
|
VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, ?> {
|
2020-11-10 12:12:33 -08:00
|
|
|
let AssemblerPredicate = HasMAIInsts;
|
|
|
|
let DecoderNamespace = "GFX8";
|
2021-02-11 13:31:35 -08:00
|
|
|
let Inst{14} = ?; // op_sel_hi(2)
|
|
|
|
let Inst{59} = ?; // op_sel_hi(0)
|
|
|
|
let Inst{60} = ?; // op_sel_hi(1)
|
2020-11-10 12:12:33 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-21 12:12:31 -08:00
|
|
|
let Constraints = "" in {
|
2021-02-17 13:37:46 -08:00
|
|
|
multiclass VOP3P_Real_MFMA_gfx90a<bits<7> op> {
|
|
|
|
let SubtargetPredicate = isGFX90AOnly,
|
|
|
|
AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A" in {
|
|
|
|
def _gfx90a_acd : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX90A>,
|
|
|
|
VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, 1>;
|
|
|
|
|
|
|
|
def _gfx90a_vcd : VOP3P_Real<!cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64"), SIEncodingFamily.GFX90A>,
|
|
|
|
VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64").Pfl, 0>;
|
|
|
|
} // End AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A"
|
|
|
|
}
|
2022-03-15 13:36:06 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
multiclass VOP3P_Real_MFMA_gfx940_aliases<string NameFrom, string NameTo, string Op,
|
|
|
|
VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(Op # "_e64"),
|
|
|
|
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(Op # "_vgprcd" # "_e64"),
|
|
|
|
VOPProfile Pfl_ACD = PS_ACD.Pfl,
|
|
|
|
VOPProfile Pfl_VCD = PS_VCD.Pfl> {
|
|
|
|
let Predicates = [isGFX940Plus] in {
|
|
|
|
foreach _ = BoolToList<!ne(NameFrom, NameTo)>.ret in {
|
|
|
|
def : InstAlias <NameTo # " " # PS_ACD.AsmOperands,
|
|
|
|
(!cast<VOP3P_Real>(Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst,
|
|
|
|
Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2,
|
|
|
|
cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl;
|
|
|
|
def : InstAlias <NameTo # " " # PS_VCD.AsmOperands,
|
|
|
|
(!cast<VOP3P_Real>(Op # "_gfx940_vcd") Pfl_VCD.DstRC:$vdst,
|
|
|
|
Pfl_VCD.Src0RC64:$src0, Pfl_VCD.Src1RC64:$src1, Pfl_VCD.Src2RC64:$src2,
|
|
|
|
cbsz:$cbsz, abid:$abid, blgp:$blgp)>, PredicateControl;
|
|
|
|
}
|
|
|
|
} // End Predicates = [isGFX940Plus]
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
|
|
|
|
VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"),
|
|
|
|
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
|
|
|
|
let SubtargetPredicate = isGFX940Plus,
|
|
|
|
AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9",
|
|
|
|
AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
|
|
|
|
def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
|
|
|
|
VOP3Pe_MAI <op, PS_ACD.Pfl, 1>;
|
|
|
|
|
|
|
|
def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>,
|
|
|
|
VOP3Pe_MAI <op, PS_VCD.Pfl, 0>;
|
|
|
|
} // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX9"
|
|
|
|
|
|
|
|
defm : VOP3P_Real_MFMA_gfx940_aliases<Name, PS_ACD.Mnemonic, NAME>;
|
2021-02-17 13:37:46 -08:00
|
|
|
|
2022-03-15 13:36:06 -07:00
|
|
|
foreach _ = BoolToList<!ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic)>.ret in
|
|
|
|
defm : VOP3P_Real_MFMA_gfx940_aliases<Name, !subst("_1k", "", PS_ACD.Mnemonic), NAME>;
|
|
|
|
}
|
|
|
|
|
|
|
|
multiclass VOP3P_Real_MFMA<bits<7> op, string GFX940Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> :
|
|
|
|
VOP3P_Real_MFMA_gfx90a <op>,
|
|
|
|
VOP3P_Real_MFMA_gfx940 <op, GFX940Name> {
|
2021-01-07 13:56:02 -05:00
|
|
|
def _vi : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
2021-02-17 13:37:46 -08:00
|
|
|
VOP3Pe_MAI <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl, ?> {
|
2019-11-04 11:50:18 -08:00
|
|
|
let AssemblerPredicate = HasMAIInsts;
|
2019-07-09 21:43:09 +00:00
|
|
|
let DecoderNamespace = "GFX8";
|
2022-03-15 13:36:06 -07:00
|
|
|
let Constraints = "";
|
2019-07-09 21:43:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-19 13:40:48 +03:00
|
|
|
multiclass VOP3P_Real_SMFMAC<bits<7> op, string alias> {
|
2022-03-21 12:59:33 -07:00
|
|
|
def _gfx940 : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
|
|
|
|
VOP3Pe_SMFMAC <op> {
|
|
|
|
let AssemblerPredicate = isGFX940Plus;
|
|
|
|
let DecoderNamespace = "GFX8";
|
|
|
|
}
|
2022-05-19 13:40:48 +03:00
|
|
|
def : MnemonicAlias<alias, !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic>;
|
2022-03-21 12:59:33 -07:00
|
|
|
}
|
|
|
|
|
2020-09-30 15:01:33 -07:00
|
|
|
defm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>;
|
|
|
|
defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x01>;
|
|
|
|
defm V_PK_ADD_I16 : VOP3P_Real_vi <0x02>;
|
|
|
|
defm V_PK_SUB_I16 : VOP3P_Real_vi <0x03>;
|
|
|
|
defm V_PK_LSHLREV_B16 : VOP3P_Real_vi <0x04>;
|
|
|
|
defm V_PK_LSHRREV_B16 : VOP3P_Real_vi <0x05>;
|
|
|
|
defm V_PK_ASHRREV_I16 : VOP3P_Real_vi <0x06>;
|
|
|
|
defm V_PK_MAX_I16 : VOP3P_Real_vi <0x07>;
|
|
|
|
defm V_PK_MIN_I16 : VOP3P_Real_vi <0x08>;
|
|
|
|
defm V_PK_MAD_U16 : VOP3P_Real_vi <0x09>;
|
|
|
|
|
|
|
|
defm V_PK_ADD_U16 : VOP3P_Real_vi <0x0a>;
|
|
|
|
defm V_PK_SUB_U16 : VOP3P_Real_vi <0x0b>;
|
|
|
|
defm V_PK_MAX_U16 : VOP3P_Real_vi <0x0c>;
|
|
|
|
defm V_PK_MIN_U16 : VOP3P_Real_vi <0x0d>;
|
|
|
|
defm V_PK_FMA_F16 : VOP3P_Real_vi <0x0e>;
|
|
|
|
defm V_PK_ADD_F16 : VOP3P_Real_vi <0x0f>;
|
|
|
|
defm V_PK_MUL_F16 : VOP3P_Real_vi <0x10>;
|
|
|
|
defm V_PK_MIN_F16 : VOP3P_Real_vi <0x11>;
|
|
|
|
defm V_PK_MAX_F16 : VOP3P_Real_vi <0x12>;
|
2017-02-27 18:49:11 +00:00
|
|
|
|
2018-04-30 19:08:16 +00:00
|
|
|
|
|
|
|
let SubtargetPredicate = HasMadMixInsts in {
|
2020-09-30 15:01:33 -07:00
|
|
|
defm V_MAD_MIX_F32 : VOP3P_Real_vi <0x20>;
|
|
|
|
defm V_MAD_MIXLO_F16 : VOP3P_Real_vi <0x21>;
|
|
|
|
defm V_MAD_MIXHI_F16 : VOP3P_Real_vi <0x22>;
|
2018-04-30 19:08:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
let SubtargetPredicate = HasFmaMixInsts in {
|
|
|
|
let DecoderNamespace = "GFX9_DL" in {
|
|
|
|
// The mad_mix instructions were renamed and their behaviors changed,
|
|
|
|
// but the opcode stayed the same so we need to put these in a
|
|
|
|
// different DecoderNamespace to avoid the ambiguity.
|
2020-09-30 15:01:33 -07:00
|
|
|
defm V_FMA_MIX_F32 : VOP3P_Real_vi <0x20>;
|
|
|
|
defm V_FMA_MIXLO_F16 : VOP3P_Real_vi <0x21>;
|
|
|
|
defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>;
|
2018-04-30 19:08:16 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-02-09 00:34:21 +00:00
|
|
|
let SubtargetPredicate = HasDot2Insts in {
|
2018-04-30 19:08:16 +00:00
|
|
|
|
2020-09-30 15:01:33 -07:00
|
|
|
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
|
|
|
|
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
|
2021-03-16 16:01:03 +00:00
|
|
|
|
|
|
|
} // End SubtargetPredicate = HasDot2Insts
|
|
|
|
|
|
|
|
let SubtargetPredicate = HasDot7Insts in {
|
|
|
|
|
|
|
|
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
|
2020-09-30 15:01:33 -07:00
|
|
|
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
|
|
|
|
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
|
2018-04-30 19:08:16 +00:00
|
|
|
|
2021-03-16 16:01:03 +00:00
|
|
|
} // End SubtargetPredicate = HasDot7Insts
|
2019-02-09 00:34:21 +00:00
|
|
|
|
|
|
|
let SubtargetPredicate = HasDot1Insts in {
|
|
|
|
|
2020-09-30 15:01:33 -07:00
|
|
|
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
|
|
|
|
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
|
2019-02-09 00:34:21 +00:00
|
|
|
|
|
|
|
} // End SubtargetPredicate = HasDot1Insts
|
2019-04-26 17:56:03 +00:00
|
|
|
|
2019-07-09 21:43:09 +00:00
|
|
|
let SubtargetPredicate = HasMAIInsts in {
|
|
|
|
|
2020-09-30 15:01:33 -07:00
|
|
|
defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>;
|
|
|
|
defm V_ACCVGPR_WRITE_B32 : VOP3P_Real_MAI <0x59>;
|
2022-03-15 13:36:06 -07:00
|
|
|
defm V_MFMA_F32_32X32X1F32 : VOP3P_Real_MFMA <0x40, "v_mfma_f32_32x32x1_2b_f32">;
|
|
|
|
defm V_MFMA_F32_16X16X1F32 : VOP3P_Real_MFMA <0x41, "v_mfma_f32_16x16x1_4b_f32">;
|
|
|
|
defm V_MFMA_F32_4X4X1F32 : VOP3P_Real_MFMA <0x42, "v_mfma_f32_4x4x1_16b_f32">;
|
|
|
|
defm V_MFMA_F32_32X32X2F32 : VOP3P_Real_MFMA <0x44, "v_mfma_f32_32x32x2_f32">;
|
|
|
|
defm V_MFMA_F32_16X16X4F32 : VOP3P_Real_MFMA <0x45, "v_mfma_f32_16x16x4_f32">;
|
|
|
|
defm V_MFMA_F32_32X32X4F16 : VOP3P_Real_MFMA <0x48, "v_mfma_f32_32x32x4_2b_f16">;
|
|
|
|
defm V_MFMA_F32_16X16X4F16 : VOP3P_Real_MFMA <0x49, "v_mfma_f32_16x16x4_4b_f16">;
|
|
|
|
defm V_MFMA_F32_4X4X4F16 : VOP3P_Real_MFMA <0x4a, "v_mfma_f32_4x4x4_16b_f16">;
|
|
|
|
defm V_MFMA_F32_32X32X8F16 : VOP3P_Real_MFMA <0x4c, "v_mfma_f32_32x32x8_f16">;
|
|
|
|
defm V_MFMA_F32_16X16X16F16 : VOP3P_Real_MFMA <0x4d, "v_mfma_f32_16x16x16_f16">;
|
|
|
|
defm V_MFMA_I32_32X32X4I8 : VOP3P_Real_MFMA <0x50, "v_mfma_i32_32x32x4_2b_i8">;
|
|
|
|
defm V_MFMA_I32_16X16X4I8 : VOP3P_Real_MFMA <0x51, "v_mfma_i32_16x16x4_4b_i8">;
|
|
|
|
defm V_MFMA_I32_4X4X4I8 : VOP3P_Real_MFMA <0x52, "v_mfma_i32_4x4x4_16b_i8">;
|
|
|
|
|
2022-03-17 14:14:47 -07:00
|
|
|
let SubtargetPredicate = isGFX908orGFX90A in {
|
2020-11-10 12:12:33 -08:00
|
|
|
defm V_MFMA_I32_16X16X16I8 : VOP3P_Real_MFMA <0x55>;
|
|
|
|
defm V_MFMA_I32_32X32X8I8 : VOP3P_Real_MFMA <0x54>;
|
|
|
|
defm V_MFMA_F32_32X32X2BF16 : VOP3P_Real_MFMA <0x68>;
|
|
|
|
defm V_MFMA_F32_16X16X2BF16 : VOP3P_Real_MFMA <0x69>;
|
|
|
|
defm V_MFMA_F32_4X4X2BF16 : VOP3P_Real_MFMA <0x6b>;
|
|
|
|
defm V_MFMA_F32_32X32X4BF16 : VOP3P_Real_MFMA <0x6c>;
|
|
|
|
defm V_MFMA_F32_16X16X8BF16 : VOP3P_Real_MFMA <0x6d>;
|
2022-03-17 14:14:47 -07:00
|
|
|
}
|
2019-07-09 21:43:09 +00:00
|
|
|
|
|
|
|
} // End SubtargetPredicate = HasMAIInsts
|
|
|
|
|
2021-02-17 13:37:46 -08:00
|
|
|
defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x63>;
|
|
|
|
defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x64>;
|
|
|
|
defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx90a <0x65>;
|
|
|
|
defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx90a <0x66>;
|
|
|
|
defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx90a <0x67>;
|
|
|
|
defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx90a <0x6e>;
|
|
|
|
defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx90a <0x6f>;
|
|
|
|
|
2022-03-18 12:33:21 -07:00
|
|
|
defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">;
|
|
|
|
defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
|
|
|
|
defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">;
|
|
|
|
defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">;
|
|
|
|
|
2022-03-15 13:36:06 -07:00
|
|
|
defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">;
|
|
|
|
defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">;
|
|
|
|
defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">;
|
|
|
|
defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx940 <0x60, "v_mfma_f32_32x32x8_bf16">;
|
|
|
|
defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx940 <0x61, "v_mfma_f32_16x16x16_bf16">;
|
|
|
|
|
|
|
|
defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx940 <0x6e, "v_mfma_f64_16x16x4_f64">;
|
|
|
|
defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx940 <0x6f, "v_mfma_f64_4x4x4_4b_f64">;
|
|
|
|
|
2022-05-19 13:40:48 +03:00
|
|
|
defm V_SMFMAC_F32_16X16X32_F16 : VOP3P_Real_SMFMAC <0x62, "v_smfmac_f32_16x16x32f16">;
|
|
|
|
defm V_SMFMAC_F32_32X32X16_F16 : VOP3P_Real_SMFMAC <0x64, "v_smfmac_f32_32x32x16f16">;
|
|
|
|
defm V_SMFMAC_F32_16X16X32_BF16 : VOP3P_Real_SMFMAC <0x66, "v_smfmac_f32_16x16x32bf16">;
|
|
|
|
defm V_SMFMAC_F32_32X32X16_BF16 : VOP3P_Real_SMFMAC <0x68, "v_smfmac_f32_32x32x16bf16">;
|
|
|
|
defm V_SMFMAC_I32_16X16X64_I8 : VOP3P_Real_SMFMAC <0x6a, "v_smfmac_i32_16x16x64i8">;
|
|
|
|
defm V_SMFMAC_I32_32X32X32_I8 : VOP3P_Real_SMFMAC <0x6c, "v_smfmac_i32_32x32x32i8">;
|
2022-03-21 12:59:33 -07:00
|
|
|
|
2021-02-17 13:37:46 -08:00
|
|
|
let SubtargetPredicate = HasPackedFP32Ops in {
|
|
|
|
defm V_PK_FMA_F32 : VOP3P_Real_vi <0x30>;
|
|
|
|
defm V_PK_MUL_F32 : VOP3P_Real_vi <0x31>;
|
|
|
|
defm V_PK_ADD_F32 : VOP3P_Real_vi <0x32>;
|
|
|
|
defm V_PK_MOV_B32 : VOP3P_Real_vi <0x33>;
|
|
|
|
} // End SubtargetPredicate = HasPackedFP32Ops
|
|
|
|
|
2019-04-26 17:56:03 +00:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// GFX10.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2022-05-24 13:31:09 -04:00
|
|
|
let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1 in {
|
2020-09-30 15:01:33 -07:00
|
|
|
multiclass VOP3P_Real_gfx10<bits<7> op> {
|
2019-04-26 17:56:03 +00:00
|
|
|
def _gfx10 : VOP3P_Real<!cast<VOP3P_Pseudo>(NAME), SIEncodingFamily.GFX10>,
|
|
|
|
VOP3Pe_gfx10 <op, !cast<VOP3P_Pseudo>(NAME).Pfl>;
|
|
|
|
}
|
2022-05-24 13:31:09 -04:00
|
|
|
} // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10", VOP3P = 1
|
|
|
|
|
|
|
|
multiclass VOP3P_Real_gfx10_gfx11<bits<7> op>
|
|
|
|
: VOP3P_Real_gfx10<op>, VOP3P_Real_gfx11<op>;
|
|
|
|
|
|
|
|
multiclass VOP3P_Real_gfx10_gfx11_Triple<bits<7> op>
|
|
|
|
: VOP3P_Real_gfx10<op>, VOP3P_Realtriple_gfx11<op>;
|
|
|
|
|
|
|
|
defm V_PK_MAD_I16 : VOP3P_Real_gfx10_gfx11<0x00>;
|
|
|
|
defm V_PK_MUL_LO_U16 : VOP3P_Real_gfx10_gfx11<0x01>;
|
|
|
|
defm V_PK_ADD_I16 : VOP3P_Real_gfx10_gfx11<0x02>;
|
|
|
|
defm V_PK_SUB_I16 : VOP3P_Real_gfx10_gfx11<0x03>;
|
|
|
|
defm V_PK_LSHLREV_B16 : VOP3P_Real_gfx10_gfx11<0x04>;
|
|
|
|
defm V_PK_LSHRREV_B16 : VOP3P_Real_gfx10_gfx11<0x05>;
|
|
|
|
defm V_PK_ASHRREV_I16 : VOP3P_Real_gfx10_gfx11<0x06>;
|
|
|
|
defm V_PK_MAX_I16 : VOP3P_Real_gfx10_gfx11<0x07>;
|
|
|
|
defm V_PK_MIN_I16 : VOP3P_Real_gfx10_gfx11<0x08>;
|
|
|
|
defm V_PK_MAD_U16 : VOP3P_Real_gfx10_gfx11<0x09>;
|
|
|
|
defm V_PK_ADD_U16 : VOP3P_Real_gfx10_gfx11<0x0a>;
|
|
|
|
defm V_PK_SUB_U16 : VOP3P_Real_gfx10_gfx11<0x0b>;
|
|
|
|
defm V_PK_MAX_U16 : VOP3P_Real_gfx10_gfx11<0x0c>;
|
|
|
|
defm V_PK_MIN_U16 : VOP3P_Real_gfx10_gfx11<0x0d>;
|
|
|
|
defm V_PK_FMA_F16 : VOP3P_Real_gfx10_gfx11<0x0e>;
|
|
|
|
defm V_PK_ADD_F16 : VOP3P_Real_gfx10_gfx11<0x0f>;
|
|
|
|
defm V_PK_MUL_F16 : VOP3P_Real_gfx10_gfx11<0x10>;
|
|
|
|
defm V_PK_MIN_F16 : VOP3P_Real_gfx10_gfx11<0x11>;
|
|
|
|
defm V_PK_MAX_F16 : VOP3P_Real_gfx10_gfx11<0x12>;
|
|
|
|
defm V_FMA_MIX_F32 : VOP3P_Real_gfx10_gfx11_Triple <0x20>;
|
|
|
|
defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x21>;
|
|
|
|
defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x22>;
|
2019-06-14 00:33:31 +00:00
|
|
|
|
|
|
|
let SubtargetPredicate = HasDot2Insts in {
|
|
|
|
|
2020-09-30 15:01:33 -07:00
|
|
|
defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
|
|
|
|
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
|
2021-03-16 16:01:03 +00:00
|
|
|
|
|
|
|
} // End SubtargetPredicate = HasDot2Insts
|
|
|
|
|
|
|
|
let SubtargetPredicate = HasDot7Insts in {
|
|
|
|
|
2022-05-24 13:31:09 -04:00
|
|
|
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x13>;
|
|
|
|
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11 <0x17>;
|
|
|
|
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11 <0x19>;
|
2019-06-14 00:33:31 +00:00
|
|
|
|
2021-03-16 16:01:03 +00:00
|
|
|
} // End SubtargetPredicate = HasDot7Insts
|
2019-06-14 00:33:31 +00:00
|
|
|
|
|
|
|
let SubtargetPredicate = HasDot1Insts in {
|
|
|
|
|
2020-09-30 15:01:33 -07:00
|
|
|
defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
|
|
|
|
defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;
|
2019-06-14 00:33:31 +00:00
|
|
|
|
|
|
|
} // End SubtargetPredicate = HasDot1Insts
|