mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-26 10:26:06 +00:00
[SDAG][NVPTX] Add TLI hook to get preferred FP->INT opcode (#132470)
Extract the logic for choosing FP_TO_UINT vs FP_TO_SINT opcodes into a TLI hook. This hook can be overridden by targets that prefer not to use the default behavior of replacing FP_TO_UINT with FP_TO_SINT when both are custom. Implement an override for NVPTX to only change opcode when FP_TO_UINT is not legal and FP_TO_SINT is legal.
This commit is contained in:
parent
8b34986072
commit
c13436e516
@ -3464,6 +3464,34 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the preferred opcode for FP_TO_XINT nodes.
|
||||
// By default, this checks if the provded operation is an illegal FP_TO_UINT
|
||||
// and if so, checks if FP_TO_SINT is legal or custom for use as a
|
||||
// replacement. If both UINT and SINT conversions are Custom, we choose SINT
|
||||
// by default because that's the right thing on PPC.
|
||||
virtual unsigned getPreferredFPToIntOpcode(unsigned Op, EVT FromVT,
|
||||
EVT ToVT) const {
|
||||
if (isOperationLegal(Op, ToVT))
|
||||
return Op;
|
||||
switch (Op) {
|
||||
case ISD::FP_TO_UINT:
|
||||
if (isOperationLegalOrCustom(ISD::FP_TO_SINT, ToVT))
|
||||
return ISD::FP_TO_SINT;
|
||||
break;
|
||||
case ISD::STRICT_FP_TO_UINT:
|
||||
if (isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, ToVT))
|
||||
return ISD::STRICT_FP_TO_SINT;
|
||||
break;
|
||||
case ISD::VP_FP_TO_UINT:
|
||||
if (isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, ToVT))
|
||||
return ISD::VP_FP_TO_SINT;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return Op;
|
||||
}
|
||||
|
||||
/// Create the IR node for the given complex deinterleaving operation.
|
||||
/// If one cannot be created using all the given inputs, nullptr should be
|
||||
/// returned.
|
||||
|
@ -849,28 +849,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
|
||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
|
||||
unsigned NewOpc = N->getOpcode();
|
||||
unsigned NewOpc =
|
||||
TLI.getPreferredFPToIntOpcode(N->getOpcode(), N->getValueType(0), NVT);
|
||||
SDLoc dl(N);
|
||||
|
||||
// If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
|
||||
// not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
|
||||
// and SINT conversions are Custom, there is no way to tell which is
|
||||
// preferable. We choose SINT because that's the right thing on PPC.)
|
||||
if (N->getOpcode() == ISD::FP_TO_UINT &&
|
||||
!TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
|
||||
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
|
||||
NewOpc = ISD::FP_TO_SINT;
|
||||
|
||||
if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
|
||||
!TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
|
||||
TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
|
||||
NewOpc = ISD::STRICT_FP_TO_SINT;
|
||||
|
||||
if (N->getOpcode() == ISD::VP_FP_TO_UINT &&
|
||||
!TLI.isOperationLegal(ISD::VP_FP_TO_UINT, NVT) &&
|
||||
TLI.isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, NVT))
|
||||
NewOpc = ISD::VP_FP_TO_SINT;
|
||||
|
||||
SDValue Res;
|
||||
if (N->isStrictFPOpcode()) {
|
||||
Res = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
|
||||
|
@ -6214,6 +6214,33 @@ Instruction *NVPTXTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Rather than default to SINT when both UINT and SINT are custom, we only
|
||||
// change the opcode when UINT is not legal and SINT is. UINT is preferred when
|
||||
// both are custom since unsigned CVT instructions can lead to slightly better
|
||||
// SASS code with fewer instructions.
|
||||
unsigned NVPTXTargetLowering::getPreferredFPToIntOpcode(unsigned Op, EVT FromVT,
|
||||
EVT ToVT) const {
|
||||
if (isOperationLegal(Op, ToVT))
|
||||
return Op;
|
||||
switch (Op) {
|
||||
case ISD::FP_TO_UINT:
|
||||
if (isOperationLegal(ISD::FP_TO_SINT, ToVT))
|
||||
return ISD::FP_TO_SINT;
|
||||
break;
|
||||
case ISD::STRICT_FP_TO_UINT:
|
||||
if (isOperationLegal(ISD::STRICT_FP_TO_SINT, ToVT))
|
||||
return ISD::STRICT_FP_TO_SINT;
|
||||
break;
|
||||
case ISD::VP_FP_TO_UINT:
|
||||
if (isOperationLegal(ISD::VP_FP_TO_SINT, ToVT))
|
||||
return ISD::VP_FP_TO_SINT;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return Op;
|
||||
}
|
||||
|
||||
// Pin NVPTXTargetObjectFile's vtables to this file.
|
||||
NVPTXTargetObjectFile::~NVPTXTargetObjectFile() = default;
|
||||
|
||||
|
@ -282,6 +282,9 @@ public:
|
||||
Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
|
||||
AtomicOrdering Ord) const override;
|
||||
|
||||
unsigned getPreferredFPToIntOpcode(unsigned Op, EVT FromVT,
|
||||
EVT ToVT) const override;
|
||||
|
||||
private:
|
||||
const NVPTXSubtarget &STI; // cache the subtarget here
|
||||
mutable unsigned GlobalUniqueCallSite;
|
||||
|
134
llvm/test/CodeGen/NVPTX/convert-fp-i8.ll
Normal file
134
llvm/test/CodeGen/NVPTX/convert-fp-i8.ll
Normal file
@ -0,0 +1,134 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_70 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 | FileCheck %s
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s
|
||||
|
||||
define i8 @cvt_u8_f32(float %x) {
|
||||
; CHECK-LABEL: cvt_u8_f32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-NEXT: .reg .f32 %f<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.f32 %f1, [cvt_u8_f32_param_0];
|
||||
; CHECK-NEXT: cvt.rzi.u16.f32 %rs1, %f1;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptoui float %x to i8
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define i8 @cvt_u8_f64(double %x) {
|
||||
; CHECK-LABEL: cvt_u8_f64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<2>;
|
||||
; CHECK-NEXT: .reg .f64 %fd<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.f64 %fd1, [cvt_u8_f64_param_0];
|
||||
; CHECK-NEXT: cvt.rzi.u16.f64 %rs1, %fd1;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptoui double %x to i8
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define float @cvt_f32_i8(i8 %x) {
|
||||
; CHECK-LABEL: cvt_f32_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .f32 %f<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.u8 %rs1, [cvt_f32_i8_param_0];
|
||||
; CHECK-NEXT: cvt.rn.f32.u16 %f1, %rs1;
|
||||
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i8 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define double @cvt_f64_i8(i8 %x) {
|
||||
; CHECK-LABEL: cvt_f64_i8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .f64 %fd<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.u8 %rs1, [cvt_f64_i8_param_0];
|
||||
; CHECK-NEXT: cvt.rn.f64.u16 %fd1, %rs1;
|
||||
; CHECK-NEXT: st.param.f64 [func_retval0], %fd1;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = uitofp i8 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define float @cvt_f32_s8(i8 %x) {
|
||||
; CHECK-LABEL: cvt_f32_s8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .f32 %f<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.s8 %rs1, [cvt_f32_s8_param_0];
|
||||
; CHECK-NEXT: cvt.rn.f32.s16 %f1, %rs1;
|
||||
; CHECK-NEXT: st.param.f32 [func_retval0], %f1;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = sitofp i8 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define double @cvt_f64_s8(i8 %x) {
|
||||
; CHECK-LABEL: cvt_f64_s8(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .f64 %fd<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.s8 %rs1, [cvt_f64_s8_param_0];
|
||||
; CHECK-NEXT: cvt.rn.f64.s16 %fd1, %rs1;
|
||||
; CHECK-NEXT: st.param.f64 [func_retval0], %fd1;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = sitofp i8 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define i8 @cvt_s8_f32(float %x) {
|
||||
; CHECK-LABEL: cvt_s8_f32(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<3>;
|
||||
; CHECK-NEXT: .reg .f32 %f<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.f32 %f1, [cvt_s8_f32_param_0];
|
||||
; CHECK-NEXT: cvt.rzi.s16.f32 %rs1, %f1;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
|
||||
; CHECK-NEXT: and.b32 %r2, %r1, 255;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptosi float %x to i8
|
||||
ret i8 %a
|
||||
}
|
||||
|
||||
define i8 @cvt_s8_f64(double %x) {
|
||||
; CHECK-LABEL: cvt_s8_f64(
|
||||
; CHECK: {
|
||||
; CHECK-NEXT: .reg .b16 %rs<2>;
|
||||
; CHECK-NEXT: .reg .b32 %r<3>;
|
||||
; CHECK-NEXT: .reg .f64 %fd<2>;
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: // %bb.0:
|
||||
; CHECK-NEXT: ld.param.f64 %fd1, [cvt_s8_f64_param_0];
|
||||
; CHECK-NEXT: cvt.rzi.s16.f64 %rs1, %fd1;
|
||||
; CHECK-NEXT: cvt.u32.u16 %r1, %rs1;
|
||||
; CHECK-NEXT: and.b32 %r2, %r1, 255;
|
||||
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
|
||||
; CHECK-NEXT: ret;
|
||||
%a = fptosi double %x to i8
|
||||
ret i8 %a
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user