mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-17 18:26:37 +00:00
[Clang][AMDGPU] Add __builtin_amdgcn_cvt_off_f32_i4 (#133741)
This built-in maps to `V_CVT_OFF_F32_I4` which treats its input as a 4-bit signed integer and returns `0.0625f * src`. SWDEV-518861
This commit is contained in:
parent
540dd89778
commit
0375ef07c3
@ -167,6 +167,7 @@ Non-comprehensive list of changes in this release
|
||||
|
||||
- Support parsing the `cc` operand modifier and alias it to the `c` modifier (#GH127719).
|
||||
- Added `__builtin_elementwise_exp10`.
|
||||
- For AMDPGU targets, added `__builtin_v_cvt_off_f32_i4` that maps to the `v_cvt_off_f32_i4` instruction.
|
||||
|
||||
New Compiler Flags
|
||||
------------------
|
||||
|
@ -140,6 +140,7 @@ BUILTIN(__builtin_amdgcn_cvt_pknorm_u16, "E2Usff", "nc")
|
||||
BUILTIN(__builtin_amdgcn_cvt_pk_i16, "E2sii", "nc")
|
||||
BUILTIN(__builtin_amdgcn_cvt_pk_u16, "E2UsUiUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_cvt_pk_u8_f32, "UifUiUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_cvt_off_f32_i4, "fi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_sad_u8, "UiUiUiUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_msad_u8, "UiUiUiUi", "nc")
|
||||
BUILTIN(__builtin_amdgcn_sad_hi_u8, "UiUiUiUi", "nc")
|
||||
|
27
clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl
Normal file
27
clang/test/CodeGenOpenCL/builtins-amdgcn-cvt-off-f32-i4.cl
Normal file
@ -0,0 +1,27 @@
|
||||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
|
||||
// RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL1.2 \
|
||||
// RUN: -emit-llvm -o - | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: @test_builtin_amdgcn_cvt_off_f32_i4_ui(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: store i32 [[N:%.*]], ptr addrspace(5) [[N_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
//
|
||||
float test_builtin_amdgcn_cvt_off_f32_i4_ui(unsigned n) {
|
||||
return __builtin_amdgcn_cvt_off_f32_i4(n);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @test_builtin_amdgcn_cvt_off_f32_i4_i(
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
|
||||
// CHECK-NEXT: store i32 [[N:%.*]], ptr addrspace(5) [[N_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[N_ADDR]], align 4
|
||||
// CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[TMP0]])
|
||||
// CHECK-NEXT: ret float [[TMP1]]
|
||||
//
|
||||
float test_builtin_amdgcn_cvt_off_f32_i4_i(int n) {
|
||||
return __builtin_amdgcn_cvt_off_f32_i4(n);
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s
|
||||
|
||||
void test_builtin_amdgcn_cvt_off_f32_i4(int n) {
|
||||
struct A{ unsigned x; } a;
|
||||
__builtin_amdgcn_cvt_off_f32_i4(n, n); // expected-error {{too many arguments to function call, expected 1, have 2}}
|
||||
__builtin_amdgcn_cvt_off_f32_i4(); // expected-error {{too few arguments to function call, expected 1, have 0}}
|
||||
__builtin_amdgcn_cvt_off_f32_i4(a); // expected-error {{passing '__private struct A' to parameter of incompatible type 'int'}}
|
||||
}
|
@ -3375,6 +3375,12 @@ def int_amdgcn_cvt_sr_fp8_f32 : ClangBuiltin<"__builtin_amdgcn_cvt_sr_fp8_f32">,
|
||||
[llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
|
||||
|
||||
// llvm.amdgcn.cvt.off.fp32.i4 int srcA
|
||||
def int_amdgcn_cvt_off_f32_i4: ClangBuiltin<"__builtin_amdgcn_cvt_off_f32_i4">,
|
||||
DefaultAttrsIntrinsic<[llvm_float_ty],
|
||||
[llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// gfx950 intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -6042,6 +6042,7 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
|
||||
// TODO: Handle more intrinsics
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_cubeid:
|
||||
case Intrinsic::amdgcn_cvt_off_f32_i4:
|
||||
return true;
|
||||
|
||||
case Intrinsic::amdgcn_frexp_mant: {
|
||||
|
@ -729,6 +729,29 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
|
||||
|
||||
break;
|
||||
}
|
||||
case Intrinsic::amdgcn_cvt_off_f32_i4: {
|
||||
Value* Arg = II.getArgOperand(0);
|
||||
Type *Ty = II.getType();
|
||||
|
||||
if (isa<PoisonValue>(Arg))
|
||||
return IC.replaceInstUsesWith(II, PoisonValue::get(Ty));
|
||||
|
||||
if(IC.getSimplifyQuery().isUndefValue(Arg))
|
||||
return IC.replaceInstUsesWith(II, Constant::getNullValue(Ty));
|
||||
|
||||
ConstantInt *CArg = dyn_cast<ConstantInt>(II.getArgOperand(0));
|
||||
if (!CArg)
|
||||
break;
|
||||
|
||||
// Tabulated 0.0625 * (sext (CArg & 0xf)).
|
||||
constexpr size_t ResValsSize = 16;
|
||||
static constexpr float ResVals[ResValsSize] = {
|
||||
0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
|
||||
-0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
|
||||
Constant *Res =
|
||||
ConstantFP::get(Ty, ResVals[CArg->getZExtValue() & (ResValsSize - 1)]);
|
||||
return IC.replaceInstUsesWith(II, Res);
|
||||
}
|
||||
case Intrinsic::amdgcn_ubfe:
|
||||
case Intrinsic::amdgcn_sbfe: {
|
||||
// Decompose simple cases into standard shifts.
|
||||
|
@ -4585,6 +4585,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
case Intrinsic::amdgcn_dot4_f32_bf8_bf8:
|
||||
case Intrinsic::amdgcn_cvt_f32_fp8:
|
||||
case Intrinsic::amdgcn_cvt_f32_bf8:
|
||||
case Intrinsic::amdgcn_cvt_off_f32_i4:
|
||||
case Intrinsic::amdgcn_cvt_pk_f32_fp8:
|
||||
case Intrinsic::amdgcn_cvt_pk_f32_bf8:
|
||||
case Intrinsic::amdgcn_cvt_pk_fp8_f32:
|
||||
|
@ -317,7 +317,7 @@ defm V_CVT_F32_BF16 : VOP1Inst_t16 <"v_cvt_f32_bf16", VOP_F32_BF16>;
|
||||
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
|
||||
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
|
||||
defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
|
||||
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
|
||||
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32, int_amdgcn_cvt_off_f32_i4>;
|
||||
} // End ReadsModeReg = 0, mayRaiseFPException = 0
|
||||
} // End SchedRW = [WriteFloatCvt]
|
||||
|
||||
|
27
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll
Normal file
27
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.off.f32.i4.ll
Normal file
@ -0,0 +1,27 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc --global-isel=0 -mtriple=amdgcn -mcpu=tahiti %s -o - | FileCheck %s
|
||||
; RUN: llc --global-isel=1 -mtriple=amdgcn -mcpu=tahiti %s -o - | FileCheck %s
|
||||
; RUN: llc --global-isel=0 -mtriple=amdgcn -mcpu=tonga %s -o - | FileCheck %s
|
||||
; RUN: llc --global-isel=1 -mtriple=amdgcn -mcpu=tonga %s -o - | FileCheck %s
|
||||
; RUN: llc --global-isel=0 -mtriple=amdgcn -mcpu=gfx90a %s -o - | FileCheck %s
|
||||
; RUN: llc --global-isel=1 -mtriple=amdgcn -mcpu=gfx90a %s -o - | FileCheck %s
|
||||
|
||||
declare float @llvm.amdgcn.cvt.off.f32.i4(i32)
|
||||
|
||||
define amdgpu_cs float @cvt_var(i32 %a) {
|
||||
; CHECK-LABEL: cvt_var:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: v_cvt_off_f32_i4_e32 v0, v0
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 %a)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define amdgpu_cs float @cvt_imm() {
|
||||
; CHECK-LABEL: cvt_imm:
|
||||
; CHECK: ; %bb.0:
|
||||
; CHECK-NEXT: v_cvt_off_f32_i4_e32 v0, 4
|
||||
; CHECK-NEXT: ; return to shader part epilog
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 4)
|
||||
ret float %ret
|
||||
}
|
@ -0,0 +1,183 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck %s
|
||||
|
||||
@gv = constant i32 0
|
||||
|
||||
define float @cvt_var(i32 %a) {
|
||||
; CHECK-LABEL: define float @cvt_var(
|
||||
; CHECK-SAME: i32 [[A:%.*]]) {
|
||||
; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 [[A]])
|
||||
; CHECK-NEXT: ret float [[RET]]
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 %a)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_0() {
|
||||
; CHECK-LABEL: define float @cvt_imm_0() {
|
||||
; CHECK-NEXT: ret float 0.000000e+00
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 0)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_1() {
|
||||
; CHECK-LABEL: define float @cvt_imm_1() {
|
||||
; CHECK-NEXT: ret float 6.250000e-02
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 1)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_2() {
|
||||
; CHECK-LABEL: define float @cvt_imm_2() {
|
||||
; CHECK-NEXT: ret float 1.250000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 2)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_3() {
|
||||
; CHECK-LABEL: define float @cvt_imm_3() {
|
||||
; CHECK-NEXT: ret float 1.875000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 3)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_4() {
|
||||
; CHECK-LABEL: define float @cvt_imm_4() {
|
||||
; CHECK-NEXT: ret float 2.500000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 4)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_5() {
|
||||
; CHECK-LABEL: define float @cvt_imm_5() {
|
||||
; CHECK-NEXT: ret float 3.125000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 5)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_6() {
|
||||
; CHECK-LABEL: define float @cvt_imm_6() {
|
||||
; CHECK-NEXT: ret float 3.750000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 6)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_7() {
|
||||
; CHECK-LABEL: define float @cvt_imm_7() {
|
||||
; CHECK-NEXT: ret float 4.375000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 7)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_8() {
|
||||
; CHECK-LABEL: define float @cvt_imm_8() {
|
||||
; CHECK-NEXT: ret float -5.000000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 8)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_9() {
|
||||
; CHECK-LABEL: define float @cvt_imm_9() {
|
||||
; CHECK-NEXT: ret float -4.375000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 9)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_10() {
|
||||
; CHECK-LABEL: define float @cvt_imm_10() {
|
||||
; CHECK-NEXT: ret float -3.750000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 10)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_11() {
|
||||
; CHECK-LABEL: define float @cvt_imm_11() {
|
||||
; CHECK-NEXT: ret float -3.125000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 11)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_12() {
|
||||
; CHECK-LABEL: define float @cvt_imm_12() {
|
||||
; CHECK-NEXT: ret float -2.500000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 12)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_13() {
|
||||
; CHECK-LABEL: define float @cvt_imm_13() {
|
||||
; CHECK-NEXT: ret float -1.875000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 13)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_14() {
|
||||
; CHECK-LABEL: define float @cvt_imm_14() {
|
||||
; CHECK-NEXT: ret float -1.250000e-01
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 14)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_15() {
|
||||
; CHECK-LABEL: define float @cvt_imm_15() {
|
||||
; CHECK-NEXT: ret float -6.250000e-02
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 15)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_underflow() {
|
||||
; CHECK-LABEL: define float @cvt_imm_underflow() {
|
||||
; CHECK-NEXT: ret float -6.250000e-02
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 -1)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_imm_overflow() {
|
||||
; CHECK-LABEL: define float @cvt_imm_overflow() {
|
||||
; CHECK-NEXT: ret float 0.000000e+00
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 16)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_poison() {
|
||||
; CHECK-LABEL: define float @cvt_poison() {
|
||||
; CHECK-NEXT: ret float poison
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 poison)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_undef() {
|
||||
; CHECK-LABEL: define float @cvt_undef() {
|
||||
; CHECK-NEXT: ret float 0.000000e+00
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 undef)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define float @cvt_constexpr() {
|
||||
; CHECK-LABEL: define float @cvt_constexpr() {
|
||||
; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.cvt.off.f32.i4(i32 ptrtoint (ptr @gv to i32))
|
||||
; CHECK-NEXT: ret float [[RET]]
|
||||
;
|
||||
%ret = call float @llvm.amdgcn.cvt.off.f32.i4(i32 ptrtoint (ptr @gv to i32))
|
||||
ret float %ret
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user