Fix scalar overload name constructed by ReplaceWithVeclib.cpp (#111095)

ReplaceWithVeclib.cpp would construct overload name using all the
arguments in the intrinsic, but overloads should only be constructed
from arguments for which isVectorIntrinsicWithOverloadTypeAtArg returns
true, including the return type first (index -1).

Additionally,
- skip when `Intrinsic::not_intrinsic`, otherwise
`isVectorIntrinsicWithOverloadTypeAtArg` asserts for some
IntrinsicCalls.

Unblocks translation for pow and atan2 intrinsics.

Fixes #111093
This commit is contained in:
Tex Riddell 2024-10-11 14:38:35 -07:00 committed by GitHub
parent 8b46d40221
commit 82b40fd4fd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 31 additions and 23 deletions

View File

@ -100,20 +100,34 @@ static void replaceWithTLIFunction(IntrinsicInst *II, VFInfo &Info,
static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
IntrinsicInst *II) {
assert(II != nullptr && "Intrinsic cannot be null");
Intrinsic::ID IID = II->getIntrinsicID();
Type *RetTy = II->getType();
Type *ScalarRetTy = RetTy->getScalarType();
// At the moment VFABI assumes the return type is always widened unless it is
// a void type.
auto *VTy = dyn_cast<VectorType>(II->getType());
auto *VTy = dyn_cast<VectorType>(RetTy);
ElementCount EC(VTy ? VTy->getElementCount() : ElementCount::getFixed(0));
// OloadTys collects types used in scalar intrinsic overload name.
SmallVector<Type *, 3> OloadTys;
if (!RetTy->isVoidTy() && isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
OloadTys.push_back(ScalarRetTy);
// Compute the argument types of the corresponding scalar call and check that
// all vector operands match the previously found EC.
SmallVector<Type *, 8> ScalarArgTypes;
Intrinsic::ID IID = II->getIntrinsicID();
for (auto Arg : enumerate(II->args())) {
auto *ArgTy = Arg.value()->getType();
bool IsOloadTy = isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index());
if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) {
ScalarArgTypes.push_back(ArgTy);
if (IsOloadTy)
OloadTys.push_back(ArgTy);
} else if (auto *VectorArgTy = dyn_cast<VectorType>(ArgTy)) {
ScalarArgTypes.push_back(VectorArgTy->getElementType());
auto *ScalarArgTy = VectorArgTy->getElementType();
ScalarArgTypes.push_back(ScalarArgTy);
if (IsOloadTy)
OloadTys.push_back(ScalarArgTy);
// When return type is void, set EC to the first vector argument, and
// disallow vector arguments with different ECs.
if (EC.isZero())
@ -129,7 +143,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// using scalar argument types.
std::string ScalarName =
Intrinsic::isOverloaded(IID)
? Intrinsic::getName(IID, ScalarArgTypes, II->getModule())
? Intrinsic::getName(IID, OloadTys, II->getModule())
: Intrinsic::getName(IID).str();
// Try to find the mapping for the scalar version of this intrinsic and the
@ -146,7 +160,6 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
// Replace the call to the intrinsic with a call to the vector library
// function.
Type *ScalarRetTy = II->getType()->getScalarType();
FunctionType *ScalarFTy =
FunctionType::get(ScalarRetTy, ScalarArgTypes, /*isVarArg*/ false);
const std::string MangledName = VD->getVectorFunctionABIVariantString();
@ -193,6 +206,8 @@ static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
for (auto &I : instructions(F)) {
// Process only intrinsic calls that return void or a vector.
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() == Intrinsic::not_intrinsic)
continue;
if (!II->getType()->isVectorTy() && !II->getType()->isVoidTy())
continue;

View File

@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
;.
; CHECK: @llvm.compiler.used = appending global [60 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending global [64 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_svcos_f64_x, ptr @armpl_svcos_f32_x, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_svexp_f64_x, ptr @armpl_svexp_f32_x, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_svexp10_f64_x, ptr @armpl_svexp10_f32_x, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_svexp2_f64_x, ptr @armpl_svexp2_f32_x, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_svlog_f64_x, ptr @armpl_svlog_f32_x, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32, ptr @armpl_svlog10_f64_x, ptr @armpl_svlog10_f32_x, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_svlog2_f64_x, ptr @armpl_svlog2_f32_x, ptr @armpl_vpowq_f64, ptr @armpl_vpowq_f32, ptr @armpl_svpow_f64_x, ptr @armpl_svpow_f32_x, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_svsin_f64_x, ptr @armpl_svsin_f32_x, ptr @armpl_vtanq_f64, ptr @armpl_vtanq_f32, ptr @armpl_svtan_f64_x, ptr @armpl_svtan_f32_x, ptr @armpl_vacosq_f64, ptr @armpl_vacosq_f32, ptr @armpl_svacos_f64_x, ptr @armpl_svacos_f32_x, ptr @armpl_vasinq_f64, ptr @armpl_vasinq_f32, ptr @armpl_svasin_f64_x, ptr @armpl_svasin_f32_x, ptr @armpl_vatanq_f64, ptr @armpl_vatanq_f32, ptr @armpl_svatan_f64_x, ptr @armpl_svatan_f32_x, ptr @armpl_vcoshq_f64, ptr @armpl_vcoshq_f32, ptr @armpl_svcosh_f64_x, ptr @armpl_svcosh_f32_x, ptr @armpl_vsinhq_f64, ptr @armpl_vsinhq_f32, ptr @armpl_svsinh_f64_x, ptr @armpl_svsinh_f32_x, ptr @armpl_vtanhq_f64, ptr @armpl_vtanhq_f32, ptr @armpl_svtanh_f64_x, ptr @armpl_svtanh_f32_x], section "llvm.metadata"
;.
define <2 x double> @llvm_cos_f64(<2 x double> %in) {
@ -333,17 +333,10 @@ declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
declare <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
;
; There is a bug in the replace-with-veclib pass, and for intrinsics which take
; more than one arguments, but has just one overloaded type, it incorrectly
; reconstructs the scalar name, for pow specifically it is searching for:
; llvm.pow.f64.f64 and llvm.pow.f32.f32
;
define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
; CHECK-LABEL: define <2 x double> @llvm_pow_f64
; CHECK-SAME: (<2 x double> [[IN:%.*]], <2 x double> [[POWER:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[IN]], <2 x double> [[POWER]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vpowq_f64(<2 x double> [[IN]], <2 x double> [[POWER]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %power)
@ -353,7 +346,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %power) {
define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) {
; CHECK-LABEL: define <4 x float> @llvm_pow_f32
; CHECK-SAME: (<4 x float> [[IN:%.*]], <4 x float> [[POWER:%.*]]) {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.pow.v4f32(<4 x float> [[IN]], <4 x float> [[POWER]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vpowq_f32(<4 x float> [[IN]], <4 x float> [[POWER]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %power)
@ -363,7 +356,7 @@ define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %power) {
define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power) #0 {
; CHECK-LABEL: define <vscale x 2 x double> @llvm_pow_vscale_f64
; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POWER:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POWER]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x double> [[POWER]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %power)
@ -373,7 +366,7 @@ define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vs
define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power) #0 {
; CHECK-LABEL: define <vscale x 4 x float> @llvm_pow_vscale_f32
; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POWER:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POWER]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x float> [[POWER]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %power)

View File

@ -4,7 +4,7 @@
target triple = "aarch64-unknown-linux-gnu"
;.
; CHECK: @llvm.compiler.used = appending global [30 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVsMxv_cos, ptr @_ZGVsMxv_cosf, ptr @_ZGVsMxv_exp, ptr @_ZGVsMxv_expf, ptr @_ZGVsMxv_exp10, ptr @_ZGVsMxv_exp10f, ptr @_ZGVsMxv_exp2, ptr @_ZGVsMxv_exp2f, ptr @_ZGVsMxv_log, ptr @_ZGVsMxv_logf, ptr @_ZGVsMxv_log10, ptr @_ZGVsMxv_log10f, ptr @_ZGVsMxv_log2, ptr @_ZGVsMxv_log2f, ptr @_ZGVsMxvv_pow, ptr @_ZGVsMxvv_powf, ptr @_ZGVsMxv_sin, ptr @_ZGVsMxv_sinf, ptr @_ZGVsMxv_tan, ptr @_ZGVsMxv_tanf, ptr @_ZGVsMxv_acos, ptr @_ZGVsMxv_acosf, ptr @_ZGVsMxv_asin, ptr @_ZGVsMxv_asinf, ptr @_ZGVsMxv_atan, ptr @_ZGVsMxv_atanf, ptr @_ZGVsMxv_cosh, ptr @_ZGVsMxv_coshf, ptr @_ZGVsMxv_sinh, ptr @_ZGVsMxv_sinhf, ptr @_ZGVsMxv_tanh, ptr @_ZGVsMxv_tanhf], section "llvm.metadata"
;.
define <vscale x 2 x double> @llvm_ceil_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_ceil_vscale_f64(
@ -278,7 +278,7 @@ define <vscale x 4 x float> @llvm_nearbyint_vscale_f32(<vscale x 4 x float> %in)
define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow) {
; CHECK-LABEL: @llvm_pow_vscale_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> [[POW:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
;
%1 = call fast <vscale x 2 x double> @llvm.pow.nxv2f64(<vscale x 2 x double> %in, <vscale x 2 x double> %pow)
@ -287,7 +287,7 @@ define <vscale x 2 x double> @llvm_pow_vscale_f64(<vscale x 2 x double> %in, <vs
define <vscale x 4 x float> @llvm_pow_vscale_f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow) {
; CHECK-LABEL: @llvm_pow_vscale_f32(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> [[POW:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
;
%1 = call fast <vscale x 4 x float> @llvm.pow.nxv4f32(<vscale x 4 x float> %in, <vscale x 4 x float> %pow)

View File

@ -4,7 +4,7 @@
target triple = "aarch64-unknown-linux-gnu"
;.
; CHECK: @llvm.compiler.used = appending global [30 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
; CHECK: @llvm.compiler.used = appending global [32 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2vv_pow, ptr @_ZGVnN4vv_powf, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf, ptr @_ZGVnN2v_tan, ptr @_ZGVnN4v_tanf, ptr @_ZGVnN2v_acos, ptr @_ZGVnN4v_acosf, ptr @_ZGVnN2v_asin, ptr @_ZGVnN4v_asinf, ptr @_ZGVnN2v_atan, ptr @_ZGVnN4v_atanf, ptr @_ZGVnN2v_cosh, ptr @_ZGVnN4v_coshf, ptr @_ZGVnN2v_sinh, ptr @_ZGVnN4v_sinhf, ptr @_ZGVnN2v_tanh, ptr @_ZGVnN4v_tanhf], section "llvm.metadata"
;.
define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
; CHECK-LABEL: @llvm_ceil_f64(
@ -278,7 +278,7 @@ define <4 x float> @llvm_nearbyint_f32(<4 x float> %in) {
define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
; CHECK-LABEL: @llvm_pow_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.pow.v2f64(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2vv_pow(<2 x double> [[IN:%.*]], <2 x double> [[POW:%.*]])
; CHECK-NEXT: ret <2 x double> [[TMP1]]
;
%1 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %in, <2 x double> %pow)
@ -287,7 +287,7 @@ define <2 x double> @llvm_pow_f64(<2 x double> %in, <2 x double> %pow) {
define <4 x float> @llvm_pow_f32(<4 x float> %in, <4 x float> %pow) {
; CHECK-LABEL: @llvm_pow_f32(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.pow.v4f32(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4vv_powf(<4 x float> [[IN:%.*]], <4 x float> [[POW:%.*]])
; CHECK-NEXT: ret <4 x float> [[TMP1]]
;
%1 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %in, <4 x float> %pow)