mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-14 17:06:38 +00:00
[WebAssembly] Add intrinsics to wasm_simd128.h for all FP16 instructions (#106465)
Getting this to work required a few additional changes: - Add builtins for any instructions that can't be done with plain C currently. - Add support for the saturating version of fp_to_<s,i>_I16x8. Other vector sizes supported this already. - Support bitcast of f16x8 to v128. Needed to return a __f16x8 as v128_t.
This commit is contained in:
parent
206b5aff44
commit
5703d8572f
@ -124,6 +124,7 @@ TARGET_BUILTIN(__builtin_wasm_bitmask_i16x8, "UiV8s", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_bitmask_i32x4, "UiV4i", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_bitmask_i64x2, "UiV2LLi", "nc", "simd128")
|
||||
|
||||
TARGET_BUILTIN(__builtin_wasm_abs_f16x8, "V8hV8h", "nc", "fp16")
|
||||
TARGET_BUILTIN(__builtin_wasm_abs_f32x4, "V4fV4f", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_abs_f64x2, "V2dV2d", "nc", "simd128")
|
||||
|
||||
@ -140,6 +141,10 @@ TARGET_BUILTIN(__builtin_wasm_max_f16x8, "V8hV8hV8h", "nc", "fp16")
|
||||
TARGET_BUILTIN(__builtin_wasm_pmin_f16x8, "V8hV8hV8h", "nc", "fp16")
|
||||
TARGET_BUILTIN(__builtin_wasm_pmax_f16x8, "V8hV8hV8h", "nc", "fp16")
|
||||
|
||||
TARGET_BUILTIN(__builtin_wasm_ceil_f16x8, "V8hV8h", "nc", "fp16")
|
||||
TARGET_BUILTIN(__builtin_wasm_floor_f16x8, "V8hV8h", "nc", "fp16")
|
||||
TARGET_BUILTIN(__builtin_wasm_trunc_f16x8, "V8hV8h", "nc", "fp16")
|
||||
TARGET_BUILTIN(__builtin_wasm_nearest_f16x8, "V8hV8h", "nc", "fp16")
|
||||
TARGET_BUILTIN(__builtin_wasm_ceil_f32x4, "V4fV4f", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_floor_f32x4, "V4fV4f", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_trunc_f32x4, "V4fV4f", "nc", "simd128")
|
||||
@ -151,9 +156,13 @@ TARGET_BUILTIN(__builtin_wasm_nearest_f64x2, "V2dV2d", "nc", "simd128")
|
||||
|
||||
TARGET_BUILTIN(__builtin_wasm_dot_s_i32x4_i16x8, "V4iV8sV8s", "nc", "simd128")
|
||||
|
||||
TARGET_BUILTIN(__builtin_wasm_sqrt_f16x8, "V8hV8h", "nc", "fp16")
|
||||
TARGET_BUILTIN(__builtin_wasm_sqrt_f32x4, "V4fV4f", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_sqrt_f64x2, "V2dV2d", "nc", "simd128")
|
||||
|
||||
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i16x8_f16x8, "V8sV8h", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i16x8_f16x8, "V8sV8h", "nc", "simd128")
|
||||
|
||||
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128")
|
||||
TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128")
|
||||
|
||||
|
@ -21211,6 +21211,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
|
||||
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i16x8_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: {
|
||||
Value *Src = EmitScalarExpr(E->getArg(0));
|
||||
llvm::Type *ResT = ConvertType(E->getType());
|
||||
@ -21222,6 +21223,7 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
|
||||
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i16x8_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: {
|
||||
Value *Src = EmitScalarExpr(E->getArg(0));
|
||||
llvm::Type *ResT = ConvertType(E->getType());
|
||||
@ -21269,6 +21271,10 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
|
||||
CGM.getIntrinsic(Intrinsic::wasm_pmax, ConvertType(E->getType()));
|
||||
return Builder.CreateCall(Callee, {LHS, RHS});
|
||||
}
|
||||
case WebAssembly::BI__builtin_wasm_ceil_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_floor_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_nearest_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_ceil_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_floor_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_f32x4:
|
||||
@ -21279,18 +21285,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
|
||||
case WebAssembly::BI__builtin_wasm_nearest_f64x2: {
|
||||
unsigned IntNo;
|
||||
switch (BuiltinID) {
|
||||
case WebAssembly::BI__builtin_wasm_ceil_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_ceil_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_ceil_f64x2:
|
||||
IntNo = Intrinsic::ceil;
|
||||
break;
|
||||
case WebAssembly::BI__builtin_wasm_floor_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_floor_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_floor_f64x2:
|
||||
IntNo = Intrinsic::floor;
|
||||
break;
|
||||
case WebAssembly::BI__builtin_wasm_trunc_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_trunc_f64x2:
|
||||
IntNo = Intrinsic::trunc;
|
||||
break;
|
||||
case WebAssembly::BI__builtin_wasm_nearest_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_nearest_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_nearest_f64x2:
|
||||
IntNo = Intrinsic::nearbyint;
|
||||
@ -21489,12 +21499,14 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
|
||||
CGM.getIntrinsic(Intrinsic::wasm_bitmask, Vec->getType());
|
||||
return Builder.CreateCall(Callee, {Vec});
|
||||
}
|
||||
case WebAssembly::BI__builtin_wasm_abs_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_abs_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_abs_f64x2: {
|
||||
Value *Vec = EmitScalarExpr(E->getArg(0));
|
||||
Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
|
||||
return Builder.CreateCall(Callee, {Vec});
|
||||
}
|
||||
case WebAssembly::BI__builtin_wasm_sqrt_f16x8:
|
||||
case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
|
||||
case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
|
||||
Value *Vec = EmitScalarExpr(E->getArg(0));
|
||||
|
@ -33,6 +33,7 @@ typedef unsigned long long __u64x2
|
||||
__attribute__((__vector_size__(16), __aligned__(16)));
|
||||
typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16)));
|
||||
typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
|
||||
typedef __fp16 __f16x8 __attribute__((__vector_size__(16), __aligned__(16)));
|
||||
|
||||
typedef signed char __i8x8 __attribute__((__vector_size__(8), __aligned__(8)));
|
||||
typedef unsigned char __u8x8
|
||||
@ -1878,6 +1879,152 @@ wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t __a, v128_t __b, v128_t __c) {
|
||||
(__i8x16)__a, (__i8x16)__b, (__i32x4)__c);
|
||||
}
|
||||
|
||||
// FP16 intrinsics
|
||||
#define __FP16_FN_ATTRS \
|
||||
__attribute__((__always_inline__, __nodebug__, __target__("fp16"), \
|
||||
__min_vector_width__(128)))
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_splat(float __a) {
|
||||
return (v128_t)__builtin_wasm_splat_f16x8(__a);
|
||||
}
|
||||
|
||||
static __inline__ float __FP16_FN_ATTRS wasm_f16x8_extract_lane(v128_t __a,
|
||||
int __i)
|
||||
__REQUIRE_CONSTANT(__i) {
|
||||
return __builtin_wasm_extract_lane_f16x8((__f16x8)__a, __i);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_replace_lane(v128_t __a,
|
||||
int __i,
|
||||
float __b)
|
||||
__REQUIRE_CONSTANT(__i) {
|
||||
return (v128_t)__builtin_wasm_replace_lane_f16x8((__f16x8)__a, __i, __b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_abs(v128_t __a) {
|
||||
return (v128_t)__builtin_wasm_abs_f16x8((__f16x8)__a);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_neg(v128_t __a) {
|
||||
return (v128_t)(-(__f16x8)__a);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sqrt(v128_t __a) {
|
||||
return (v128_t)__builtin_wasm_sqrt_f16x8((__f16x8)__a);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ceil(v128_t __a) {
|
||||
return (v128_t)__builtin_wasm_ceil_f16x8((__f16x8)__a);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_floor(v128_t __a) {
|
||||
return (v128_t)__builtin_wasm_floor_f16x8((__f16x8)__a);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_trunc(v128_t __a) {
|
||||
return (v128_t)__builtin_wasm_trunc_f16x8((__f16x8)__a);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_nearest(v128_t __a) {
|
||||
return (v128_t)__builtin_wasm_nearest_f16x8((__f16x8)__a);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_eq(v128_t __a, v128_t __b) {
|
||||
return (v128_t)((__f16x8)__a == (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ne(v128_t __a, v128_t __b) {
|
||||
return (v128_t)((__f16x8)__a != (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_lt(v128_t __a, v128_t __b) {
|
||||
return (v128_t)((__f16x8)__a < (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_gt(v128_t __a, v128_t __b) {
|
||||
return (v128_t)((__f16x8)__a > (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_le(v128_t __a, v128_t __b) {
|
||||
return (v128_t)((__f16x8)__a <= (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ge(v128_t __a, v128_t __b) {
|
||||
return (v128_t)((__f16x8)__a >= (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_add(v128_t __a,
|
||||
v128_t __b) {
|
||||
return (v128_t)((__f16x8)__a + (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sub(v128_t __a,
|
||||
v128_t __b) {
|
||||
return (v128_t)((__f16x8)__a - (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_mul(v128_t __a,
|
||||
v128_t __b) {
|
||||
return (v128_t)((__f16x8)__a * (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_div(v128_t __a,
|
||||
v128_t __b) {
|
||||
return (v128_t)((__f16x8)__a / (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_min(v128_t __a,
|
||||
v128_t __b) {
|
||||
return (v128_t)__builtin_wasm_min_f16x8((__f16x8)__a, (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_max(v128_t __a,
|
||||
v128_t __b) {
|
||||
return (v128_t)__builtin_wasm_max_f16x8((__f16x8)__a, (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmin(v128_t __a,
|
||||
v128_t __b) {
|
||||
return (v128_t)__builtin_wasm_pmin_f16x8((__f16x8)__a, (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmax(v128_t __a,
|
||||
v128_t __b) {
|
||||
return (v128_t)__builtin_wasm_pmax_f16x8((__f16x8)__a, (__f16x8)__b);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS
|
||||
wasm_i16x8_trunc_sat_f16x8(v128_t __a) {
|
||||
return (v128_t)__builtin_wasm_trunc_saturate_s_i16x8_f16x8((__f16x8)__a);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS
|
||||
wasm_u16x8_trunc_sat_f16x8(v128_t __a) {
|
||||
return (v128_t)__builtin_wasm_trunc_saturate_u_i16x8_f16x8((__f16x8)__a);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_i16x8(v128_t __a) {
|
||||
return (v128_t) __builtin_convertvector((__i16x8)__a, __f16x8);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) {
|
||||
return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a,
|
||||
v128_t __b,
|
||||
v128_t __c) {
|
||||
return (v128_t)__builtin_wasm_relaxed_madd_f16x8((__f16x8)__a, (__f16x8)__b,
|
||||
(__f16x8)__c);
|
||||
}
|
||||
|
||||
static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_nmadd(v128_t __a,
|
||||
v128_t __b,
|
||||
v128_t __c) {
|
||||
return (v128_t)__builtin_wasm_relaxed_nmadd_f16x8((__f16x8)__a, (__f16x8)__b,
|
||||
(__f16x8)__c);
|
||||
}
|
||||
|
||||
// Deprecated intrinsics
|
||||
|
||||
static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_swizzle")
|
||||
|
@ -2,7 +2,7 @@
|
||||
// expected-no-diagnostics
|
||||
|
||||
// RUN: %clang %s -O2 -S -o - -target wasm32-unknown-unknown \
|
||||
// RUN: -msimd128 -mrelaxed-simd -Wcast-qual -Werror | FileCheck %s
|
||||
// RUN: -msimd128 -mrelaxed-simd -mfp16 -Wcast-qual -Werror | FileCheck %s
|
||||
|
||||
#include <wasm_simd128.h>
|
||||
|
||||
@ -1385,3 +1385,139 @@ v128_t test_i16x8_relaxed_dot_i8x16_i7x16(v128_t a, v128_t b) {
|
||||
v128_t test_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t a, v128_t b, v128_t c) {
|
||||
return wasm_i32x4_relaxed_dot_i8x16_i7x16_add(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_f16x8_splat:
|
||||
// CHECK: f16x8.splat{{$}}
|
||||
v128_t test_f16x8_splat(float a) { return wasm_f16x8_splat(a); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_extract_lane:
|
||||
// CHECK: f16x8.extract_lane 7{{$}}
|
||||
int16_t test_f16x8_extract_lane(v128_t a) {
|
||||
return wasm_f16x8_extract_lane(a, 7);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_f16x8_replace_lane:
|
||||
// CHECK: f16x8.replace_lane 7{{$}}
|
||||
v128_t test_f16x8_replace_lane(v128_t a, float b) {
|
||||
return wasm_f16x8_replace_lane(a, 7, b);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_f16x8_abs:
|
||||
// CHECK: f16x8.abs{{$}}
|
||||
v128_t test_f16x8_abs(v128_t a) { return wasm_f16x8_abs(a); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_neg:
|
||||
// CHECK: f16x8.neg{{$}}
|
||||
v128_t test_f16x8_neg(v128_t a) { return wasm_f16x8_neg(a); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_sqrt:
|
||||
// CHECK: f16x8.sqrt{{$}}
|
||||
v128_t test_f16x8_sqrt(v128_t a) { return wasm_f16x8_sqrt(a); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_ceil:
|
||||
// CHECK: f16x8.ceil{{$}}
|
||||
v128_t test_f16x8_ceil(v128_t a) { return wasm_f16x8_ceil(a); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_floor:
|
||||
// CHECK: f16x8.floor{{$}}
|
||||
v128_t test_f16x8_floor(v128_t a) { return wasm_f16x8_floor(a); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_trunc:
|
||||
// CHECK: f16x8.trunc{{$}}
|
||||
v128_t test_f16x8_trunc(v128_t a) { return wasm_f16x8_trunc(a); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_nearest:
|
||||
// CHECK: f16x8.nearest{{$}}
|
||||
v128_t test_f16x8_nearest(v128_t a) { return wasm_f16x8_nearest(a); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_add:
|
||||
// CHECK: f16x8.add{{$}}
|
||||
v128_t test_f16x8_add(v128_t a, v128_t b) { return wasm_f16x8_add(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_sub:
|
||||
// CHECK: f16x8.sub{{$}}
|
||||
v128_t test_f16x8_sub(v128_t a, v128_t b) { return wasm_f16x8_sub(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_mul:
|
||||
// CHECK: f16x8.mul{{$}}
|
||||
v128_t test_f16x8_mul(v128_t a, v128_t b) { return wasm_f16x8_mul(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_div:
|
||||
// CHECK: f16x8.div{{$}}
|
||||
v128_t test_f16x8_div(v128_t a, v128_t b) { return wasm_f16x8_div(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_min:
|
||||
// CHECK: f16x8.min{{$}}
|
||||
v128_t test_f16x8_min(v128_t a, v128_t b) { return wasm_f16x8_min(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_max:
|
||||
// CHECK: f16x8.max{{$}}
|
||||
v128_t test_f16x8_max(v128_t a, v128_t b) { return wasm_f16x8_max(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_pmin:
|
||||
// CHECK: f16x8.pmin{{$}}
|
||||
v128_t test_f16x8_pmin(v128_t a, v128_t b) { return wasm_f16x8_pmin(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_pmax:
|
||||
// CHECK: f16x8.pmax{{$}}
|
||||
v128_t test_f16x8_pmax(v128_t a, v128_t b) { return wasm_f16x8_pmax(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_eq:
|
||||
// CHECK: f16x8.eq{{$}}
|
||||
v128_t test_f16x8_eq(v128_t a, v128_t b) { return wasm_f16x8_eq(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_ne:
|
||||
// CHECK: f16x8.ne{{$}}
|
||||
v128_t test_f16x8_ne(v128_t a, v128_t b) { return wasm_f16x8_ne(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_lt:
|
||||
// CHECK: f16x8.lt{{$}}
|
||||
v128_t test_f16x8_lt(v128_t a, v128_t b) { return wasm_f16x8_lt(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_gt:
|
||||
// CHECK: f16x8.gt{{$}}
|
||||
v128_t test_f16x8_gt(v128_t a, v128_t b) { return wasm_f16x8_gt(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_le:
|
||||
// CHECK: f16x8.le{{$}}
|
||||
v128_t test_f16x8_le(v128_t a, v128_t b) { return wasm_f16x8_le(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_f16x8_ge:
|
||||
// CHECK: f16x8.ge{{$}}
|
||||
v128_t test_f16x8_ge(v128_t a, v128_t b) { return wasm_f16x8_ge(a, b); }
|
||||
|
||||
// CHECK-LABEL: test_i16x8_trunc_sat_f16x8:
|
||||
// CHECK: i16x8.trunc_sat_f16x8_s{{$}}
|
||||
v128_t test_i16x8_trunc_sat_f16x8(v128_t a) {
|
||||
return wasm_i16x8_trunc_sat_f16x8(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_u16x8_trunc_sat_f16x8:
|
||||
// CHECK: i16x8.trunc_sat_f16x8_u{{$}}
|
||||
v128_t test_u16x8_trunc_sat_f16x8(v128_t a) {
|
||||
return wasm_u16x8_trunc_sat_f16x8(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_f16x8_convert_i16x8:
|
||||
// CHECK: f16x8.convert_i16x8_s{{$}}
|
||||
v128_t test_f16x8_convert_i16x8(v128_t a) {
|
||||
return wasm_f16x8_convert_i16x8(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_f16x8_convert_u16x8:
|
||||
// CHECK: f16x8.convert_i16x8_u{{$}}
|
||||
v128_t test_f16x8_convert_u16x8(v128_t a) {
|
||||
return wasm_f16x8_convert_u16x8(a);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_f16x8_relaxed_madd:
|
||||
// CHECK: f16x8.relaxed_madd{{$}}
|
||||
v128_t test_f16x8_relaxed_madd(v128_t a, v128_t b, v128_t c) {
|
||||
return wasm_f16x8_relaxed_madd(a, b, c);
|
||||
}
|
||||
|
||||
// CHECK-LABEL: test_f16x8_relaxed_nmadd:
|
||||
// CHECK: f16x8.relaxed_nmadd{{$}}
|
||||
v128_t test_f16x8_relaxed_nmadd(v128_t a, v128_t b, v128_t c) {
|
||||
return wasm_f16x8_relaxed_nmadd(a, b, c);
|
||||
}
|
||||
|
@ -275,8 +275,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
|
||||
setOperationAction(Op, T, Expand);
|
||||
|
||||
// But saturating fp_to_int converstions are
|
||||
for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
|
||||
for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) {
|
||||
setOperationAction(Op, MVT::v4i32, Custom);
|
||||
if (Subtarget->hasFP16()) {
|
||||
setOperationAction(Op, MVT::v8i16, Custom);
|
||||
}
|
||||
}
|
||||
|
||||
// Support vector extending
|
||||
for (auto T : MVT::integer_fixedlen_vector_valuetypes()) {
|
||||
@ -2475,6 +2479,9 @@ SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
|
||||
if (ResT == MVT::v4i32 && SatVT == MVT::i32)
|
||||
return Op;
|
||||
|
||||
if (ResT == MVT::v8i16 && SatVT == MVT::i16)
|
||||
return Op;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
|
@ -165,8 +165,9 @@ def F16x8 : Vec {
|
||||
let prefix = "f16x8";
|
||||
}
|
||||
|
||||
// TODO: Include F16x8 here when half precision is better supported.
|
||||
defvar AllVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2];
|
||||
// TODO: Remove StdVecs when the F16x8 works every where StdVecs is used.
|
||||
defvar StdVecs = [I8x16, I16x8, I32x4, I64x2, F32x4, F64x2];
|
||||
defvar AllVecs = !listconcat(StdVecs, [F16x8]);
|
||||
defvar IntVecs = [I8x16, I16x8, I32x4, I64x2];
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -188,7 +189,7 @@ defm LOAD_V128_A64 :
|
||||
}
|
||||
|
||||
// Def load patterns from WebAssemblyInstrMemory.td for vector types
|
||||
foreach vec = AllVecs in {
|
||||
foreach vec = StdVecs in {
|
||||
defm : LoadPat<vec.vt, load, "LOAD_V128">;
|
||||
}
|
||||
|
||||
@ -217,7 +218,7 @@ defm "" : SIMDLoadSplat<16, 8>;
|
||||
defm "" : SIMDLoadSplat<32, 9>;
|
||||
defm "" : SIMDLoadSplat<64, 10>;
|
||||
|
||||
foreach vec = AllVecs in {
|
||||
foreach vec = StdVecs in {
|
||||
defvar inst = "LOAD"#vec.lane_bits#"_SPLAT";
|
||||
defm : LoadPat<vec.vt,
|
||||
PatFrag<(ops node:$addr), (splat_vector (vec.lane_vt (vec.lane_load node:$addr)))>,
|
||||
@ -389,7 +390,7 @@ defm STORE_V128_A64 :
|
||||
}
|
||||
|
||||
// Def store patterns from WebAssemblyInstrMemory.td for vector types
|
||||
foreach vec = AllVecs in {
|
||||
foreach vec = StdVecs in {
|
||||
defm : StorePat<vec.vt, store, "STORE_V128">;
|
||||
}
|
||||
|
||||
@ -513,7 +514,7 @@ defm "" : ConstVec<F64x2,
|
||||
"$i0, $i1">;
|
||||
|
||||
// Match splat(x) -> const.v128(x, ..., x)
|
||||
foreach vec = AllVecs in {
|
||||
foreach vec = StdVecs in {
|
||||
defvar numEls = !div(vec.vt.Size, vec.lane_bits);
|
||||
defvar isFloat = !or(!eq(vec.lane_vt, f32), !eq(vec.lane_vt, f64));
|
||||
defvar immKind = !if(isFloat, fpimm, imm);
|
||||
@ -557,7 +558,7 @@ defm SHUFFLE :
|
||||
// Shuffles after custom lowering
|
||||
def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
|
||||
def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
|
||||
foreach vec = AllVecs in {
|
||||
foreach vec = StdVecs in {
|
||||
// The @llvm.wasm.shuffle intrinsic has immediate arguments that become TargetConstants.
|
||||
def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y),
|
||||
(i32 timm:$m0), (i32 timm:$m1),
|
||||
@ -627,7 +628,7 @@ defm SPLAT_F16x8 :
|
||||
"f16x8.splat\t$dst, $x", "f16x8.splat", 0x120>;
|
||||
|
||||
// scalar_to_vector leaves high lanes undefined, so can be a splat
|
||||
foreach vec = AllVecs in
|
||||
foreach vec = StdVecs in
|
||||
def : Pat<(vec.vt (scalar_to_vector (vec.lane_vt vec.lane_rc:$x))),
|
||||
(!cast<Instruction>("SPLAT_"#vec) $x)>;
|
||||
|
||||
@ -880,7 +881,7 @@ defm BITSELECT :
|
||||
SIMD_I<(outs V128:$dst), (ins V128:$v1, V128:$v2, V128:$c), (outs), (ins), [],
|
||||
"v128.bitselect\t$dst, $v1, $v2, $c", "v128.bitselect", 82>;
|
||||
|
||||
foreach vec = AllVecs in
|
||||
foreach vec = StdVecs in
|
||||
def : Pat<(vec.vt (int_wasm_bitselect
|
||||
(vec.vt V128:$v1), (vec.vt V128:$v2), (vec.vt V128:$c))),
|
||||
(BITSELECT $v1, $v2, $c)>;
|
||||
@ -906,7 +907,7 @@ def : Pat<(vec.vt (xor (and (xor (vec.vt V128:$v1), (vec.vt V128:$v2)),
|
||||
(BITSELECT $v2, $v1, $c)>;
|
||||
|
||||
// Also implement vselect in terms of bitselect
|
||||
foreach vec = AllVecs in
|
||||
foreach vec = StdVecs in
|
||||
def : Pat<(vec.vt (vselect
|
||||
(vec.int_vt V128:$c), (vec.vt V128:$v1), (vec.vt V128:$v2))),
|
||||
(BITSELECT $v1, $v2, $c)>;
|
||||
@ -916,7 +917,7 @@ defm SELECT_V128 :
|
||||
I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond), (outs), (ins), [],
|
||||
"v128.select\t$dst, $lhs, $rhs, $cond", "v128.select", 0x1b>;
|
||||
|
||||
foreach vec = AllVecs in {
|
||||
foreach vec = StdVecs in {
|
||||
def : Pat<(select I32:$cond, (vec.vt V128:$lhs), (vec.vt V128:$rhs)),
|
||||
(SELECT_V128 $lhs, $rhs, $cond)>;
|
||||
|
||||
@ -1370,6 +1371,11 @@ def trunc_u_sat32 : PatFrag<(ops node:$x), (fp_to_uint_sat $x, i32)>;
|
||||
def : Pat<(v4i32 (trunc_s_sat32 (v4f32 V128:$src))), (fp_to_sint_I32x4 $src)>;
|
||||
def : Pat<(v4i32 (trunc_u_sat32 (v4f32 V128:$src))), (fp_to_uint_I32x4 $src)>;
|
||||
|
||||
def trunc_s_sat16 : PatFrag<(ops node:$x), (fp_to_sint_sat $x, i16)>;
|
||||
def trunc_u_sat16 : PatFrag<(ops node:$x), (fp_to_uint_sat $x, i16)>;
|
||||
def : Pat<(v8i16 (trunc_s_sat16 (v8f16 V128:$src))), (fp_to_sint_I16x8 $src)>;
|
||||
def : Pat<(v8i16 (trunc_u_sat16 (v8f16 V128:$src))), (fp_to_uint_I16x8 $src)>;
|
||||
|
||||
def trunc_sat_zero_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
|
||||
def trunc_sat_zero_s :
|
||||
SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_S", trunc_sat_zero_t>;
|
||||
|
@ -290,3 +290,21 @@ define <8 x i16> @trunc_sat_u_v8i16(<8 x half> %x) {
|
||||
%a = fptoui <8 x half> %x to <8 x i16>
|
||||
ret <8 x i16> %a
|
||||
}
|
||||
|
||||
define <8 x i16> @trunc_sat_s_v8i16_sat(<8 x half> %x) {
|
||||
; CHECK-LABEL: trunc_sat_s_v8i16_sat:
|
||||
; CHECK: .functype trunc_sat_s_v8i16_sat (v128) -> (v128)
|
||||
; CHECK-NEXT: i16x8.trunc_sat_f16x8_s $push0=, $0
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
%a = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> %x)
|
||||
ret <8 x i16> %a
|
||||
}
|
||||
|
||||
define <8 x i16> @trunc_sat_u_v8i16_sat(<8 x half> %x) {
|
||||
; CHECK-LABEL: trunc_sat_u_v8i16_sat:
|
||||
; CHECK: .functype trunc_sat_u_v8i16_sat (v128) -> (v128)
|
||||
; CHECK-NEXT: i16x8.trunc_sat_f16x8_u $push0=, $0
|
||||
; CHECK-NEXT: return $pop[[R]]{{$}}
|
||||
%a = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> %x)
|
||||
ret <8 x i16> %a
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user