Momchil Velikov c2172431c7
[AArch64] Implements FP8 SVE intrinsics for dot-product (#118125)
This patch adds the following intrinsics:

* 8-bit floating-point dot product to single-precision.

// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8DOT4) ||
__ARM_FEATURE_SSVE_FP8DOT4
svfloat32_t svdot[_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn,
svmfloat8_t zm, fpm_t fpm);
svfloat32_t svdot[_n_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn,
mfloat8_t zm, fpm_t fpm);

* 8-bit floating-point indexed dot product to single-precision.

// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8DOT4) ||
__ARM_FEATURE_SSVE_FP8DOT4
svfloat32_t svdot_lane[_f32_mf8]_fpm(svfloat32_t zda, svmfloat8_t zn,
svmfloat8_t zm,
                                       uint64_t imm0_3, fpm_t fpm);

* 8-bit floating-point dot product to half-precision.

// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8DOT2) ||
__ARM_FEATURE_SSVE_FP8DOT2
svfloat16_t svdot[_f16_mf8]_fpm(svfloat16_t zda, svmfloat8_t zn,
svmfloat8_t zm, fpm_t fpm);
svfloat16_t svdot[_n_f16_mf8]_fpm(svfloat16_t zda, svmfloat8_t zn,
mfloat8_t zm, fpm_t fpm);

* 8-bit floating-point indexed dot product to half-precision.

// Only if (__ARM_FEATURE_SVE2 && __ARM_FEATURE_FP8DOT2) ||
__ARM_FEATURE_SSVE_FP8DOT2
svfloat16_t svdot_lane[_f16_mf8]_fpm(svfloat16_t zda, svmfloat8_t zn,
svmfloat8_t zm,
                                       uint64_t imm0_7, fpm_t fpm);
2024-12-13 14:06:54 +00:00
..