mirror of
https://github.com/llvm/llvm-project.git
synced 2025-05-16 17:26:04 +00:00

This is a continuation of my patches to inform the X86 backend about what the largest IR types are in the function so that we can restrict the backend type legalizer to prevent 512-bit vectors on SKX when -mprefer-vector-width=256 is specified if no explicit 512 bit vectors were specified by the user. This patch updates the vector width based on the argument and return types of the current function and from the types of any functions it calls. This is intended to make sure the backend type legalizer doesn't disturb any types that are required for ABI. Differential Revision: https://reviews.llvm.org/D52441 llvm-svn: 345168
81 lines
3.8 KiB
C
81 lines
3.8 KiB
C
// RUN: %clang_cc1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -fallow-half-arguments-and-returns -S -o - -disable-O0-optnone -emit-llvm %s | opt -S -mem2reg | FileCheck %s
|
|
|
|
#include <arm_neon.h>
|
|
|
|
// vdupq_n_f64 -> dup.2d v0, v0[0]
|
|
//
|
|
// CHECK-LABEL: define <2 x double> @test_vdupq_n_f64(double %w) #0 {
|
|
// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %w, i32 0
|
|
// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %w, i32 1
|
|
// CHECK: ret <2 x double> [[VECINIT1_I]]
|
|
float64x2_t test_vdupq_n_f64(float64_t w) {
|
|
return vdupq_n_f64(w);
|
|
}
|
|
|
|
// might as well test this while we're here
|
|
// vdupq_n_f32 -> dup.4s v0, v0[0]
|
|
// CHECK-LABEL: define <4 x float> @test_vdupq_n_f32(float %w) #0 {
|
|
// CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %w, i32 0
|
|
// CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %w, i32 1
|
|
// CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %w, i32 2
|
|
// CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %w, i32 3
|
|
// CHECK: ret <4 x float> [[VECINIT3_I]]
|
|
float32x4_t test_vdupq_n_f32(float32_t w) {
|
|
return vdupq_n_f32(w);
|
|
}
|
|
|
|
// vdupq_lane_f64 -> dup.2d v0, v0[0]
|
|
// this was in <rdar://problem/11778405>, but had already been implemented,
|
|
// test anyway
|
|
// CHECK-LABEL: define <2 x double> @test_vdupq_lane_f64(<1 x double> %V) #0 {
|
|
// CHECK: [[SHUFFLE:%.*]] = shufflevector <1 x double> %V, <1 x double> %V, <2 x i32> zeroinitializer
|
|
// CHECK: ret <2 x double> [[SHUFFLE]]
|
|
float64x2_t test_vdupq_lane_f64(float64x1_t V) {
|
|
return vdupq_lane_f64(V, 0);
|
|
}
|
|
|
|
// vmovq_n_f64 -> dup Vd.2d,X0
|
|
// this wasn't in <rdar://problem/11778405>, but it was between the vdups
|
|
// CHECK-LABEL: define <2 x double> @test_vmovq_n_f64(double %w) #0 {
|
|
// CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %w, i32 0
|
|
// CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %w, i32 1
|
|
// CHECK: ret <2 x double> [[VECINIT1_I]]
|
|
float64x2_t test_vmovq_n_f64(float64_t w) {
|
|
return vmovq_n_f64(w);
|
|
}
|
|
|
|
// CHECK-LABEL: define <4 x half> @test_vmov_n_f16(half* %a1) #1 {
|
|
// CHECK: [[TMP0:%.*]] = load half, half* %a1, align 2
|
|
// CHECK: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
|
|
// CHECK: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
|
|
// CHECK: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
|
|
// CHECK: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
|
|
// CHECK: ret <4 x half> [[VECINIT3]]
|
|
float16x4_t test_vmov_n_f16(float16_t *a1) {
|
|
return vmov_n_f16(*a1);
|
|
}
|
|
|
|
/*
|
|
float64x1_t test_vmov_n_f64(float64_t a1) {
|
|
return vmov_n_f64(a1);
|
|
}
|
|
*/
|
|
|
|
// CHECK-LABEL: define <8 x half> @test_vmovq_n_f16(half* %a1) #0 {
|
|
// CHECK: [[TMP0:%.*]] = load half, half* %a1, align 2
|
|
// CHECK: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
|
|
// CHECK: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
|
|
// CHECK: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
|
|
// CHECK: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
|
|
// CHECK: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
|
|
// CHECK: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
|
|
// CHECK: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
|
|
// CHECK: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
|
|
// CHECK: ret <8 x half> [[VECINIT7]]
|
|
float16x8_t test_vmovq_n_f16(float16_t *a1) {
|
|
return vmovq_n_f16(*a1);
|
|
}
|
|
|
|
// CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="128"
|
|
// CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="64"
|