mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-18 12:16:49 +00:00
[LLVM][AArch64][SVE] Mark DUP immediate instructions with isAsCheapAsAMove. (#133945)
Doing this means we'll regenerate an immediate rather than copy the result of an existing one, reducing instruction dependency chains.
This commit is contained in:
parent
cb0d1305d1
commit
ee4e8197fa
@ -2113,6 +2113,7 @@ class sve_int_dup_mask_imm<string asm>
|
||||
|
||||
let DecoderMethod = "DecodeSVELogicalImmInstruction";
|
||||
let hasSideEffects = 0;
|
||||
let isAsCheapAsAMove = 1;
|
||||
let isReMaterializable = 1;
|
||||
let Uses = [VG];
|
||||
}
|
||||
@ -5118,6 +5119,7 @@ class sve_int_dup_imm<bits<2> sz8_64, string asm,
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let hasSideEffects = 0;
|
||||
let isAsCheapAsAMove = 1;
|
||||
let isReMaterializable = 1;
|
||||
let Uses = [VG];
|
||||
}
|
||||
@ -5161,6 +5163,7 @@ class sve_int_dup_fpimm<bits<2> sz8_64, Operand fpimmtype,
|
||||
let Inst{4-0} = Zd;
|
||||
|
||||
let hasSideEffects = 0;
|
||||
let isAsCheapAsAMove = 1;
|
||||
let isReMaterializable = 1;
|
||||
let Uses = [VG];
|
||||
}
|
||||
|
@ -51,20 +51,20 @@ define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4
|
||||
; CHECK-LABEL: mul_add_mull:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z26.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z27.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: mov z26.d, z24.d
|
||||
; CHECK-NEXT: mov z27.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
|
||||
; CHECK-NEXT: fadd z1.d, z26.d, z24.d
|
||||
; CHECK-NEXT: fadd z0.d, z25.d, z27.d
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #90
|
||||
; CHECK-NEXT: fadd z0.d, z24.d, z27.d
|
||||
; CHECK-NEXT: fadd z1.d, z25.d, z26.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
|
||||
@ -102,20 +102,20 @@ define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4
|
||||
; CHECK-LABEL: mul_sub_mull:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z26.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z27.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: mov z26.d, z24.d
|
||||
; CHECK-NEXT: mov z27.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z4.d, #90
|
||||
; CHECK-NEXT: fsub z1.d, z26.d, z24.d
|
||||
; CHECK-NEXT: fsub z0.d, z25.d, z27.d
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z5.d, #90
|
||||
; CHECK-NEXT: fsub z0.d, z24.d, z27.d
|
||||
; CHECK-NEXT: fsub z1.d, z25.d, z26.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
|
||||
@ -153,20 +153,20 @@ define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x
|
||||
; CHECK-LABEL: mul_conj_mull:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z26.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z27.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: mov z26.d, z24.d
|
||||
; CHECK-NEXT: mov z27.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #0
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z2.d, z0.d, #90
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z3.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z7.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z2.d, z0.d, #90
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z3.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z27.d, p0/m, z4.d, z6.d, #270
|
||||
; CHECK-NEXT: fadd z1.d, z26.d, z24.d
|
||||
; CHECK-NEXT: fadd z0.d, z25.d, z27.d
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z7.d, #270
|
||||
; CHECK-NEXT: fadd z0.d, z24.d, z27.d
|
||||
; CHECK-NEXT: fadd z1.d, z25.d, z26.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
|
||||
|
@ -42,18 +42,18 @@ define <vscale x 4 x double> @mul_add_mull(<vscale x 4 x double> %a, <vscale x 4
|
||||
; CHECK-LABEL: mul_add_mull:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #90
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
|
||||
; CHECK-NEXT: mov z1.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
|
||||
; CHECK-NEXT: mov z0.d, z25.d
|
||||
; CHECK-NEXT: mov z1.d, z24.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
|
||||
@ -91,18 +91,18 @@ define <vscale x 4 x double> @mul_sub_mull(<vscale x 4 x double> %a, <vscale x 4
|
||||
; CHECK-LABEL: mul_sub_mull:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #270
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #180
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
|
||||
; CHECK-NEXT: mov z1.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
|
||||
; CHECK-NEXT: mov z0.d, z25.d
|
||||
; CHECK-NEXT: mov z1.d, z24.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
|
||||
@ -140,18 +140,18 @@ define <vscale x 4 x double> @mul_conj_mull(<vscale x 4 x double> %a, <vscale x
|
||||
; CHECK-LABEL: mul_conj_mull:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #270
|
||||
; CHECK-NEXT: mov z1.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270
|
||||
; CHECK-NEXT: mov z0.d, z25.d
|
||||
; CHECK-NEXT: mov z1.d, z24.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
|
||||
|
@ -73,14 +73,14 @@ define <vscale x 16 x half> @complex_mul_v16f16(<vscale x 16 x half> %a, <vscale
|
||||
; CHECK-LABEL: complex_mul_v16f16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z4.h, #0 // =0x0
|
||||
; CHECK-NEXT: mov z5.h, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov z5.d, z4.d
|
||||
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0
|
||||
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #0
|
||||
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90
|
||||
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0
|
||||
; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #90
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.vector.deinterleave2.nxv16f16(<vscale x 16 x half> %a)
|
||||
@ -104,22 +104,22 @@ define <vscale x 32 x half> @complex_mul_v32f16(<vscale x 32 x half> %a, <vscale
|
||||
; CHECK-LABEL: complex_mul_v32f16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.h, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.h, #0 // =0x0
|
||||
; CHECK-NEXT: mov z26.h, #0 // =0x0
|
||||
; CHECK-NEXT: mov z27.h, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.h
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: mov z26.d, z24.d
|
||||
; CHECK-NEXT: mov z27.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #0
|
||||
; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #0
|
||||
; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #0
|
||||
; CHECK-NEXT: fcmla z24.h, p0/m, z4.h, z0.h, #0
|
||||
; CHECK-NEXT: fcmla z25.h, p0/m, z5.h, z1.h, #0
|
||||
; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #0
|
||||
; CHECK-NEXT: fcmla z24.h, p0/m, z7.h, z3.h, #90
|
||||
; CHECK-NEXT: fcmla z25.h, p0/m, z4.h, z0.h, #90
|
||||
; CHECK-NEXT: fcmla z26.h, p0/m, z5.h, z1.h, #90
|
||||
; CHECK-NEXT: fcmla z26.h, p0/m, z7.h, z3.h, #0
|
||||
; CHECK-NEXT: fcmla z24.h, p0/m, z4.h, z0.h, #90
|
||||
; CHECK-NEXT: fcmla z25.h, p0/m, z5.h, z1.h, #90
|
||||
; CHECK-NEXT: fcmla z27.h, p0/m, z6.h, z2.h, #90
|
||||
; CHECK-NEXT: mov z3.d, z24.d
|
||||
; CHECK-NEXT: mov z0.d, z25.d
|
||||
; CHECK-NEXT: mov z1.d, z26.d
|
||||
; CHECK-NEXT: fcmla z26.h, p0/m, z7.h, z3.h, #90
|
||||
; CHECK-NEXT: mov z0.d, z24.d
|
||||
; CHECK-NEXT: mov z1.d, z25.d
|
||||
; CHECK-NEXT: mov z2.d, z27.d
|
||||
; CHECK-NEXT: mov z3.d, z26.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 16 x half>, <vscale x 16 x half> } @llvm.vector.deinterleave2.nxv32f16(<vscale x 32 x half> %a)
|
||||
|
@ -35,14 +35,14 @@ define <vscale x 8 x float> @complex_mul_v8f32(<vscale x 8 x float> %a, <vscale
|
||||
; CHECK-LABEL: complex_mul_v8f32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z4.s, #0 // =0x0
|
||||
; CHECK-NEXT: mov z5.s, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov z5.d, z4.d
|
||||
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0
|
||||
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #0
|
||||
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #90
|
||||
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0
|
||||
; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #90
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #90
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float> %a)
|
||||
@ -66,22 +66,22 @@ define <vscale x 16 x float> @complex_mul_v16f32(<vscale x 16 x float> %a, <vsca
|
||||
; CHECK-LABEL: complex_mul_v16f32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.s, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.s, #0 // =0x0
|
||||
; CHECK-NEXT: mov z26.s, #0 // =0x0
|
||||
; CHECK-NEXT: mov z27.s, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: mov z26.d, z24.d
|
||||
; CHECK-NEXT: mov z27.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #0
|
||||
; CHECK-NEXT: fcmla z25.s, p0/m, z4.s, z0.s, #0
|
||||
; CHECK-NEXT: fcmla z26.s, p0/m, z5.s, z1.s, #0
|
||||
; CHECK-NEXT: fcmla z24.s, p0/m, z4.s, z0.s, #0
|
||||
; CHECK-NEXT: fcmla z25.s, p0/m, z5.s, z1.s, #0
|
||||
; CHECK-NEXT: fcmla z27.s, p0/m, z6.s, z2.s, #0
|
||||
; CHECK-NEXT: fcmla z24.s, p0/m, z7.s, z3.s, #90
|
||||
; CHECK-NEXT: fcmla z25.s, p0/m, z4.s, z0.s, #90
|
||||
; CHECK-NEXT: fcmla z26.s, p0/m, z5.s, z1.s, #90
|
||||
; CHECK-NEXT: fcmla z26.s, p0/m, z7.s, z3.s, #0
|
||||
; CHECK-NEXT: fcmla z24.s, p0/m, z4.s, z0.s, #90
|
||||
; CHECK-NEXT: fcmla z25.s, p0/m, z5.s, z1.s, #90
|
||||
; CHECK-NEXT: fcmla z27.s, p0/m, z6.s, z2.s, #90
|
||||
; CHECK-NEXT: mov z3.d, z24.d
|
||||
; CHECK-NEXT: mov z0.d, z25.d
|
||||
; CHECK-NEXT: mov z1.d, z26.d
|
||||
; CHECK-NEXT: fcmla z26.s, p0/m, z7.s, z3.s, #90
|
||||
; CHECK-NEXT: mov z0.d, z24.d
|
||||
; CHECK-NEXT: mov z1.d, z25.d
|
||||
; CHECK-NEXT: mov z2.d, z27.d
|
||||
; CHECK-NEXT: mov z3.d, z26.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 8 x float>, <vscale x 8 x float> } @llvm.vector.deinterleave2.nxv16f32(<vscale x 16 x float> %a)
|
||||
|
@ -35,14 +35,14 @@ define <vscale x 4 x double> @complex_mul_v4f64(<vscale x 4 x double> %a, <vscal
|
||||
; CHECK-LABEL: complex_mul_v4f64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z4.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z5.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z5.d, z4.d
|
||||
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #0
|
||||
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #90
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
|
||||
@ -66,22 +66,22 @@ define <vscale x 8 x double> @complex_mul_v8f64(<vscale x 8 x double> %a, <vscal
|
||||
; CHECK-LABEL: complex_mul_v8f64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z26.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z27.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: mov z26.d, z24.d
|
||||
; CHECK-NEXT: mov z27.d, z24.d
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #0
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z4.d, z0.d, #0
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z5.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z3.d, #90
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z0.d, #90
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z5.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z4.d, z0.d, #90
|
||||
; CHECK-NEXT: fcmla z25.d, p0/m, z5.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z27.d, p0/m, z6.d, z2.d, #90
|
||||
; CHECK-NEXT: mov z3.d, z24.d
|
||||
; CHECK-NEXT: mov z0.d, z25.d
|
||||
; CHECK-NEXT: mov z1.d, z26.d
|
||||
; CHECK-NEXT: fcmla z26.d, p0/m, z7.d, z3.d, #90
|
||||
; CHECK-NEXT: mov z0.d, z24.d
|
||||
; CHECK-NEXT: mov z1.d, z25.d
|
||||
; CHECK-NEXT: mov z2.d, z27.d
|
||||
; CHECK-NEXT: mov z3.d, z26.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.vector.deinterleave2.nxv8f64(<vscale x 8 x double> %a)
|
||||
|
@ -72,13 +72,13 @@ define <vscale x 16 x i16> @complex_mul_v16i16(<vscale x 16 x i16> %a, <vscale x
|
||||
; CHECK-LABEL: complex_mul_v16i16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z4.h, #0 // =0x0
|
||||
; CHECK-NEXT: mov z5.d, z4.d
|
||||
; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #0
|
||||
; CHECK-NEXT: mov z5.h, #0 // =0x0
|
||||
; CHECK-NEXT: cmla z5.h, z2.h, z0.h, #0
|
||||
; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #90
|
||||
; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #0
|
||||
; CHECK-NEXT: cmla z5.h, z2.h, z0.h, #90
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: cmla z4.h, z3.h, z1.h, #90
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.vector.deinterleave2.nxv16i16(<vscale x 16 x i16> %a)
|
||||
@ -102,21 +102,21 @@ define <vscale x 32 x i16> @complex_mul_v32i16(<vscale x 32 x i16> %a, <vscale x
|
||||
; CHECK-LABEL: complex_mul_v32i16:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.h, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: mov z26.d, z24.d
|
||||
; CHECK-NEXT: mov z27.d, z24.d
|
||||
; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #0
|
||||
; CHECK-NEXT: cmla z25.h, z4.h, z0.h, #0
|
||||
; CHECK-NEXT: cmla z26.h, z5.h, z1.h, #0
|
||||
; CHECK-NEXT: mov z25.h, #0 // =0x0
|
||||
; CHECK-NEXT: mov z26.h, #0 // =0x0
|
||||
; CHECK-NEXT: mov z27.h, #0 // =0x0
|
||||
; CHECK-NEXT: cmla z24.h, z4.h, z0.h, #0
|
||||
; CHECK-NEXT: cmla z25.h, z5.h, z1.h, #0
|
||||
; CHECK-NEXT: cmla z27.h, z6.h, z2.h, #0
|
||||
; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #90
|
||||
; CHECK-NEXT: cmla z25.h, z4.h, z0.h, #90
|
||||
; CHECK-NEXT: cmla z26.h, z5.h, z1.h, #90
|
||||
; CHECK-NEXT: cmla z26.h, z7.h, z3.h, #0
|
||||
; CHECK-NEXT: cmla z24.h, z4.h, z0.h, #90
|
||||
; CHECK-NEXT: cmla z25.h, z5.h, z1.h, #90
|
||||
; CHECK-NEXT: cmla z27.h, z6.h, z2.h, #90
|
||||
; CHECK-NEXT: mov z3.d, z24.d
|
||||
; CHECK-NEXT: mov z0.d, z25.d
|
||||
; CHECK-NEXT: mov z1.d, z26.d
|
||||
; CHECK-NEXT: cmla z26.h, z7.h, z3.h, #90
|
||||
; CHECK-NEXT: mov z0.d, z24.d
|
||||
; CHECK-NEXT: mov z1.d, z25.d
|
||||
; CHECK-NEXT: mov z2.d, z27.d
|
||||
; CHECK-NEXT: mov z3.d, z26.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 16 x i16>, <vscale x 16 x i16> } @llvm.vector.deinterleave2.nxv32i16(<vscale x 32 x i16> %a)
|
||||
|
@ -34,13 +34,13 @@ define <vscale x 8 x i32> @complex_mul_v8i32(<vscale x 8 x i32> %a, <vscale x 8
|
||||
; CHECK-LABEL: complex_mul_v8i32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z4.s, #0 // =0x0
|
||||
; CHECK-NEXT: mov z5.d, z4.d
|
||||
; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #0
|
||||
; CHECK-NEXT: mov z5.s, #0 // =0x0
|
||||
; CHECK-NEXT: cmla z5.s, z2.s, z0.s, #0
|
||||
; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #90
|
||||
; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #0
|
||||
; CHECK-NEXT: cmla z5.s, z2.s, z0.s, #90
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: cmla z4.s, z3.s, z1.s, #90
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> %a)
|
||||
@ -64,21 +64,21 @@ define <vscale x 16 x i32> @complex_mul_v16i32(<vscale x 16 x i32> %a, <vscale x
|
||||
; CHECK-LABEL: complex_mul_v16i32:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.s, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: mov z26.d, z24.d
|
||||
; CHECK-NEXT: mov z27.d, z24.d
|
||||
; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #0
|
||||
; CHECK-NEXT: cmla z25.s, z4.s, z0.s, #0
|
||||
; CHECK-NEXT: cmla z26.s, z5.s, z1.s, #0
|
||||
; CHECK-NEXT: mov z25.s, #0 // =0x0
|
||||
; CHECK-NEXT: mov z26.s, #0 // =0x0
|
||||
; CHECK-NEXT: mov z27.s, #0 // =0x0
|
||||
; CHECK-NEXT: cmla z24.s, z4.s, z0.s, #0
|
||||
; CHECK-NEXT: cmla z25.s, z5.s, z1.s, #0
|
||||
; CHECK-NEXT: cmla z27.s, z6.s, z2.s, #0
|
||||
; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #90
|
||||
; CHECK-NEXT: cmla z25.s, z4.s, z0.s, #90
|
||||
; CHECK-NEXT: cmla z26.s, z5.s, z1.s, #90
|
||||
; CHECK-NEXT: cmla z26.s, z7.s, z3.s, #0
|
||||
; CHECK-NEXT: cmla z24.s, z4.s, z0.s, #90
|
||||
; CHECK-NEXT: cmla z25.s, z5.s, z1.s, #90
|
||||
; CHECK-NEXT: cmla z27.s, z6.s, z2.s, #90
|
||||
; CHECK-NEXT: mov z3.d, z24.d
|
||||
; CHECK-NEXT: mov z0.d, z25.d
|
||||
; CHECK-NEXT: mov z1.d, z26.d
|
||||
; CHECK-NEXT: cmla z26.s, z7.s, z3.s, #90
|
||||
; CHECK-NEXT: mov z0.d, z24.d
|
||||
; CHECK-NEXT: mov z1.d, z25.d
|
||||
; CHECK-NEXT: mov z2.d, z27.d
|
||||
; CHECK-NEXT: mov z3.d, z26.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 8 x i32>, <vscale x 8 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 16 x i32> %a)
|
||||
|
@ -34,13 +34,13 @@ define <vscale x 4 x i64> @complex_mul_v4i64(<vscale x 4 x i64> %a, <vscale x 4
|
||||
; CHECK-LABEL: complex_mul_v4i64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z4.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z5.d, z4.d
|
||||
; CHECK-NEXT: cmla z4.d, z3.d, z1.d, #0
|
||||
; CHECK-NEXT: mov z5.d, #0 // =0x0
|
||||
; CHECK-NEXT: cmla z5.d, z2.d, z0.d, #0
|
||||
; CHECK-NEXT: cmla z4.d, z3.d, z1.d, #90
|
||||
; CHECK-NEXT: cmla z4.d, z3.d, z1.d, #0
|
||||
; CHECK-NEXT: cmla z5.d, z2.d, z0.d, #90
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: cmla z4.d, z3.d, z1.d, #90
|
||||
; CHECK-NEXT: mov z0.d, z5.d
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.vector.deinterleave2.nxv4i64(<vscale x 4 x i64> %a)
|
||||
@ -64,21 +64,21 @@ define <vscale x 8 x i64> @complex_mul_v8i64(<vscale x 8 x i64> %a, <vscale x 8
|
||||
; CHECK-LABEL: complex_mul_v8i64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: mov z26.d, z24.d
|
||||
; CHECK-NEXT: mov z27.d, z24.d
|
||||
; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #0
|
||||
; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #0
|
||||
; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #0
|
||||
; CHECK-NEXT: mov z25.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z26.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z27.d, #0 // =0x0
|
||||
; CHECK-NEXT: cmla z24.d, z4.d, z0.d, #0
|
||||
; CHECK-NEXT: cmla z25.d, z5.d, z1.d, #0
|
||||
; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #0
|
||||
; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #90
|
||||
; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #90
|
||||
; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #90
|
||||
; CHECK-NEXT: cmla z26.d, z7.d, z3.d, #0
|
||||
; CHECK-NEXT: cmla z24.d, z4.d, z0.d, #90
|
||||
; CHECK-NEXT: cmla z25.d, z5.d, z1.d, #90
|
||||
; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #90
|
||||
; CHECK-NEXT: mov z3.d, z24.d
|
||||
; CHECK-NEXT: mov z0.d, z25.d
|
||||
; CHECK-NEXT: mov z1.d, z26.d
|
||||
; CHECK-NEXT: cmla z26.d, z7.d, z3.d, #90
|
||||
; CHECK-NEXT: mov z0.d, z24.d
|
||||
; CHECK-NEXT: mov z1.d, z25.d
|
||||
; CHECK-NEXT: mov z2.d, z27.d
|
||||
; CHECK-NEXT: mov z3.d, z26.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %a)
|
||||
@ -102,21 +102,21 @@ define <vscale x 8 x i64> @complex_minus_mul_v8i64(<vscale x 8 x i64> %a, <vscal
|
||||
; CHECK-LABEL: complex_minus_mul_v8i64:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z24.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z25.d, z24.d
|
||||
; CHECK-NEXT: mov z26.d, z24.d
|
||||
; CHECK-NEXT: mov z27.d, z24.d
|
||||
; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #270
|
||||
; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #270
|
||||
; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #270
|
||||
; CHECK-NEXT: mov z25.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z26.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z27.d, #0 // =0x0
|
||||
; CHECK-NEXT: cmla z24.d, z4.d, z0.d, #270
|
||||
; CHECK-NEXT: cmla z25.d, z5.d, z1.d, #270
|
||||
; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #270
|
||||
; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #180
|
||||
; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #180
|
||||
; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #180
|
||||
; CHECK-NEXT: cmla z26.d, z7.d, z3.d, #270
|
||||
; CHECK-NEXT: cmla z24.d, z4.d, z0.d, #180
|
||||
; CHECK-NEXT: cmla z25.d, z5.d, z1.d, #180
|
||||
; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #180
|
||||
; CHECK-NEXT: mov z3.d, z24.d
|
||||
; CHECK-NEXT: mov z0.d, z25.d
|
||||
; CHECK-NEXT: mov z1.d, z26.d
|
||||
; CHECK-NEXT: cmla z26.d, z7.d, z3.d, #180
|
||||
; CHECK-NEXT: mov z0.d, z24.d
|
||||
; CHECK-NEXT: mov z1.d, z25.d
|
||||
; CHECK-NEXT: mov z2.d, z27.d
|
||||
; CHECK-NEXT: mov z3.d, z26.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%a.deinterleaved = tail call { <vscale x 4 x i64>, <vscale x 4 x i64> } @llvm.vector.deinterleave2.nxv8i64(<vscale x 8 x i64> %a)
|
||||
|
@ -8,24 +8,24 @@ target triple = "aarch64"
|
||||
define <vscale x 4 x double> @complex_mul_const(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
|
||||
; CHECK-LABEL: complex_mul_const:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z5.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z4.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: fmov z7.d, #3.00000000
|
||||
; CHECK-NEXT: fmov z24.d, #11.00000000
|
||||
; CHECK-NEXT: mov z6.d, z4.d
|
||||
; CHECK-NEXT: mov z5.d, z4.d
|
||||
; CHECK-NEXT: fcmla z6.d, p0/m, z1.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z5.d, p0/m, z0.d, z2.d, #0
|
||||
; CHECK-NEXT: fcmla z6.d, p0/m, z1.d, z3.d, #90
|
||||
; CHECK-NEXT: zip2 z1.d, z24.d, z7.d
|
||||
; CHECK-NEXT: fcmla z5.d, p0/m, z0.d, z2.d, #90
|
||||
; CHECK-NEXT: zip1 z2.d, z24.d, z7.d
|
||||
; CHECK-NEXT: mov z0.d, z4.d
|
||||
; CHECK-NEXT: fcmla z4.d, p0/m, z6.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z2.d, #0
|
||||
; CHECK-NEXT: fcmla z4.d, p0/m, z6.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z2.d, #90
|
||||
; CHECK-NEXT: mov z1.d, z4.d
|
||||
; CHECK-NEXT: fmov z6.d, #3.00000000
|
||||
; CHECK-NEXT: fmov z7.d, #11.00000000
|
||||
; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #0
|
||||
; CHECK-NEXT: fcmla z5.d, p0/m, z1.d, z3.d, #90
|
||||
; CHECK-NEXT: fcmla z4.d, p0/m, z0.d, z2.d, #90
|
||||
; CHECK-NEXT: mov z2.d, #0 // =0x0
|
||||
; CHECK-NEXT: zip2 z1.d, z7.d, z6.d
|
||||
; CHECK-NEXT: zip1 z3.d, z7.d, z6.d
|
||||
; CHECK-NEXT: mov z0.d, #0 // =0x0
|
||||
; CHECK-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z2.d, p0/m, z5.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z0.d, p0/m, z4.d, z3.d, #90
|
||||
; CHECK-NEXT: mov z1.d, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%strided.vec = tail call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double> %a)
|
||||
@ -56,25 +56,24 @@ define <vscale x 4 x double> @complex_mul_non_const(<vscale x 4 x double> %a, <v
|
||||
; CHECK-LABEL: complex_mul_non_const:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z6.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z7.d, #0 // =0x0
|
||||
; CHECK-NEXT: // kill: def $d5 killed $d5 def $z5
|
||||
; CHECK-NEXT: // kill: def $d4 killed $d4 def $z4
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: mov z5.d, d5
|
||||
; CHECK-NEXT: mov z4.d, d4
|
||||
; CHECK-NEXT: mov z24.d, z6.d
|
||||
; CHECK-NEXT: mov z7.d, z6.d
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0
|
||||
; CHECK-NEXT: fcmla z7.d, p0/m, z0.d, z2.d, #0
|
||||
; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90
|
||||
; CHECK-NEXT: zip2 z1.d, z4.d, z5.d
|
||||
; CHECK-NEXT: fcmla z7.d, p0/m, z0.d, z2.d, #90
|
||||
; CHECK-NEXT: fcmla z6.d, p0/m, z0.d, z2.d, #0
|
||||
; CHECK-NEXT: fcmla z7.d, p0/m, z1.d, z3.d, #0
|
||||
; CHECK-NEXT: zip2 z24.d, z4.d, z5.d
|
||||
; CHECK-NEXT: fcmla z6.d, p0/m, z0.d, z2.d, #90
|
||||
; CHECK-NEXT: fcmla z7.d, p0/m, z1.d, z3.d, #90
|
||||
; CHECK-NEXT: zip1 z2.d, z4.d, z5.d
|
||||
; CHECK-NEXT: mov z0.d, z6.d
|
||||
; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z1.d, #0
|
||||
; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z2.d, #0
|
||||
; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z1.d, #90
|
||||
; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z2.d, #90
|
||||
; CHECK-NEXT: mov z1.d, z6.d
|
||||
; CHECK-NEXT: mov z1.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z0.d, #0 // =0x0
|
||||
; CHECK-NEXT: fcmla z0.d, p0/m, z6.d, z2.d, #0
|
||||
; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z24.d, #0
|
||||
; CHECK-NEXT: fcmla z0.d, p0/m, z6.d, z2.d, #90
|
||||
; CHECK-NEXT: fcmla z1.d, p0/m, z7.d, z24.d, #90
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%c.coerce.fca.0.extract = extractvalue [2 x double] %c, 0
|
||||
|
@ -438,21 +438,20 @@ define <vscale x 2 x double> @extract_col_q_v2f64(<vscale x 2 x double> %zd, <vs
|
||||
define <vscale x 4 x i32> @test_sink_offset_operand(<vscale x 4 x i1> %pg, i32 %base, i32 %N) {
|
||||
; CHECK-LABEL: test_sink_offset_operand:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z0.s, #0 // =0x0
|
||||
; CHECK-NEXT: mov w12, w0
|
||||
; CHECK-NEXT: .LBB26_1: // %for.body
|
||||
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
||||
; CHECK-NEXT: mov z1.d, z0.d
|
||||
; CHECK-NEXT: mov z2.d, z0.d
|
||||
; CHECK-NEXT: mov z0.s, #0 // =0x0
|
||||
; CHECK-NEXT: mov z1.s, #0 // =0x0
|
||||
; CHECK-NEXT: subs w1, w1, #3
|
||||
; CHECK-NEXT: mov z3.d, z0.d
|
||||
; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 0]
|
||||
; CHECK-NEXT: mov z2.s, p0/m, za0h.s[w12, 1]
|
||||
; CHECK-NEXT: mov z3.s, p0/m, za0h.s[w12, 2]
|
||||
; CHECK-NEXT: mov z2.s, #0 // =0x0
|
||||
; CHECK-NEXT: mov z0.s, p0/m, za0h.s[w12, 0]
|
||||
; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 1]
|
||||
; CHECK-NEXT: mov z2.s, p0/m, za0h.s[w12, 2]
|
||||
; CHECK-NEXT: b.ne .LBB26_1
|
||||
; CHECK-NEXT: // %bb.2: // %exit
|
||||
; CHECK-NEXT: add z0.s, z1.s, z2.s
|
||||
; CHECK-NEXT: add z0.s, z0.s, z3.s
|
||||
; CHECK-NEXT: add z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: add z0.s, z0.s, z2.s
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%add1 = add i32 %base, 1
|
||||
|
@ -29,58 +29,58 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
|
||||
; CHECK-NEXT: tbnz w1, #0, .LBB1_2
|
||||
; CHECK-NEXT: // %bb.1: // %vector.body
|
||||
; CHECK-NEXT: mov z0.b, #0 // =0x0
|
||||
; CHECK-NEXT: mov z1.b, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: umov w8, v0.b[8]
|
||||
; CHECK-NEXT: mov v1.16b, v0.16b
|
||||
; CHECK-NEXT: mov v1.b[1], v0.b[1]
|
||||
; CHECK-NEXT: fmov s2, w8
|
||||
; CHECK-NEXT: mov v2.b[1], v0.b[9]
|
||||
; CHECK-NEXT: mov v1.b[2], v0.b[2]
|
||||
; CHECK-NEXT: mov v2.b[2], v0.b[10]
|
||||
; CHECK-NEXT: mov v2.b[1], v0.b[9]
|
||||
; CHECK-NEXT: mov v1.b[3], v0.b[3]
|
||||
; CHECK-NEXT: mov v2.b[3], v0.b[11]
|
||||
; CHECK-NEXT: mov v2.b[2], v0.b[10]
|
||||
; CHECK-NEXT: mov v1.b[4], v0.b[4]
|
||||
; CHECK-NEXT: mov v2.b[4], v0.b[12]
|
||||
; CHECK-NEXT: mov v2.b[3], v0.b[11]
|
||||
; CHECK-NEXT: mov v1.b[5], v0.b[5]
|
||||
; CHECK-NEXT: mov v2.b[5], v0.b[13]
|
||||
; CHECK-NEXT: mov v2.b[4], v0.b[12]
|
||||
; CHECK-NEXT: mov v1.b[6], v0.b[6]
|
||||
; CHECK-NEXT: mov v2.b[6], v0.b[14]
|
||||
; CHECK-NEXT: mov v2.b[5], v0.b[13]
|
||||
; CHECK-NEXT: mov v1.b[7], v0.b[7]
|
||||
; CHECK-NEXT: mov v2.b[6], v0.b[14]
|
||||
; CHECK-NEXT: uunpklo z1.h, z1.b
|
||||
; CHECK-NEXT: mov v2.b[7], v0.b[15]
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16
|
||||
; CHECK-NEXT: uunpklo z1.h, z1.b
|
||||
; CHECK-NEXT: uunpklo z1.s, z1.h
|
||||
; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-NEXT: uunpklo z0.h, z0.b
|
||||
; CHECK-NEXT: uunpklo z2.h, z2.b
|
||||
; CHECK-NEXT: uunpklo z1.s, z1.h
|
||||
; CHECK-NEXT: lsl z1.s, z1.s, #31
|
||||
; CHECK-NEXT: uunpklo z3.h, z3.b
|
||||
; CHECK-NEXT: uunpklo z0.s, z0.h
|
||||
; CHECK-NEXT: uunpklo z2.s, z2.h
|
||||
; CHECK-NEXT: lsl z1.s, z1.s, #31
|
||||
; CHECK-NEXT: asr z1.s, z1.s, #31
|
||||
; CHECK-NEXT: uunpklo z3.s, z3.h
|
||||
; CHECK-NEXT: lsl z0.s, z0.s, #31
|
||||
; CHECK-NEXT: asr z1.s, z1.s, #31
|
||||
; CHECK-NEXT: and z1.s, z1.s, #0x1
|
||||
; CHECK-NEXT: lsl z2.s, z2.s, #31
|
||||
; CHECK-NEXT: asr z0.s, z0.s, #31
|
||||
; CHECK-NEXT: and z1.s, z1.s, #0x1
|
||||
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: lsl z3.s, z3.s, #31
|
||||
; CHECK-NEXT: asr z2.s, z2.s, #31
|
||||
; CHECK-NEXT: and z0.s, z0.s, #0x1
|
||||
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
|
||||
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0]
|
||||
; CHECK-NEXT: asr z3.s, z3.s, #31
|
||||
; CHECK-NEXT: and z2.s, z2.s, #0x1
|
||||
; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
|
||||
; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
|
||||
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: and z3.s, z3.s, #0x1
|
||||
; CHECK-NEXT: cmpne p4.s, p0/z, z2.s, #0
|
||||
; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, #1, mul vl]
|
||||
; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x0]
|
||||
; CHECK-NEXT: cmpne p3.s, p0/z, z3.s, #0
|
||||
; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0, #3, mul vl]
|
||||
; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0
|
||||
; CHECK-NEXT: mov z2.s, p4/m, #0 // =0x0
|
||||
; CHECK-NEXT: st1w { z1.s }, p0, [x0]
|
||||
; CHECK-NEXT: st1w { z0.s }, p0, [x0, #2, mul vl]
|
||||
; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0
|
||||
; CHECK-NEXT: st1w { z2.s }, p0, [x0, #1, mul vl]
|
||||
|
@ -5,16 +5,16 @@ define void @main(ptr %0) {
|
||||
; CHECK-LABEL: main:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: mov z0.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z1.d, #0 // =0x0
|
||||
; CHECK-NEXT: ptrue p0.d, vl1
|
||||
; CHECK-NEXT: mov z1.d, z0.d
|
||||
; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
|
||||
; CHECK-NEXT: uzp1 v1.2s, v0.2s, v1.2s
|
||||
; CHECK-NEXT: neg v1.2s, v1.2s
|
||||
; CHECK-NEXT: smov x8, v1.s[0]
|
||||
; CHECK-NEXT: smov x9, v1.s[1]
|
||||
; CHECK-NEXT: mov z0.d, p0/m, x8
|
||||
; CHECK-NEXT: mov z0.d, p0/m, x9
|
||||
; CHECK-NEXT: str z0, [x0]
|
||||
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
|
||||
; CHECK-NEXT: uzp1 v0.2s, v1.2s, v0.2s
|
||||
; CHECK-NEXT: neg v0.2s, v0.2s
|
||||
; CHECK-NEXT: smov x8, v0.s[0]
|
||||
; CHECK-NEXT: smov x9, v0.s[1]
|
||||
; CHECK-NEXT: mov z1.d, p0/m, x8
|
||||
; CHECK-NEXT: mov z1.d, p0/m, x9
|
||||
; CHECK-NEXT: str z1, [x0]
|
||||
; CHECK-NEXT: ret
|
||||
"entry":
|
||||
%1 = bitcast <vscale x 2 x i64> zeroinitializer to <vscale x 4 x i32>
|
||||
|
@ -331,12 +331,12 @@ define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
|
||||
; CHECK-LABEL: scvtf_d_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z1.d, #0 // =0x0
|
||||
; CHECK-NEXT: punpklo p1.h, p0.b
|
||||
; CHECK-NEXT: punpkhi p0.h, p0.b
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: fmov z1.d, p0/m, #-1.00000000
|
||||
; CHECK-NEXT: fmov z0.d, p1/m, #-1.00000000
|
||||
; CHECK-NEXT: fmov z1.d, p0/m, #-1.00000000
|
||||
; CHECK-NEXT: ret
|
||||
%res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
|
||||
ret <vscale x 4 x double> %res
|
||||
@ -392,12 +392,12 @@ define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
|
||||
; CHECK-LABEL: ucvtf_d_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, #0 // =0x0
|
||||
; CHECK-NEXT: mov z1.d, #0 // =0x0
|
||||
; CHECK-NEXT: punpklo p1.h, p0.b
|
||||
; CHECK-NEXT: punpkhi p0.h, p0.b
|
||||
; CHECK-NEXT: mov z0.d, z1.d
|
||||
; CHECK-NEXT: fmov z1.d, p0/m, #1.00000000
|
||||
; CHECK-NEXT: fmov z0.d, p1/m, #1.00000000
|
||||
; CHECK-NEXT: fmov z1.d, p0/m, #1.00000000
|
||||
; CHECK-NEXT: ret
|
||||
%res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
|
||||
ret <vscale x 4 x double> %res
|
||||
|
@ -419,7 +419,6 @@ define <1 x i64> @insertelement_v1i64(<1 x i64> %op1) {
|
||||
; CHECK-LABEL: insertelement_v1i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, #5 // =0x5
|
||||
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
; NONEON-NOSVE-LABEL: insertelement_v1i64:
|
||||
|
@ -39,7 +39,6 @@ define <2 x i64> @fixed_vec_zero_constant() {
|
||||
; CHECK-LABEL: fixed_vec_zero_constant:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, #0 // =0x0
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
; NONEON-NOSVE-LABEL: fixed_vec_zero_constant:
|
||||
@ -54,7 +53,6 @@ define <2 x double> @fixed_vec_fp_zero_constant() {
|
||||
; CHECK-LABEL: fixed_vec_fp_zero_constant:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: mov z0.d, #0 // =0x0
|
||||
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
|
||||
; CHECK-NEXT: ret
|
||||
;
|
||||
; NONEON-NOSVE-LABEL: fixed_vec_fp_zero_constant:
|
||||
|
Loading…
x
Reference in New Issue
Block a user