mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-29 02:36:07 +00:00

PowerPC subtargets prior to Power9 use the 'legacy' itinerary way to provide scheduling information. This patch re-writes the tablegen file to define the scheduling information in the new SchedModel way, which can bring improvements to some benchmarks. Reviewed By: shchenz Differential Revision: https://reviews.llvm.org/D154488
123 lines
4.0 KiB
LLVM
123 lines
4.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
|
|
; RUN: -check-prefix=P9BE
|
|
; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
|
|
; RUN: -check-prefix=P9LE
|
|
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
|
|
; RUN: -check-prefix=P8BE
|
|
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
|
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
|
|
; RUN: -check-prefix=P8LE
|
|
define dso_local <2 x double> @test1(<8 x i16> %a) {
|
|
; P9BE-LABEL: test1:
|
|
; P9BE: # %bb.0: # %entry
|
|
; P9BE-NEXT: vextractuh v3, v2, 0
|
|
; P9BE-NEXT: vextractuh v2, v2, 2
|
|
; P9BE-NEXT: xscvuxddp f0, v3
|
|
; P9BE-NEXT: xscvuxddp f1, v2
|
|
; P9BE-NEXT: xxmrghd v2, vs0, vs1
|
|
; P9BE-NEXT: blr
|
|
;
|
|
; P9LE-LABEL: test1:
|
|
; P9LE: # %bb.0: # %entry
|
|
; P9LE-NEXT: vextractuh v3, v2, 14
|
|
; P9LE-NEXT: vextractuh v2, v2, 12
|
|
; P9LE-NEXT: xscvuxddp f0, v3
|
|
; P9LE-NEXT: xscvuxddp f1, v2
|
|
; P9LE-NEXT: xxmrghd v2, vs1, vs0
|
|
; P9LE-NEXT: blr
|
|
;
|
|
; P8BE-LABEL: test1:
|
|
; P8BE: # %bb.0: # %entry
|
|
; P8BE-NEXT: mfvsrd r3, v2
|
|
; P8BE-NEXT: rldicl r4, r3, 16, 48
|
|
; P8BE-NEXT: rldicl r3, r3, 32, 48
|
|
; P8BE-NEXT: clrlwi r4, r4, 16
|
|
; P8BE-NEXT: clrlwi r3, r3, 16
|
|
; P8BE-NEXT: mtfprwz f0, r4
|
|
; P8BE-NEXT: mtfprwz f1, r3
|
|
; P8BE-NEXT: xscvuxddp f0, f0
|
|
; P8BE-NEXT: xscvuxddp f1, f1
|
|
; P8BE-NEXT: xxmrghd v2, vs0, vs1
|
|
; P8BE-NEXT: blr
|
|
;
|
|
; P8LE-LABEL: test1:
|
|
; P8LE: # %bb.0: # %entry
|
|
; P8LE-NEXT: xxswapd vs0, v2
|
|
; P8LE-NEXT: mffprd r3, f0
|
|
; P8LE-NEXT: clrldi r4, r3, 48
|
|
; P8LE-NEXT: rldicl r3, r3, 48, 48
|
|
; P8LE-NEXT: clrlwi r4, r4, 16
|
|
; P8LE-NEXT: clrlwi r3, r3, 16
|
|
; P8LE-NEXT: mtfprwz f0, r4
|
|
; P8LE-NEXT: mtfprwz f1, r3
|
|
; P8LE-NEXT: xscvuxddp f0, f0
|
|
; P8LE-NEXT: xscvuxddp f1, f1
|
|
; P8LE-NEXT: xxmrghd v2, vs1, vs0
|
|
; P8LE-NEXT: blr
|
|
entry:
|
|
%vecext = extractelement <8 x i16> %a, i32 0
|
|
%conv = uitofp i16 %vecext to double
|
|
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
|
%vecext1 = extractelement <8 x i16> %a, i32 1
|
|
%conv2 = uitofp i16 %vecext1 to double
|
|
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
|
ret <2 x double> %vecinit3
|
|
}
|
|
|
|
define dso_local <2 x double> @test2(<4 x i32> %a, <4 x i32> %b) {
|
|
; P9BE-LABEL: test2:
|
|
; P9BE: # %bb.0: # %entry
|
|
; P9BE-NEXT: xxextractuw f0, v2, 0
|
|
; P9BE-NEXT: xxextractuw f1, v3, 4
|
|
; P9BE-NEXT: xscvuxddp f0, f0
|
|
; P9BE-NEXT: xscvuxddp f1, f1
|
|
; P9BE-NEXT: xxmrghd v2, vs0, vs1
|
|
; P9BE-NEXT: blr
|
|
;
|
|
; P9LE-LABEL: test2:
|
|
; P9LE: # %bb.0: # %entry
|
|
; P9LE-NEXT: xxextractuw f0, v2, 12
|
|
; P9LE-NEXT: xxextractuw f1, v3, 8
|
|
; P9LE-NEXT: xscvuxddp f0, f0
|
|
; P9LE-NEXT: xscvuxddp f1, f1
|
|
; P9LE-NEXT: xxmrghd v2, vs1, vs0
|
|
; P9LE-NEXT: blr
|
|
;
|
|
; P8BE-LABEL: test2:
|
|
; P8BE: # %bb.0: # %entry
|
|
; P8BE-NEXT: xxsldwi vs0, v2, v2, 3
|
|
; P8BE-NEXT: mffprwz r3, f0
|
|
; P8BE-NEXT: mtfprwz f0, r3
|
|
; P8BE-NEXT: mfvsrwz r3, v3
|
|
; P8BE-NEXT: mtfprwz f1, r3
|
|
; P8BE-NEXT: xscvuxddp f0, f0
|
|
; P8BE-NEXT: xscvuxddp f1, f1
|
|
; P8BE-NEXT: xxmrghd v2, vs0, vs1
|
|
; P8BE-NEXT: blr
|
|
;
|
|
; P8LE-LABEL: test2:
|
|
; P8LE: # %bb.0: # %entry
|
|
; P8LE-NEXT: xxswapd vs0, v2
|
|
; P8LE-NEXT: xxsldwi vs1, v3, v3, 1
|
|
; P8LE-NEXT: mffprwz r3, f0
|
|
; P8LE-NEXT: mtfprwz f0, r3
|
|
; P8LE-NEXT: mffprwz r3, f1
|
|
; P8LE-NEXT: mtfprwz f1, r3
|
|
; P8LE-NEXT: xscvuxddp f0, f0
|
|
; P8LE-NEXT: xscvuxddp f1, f1
|
|
; P8LE-NEXT: xxmrghd v2, vs1, vs0
|
|
; P8LE-NEXT: blr
|
|
entry:
|
|
%vecext = extractelement <4 x i32> %a, i32 0
|
|
%conv = uitofp i32 %vecext to double
|
|
%vecinit = insertelement <2 x double> undef, double %conv, i32 0
|
|
%vecext1 = extractelement <4 x i32> %b, i32 1
|
|
%conv2 = uitofp i32 %vecext1 to double
|
|
%vecinit3 = insertelement <2 x double> %vecinit, double %conv2, i32 1
|
|
ret <2 x double> %vecinit3
|
|
}
|