[X86] Improve F16C CVT schedules on SNB/HSW/BDW

Add complete IvyBridge schedule (which is included in the SandyBridge model, IvyBridge was the first to support F16C) - split rr/rm schedules as they usually have very different port usage.

Haswell/Broadwell use Port1 not Port0.

Confirmed with a mixture of Agner + uops.info comparisons.
This commit is contained in:
Simon Pilgrim 2024-11-24 17:04:53 +00:00
parent 6cfaddfd52
commit 0a6d797c20
7 changed files with 65 additions and 61 deletions

View File

@ -393,11 +393,11 @@ defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1,BWPort5], 4, [1,1], 2, 5>;
defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1,BWPort5], 6, [1,1], 2, 6>;
defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>;
defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PS, [BWPort1,BWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [BWPort1,BWPort5], 2, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZ>;
defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort1,BWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort1,BWPort23], 6, [1,1], 2>;
defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>;
defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>;

View File

@ -393,12 +393,12 @@ defm : HWWriteResPair<WriteCvtPD2PS, [HWPort1,HWPort5], 4, [1,1], 2, 6>;
defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1,HWPort5], 6, [1,1], 2, 6>;
defm : HWWriteResPair<WriteCvtPD2PSZ, [HWPort1,HWPort5], 4, [1,1], 2, 6>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPH2PS, [HWPort0,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [HWPort0,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSZ, [HWPort0,HWPort5], 2, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort0,HWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort0,HWPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSZLd, [HWPort0,HWPort23], 7, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPH2PS, [HWPort1,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [HWPort1,HWPort5], 2, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSZ, [HWPort1,HWPort5], 2, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort1,HWPort23], 6, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort1,HWPort23], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSZLd, [HWPort1,HWPort23], 7, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PH, [HWPort1,HWPort5], 4, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [HWPort1,HWPort5], 6, [1,1], 2>;

View File

@ -361,16 +361,20 @@ defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>;
defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>;
defm : SBWriteResPair<WriteCvtPD2PSZ, [SBPort1,SBPort5], 4, [1,1], 2, 7>; // Unsupported = 1
defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>;
defm : SBWriteResPair<WriteCvtPH2PSZ, [SBPort1], 3>; // Unsupported = 1
// F16C Instructions (IvyBridge+)
defm : X86WriteRes<WriteCvtPH2PS, [SBPort0,SBPort5], 3, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSY, [SBPort0,SBPort5], 3, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSZ, [SBPort0,SBPort5], 3, [1,1], 2>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPH2PSLd, [SBPort0,SBPort23], 8, [1,1], 2>;
defm : X86WriteRes<WriteCvtPH2PSYLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>;
defm : X86WriteRes<WriteCvtPH2PSZLd, [SBPort0,SBPort5,SBPort23], 8, [1,1,1], 3>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>;
defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>;
defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort1], 3, [1], 1>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>;
defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PH, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>;
defm : X86WriteRes<WriteCvtPS2PHY, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>;
defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort0,SBPort1,SBPort5], 10, [1,1,1], 3>; // Unsupported = 1
defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>;
defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort0,SBPort1,SBPort23,SBPort4], 13, [1,1,1,1], 4>; // Unsupported = 1
// Vector integer operations.
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;

View File

@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 4.00 - 0.67
# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 4.00 - 0.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 - - 0.33 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2

View File

@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax)
# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00
# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)
# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)

View File

@ -45,14 +45,14 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67
# CHECK-NEXT: - - - 8.00 1.67 1.67 2.00 6.00 - 0.67
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2

View File

@ -22,14 +22,14 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax)
# CHECK-NEXT: 2 3 1.00 vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: 3 8 1.00 * vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: 3 10 1.00 vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: 4 13 1.00 * vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
@ -43,15 +43,15 @@ vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00
# CHECK-NEXT: - - 8.00 4.00 2.00 5.00 2.00 2.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)
# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)