[X86] AMD Zen 5 Initial enablement

This commit is contained in:
Ganesh Gopalasubramanian 2024-09-16 11:16:14 +00:00 committed by Tobias Hieta
parent 82e85b62da
commit 149a150b50
30 changed files with 238 additions and 4 deletions

View File

@ -723,6 +723,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
case CK_ZNVER4:
defineCPUMacros(Builder, "znver4");
break;
case CK_ZNVER5:
defineCPUMacros(Builder, "znver5");
break;
case CK_Geode:
defineCPUMacros(Builder, "geode");
break;
@ -1613,6 +1616,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
case CK_ZNVER2:
case CK_ZNVER3:
case CK_ZNVER4:
case CK_ZNVER5:
// Deprecated
case CK_x86_64:
case CK_x86_64_v2:

View File

@ -205,4 +205,5 @@ void verifycpustrings(void) {
(void)__builtin_cpu_is("znver2");
(void)__builtin_cpu_is("znver3");
(void)__builtin_cpu_is("znver4");
(void)__builtin_cpu_is("znver5");
}

View File

@ -242,6 +242,10 @@
// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver4 2>&1 \
// RUN: | FileCheck %s -check-prefix=znver4
// znver4: "-target-cpu" "znver4"
//
// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver5 2>&1 \
// RUN: | FileCheck %s -check-prefix=znver5
// znver5: "-target-cpu" "znver5"
// RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64
// x86-64: "-target-cpu" "x86-64"

View File

@ -38,5 +38,6 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver2 -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver3 -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver4 -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver5 -verify %s
//
// expected-no-diagnostics

View File

@ -13,19 +13,19 @@
// RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
// X86: error: unknown target CPU 'not-a-cpu'
// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, znver5, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
// RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64
// X86_64: error: unknown target CPU 'not-a-cpu'
// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, znver5, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
// RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86
// TUNE_X86: error: unknown target CPU 'not-a-cpu'
// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, znver5, x86-64, geode{{$}}
// RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64
// TUNE_X86_64: error: unknown target CPU 'not-a-cpu'
// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, znver5, x86-64, geode{{$}}
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'

View File

@ -3923,6 +3923,148 @@
// CHECK_ZNVER4_M64: #define __znver4 1
// CHECK_ZNVER4_M64: #define __znver4__ 1
// RUN: %clang -march=znver5 -m32 -E -dM %s -o - 2>&1 \
// RUN: -target i386-unknown-linux \
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER5_M32
// CHECK_ZNVER5_M32-NOT: #define __3dNOW_A__ 1
// CHECK_ZNVER5_M32-NOT: #define __3dNOW__ 1
// CHECK_ZNVER5_M32: #define __ADX__ 1
// CHECK_ZNVER5_M32: #define __AES__ 1
// CHECK_ZNVER5_M32: #define __AVX2__ 1
// CHECK_ZNVER5_M32: #define __AVX512BF16__ 1
// CHECK_ZNVER5_M32: #define __AVX512BITALG__ 1
// CHECK_ZNVER5_M32: #define __AVX512BW__ 1
// CHECK_ZNVER5_M32: #define __AVX512CD__ 1
// CHECK_ZNVER5_M32: #define __AVX512DQ__ 1
// CHECK_ZNVER5_M32: #define __AVX512F__ 1
// CHECK_ZNVER5_M32: #define __AVX512IFMA__ 1
// CHECK_ZNVER5_M32: #define __AVX512VBMI2__ 1
// CHECK_ZNVER5_M32: #define __AVX512VBMI__ 1
// CHECK_ZNVER5_M32: #define __AVX512VL__ 1
// CHECK_ZNVER5_M32: #define __AVX512VNNI__ 1
// CHECK_ZNVER5_M32: #define __AVX512VP2INTERSECT__ 1
// CHECK_ZNVER5_M32: #define __AVX512VPOPCNTDQ__ 1
// CHECK_ZNVER5_M32: #define __AVXVNNI__ 1
// CHECK_ZNVER5_M32: #define __AVX__ 1
// CHECK_ZNVER5_M32: #define __BMI2__ 1
// CHECK_ZNVER5_M32: #define __BMI__ 1
// CHECK_ZNVER5_M32: #define __CLFLUSHOPT__ 1
// CHECK_ZNVER5_M32: #define __CLWB__ 1
// CHECK_ZNVER5_M32: #define __CLZERO__ 1
// CHECK_ZNVER5_M32: #define __F16C__ 1
// CHECK_ZNVER5_M32-NOT: #define __FMA4__ 1
// CHECK_ZNVER5_M32: #define __FMA__ 1
// CHECK_ZNVER5_M32: #define __FSGSBASE__ 1
// CHECK_ZNVER5_M32: #define __GFNI__ 1
// CHECK_ZNVER5_M32: #define __LZCNT__ 1
// CHECK_ZNVER5_M32: #define __MMX__ 1
// CHECK_ZNVER5_M32: #define __MOVDIR64B__ 1
// CHECK_ZNVER5_M32: #define __MOVDIRI__ 1
// CHECK_ZNVER5_M32: #define __PCLMUL__ 1
// CHECK_ZNVER5_M32: #define __PKU__ 1
// CHECK_ZNVER5_M32: #define __POPCNT__ 1
// CHECK_ZNVER5_M32: #define __PREFETCHI__ 1
// CHECK_ZNVER5_M32: #define __PRFCHW__ 1
// CHECK_ZNVER5_M32: #define __RDPID__ 1
// CHECK_ZNVER5_M32: #define __RDPRU__ 1
// CHECK_ZNVER5_M32: #define __RDRND__ 1
// CHECK_ZNVER5_M32: #define __RDSEED__ 1
// CHECK_ZNVER5_M32: #define __SHA__ 1
// CHECK_ZNVER5_M32: #define __SSE2_MATH__ 1
// CHECK_ZNVER5_M32: #define __SSE2__ 1
// CHECK_ZNVER5_M32: #define __SSE3__ 1
// CHECK_ZNVER5_M32: #define __SSE4A__ 1
// CHECK_ZNVER5_M32: #define __SSE4_1__ 1
// CHECK_ZNVER5_M32: #define __SSE4_2__ 1
// CHECK_ZNVER5_M32: #define __SSE_MATH__ 1
// CHECK_ZNVER5_M32: #define __SSE__ 1
// CHECK_ZNVER5_M32: #define __SSSE3__ 1
// CHECK_ZNVER5_M32-NOT: #define __TBM__ 1
// CHECK_ZNVER5_M32: #define __WBNOINVD__ 1
// CHECK_ZNVER5_M32-NOT: #define __XOP__ 1
// CHECK_ZNVER5_M32: #define __XSAVEC__ 1
// CHECK_ZNVER5_M32: #define __XSAVEOPT__ 1
// CHECK_ZNVER5_M32: #define __XSAVES__ 1
// CHECK_ZNVER5_M32: #define __XSAVE__ 1
// CHECK_ZNVER5_M32: #define __i386 1
// CHECK_ZNVER5_M32: #define __i386__ 1
// CHECK_ZNVER5_M32: #define __tune_znver5__ 1
// CHECK_ZNVER5_M32: #define __znver5 1
// CHECK_ZNVER5_M32: #define __znver5__ 1
// RUN: %clang -march=znver5 -m64 -E -dM %s -o - 2>&1 \
// RUN: -target i386-unknown-linux \
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER5_M64
// CHECK_ZNVER5_M64-NOT: #define __3dNOW_A__ 1
// CHECK_ZNVER5_M64-NOT: #define __3dNOW__ 1
// CHECK_ZNVER5_M64: #define __ADX__ 1
// CHECK_ZNVER5_M64: #define __AES__ 1
// CHECK_ZNVER5_M64: #define __AVX2__ 1
// CHECK_ZNVER5_M64: #define __AVX512BF16__ 1
// CHECK_ZNVER5_M64: #define __AVX512BITALG__ 1
// CHECK_ZNVER5_M64: #define __AVX512BW__ 1
// CHECK_ZNVER5_M64: #define __AVX512CD__ 1
// CHECK_ZNVER5_M64: #define __AVX512DQ__ 1
// CHECK_ZNVER5_M64: #define __AVX512F__ 1
// CHECK_ZNVER5_M64: #define __AVX512IFMA__ 1
// CHECK_ZNVER5_M64: #define __AVX512VBMI2__ 1
// CHECK_ZNVER5_M64: #define __AVX512VBMI__ 1
// CHECK_ZNVER5_M64: #define __AVX512VL__ 1
// CHECK_ZNVER5_M64: #define __AVX512VNNI__ 1
// CHECK_ZNVER5_M64: #define __AVX512VP2INTERSECT__ 1
// CHECK_ZNVER5_M64: #define __AVX512VPOPCNTDQ__ 1
// CHECK_ZNVER5_M64: #define __AVXVNNI__ 1
// CHECK_ZNVER5_M64: #define __AVX__ 1
// CHECK_ZNVER5_M64: #define __BMI2__ 1
// CHECK_ZNVER5_M64: #define __BMI__ 1
// CHECK_ZNVER5_M64: #define __CLFLUSHOPT__ 1
// CHECK_ZNVER5_M64: #define __CLWB__ 1
// CHECK_ZNVER5_M64: #define __CLZERO__ 1
// CHECK_ZNVER5_M64: #define __F16C__ 1
// CHECK_ZNVER5_M64-NOT: #define __FMA4__ 1
// CHECK_ZNVER5_M64: #define __FMA__ 1
// CHECK_ZNVER5_M64: #define __FSGSBASE__ 1
// CHECK_ZNVER5_M64: #define __GFNI__ 1
// CHECK_ZNVER5_M64: #define __LZCNT__ 1
// CHECK_ZNVER5_M64: #define __MMX__ 1
// CHECK_ZNVER5_M64: #define __MOVDIR64B__ 1
// CHECK_ZNVER5_M64: #define __MOVDIRI__ 1
// CHECK_ZNVER5_M64: #define __PCLMUL__ 1
// CHECK_ZNVER5_M64: #define __PKU__ 1
// CHECK_ZNVER5_M64: #define __POPCNT__ 1
// CHECK_ZNVER5_M64: #define __PREFETCHI__ 1
// CHECK_ZNVER5_M64: #define __PRFCHW__ 1
// CHECK_ZNVER5_M64: #define __RDPID__ 1
// CHECK_ZNVER5_M64: #define __RDPRU__ 1
// CHECK_ZNVER5_M64: #define __RDRND__ 1
// CHECK_ZNVER5_M64: #define __RDSEED__ 1
// CHECK_ZNVER5_M64: #define __SHA__ 1
// CHECK_ZNVER5_M64: #define __SSE2_MATH__ 1
// CHECK_ZNVER5_M64: #define __SSE2__ 1
// CHECK_ZNVER5_M64: #define __SSE3__ 1
// CHECK_ZNVER5_M64: #define __SSE4A__ 1
// CHECK_ZNVER5_M64: #define __SSE4_1__ 1
// CHECK_ZNVER5_M64: #define __SSE4_2__ 1
// CHECK_ZNVER5_M64: #define __SSE_MATH__ 1
// CHECK_ZNVER5_M64: #define __SSE__ 1
// CHECK_ZNVER5_M64: #define __SSSE3__ 1
// CHECK_ZNVER5_M64-NOT: #define __TBM__ 1
// CHECK_ZNVER5_M64: #define __VAES__ 1
// CHECK_ZNVER5_M64: #define __VPCLMULQDQ__ 1
// CHECK_ZNVER5_M64: #define __WBNOINVD__ 1
// CHECK_ZNVER5_M64-NOT: #define __XOP__ 1
// CHECK_ZNVER5_M64: #define __XSAVEC__ 1
// CHECK_ZNVER5_M64: #define __XSAVEOPT__ 1
// CHECK_ZNVER5_M64: #define __XSAVES__ 1
// CHECK_ZNVER5_M64: #define __XSAVE__ 1
// CHECK_ZNVER5_M64: #define __amd64 1
// CHECK_ZNVER5_M64: #define __amd64__ 1
// CHECK_ZNVER5_M64: #define __tune_znver5__ 1
// CHECK_ZNVER5_M64: #define __x86_64 1
// CHECK_ZNVER5_M64: #define __x86_64__ 1
// CHECK_ZNVER5_M64: #define __znver5 1
// CHECK_ZNVER5_M64: #define __znver5__ 1
// End X86/GCC/Linux tests ------------------
// Begin PPC/GCC/Linux tests ----------------

View File

@ -59,6 +59,7 @@ enum ProcessorTypes {
INTEL_SIERRAFOREST,
INTEL_GRANDRIDGE,
INTEL_CLEARWATERFOREST,
AMDFAM1AH,
CPU_TYPE_MAX
};
@ -97,6 +98,7 @@ enum ProcessorSubtypes {
INTEL_COREI7_ARROWLAKE,
INTEL_COREI7_ARROWLAKE_S,
INTEL_COREI7_PANTHERLAKE,
AMDFAM1AH_ZNVER5,
CPU_SUBTYPE_MAX
};
@ -803,6 +805,24 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
break; // "znver4"
}
break; // family 19h
case 26:
CPU = "znver5";
*Type = AMDFAM1AH;
if (Model <= 0x77) {
// Models 00h-0Fh (Breithorn).
// Models 10h-1Fh (Breithorn-Dense).
// Models 20h-2Fh (Strix 1).
// Models 30h-37h (Strix 2).
// Models 38h-3Fh (Strix 3).
// Models 40h-4Fh (Granite Ridge).
// Models 50h-5Fh (Weisshorn).
// Models 60h-6Fh (Krackan1).
// Models 70h-77h (Sarlak).
CPU = "znver5";
*Subtype = AMDFAM1AH_ZNVER5;
break; // "znver5"
}
break;
default:
break; // Unknown AMD CPU.
}

View File

@ -49,11 +49,13 @@ X86_CPU_TYPE(ZHAOXIN_FAM7H, "zhaoxin_fam7h")
X86_CPU_TYPE(INTEL_SIERRAFOREST, "sierraforest")
X86_CPU_TYPE(INTEL_GRANDRIDGE, "grandridge")
X86_CPU_TYPE(INTEL_CLEARWATERFOREST, "clearwaterforest")
X86_CPU_TYPE(AMDFAM1AH, "amdfam1ah")
// Alternate names supported by __builtin_cpu_is and target multiversioning.
X86_CPU_TYPE_ALIAS(INTEL_BONNELL, "atom")
X86_CPU_TYPE_ALIAS(AMDFAM10H, "amdfam10")
X86_CPU_TYPE_ALIAS(AMDFAM15H, "amdfam15")
X86_CPU_TYPE_ALIAS(AMDFAM1AH, "amdfam1a")
X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm")
#undef X86_CPU_TYPE_ALIAS
@ -104,6 +106,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS_D,"graniterapids-d")
X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE, "arrowlake")
X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S, "arrowlake-s")
X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE, "pantherlake")
X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5, "znver5")
// Alternate names supported by __builtin_cpu_is and target multiversioning.
X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")

View File

@ -147,6 +147,7 @@ enum CPUKind {
CK_x86_64_v3,
CK_x86_64_v4,
CK_Geode,
CK_ZNVER5,
};
/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if

View File

@ -1543,6 +1543,19 @@ def ProcessorFeatures {
FeatureVPOPCNTDQ];
list<SubtargetFeature> ZN4Features =
!listconcat(ZN3Features, ZN4AdditionalFeatures);
list<SubtargetFeature> ZN5Tuning = ZN4Tuning;
list<SubtargetFeature> ZN5AdditionalFeatures = [FeatureVNNI,
FeatureMOVDIRI,
FeatureMOVDIR64B,
FeatureVP2INTERSECT,
FeaturePREFETCHI,
FeatureAVXVNNI
];
list<SubtargetFeature> ZN5Features =
!listconcat(ZN4Features, ZN5AdditionalFeatures);
}
//===----------------------------------------------------------------------===//
@ -1892,6 +1905,8 @@ def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
ProcessorFeatures.ZN3Tuning>;
def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
ProcessorFeatures.ZN4Tuning>;
def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
ProcessorFeatures.ZN5Tuning>;
def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;

View File

@ -350,3 +350,4 @@ def ZnVer4PfmCounters : ProcPfmCounters {
let ValidationCounters = DefaultAMDPfmValidationCounters;
}
def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;
def : PfmCountersBinding<"znver5", ZnVer4PfmCounters>;

View File

@ -1213,6 +1213,25 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
break; // "znver4"
}
break; // family 19h
case 26:
CPU = "znver5";
*Type = X86::AMDFAM1AH;
if (Model <= 0x77) {
// Models 00h-0Fh (Breithorn).
// Models 10h-1Fh (Breithorn-Dense).
// Models 20h-2Fh (Strix 1).
// Models 30h-37h (Strix 2).
// Models 38h-3Fh (Strix 3).
// Models 40h-4Fh (Granite Ridge).
// Models 50h-5Fh (Weisshorn).
// Models 60h-6Fh (Krackan1).
// Models 70h-77h (Sarlak).
CPU = "znver5";
*Subtype = X86::AMDFAM1AH_ZNVER5;
break; // "znver5"
}
break;
default:
break; // Unknown AMD CPU.
}

View File

@ -238,6 +238,10 @@ static constexpr FeatureBitset FeaturesZNVER4 =
FeatureAVX512BITALG | FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 |
FeatureGFNI | FeatureSHSTK;
static constexpr FeatureBitset FeaturesZNVER5 =
FeaturesZNVER4 | FeatureAVXVNNI | FeatureMOVDIRI | FeatureMOVDIR64B |
FeatureAVX512VP2INTERSECT | FeaturePREFETCHI | FeatureAVXVNNI;
// D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
// X86TargetParser.def to here. They are assigned by following ways:
// 1. Copy the mangling from the original CPU_SPEICIFC MACROs. If no, assign
@ -417,6 +421,7 @@ constexpr ProcInfo Processors[] = {
{ {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2, '\0', false },
{ {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false },
{ {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false },
{ {"znver5"}, CK_ZNVER5, FEATURE_AVX512VP2INTERSECT, FeaturesZNVER5, '\0', false },
// Generic 64-bit processor.
{ {"x86-64"}, CK_x86_64, FEATURE_SSE2 , FeaturesX86_64, '\0', false },
{ {"x86-64-v2"}, CK_x86_64_v2, FEATURE_SSE4_2 , FeaturesX86_64_V2, '\0', false },

View File

@ -23,6 +23,7 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; Additional tests for 64-bit divide bypass

View File

@ -13,6 +13,7 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefixes=X64,X64-FAST
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=X64,X64-FAST
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=X64,X64-FAST
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=X64,X64-FAST
define i1 @cmp16_reg_eq_reg(i16 %a0, i16 %a1) {
; X86-GENERIC-LABEL: cmp16_reg_eq_reg:

View File

@ -29,6 +29,7 @@
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver5 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
define void @foo() {
ret void

View File

@ -6,6 +6,7 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 -fast-isel | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 -fast-isel | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 -fast-isel | FileCheck %s --check-prefix=X64
define void @rdpru_asm() {
; X86-LABEL: rdpru_asm:

View File

@ -3,6 +3,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server | FileCheck %s --check-prefixes=CHECK,CHECK-ICX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
define <4 x i32> @shuf_rot_v4i32_1032(<4 x i32> %x) {

View File

@ -50,6 +50,7 @@
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver2 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver5 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512
; Other chips with slow unaligned memory accesses

View File

@ -6,6 +6,7 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver1 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X86-64
define float @f32_no_daz(float %f) #0 {

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
define <8 x double> @transform_VPERMILPSZrr(<8 x double> %a) nounwind {
; CHECK-LABEL: transform_VPERMILPSZrr:

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
define <16 x float> @transform_VPERMILPSZrr(<16 x float> %a) nounwind {
; CHECK-LABEL: transform_VPERMILPSZrr:

View File

@ -5,6 +5,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
define <16 x float> @transform_VUNPCKLPDZrr(<16 x float> %a, <16 x float> %b) nounwind {

View File

@ -5,6 +5,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
define <16 x float> @transform_VUNPCKLPSZrr(<16 x float> %a, <16 x float> %b) nounwind {
; CHECK-LABEL: transform_VUNPCKLPSZrr:

View File

@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver2 | FileCheck %s --check-prefixes=FAST
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver3 | FileCheck %s --check-prefixes=FAST
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=FAST
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=FAST
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=FAST
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=FAST

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+fast-dpwssd | FileCheck %s
define <16 x i32> @vpdpwssd_test(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2) {

View File

@ -16,6 +16,7 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver2 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s
; Verify that for the X86_64 processors that are known to have poor latency
; double precision shift instructions we do not generate 'shld' or 'shrd'

View File

@ -19,6 +19,8 @@
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver3 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver4 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver4 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver5 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver5 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s

View File

@ -1,6 +1,7 @@
; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s
; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=TTI -pass-remarks-analysis=TTI < %s -S 2>&1 | FileCheck --check-prefixes=ALL,TTI %s
; RUN: opt -passes=debugify,loop-unroll -mcpu=znver4 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s
; RUN: opt -passes=debugify,loop-unroll -mcpu=znver5 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s
; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 --try-experimental-debuginfo-iterators | FileCheck --check-prefixes=ALL,UNROLL %s

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver4 -S < %s | FileCheck %s
; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver5 -S < %s | FileCheck %s
define internal i32 @testfunc() {
; CHECK-LABEL: define internal i32 @testfunc