mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-27 05:56:08 +00:00

This is an alternative of D157485 and a pre-feature to support AVX10. AVX10 Architecture Specification: https://cdrdv2.intel.com/v1/dl/getContent/784267 AVX10 Technical Paper: https://cdrdv2.intel.com/v1/dl/getContent/784343 RFC: https://discourse.llvm.org/t/rfc-design-for-avx10-feature-support/72661 Based on the feedbacks from LLVM and GCC community, we have agreed to start from supporting `-m[no-]evex512` on existing AVX512 features. The option `-mno-evex512` can be used with `-mavx512xxx` to build binaries that can run on both legacy AVX512 targets and AVX10-256. There're still arguments about what's the expected behavior when this option as well as `-mavx512xxx` used together with `-mavx10.1-256`. We decided to defer the support of `-mavx10.1` after we made consensus. Or furthermore, we start from supporting AVX10.2 and not providing any AVX10.1 options. Reviewed By: RKSimon, skan Differential Revision: https://reviews.llvm.org/D159250
443 lines
16 KiB
C++
443 lines
16 KiB
C++
//===-- X86Subtarget.h - Define Subtarget for the X86 ----------*- C++ -*--===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file declares the X86 specific subclass of TargetSubtargetInfo.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_X86_X86SUBTARGET_H
|
|
#define LLVM_LIB_TARGET_X86_X86SUBTARGET_H
|
|
|
|
#include "X86FrameLowering.h"
|
|
#include "X86ISelLowering.h"
|
|
#include "X86InstrInfo.h"
|
|
#include "X86SelectionDAGInfo.h"
|
|
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
|
|
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
|
#include "llvm/IR/CallingConv.h"
|
|
#include "llvm/TargetParser/Triple.h"
|
|
#include <climits>
|
|
#include <memory>
|
|
|
|
#define GET_SUBTARGETINFO_HEADER
|
|
#include "X86GenSubtargetInfo.inc"
|
|
|
|
namespace llvm {
|
|
|
|
class CallLowering;
|
|
class GlobalValue;
|
|
class InstructionSelector;
|
|
class LegalizerInfo;
|
|
class RegisterBankInfo;
|
|
class StringRef;
|
|
class TargetMachine;
|
|
|
|
/// The X86 backend supports a number of different styles of PIC.
|
|
///
|
|
namespace PICStyles {
|
|
|
|
enum class Style {
|
|
StubPIC, // Used on i386-darwin in pic mode.
|
|
GOT, // Used on 32 bit elf on when in pic mode.
|
|
RIPRel, // Used on X86-64 when in pic mode.
|
|
None // Set when not in pic mode.
|
|
};
|
|
|
|
} // end namespace PICStyles
|
|
|
|
class X86Subtarget final : public X86GenSubtargetInfo {
|
|
enum X86SSEEnum {
|
|
NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512
|
|
};
|
|
|
|
enum X863DNowEnum {
|
|
NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
|
|
};
|
|
|
|
/// Which PIC style to use
|
|
PICStyles::Style PICStyle;
|
|
|
|
const TargetMachine &TM;
|
|
|
|
/// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
|
|
X86SSEEnum X86SSELevel = NoSSE;
|
|
|
|
/// MMX, 3DNow, 3DNow Athlon, or none supported.
|
|
X863DNowEnum X863DNowLevel = NoThreeDNow;
|
|
|
|
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
|
|
bool ATTRIBUTE = DEFAULT;
|
|
#include "X86GenSubtargetInfo.inc"
|
|
/// The minimum alignment known to hold of the stack frame on
|
|
/// entry to the function and which must be maintained by every function.
|
|
Align stackAlignment = Align(4);
|
|
|
|
Align TileConfigAlignment = Align(4);
|
|
|
|
/// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
|
|
///
|
|
// FIXME: this is a known good value for Yonah. How about others?
|
|
unsigned MaxInlineSizeThreshold = 128;
|
|
|
|
/// What processor and OS we're targeting.
|
|
Triple TargetTriple;
|
|
|
|
/// GlobalISel related APIs.
|
|
std::unique_ptr<CallLowering> CallLoweringInfo;
|
|
std::unique_ptr<LegalizerInfo> Legalizer;
|
|
std::unique_ptr<RegisterBankInfo> RegBankInfo;
|
|
std::unique_ptr<InstructionSelector> InstSelector;
|
|
|
|
/// Override the stack alignment.
|
|
MaybeAlign StackAlignOverride;
|
|
|
|
/// Preferred vector width from function attribute.
|
|
unsigned PreferVectorWidthOverride;
|
|
|
|
/// Resolved preferred vector width from function attribute and subtarget
|
|
/// features.
|
|
unsigned PreferVectorWidth = UINT32_MAX;
|
|
|
|
/// Required vector width from function attribute.
|
|
unsigned RequiredVectorWidth;
|
|
|
|
X86SelectionDAGInfo TSInfo;
|
|
// Ordering here is important. X86InstrInfo initializes X86RegisterInfo which
|
|
// X86TargetLowering needs.
|
|
X86InstrInfo InstrInfo;
|
|
X86TargetLowering TLInfo;
|
|
X86FrameLowering FrameLowering;
|
|
|
|
public:
|
|
/// This constructor initializes the data members to match that
|
|
/// of the specified triple.
|
|
///
|
|
X86Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS,
|
|
const X86TargetMachine &TM, MaybeAlign StackAlignOverride,
|
|
unsigned PreferVectorWidthOverride,
|
|
unsigned RequiredVectorWidth);
|
|
|
|
const X86TargetLowering *getTargetLowering() const override {
|
|
return &TLInfo;
|
|
}
|
|
|
|
const X86InstrInfo *getInstrInfo() const override { return &InstrInfo; }
|
|
|
|
const X86FrameLowering *getFrameLowering() const override {
|
|
return &FrameLowering;
|
|
}
|
|
|
|
const X86SelectionDAGInfo *getSelectionDAGInfo() const override {
|
|
return &TSInfo;
|
|
}
|
|
|
|
const X86RegisterInfo *getRegisterInfo() const override {
|
|
return &getInstrInfo()->getRegisterInfo();
|
|
}
|
|
|
|
unsigned getTileConfigSize() const { return 64; }
|
|
Align getTileConfigAlignment() const { return TileConfigAlignment; }
|
|
|
|
/// Returns the minimum alignment known to hold of the
|
|
/// stack frame on entry to the function and which must be maintained by every
|
|
/// function for this subtarget.
|
|
Align getStackAlignment() const { return stackAlignment; }
|
|
|
|
/// Returns the maximum memset / memcpy size
|
|
/// that still makes it profitable to inline the call.
|
|
unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
|
|
|
|
/// ParseSubtargetFeatures - Parses features string setting specified
|
|
/// subtarget options. Definition of function is auto generated by tblgen.
|
|
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
|
|
|
|
/// Methods used by Global ISel
|
|
const CallLowering *getCallLowering() const override;
|
|
InstructionSelector *getInstructionSelector() const override;
|
|
const LegalizerInfo *getLegalizerInfo() const override;
|
|
const RegisterBankInfo *getRegBankInfo() const override;
|
|
|
|
private:
|
|
/// Initialize the full set of dependencies so we can use an initializer
|
|
/// list for X86Subtarget.
|
|
X86Subtarget &initializeSubtargetDependencies(StringRef CPU,
|
|
StringRef TuneCPU,
|
|
StringRef FS);
|
|
void initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
|
|
|
|
public:
|
|
|
|
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
|
|
bool GETTER() const { return ATTRIBUTE; }
|
|
#include "X86GenSubtargetInfo.inc"
|
|
|
|
/// Is this x86_64 with the ILP32 programming model (x32 ABI)?
|
|
bool isTarget64BitILP32() const {
|
|
return Is64Bit && (TargetTriple.isX32() || TargetTriple.isOSNaCl());
|
|
}
|
|
|
|
/// Is this x86_64 with the LP64 programming model (standard AMD64, no x32)?
|
|
bool isTarget64BitLP64() const {
|
|
return Is64Bit && (!TargetTriple.isX32() && !TargetTriple.isOSNaCl());
|
|
}
|
|
|
|
PICStyles::Style getPICStyle() const { return PICStyle; }
|
|
void setPICStyle(PICStyles::Style Style) { PICStyle = Style; }
|
|
|
|
bool canUseCMPXCHG8B() const { return hasCX8(); }
|
|
bool canUseCMPXCHG16B() const {
|
|
// CX16 is just the CPUID bit, instruction requires 64-bit mode too.
|
|
return hasCX16() && is64Bit();
|
|
}
|
|
// SSE codegen depends on cmovs, and all SSE1+ processors support them.
|
|
// All 64-bit processors support cmov.
|
|
bool canUseCMOV() const { return hasCMOV() || hasSSE1() || is64Bit(); }
|
|
bool hasSSE1() const { return X86SSELevel >= SSE1; }
|
|
bool hasSSE2() const { return X86SSELevel >= SSE2; }
|
|
bool hasSSE3() const { return X86SSELevel >= SSE3; }
|
|
bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
|
|
bool hasSSE41() const { return X86SSELevel >= SSE41; }
|
|
bool hasSSE42() const { return X86SSELevel >= SSE42; }
|
|
bool hasAVX() const { return X86SSELevel >= AVX; }
|
|
bool hasAVX2() const { return X86SSELevel >= AVX2; }
|
|
bool hasAVX512() const { return X86SSELevel >= AVX512; }
|
|
bool hasInt256() const { return hasAVX2(); }
|
|
bool hasMMX() const { return X863DNowLevel >= MMX; }
|
|
bool hasThreeDNow() const { return X863DNowLevel >= ThreeDNow; }
|
|
bool hasThreeDNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
|
bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
|
|
bool hasPrefetchW() const {
|
|
// The PREFETCHW instruction was added with 3DNow but later CPUs gave it
|
|
// its own CPUID bit as part of deprecating 3DNow. Intel eventually added
|
|
// it and KNL has another that prefetches to L2 cache. We assume the
|
|
// L1 version exists if the L2 version does.
|
|
return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1();
|
|
}
|
|
bool hasSSEPrefetch() const {
|
|
// We implicitly enable these when we have a write prefix supporting cache
|
|
// level OR if we have prfchw, but don't already have a read prefetch from
|
|
// 3dnow.
|
|
return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() ||
|
|
hasPREFETCHI();
|
|
}
|
|
bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
|
|
// These are generic getters that OR together all of the thunk types
|
|
// supported by the subtarget. Therefore useIndirectThunk*() will return true
|
|
// if any respective thunk feature is enabled.
|
|
bool useIndirectThunkCalls() const {
|
|
return useRetpolineIndirectCalls() || useLVIControlFlowIntegrity();
|
|
}
|
|
bool useIndirectThunkBranches() const {
|
|
return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity();
|
|
}
|
|
|
|
unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
|
|
unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; }
|
|
|
|
// Helper functions to determine when we should allow widening to 512-bit
|
|
// during codegen.
|
|
// TODO: Currently we're always allowing widening on CPUs without VLX,
|
|
// because for many cases we don't have a better option.
|
|
bool canExtendTo512DQ() const {
|
|
return hasAVX512() && (!hasVLX() || getPreferVectorWidth() >= 512);
|
|
}
|
|
bool canExtendTo512BW() const {
|
|
return hasBWI() && canExtendTo512DQ();
|
|
}
|
|
|
|
bool hasNoDomainDelay() const { return NoDomainDelay; }
|
|
bool hasNoDomainDelayMov() const {
|
|
return hasNoDomainDelay() || NoDomainDelayMov;
|
|
}
|
|
bool hasNoDomainDelayBlend() const {
|
|
return hasNoDomainDelay() || NoDomainDelayBlend;
|
|
}
|
|
bool hasNoDomainDelayShuffle() const {
|
|
return hasNoDomainDelay() || NoDomainDelayShuffle;
|
|
}
|
|
|
|
// If there are no 512-bit vectors and we prefer not to use 512-bit registers,
|
|
// disable them in the legalizer.
|
|
bool useAVX512Regs() const {
|
|
return hasAVX512() && hasEVEX512() &&
|
|
(canExtendTo512DQ() || RequiredVectorWidth > 256);
|
|
}
|
|
|
|
bool useLight256BitInstructions() const {
|
|
return getPreferVectorWidth() >= 256 || AllowLight256Bit;
|
|
}
|
|
|
|
bool useBWIRegs() const {
|
|
return hasBWI() && useAVX512Regs();
|
|
}
|
|
|
|
bool isXRaySupported() const override { return is64Bit(); }
|
|
|
|
/// Use clflush if we have SSE2 or we're on x86-64 (even if we asked for
|
|
/// no-sse2). There isn't any reason to disable it if the target processor
|
|
/// supports it.
|
|
bool hasCLFLUSH() const { return hasSSE2() || is64Bit(); }
|
|
|
|
/// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
|
|
/// no-sse2). There isn't any reason to disable it if the target processor
|
|
/// supports it.
|
|
bool hasMFence() const { return hasSSE2() || is64Bit(); }
|
|
|
|
const Triple &getTargetTriple() const { return TargetTriple; }
|
|
|
|
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
|
|
bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
|
|
bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
|
|
bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
|
|
bool isTargetPS() const { return TargetTriple.isPS(); }
|
|
|
|
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
|
|
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
|
|
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
|
|
|
|
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
|
|
bool isTargetKFreeBSD() const { return TargetTriple.isOSKFreeBSD(); }
|
|
bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); }
|
|
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
|
|
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
|
|
bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
|
|
bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
|
|
bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
|
|
bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
|
|
|
|
bool isTargetWindowsMSVC() const {
|
|
return TargetTriple.isWindowsMSVCEnvironment();
|
|
}
|
|
|
|
bool isTargetWindowsCoreCLR() const {
|
|
return TargetTriple.isWindowsCoreCLREnvironment();
|
|
}
|
|
|
|
bool isTargetWindowsCygwin() const {
|
|
return TargetTriple.isWindowsCygwinEnvironment();
|
|
}
|
|
|
|
bool isTargetWindowsGNU() const {
|
|
return TargetTriple.isWindowsGNUEnvironment();
|
|
}
|
|
|
|
bool isTargetWindowsItanium() const {
|
|
return TargetTriple.isWindowsItaniumEnvironment();
|
|
}
|
|
|
|
bool isTargetCygMing() const { return TargetTriple.isOSCygMing(); }
|
|
|
|
bool isOSWindows() const { return TargetTriple.isOSWindows(); }
|
|
|
|
bool isTargetWin64() const { return Is64Bit && isOSWindows(); }
|
|
|
|
bool isTargetWin32() const { return !Is64Bit && isOSWindows(); }
|
|
|
|
bool isPICStyleGOT() const { return PICStyle == PICStyles::Style::GOT; }
|
|
bool isPICStyleRIPRel() const { return PICStyle == PICStyles::Style::RIPRel; }
|
|
|
|
bool isPICStyleStubPIC() const {
|
|
return PICStyle == PICStyles::Style::StubPIC;
|
|
}
|
|
|
|
bool isPositionIndependent() const;
|
|
|
|
bool isCallingConvWin64(CallingConv::ID CC) const {
|
|
switch (CC) {
|
|
// On Win64, all these conventions just use the default convention.
|
|
case CallingConv::C:
|
|
case CallingConv::Fast:
|
|
case CallingConv::Tail:
|
|
case CallingConv::Swift:
|
|
case CallingConv::SwiftTail:
|
|
case CallingConv::X86_FastCall:
|
|
case CallingConv::X86_StdCall:
|
|
case CallingConv::X86_ThisCall:
|
|
case CallingConv::X86_VectorCall:
|
|
case CallingConv::Intel_OCL_BI:
|
|
return isTargetWin64();
|
|
// This convention allows using the Win64 convention on other targets.
|
|
case CallingConv::Win64:
|
|
return true;
|
|
// This convention allows using the SysV convention on Windows targets.
|
|
case CallingConv::X86_64_SysV:
|
|
return false;
|
|
// Otherwise, who knows what this is.
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/// Classify a global variable reference for the current subtarget according
|
|
/// to how we should reference it in a non-pcrel context.
|
|
unsigned char classifyLocalReference(const GlobalValue *GV) const;
|
|
|
|
unsigned char classifyGlobalReference(const GlobalValue *GV,
|
|
const Module &M) const;
|
|
unsigned char classifyGlobalReference(const GlobalValue *GV) const;
|
|
|
|
/// Classify a global function reference for the current subtarget.
|
|
unsigned char classifyGlobalFunctionReference(const GlobalValue *GV,
|
|
const Module &M) const;
|
|
unsigned char
|
|
classifyGlobalFunctionReference(const GlobalValue *GV) const override;
|
|
|
|
/// Classify a blockaddress reference for the current subtarget according to
|
|
/// how we should reference it in a non-pcrel context.
|
|
unsigned char classifyBlockAddressReference() const;
|
|
|
|
/// Return true if the subtarget allows calls to immediate address.
|
|
bool isLegalToCallImmediateAddr() const;
|
|
|
|
/// Return whether FrameLowering should always set the "extended frame
|
|
/// present" bit in FP, or set it based on a symbol in the runtime.
|
|
bool swiftAsyncContextIsDynamicallySet() const {
|
|
// Older OS versions (particularly system unwinders) are confused by the
|
|
// Swift extended frame, so when building code that might be run on them we
|
|
// must dynamically query the concurrency library to determine whether
|
|
// extended frames should be flagged as present.
|
|
const Triple &TT = getTargetTriple();
|
|
|
|
unsigned Major = TT.getOSVersion().getMajor();
|
|
switch(TT.getOS()) {
|
|
default:
|
|
return false;
|
|
case Triple::IOS:
|
|
case Triple::TvOS:
|
|
return Major < 15;
|
|
case Triple::WatchOS:
|
|
return Major < 8;
|
|
case Triple::MacOSX:
|
|
case Triple::Darwin:
|
|
return Major < 12;
|
|
}
|
|
}
|
|
|
|
/// If we are using indirect thunks, we need to expand indirectbr to avoid it
|
|
/// lowering to an actual indirect jump.
|
|
bool enableIndirectBrExpand() const override {
|
|
return useIndirectThunkBranches();
|
|
}
|
|
|
|
/// Enable the MachineScheduler pass for all X86 subtargets.
|
|
bool enableMachineScheduler() const override { return true; }
|
|
|
|
bool enableEarlyIfConversion() const override;
|
|
|
|
void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
|
|
&Mutations) const override;
|
|
|
|
AntiDepBreakMode getAntiDepBreakMode() const override {
|
|
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
|
|
}
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif // LLVM_LIB_TARGET_X86_X86SUBTARGET_H
|