222 lines
11 KiB
Diff
222 lines
11 KiB
Diff
|
|
From f91b5ed322bbb6d793fca7005ac350d466fff232 Mon Sep 17 00:00:00 2001
|
||
|
|
From: "Wang, Lei" <lei4.wang@intel.com>
|
||
|
|
Date: Thu, 11 Aug 2022 22:57:51 -0700
|
||
|
|
Subject: [PATCH] i386: Add new CPU model SapphireRapids
|
||
|
|
|
||
|
|
The new CPU model mostly inherits features from Icelake-Server, while
|
||
|
|
adding new features:
|
||
|
|
- AMX (Advance Matrix eXtensions)
|
||
|
|
- Bus Lock Debug Exception
|
||
|
|
and new instructions:
|
||
|
|
- AVX VNNI (Vector Neural Network Instruction):
|
||
|
|
- VPDPBUS: Multiply and Add Unsigned and Signed Bytes
|
||
|
|
- VPDPBUSDS: Multiply and Add Unsigned and Signed Bytes with Saturation
|
||
|
|
- VPDPWSSD: Multiply and Add Signed Word Integers
|
||
|
|
- VPDPWSSDS: Multiply and Add Signed Integers with Saturation
|
||
|
|
- FP16: Replicates existing AVX512 computational SP (FP32) instructions
|
||
|
|
using FP16 instead of FP32 for ~2X performance gain
|
||
|
|
- SERIALIZE: Provide software with a simple way to force the processor to
|
||
|
|
complete all modifications, faster, allowed in all privilege levels and
|
||
|
|
not causing an unconditional VM exit
|
||
|
|
- TSX Suspend Load Address Tracking: Allows programmers to choose which
|
||
|
|
memory accesses do not need to be tracked in the TSX read set
|
||
|
|
- AVX512_BF16: Vector Neural Network Instructions supporting BFLOAT16
|
||
|
|
inputs and conversion instructions from IEEE single precision
|
||
|
|
- fast zero-length MOVSB (KVM doesn't support yet)
|
||
|
|
- fast short STOSB (KVM doesn't support yet)
|
||
|
|
- fast short CMPSB, SCASB (KVM doesn't support yet)
|
||
|
|
|
||
|
|
Features that may be added in future versions:
|
||
|
|
- CET (virtualization support hasn't been merged)
|
||
|
|
|
||
|
|
Signed-off-by: Wang, Lei <lei4.wang@intel.com>
|
||
|
|
Reviewed-by: Robert Hoo <robert.hu@linux.intel.com>
|
||
|
|
Message-Id: <20220812055751.14553-1-lei4.wang@intel.com>
|
||
|
|
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
||
|
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
||
|
|
---
|
||
|
|
target/i386/cpu.c | 133 +++++++++++++++++++++++++++++++++++++++++++++-
|
||
|
|
target/i386/cpu.h | 4 ++
|
||
|
|
2 files changed, 135 insertions(+), 2 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
|
||
|
|
index 7122af303d..61cd7abcaa 100644
|
||
|
|
--- a/target/i386/cpu.c
|
||
|
|
+++ b/target/i386/cpu.c
|
||
|
|
@@ -3529,6 +3529,135 @@ static const X86CPUDefinition builtin_x86_defs[] = {
|
||
|
|
{ /* end of list */ }
|
||
|
|
}
|
||
|
|
},
|
||
|
|
+ {
|
||
|
|
+ .name = "SapphireRapids",
|
||
|
|
+ .level = 0x20,
|
||
|
|
+ .vendor = CPUID_VENDOR_INTEL,
|
||
|
|
+ .family = 6,
|
||
|
|
+ .model = 143,
|
||
|
|
+ .stepping = 4,
|
||
|
|
+ /*
|
||
|
|
+ * please keep the ascending order so that we can have a clear view of
|
||
|
|
+ * bit position of each feature.
|
||
|
|
+ */
|
||
|
|
+ .features[FEAT_1_EDX] =
|
||
|
|
+ CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC |
|
||
|
|
+ CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC |
|
||
|
|
+ CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV |
|
||
|
|
+ CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR |
|
||
|
|
+ CPUID_SSE | CPUID_SSE2,
|
||
|
|
+ .features[FEAT_1_ECX] =
|
||
|
|
+ CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 |
|
||
|
|
+ CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 |
|
||
|
|
+ CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE |
|
||
|
|
+ CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES |
|
||
|
|
+ CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND,
|
||
|
|
+ .features[FEAT_8000_0001_EDX] =
|
||
|
|
+ CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB |
|
||
|
|
+ CPUID_EXT2_RDTSCP | CPUID_EXT2_LM,
|
||
|
|
+ .features[FEAT_8000_0001_ECX] =
|
||
|
|
+ CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH,
|
||
|
|
+ .features[FEAT_8000_0008_EBX] =
|
||
|
|
+ CPUID_8000_0008_EBX_WBNOINVD,
|
||
|
|
+ .features[FEAT_7_0_EBX] =
|
||
|
|
+ CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE |
|
||
|
|
+ CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 |
|
||
|
|
+ CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM |
|
||
|
|
+ CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ |
|
||
|
|
+ CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP |
|
||
|
|
+ CPUID_7_0_EBX_AVX512IFMA | CPUID_7_0_EBX_CLFLUSHOPT |
|
||
|
|
+ CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI |
|
||
|
|
+ CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL,
|
||
|
|
+ .features[FEAT_7_0_ECX] =
|
||
|
|
+ CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU |
|
||
|
|
+ CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI |
|
||
|
|
+ CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ |
|
||
|
|
+ CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG |
|
||
|
|
+ CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 |
|
||
|
|
+ CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT,
|
||
|
|
+ .features[FEAT_7_0_EDX] =
|
||
|
|
+ CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE |
|
||
|
|
+ CPUID_7_0_EDX_TSX_LDTRK | CPUID_7_0_EDX_AMX_BF16 |
|
||
|
|
+ CPUID_7_0_EDX_AVX512_FP16 | CPUID_7_0_EDX_AMX_TILE |
|
||
|
|
+ CPUID_7_0_EDX_AMX_INT8 | CPUID_7_0_EDX_SPEC_CTRL |
|
||
|
|
+ CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD,
|
||
|
|
+ .features[FEAT_ARCH_CAPABILITIES] =
|
||
|
|
+ MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL |
|
||
|
|
+ MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO |
|
||
|
|
+ MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO,
|
||
|
|
+ .features[FEAT_XSAVE] =
|
||
|
|
+ CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC |
|
||
|
|
+ CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES | CPUID_D_1_EAX_XFD,
|
||
|
|
+ .features[FEAT_6_EAX] =
|
||
|
|
+ CPUID_6_EAX_ARAT,
|
||
|
|
+ .features[FEAT_7_1_EAX] =
|
||
|
|
+ CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16 |
|
||
|
|
+ CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_FSRC,
|
||
|
|
+ .features[FEAT_VMX_BASIC] =
|
||
|
|
+ MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS,
|
||
|
|
+ .features[FEAT_VMX_ENTRY_CTLS] =
|
||
|
|
+ VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE |
|
||
|
|
+ VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
|
||
|
|
+ VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER,
|
||
|
|
+ .features[FEAT_VMX_EPT_VPID_CAPS] =
|
||
|
|
+ MSR_VMX_EPT_EXECONLY |
|
||
|
|
+ MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_PAGE_WALK_LENGTH_5 |
|
||
|
|
+ MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB |
|
||
|
|
+ MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS |
|
||
|
|
+ MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT |
|
||
|
|
+ MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR |
|
||
|
|
+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT |
|
||
|
|
+ MSR_VMX_EPT_INVVPID_ALL_CONTEXT |
|
||
|
|
+ MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS,
|
||
|
|
+ .features[FEAT_VMX_EXIT_CTLS] =
|
||
|
|
+ VMX_VM_EXIT_SAVE_DEBUG_CONTROLS |
|
||
|
|
+ VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
|
||
|
|
+ VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT |
|
||
|
|
+ VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER |
|
||
|
|
+ VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER,
|
||
|
|
+ .features[FEAT_VMX_MISC] =
|
||
|
|
+ MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT |
|
||
|
|
+ MSR_VMX_MISC_VMWRITE_VMEXIT,
|
||
|
|
+ .features[FEAT_VMX_PINBASED_CTLS] =
|
||
|
|
+ VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING |
|
||
|
|
+ VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER |
|
||
|
|
+ VMX_PIN_BASED_POSTED_INTR,
|
||
|
|
+ .features[FEAT_VMX_PROCBASED_CTLS] =
|
||
|
|
+ VMX_CPU_BASED_VIRTUAL_INTR_PENDING |
|
||
|
|
+ VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING |
|
||
|
|
+ VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING |
|
||
|
|
+ VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING |
|
||
|
|
+ VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING |
|
||
|
|
+ VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING |
|
||
|
|
+ VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING |
|
||
|
|
+ VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING |
|
||
|
|
+ VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG |
|
||
|
|
+ VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING |
|
||
|
|
+ VMX_CPU_BASED_PAUSE_EXITING |
|
||
|
|
+ VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS,
|
||
|
|
+ .features[FEAT_VMX_SECONDARY_CTLS] =
|
||
|
|
+ VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
||
|
|
+ VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC |
|
||
|
|
+ VMX_SECONDARY_EXEC_RDTSCP |
|
||
|
|
+ VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
|
||
|
|
+ VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING |
|
||
|
|
+ VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST |
|
||
|
|
+ VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
||
|
|
+ VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
||
|
|
+ VMX_SECONDARY_EXEC_RDRAND_EXITING |
|
||
|
|
+ VMX_SECONDARY_EXEC_ENABLE_INVPCID |
|
||
|
|
+ VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS |
|
||
|
|
+ VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML |
|
||
|
|
+ VMX_SECONDARY_EXEC_XSAVES,
|
||
|
|
+ .features[FEAT_VMX_VMFUNC] =
|
||
|
|
+ MSR_VMX_VMFUNC_EPT_SWITCHING,
|
||
|
|
+ .xlevel = 0x80000008,
|
||
|
|
+ .model_id = "Intel Xeon Processor (SapphireRapids)",
|
||
|
|
+ .versions = (X86CPUVersionDefinition[]) {
|
||
|
|
+ { .version = 1 },
|
||
|
|
+ { /* end of list */ },
|
||
|
|
+ },
|
||
|
|
+ },
|
||
|
|
{
|
||
|
|
.name = "Denverton",
|
||
|
|
.level = 21,
|
||
|
|
@@ -5619,7 +5748,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
case 0x1D: {
|
||
|
|
- /* AMX TILE */
|
||
|
|
+ /* AMX TILE, for now hardcoded for Sapphire Rapids*/
|
||
|
|
*eax = 0;
|
||
|
|
*ebx = 0;
|
||
|
|
*ecx = 0;
|
||
|
|
@@ -5640,7 +5769,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
case 0x1E: {
|
||
|
|
- /* AMX TMUL */
|
||
|
|
+ /* AMX TMUL, for now hardcoded for Sapphire Rapids */
|
||
|
|
*eax = 0;
|
||
|
|
*ebx = 0;
|
||
|
|
*ecx = 0;
|
||
|
|
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
|
||
|
|
index 7a32dabf12..d0c7791a1e 100644
|
||
|
|
--- a/target/i386/cpu.h
|
||
|
|
+++ b/target/i386/cpu.h
|
||
|
|
@@ -857,10 +857,14 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
|
||
|
|
#define CPUID_7_0_EDX_SERIALIZE (1U << 14)
|
||
|
|
/* TSX Suspend Load Address Tracking instruction */
|
||
|
|
#define CPUID_7_0_EDX_TSX_LDTRK (1U << 16)
|
||
|
|
+/* AMX_BF16 instruction */
|
||
|
|
+#define CPUID_7_0_EDX_AMX_BF16 (1U << 22)
|
||
|
|
/* AVX512_FP16 instruction */
|
||
|
|
#define CPUID_7_0_EDX_AVX512_FP16 (1U << 23)
|
||
|
|
/* AMX tile (two-dimensional register) */
|
||
|
|
#define CPUID_7_0_EDX_AMX_TILE (1U << 24)
|
||
|
|
+/* AMX_INT8 instruction */
|
||
|
|
+#define CPUID_7_0_EDX_AMX_INT8 (1U << 25)
|
||
|
|
/* Speculation Control */
|
||
|
|
#define CPUID_7_0_EDX_SPEC_CTRL (1U << 26)
|
||
|
|
/* Single Thread Indirect Branch Predictors */
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|