From c362956eb88558991bee59e43d7db52c8bc7e5f5 Mon Sep 17 00:00:00 2001 From: Jiaxi Chen Date: Fri, 3 Mar 2023 14:59:09 +0800 Subject: [PATCH] target/i386: Add support for AMX-FP16 in CPUID enumeration commit 99ed8445ea27742a4df40f51a3a5fbd6f8e76fa5 upstream. Latest Intel platform Granite Rapids has introduced a new instruction - AMX-FP16, which performs dot-products of two FP16 tiles and accumulates the results into a packed single precision tile. AMX-FP16 adds FP16 capability and allows a FP16 GPU trained model to run faster without loss of accuracy or added SW overhead. The bit definition: CPUID.(EAX=7,ECX=1):EAX[bit 21] Add CPUID definition for AMX-FP16. Intel-SIG: commit 99ed8445ea27 target/i386: Add support for AMX-FP16 in CPUID enumeration. Backport GNR and SRF ISA into QEMU-6.2 Signed-off-by: Jiaxi Chen Signed-off-by: Tao Su Reviewed-by: Xiaoyao Li Message-Id: <20230303065913.1246327-3-tao1.su@linux.intel.com> Signed-off-by: Paolo Bonzini [ Quanxian Wang: amend commit log ] Signed-off-by: Quanxian Wang --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 47c2d9da80..3fc3b8041a 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -876,7 +876,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, "fzrm", "fsrs", "fsrc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, "amx-fp16", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 4a7362ee07..c747e68a7a 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -891,6 +891,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_7_1_EAX_FSRS (1U << 11) /* Fast Short REP CMPS/SCAS */ #define CPUID_7_1_EAX_FSRC (1U << 12) +/* Support Tile Computational Operations on FP16 Numbers */ +#define CPUID_7_1_EAX_AMX_FP16 (1U << 21) /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ #define CPUID_7_2_EDX_MCDT_NO (1U << 5) -- 2.27.0