glibc/0001-x86-Set-preferred-CPU-features-on-the-KH-40000-and-K.patch
May 3225aed4dd x86: Set preferred CPU features and default NT threshold for Zhaoxin processors
Signed-off-by: May <mayshao-oc@zhaoxin.com>
(cherry picked from commit c4f135bfbc5d7fc8b2471ce71997067e4441662e)
2025-01-20 10:44:09 +08:00

103 lines
3.1 KiB
Diff

From 5f5b877974cecf892346ae534edc4db9e8fbc75b Mon Sep 17 00:00:00 2001
From: May <mayshao-oc@zhaoxin.com>
Date: Wed, 15 Jan 2025 10:25:48 +0800
Subject: [PATCH 1/3] x86:Set preferred CPU features on the KH-40000 and
KX-7000 Zhaoxin processors
Fix code formatting under the Zhaoxin branch and add comments for
different Zhaoxin models.
Unaligned AVX load are slower on KH-40000 and KX-7000, so disable
the AVX_Fast_Unaligned_Load.
Enable Prefer_No_VZEROUPPER and Fast_Unaligned_Load features to
use sse2_unaligned version of memset,strcpy and strcat.
Signed-off-by: May <mayshao-oc@zhaoxin.com>
---
sysdeps/x86/cpu-features.c | 51 ++++++++++++++++++++++++++------------
1 file changed, 35 insertions(+), 16 deletions(-)
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
index badf0888..43b5f562 100644
--- a/sysdeps/x86/cpu-features.c
+++ b/sysdeps/x86/cpu-features.c
@@ -907,39 +907,58 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
model += extended_model;
if (family == 0x6)
- {
- if (model == 0xf || model == 0x19)
- {
+ {
+ /* Tuning for older Zhaoxin processors. */
+ if (model == 0xf || model == 0x19)
+ {
CPU_FEATURE_UNSET (cpu_features, AVX);
CPU_FEATURE_UNSET (cpu_features, AVX2);
- cpu_features->preferred[index_arch_Slow_SSE4_2]
- |= bit_arch_Slow_SSE4_2;
+ cpu_features->preferred[index_arch_Slow_SSE4_2]
+ |= bit_arch_Slow_SSE4_2;
+ /* Unaligned AVX loads are slower. */
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
- &= ~bit_arch_AVX_Fast_Unaligned_Load;
- }
- }
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
+ }
+ }
else if (family == 0x7)
- {
- if (model == 0x1b)
+ {
+ switch (model)
{
+ /* Wudaokou microarch tuning. */
+ case 0x1b:
CPU_FEATURE_UNSET (cpu_features, AVX);
CPU_FEATURE_UNSET (cpu_features, AVX2);
cpu_features->preferred[index_arch_Slow_SSE4_2]
- |= bit_arch_Slow_SSE4_2;
+ |= bit_arch_Slow_SSE4_2;
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
- &= ~bit_arch_AVX_Fast_Unaligned_Load;
- }
- else if (model == 0x3b)
- {
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
+ break;
+
+ /* Lujiazui microarch tuning. */
+ case 0x3b:
CPU_FEATURE_UNSET (cpu_features, AVX);
CPU_FEATURE_UNSET (cpu_features, AVX2);
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
- &= ~bit_arch_AVX_Fast_Unaligned_Load;
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
+ break;
+
+ /* Yongfeng and Shijidadao mircoarch tuning. */
+ case 0x5b:
+ case 0x6b:
+ cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
+
+ /* To use sse2_unaligned versions of memset, strcpy and strcat.
+ */
+ cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
+ |= (bit_arch_Prefer_No_VZEROUPPER
+ | bit_arch_Fast_Unaligned_Load);
+ break;
}
}
}
--
2.27.0