Signed-off-by: May <mayshao-oc@zhaoxin.com> (cherry picked from commit c4f135bfbc5d7fc8b2471ce71997067e4441662e)
103 lines
3.1 KiB
Diff
103 lines
3.1 KiB
Diff
From 5f5b877974cecf892346ae534edc4db9e8fbc75b Mon Sep 17 00:00:00 2001
|
|
From: May <mayshao-oc@zhaoxin.com>
|
|
Date: Wed, 15 Jan 2025 10:25:48 +0800
|
|
Subject: [PATCH 1/3] x86:Set preferred CPU features on the KH-40000 and
|
|
KX-7000 Zhaoxin processors
|
|
|
|
Fix code formatting under the Zhaoxin branch and add comments for
|
|
different Zhaoxin models.
|
|
|
|
Unaligned AVX load are slower on KH-40000 and KX-7000, so disable
|
|
the AVX_Fast_Unaligned_Load.
|
|
|
|
Enable Prefer_No_VZEROUPPER and Fast_Unaligned_Load features to
|
|
use sse2_unaligned version of memset,strcpy and strcat.
|
|
|
|
Signed-off-by: May <mayshao-oc@zhaoxin.com>
|
|
---
|
|
sysdeps/x86/cpu-features.c | 51 ++++++++++++++++++++++++++------------
|
|
1 file changed, 35 insertions(+), 16 deletions(-)
|
|
|
|
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
|
|
index badf0888..43b5f562 100644
|
|
--- a/sysdeps/x86/cpu-features.c
|
|
+++ b/sysdeps/x86/cpu-features.c
|
|
@@ -907,39 +907,58 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
|
|
|
|
model += extended_model;
|
|
if (family == 0x6)
|
|
- {
|
|
- if (model == 0xf || model == 0x19)
|
|
- {
|
|
+ {
|
|
+ /* Tuning for older Zhaoxin processors. */
|
|
+ if (model == 0xf || model == 0x19)
|
|
+ {
|
|
CPU_FEATURE_UNSET (cpu_features, AVX);
|
|
CPU_FEATURE_UNSET (cpu_features, AVX2);
|
|
|
|
- cpu_features->preferred[index_arch_Slow_SSE4_2]
|
|
- |= bit_arch_Slow_SSE4_2;
|
|
+ cpu_features->preferred[index_arch_Slow_SSE4_2]
|
|
+ |= bit_arch_Slow_SSE4_2;
|
|
|
|
+ /* Unaligned AVX loads are slower. */
|
|
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
|
|
- &= ~bit_arch_AVX_Fast_Unaligned_Load;
|
|
- }
|
|
- }
|
|
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
|
|
+ }
|
|
+ }
|
|
else if (family == 0x7)
|
|
- {
|
|
- if (model == 0x1b)
|
|
+ {
|
|
+ switch (model)
|
|
{
|
|
+ /* Wudaokou microarch tuning. */
|
|
+ case 0x1b:
|
|
CPU_FEATURE_UNSET (cpu_features, AVX);
|
|
CPU_FEATURE_UNSET (cpu_features, AVX2);
|
|
|
|
cpu_features->preferred[index_arch_Slow_SSE4_2]
|
|
- |= bit_arch_Slow_SSE4_2;
|
|
+ |= bit_arch_Slow_SSE4_2;
|
|
|
|
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
|
|
- &= ~bit_arch_AVX_Fast_Unaligned_Load;
|
|
- }
|
|
- else if (model == 0x3b)
|
|
- {
|
|
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
|
|
+ break;
|
|
+
|
|
+ /* Lujiazui microarch tuning. */
|
|
+ case 0x3b:
|
|
CPU_FEATURE_UNSET (cpu_features, AVX);
|
|
CPU_FEATURE_UNSET (cpu_features, AVX2);
|
|
|
|
cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
|
|
- &= ~bit_arch_AVX_Fast_Unaligned_Load;
|
|
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
|
|
+ break;
|
|
+
|
|
+ /* Yongfeng and Shijidadao mircoarch tuning. */
|
|
+ case 0x5b:
|
|
+ case 0x6b:
|
|
+ cpu_features->preferred[index_arch_AVX_Fast_Unaligned_Load]
|
|
+ &= ~bit_arch_AVX_Fast_Unaligned_Load;
|
|
+
|
|
+ /* To use sse2_unaligned versions of memset, strcpy and strcat.
|
|
+ */
|
|
+ cpu_features->preferred[index_arch_Prefer_No_VZEROUPPER]
|
|
+ |= (bit_arch_Prefer_No_VZEROUPPER
|
|
+ | bit_arch_Fast_Unaligned_Load);
|
|
+ break;
|
|
}
|
|
}
|
|
}
|
|
--
|
|
2.27.0
|
|
|