Signed-off-by: Xie jiamei <xiejiamei@hygon.cn> (cherry picked from commit 9cf451dd6fdd13ec64780b1f56c84778f99449fb)
93 lines
3.4 KiB
Diff
93 lines
3.4 KiB
Diff
From 1e57e1c6aa6ca5a476aba725271c1ace9be345d3 Mon Sep 17 00:00:00 2001
|
|
From: Feifei Wang <wangfeifei@hygon.cn>
|
|
Date: Mon, 19 Aug 2024 14:57:55 +0800
|
|
Subject: [PATCH 10/10] x86: Enable non-temporal memset for Hygon processors
|
|
|
|
This patch uses 'Avoid_Non_Temporal_Memset' flag to access
|
|
the non-temporal memset implementation for hygon processors.
|
|
|
|
Test Results:
|
|
|
|
hygon1 arch
|
|
x86_memset_non_temporal_threshold = 8MB
|
|
size new performance time / old performance time
|
|
1MB 0.994
|
|
4MB 0.996
|
|
8MB 0.670
|
|
16MB 0.343
|
|
32MB 0.355
|
|
|
|
hygon2 arch
|
|
x86_memset_non_temporal_threshold = 8MB
|
|
size new performance time / old performance time
|
|
1MB 1
|
|
4MB 1
|
|
8MB 1.312
|
|
16MB 0.822
|
|
32MB 0.830
|
|
|
|
hygon3 arch
|
|
x86_memset_non_temporal_threshold = 8MB
|
|
size new performance time / old performance time
|
|
1MB 1
|
|
4MB 0.990
|
|
8MB 0.737
|
|
16MB 0.390
|
|
32MB 0.401
|
|
|
|
For hygon arch with this patch, non-temporal stores can improve
|
|
performance by 20% - 65%.
|
|
|
|
Signed-off-by: Feifei Wang <wangfeifei@hygon.cn>
|
|
Reviewed-by: Jing Li <lijing@hygon.cn>
|
|
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
|
---
|
|
sysdeps/x86/cpu-features.c | 9 +++++++--
|
|
sysdeps/x86/dl-cacheinfo.h | 2 +-
|
|
2 files changed, 8 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c
|
|
index 287edc5b08..f5539aea6f 100644
|
|
--- a/sysdeps/x86/cpu-features.c
|
|
+++ b/sysdeps/x86/cpu-features.c
|
|
@@ -640,9 +640,9 @@ init_cpu_features (struct cpu_features *cpu_features)
|
|
unsigned int stepping = 0;
|
|
enum cpu_features_kind kind;
|
|
|
|
- /* Default is avoid non-temporal memset for non Intel/AMD hardware. This is,
|
|
+ /* Default is avoid non-temporal memset for non Intel/AMD/Hygon hardware. This is,
|
|
as of writing this, we only have benchmarks indicatings it profitability
|
|
- on Intel/AMD. */
|
|
+ on Intel/AMD/Hygon. */
|
|
cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
|
|
|= bit_arch_Avoid_Non_Temporal_Memset;
|
|
|
|
@@ -998,6 +998,11 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht
|
|
get_extended_indices (cpu_features);
|
|
|
|
update_active (cpu_features);
|
|
+
|
|
+ /* Benchmarks indicate non-temporal memset can be profitable on Hygon
|
|
+ hardware. */
|
|
+ cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset]
|
|
+ &= ~bit_arch_Avoid_Non_Temporal_Memset;
|
|
}
|
|
else
|
|
{
|
|
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
|
|
index 5803bfcea8..d4dad8df3b 100644
|
|
--- a/sysdeps/x86/dl-cacheinfo.h
|
|
+++ b/sysdeps/x86/dl-cacheinfo.h
|
|
@@ -1071,7 +1071,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
|
|
|
/* Non-temporal stores are more performant on some hardware above
|
|
non_temporal_threshold. Currently Prefer_Non_Temporal is set for for both
|
|
- Intel and AMD hardware. */
|
|
+ Intel, AMD and Hygon hardware. */
|
|
unsigned long int memset_non_temporal_threshold = SIZE_MAX;
|
|
if (!CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset))
|
|
memset_non_temporal_threshold = non_temporal_threshold;
|
|
--
|
|
2.17.1
|
|
|