48 lines
1.9 KiB
Diff
48 lines
1.9 KiB
Diff
|
|
From 54e99a96ec3b97f53ee018bfa8dbbef2dd13f1e8 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Joe Damato <jdamato@fastly.com>
|
||
|
|
Date: Fri, 7 Jun 2024 23:04:47 +0000
|
||
|
|
Subject: [PATCH 05/10] x86: Enable non-temporal memset tunable for AMD
|
||
|
|
|
||
|
|
In commit 46b5e98ef6f1 ("x86: Add seperate non-temporal tunable for
|
||
|
|
memset") a tunable threshold for enabling non-temporal memset was added,
|
||
|
|
but only for Intel hardware.
|
||
|
|
|
||
|
|
Since that commit, new benchmark results suggest that non-temporal
|
||
|
|
memset is beneficial on AMD, as well, so allow this tunable to be set
|
||
|
|
for AMD.
|
||
|
|
|
||
|
|
See:
|
||
|
|
https://docs.google.com/spreadsheets/d/1opzukzvum4n6-RUVHTGddV6RjAEil4P2uMjjQGLbLcU/edit?usp=sharing
|
||
|
|
which has been updated to include data using different stategies for
|
||
|
|
large memset on AMD Zen2, Zen3, and Zen4.
|
||
|
|
|
||
|
|
Signed-off-by: Joe Damato <jdamato@fastly.com>
|
||
|
|
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
||
|
|
---
|
||
|
|
sysdeps/x86/dl-cacheinfo.h | 8 ++++----
|
||
|
|
1 file changed, 4 insertions(+), 4 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
|
||
|
|
index 9f27da21ce..dfdb4069c7 100644
|
||
|
|
--- a/sysdeps/x86/dl-cacheinfo.h
|
||
|
|
+++ b/sysdeps/x86/dl-cacheinfo.h
|
||
|
|
@@ -1048,11 +1048,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
||
|
|
if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
|
||
|
|
rep_movsb_threshold = 2112;
|
||
|
|
|
||
|
|
- /* Non-temporal stores in memset have only been tested on Intel hardware.
|
||
|
|
- Until we benchmark data on other x86 processor, disable non-temporal
|
||
|
|
- stores in memset. */
|
||
|
|
+ /* Non-temporal stores are more performant on Intel and AMD hardware above
|
||
|
|
+ non_temporal_threshold. Enable this for both Intel and AMD hardware. */
|
||
|
|
unsigned long int memset_non_temporal_threshold = SIZE_MAX;
|
||
|
|
- if (cpu_features->basic.kind == arch_kind_intel)
|
||
|
|
+ if (cpu_features->basic.kind == arch_kind_intel
|
||
|
|
+ || cpu_features->basic.kind == arch_kind_amd)
|
||
|
|
memset_non_temporal_threshold = non_temporal_threshold;
|
||
|
|
|
||
|
|
/* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of
|
||
|
|
--
|
||
|
|
2.17.1
|
||
|
|
|