From 1e57e1c6aa6ca5a476aba725271c1ace9be345d3 Mon Sep 17 00:00:00 2001 From: Feifei Wang Date: Mon, 19 Aug 2024 14:57:55 +0800 Subject: [PATCH 10/10] x86: Enable non-temporal memset for Hygon processors This patch uses 'Avoid_Non_Temporal_Memset' flag to access the non-temporal memset implementation for hygon processors. Test Results: hygon1 arch x86_memset_non_temporal_threshold = 8MB size new performance time / old performance time 1MB 0.994 4MB 0.996 8MB 0.670 16MB 0.343 32MB 0.355 hygon2 arch x86_memset_non_temporal_threshold = 8MB size new performance time / old performance time 1MB 1 4MB 1 8MB 1.312 16MB 0.822 32MB 0.830 hygon3 arch x86_memset_non_temporal_threshold = 8MB size new performance time / old performance time 1MB 1 4MB 0.990 8MB 0.737 16MB 0.390 32MB 0.401 For hygon arch with this patch, non-temporal stores can improve performance by 20% - 65%. Signed-off-by: Feifei Wang Reviewed-by: Jing Li Reviewed-by: H.J. Lu --- sysdeps/x86/cpu-features.c | 9 +++++++-- sysdeps/x86/dl-cacheinfo.h | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c index 287edc5b08..f5539aea6f 100644 --- a/sysdeps/x86/cpu-features.c +++ b/sysdeps/x86/cpu-features.c @@ -640,9 +640,9 @@ init_cpu_features (struct cpu_features *cpu_features) unsigned int stepping = 0; enum cpu_features_kind kind; - /* Default is avoid non-temporal memset for non Intel/AMD hardware. This is, + /* Default is avoid non-temporal memset for non Intel/AMD/Hygon hardware. This is, as of writing this, we only have benchmarks indicatings it profitability - on Intel/AMD. */ + on Intel/AMD/Hygon. */ cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] |= bit_arch_Avoid_Non_Temporal_Memset; @@ -998,6 +998,11 @@ https://www.intel.com/content/www/us/en/support/articles/000059422/processors.ht get_extended_indices (cpu_features); update_active (cpu_features); + + /* Benchmarks indicate non-temporal memset can be profitable on Hygon + hardware. */ + cpu_features->preferred[index_arch_Avoid_Non_Temporal_Memset] + &= ~bit_arch_Avoid_Non_Temporal_Memset; } else { diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h index 5803bfcea8..d4dad8df3b 100644 --- a/sysdeps/x86/dl-cacheinfo.h +++ b/sysdeps/x86/dl-cacheinfo.h @@ -1071,7 +1071,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) /* Non-temporal stores are more performant on some hardware above non_temporal_threshold. Currently Prefer_Non_Temporal is set for for both - Intel and AMD hardware. */ + Intel, AMD and Hygon hardware. */ unsigned long int memset_non_temporal_threshold = SIZE_MAX; if (!CPU_FEATURES_ARCH_P (cpu_features, Avoid_Non_Temporal_Memset)) memset_non_temporal_threshold = non_temporal_threshold; -- 2.17.1