!279 调整memset的水线x86_rep_stosb_threshold从为1M
From: @yang_yanchao Reviewed-by: @liqingqing_1229 Signed-off-by: @liqingqing_1229
This commit is contained in:
commit
0ac59f5ae4
@ -65,7 +65,7 @@
|
||||
##############################################################################
|
||||
Name: glibc
|
||||
Version: 2.34
|
||||
Release: 29
|
||||
Release: 30
|
||||
Summary: The GNU libc libraries
|
||||
License: %{all_license}
|
||||
URL: http://www.gnu.org/software/glibc/
|
||||
@ -138,7 +138,7 @@ Patch51: linux-Simplify-get_nprocs.patch
|
||||
Patch52: misc-Add-__get_nprocs_sched.patch
|
||||
Patch53: linux-Revert-the-use-of-sched_getaffinity-on-get_npr.patch
|
||||
|
||||
#Patch9000: turn-REP_STOSB_THRESHOLD-from-2k-to-1M.patch
|
||||
Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch
|
||||
Patch9001: delete-no-hard-link-to-avoid-all_language-package-to.patch
|
||||
Patch9002: 0001-add-base-files-for-libphtread-condition-family.patch
|
||||
Patch9003: 0002-add-header-files-for-libphtread_2_17_so.patch
|
||||
@ -1334,6 +1334,9 @@ fi
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Thu Dec 3 2021 Yang Yanchao <yangyanchao6@huawei.com> - 2.34-30
|
||||
- turn the default value of x86_rep_stosb_threshold from 2k to 1M
|
||||
|
||||
* Thu Dec 2 2021 Qingqing Li <liqingqing3@huawei.com> - 2.34-29
|
||||
- revert the use of sched_getaffinity [BZ #28310]
|
||||
|
||||
|
||||
@ -1,45 +0,0 @@
|
||||
From dc8c5d3ba8ec3c2de8ca0898d682d89492d275b3 Mon Sep 17 00:00:00 2001
|
||||
From: Shuo Wang <wangshuo_1994@foxmail.com>
|
||||
Date: Tue, 2 Mar 2021 10:41:09 +0800
|
||||
Subject: [PATCH] turn REP_STOSB_THRESHOLD from 2k to 1M
|
||||
|
||||
REP_STOSB_THRESHOLD is designed to choose vec mov or stosb.
|
||||
The default threshold (2k) will lead to performance degradation if the
|
||||
memcpy size is between 2k and 1M.
|
||||
---
|
||||
sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S | 12 +++++++++++-
|
||||
1 file changed, 11 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||||
index faa40856..76f84748 100644
|
||||
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||||
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||||
@@ -58,6 +58,16 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
+/* Threshold to use Enhanced REP STOSB. Since there is overhead to set
|
||||
+ up REP STOSB operation, REP STOSB isn't faster on short data. The
|
||||
+ memset micro benchmark in glibc shows that 2KB is the approximate
|
||||
+ value above which REP STOSB becomes faster on processors with
|
||||
+ Enhanced REP STOSB. Since the stored value is fixed, larger register
|
||||
+ size has minimal impact on threshold. */
|
||||
+#ifndef REP_STOSB_THRESHOLD
|
||||
+# define REP_STOSB_THRESHOLD 1048576
|
||||
+#endif
|
||||
+
|
||||
#ifndef SECTION
|
||||
# error SECTION is not defined!
|
||||
#endif
|
||||
@@ -171,7 +181,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
|
||||
ret
|
||||
|
||||
L(stosb_more_2x_vec):
|
||||
- cmp __x86_rep_stosb_threshold(%rip), %RDX_LP
|
||||
+ cmp $REP_STOSB_THRESHOLD, %RDX_LP
|
||||
ja L(stosb)
|
||||
#endif
|
||||
L(more_2x_vec):
|
||||
--
|
||||
2.23.0
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
From 4dee2794b8c78ccd540e3f72bc07585828e0143b Mon Sep 17 00:00:00 2001
|
||||
From: Yang Yanchao <yangyanchao6@huawei.com>
|
||||
Date: Thu, 2 Dec 2021 19:56:20 +0800
|
||||
Subject: [PATCH] turn the default value of x86_rep_stosb_threshold from 2k to 1M
|
||||
|
||||
x86_rep_stosb_threshold is designed to choose vec mov or stosb.
|
||||
For the libMicro, after set this x86_rep_stosb_threshold to 1 MB.
|
||||
The performance of memset_256_u, memset_4k_uc, and memset_1m is improved.
|
||||
The performance deteriorates in the memset_4k and memset_10k scenarios.
|
||||
---
|
||||
sysdeps/x86/dl-tunables.list | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list
|
||||
index dd6e1d65..a4c3af69 100644
|
||||
--- a/sysdeps/x86/dl-tunables.list
|
||||
+++ b/sysdeps/x86/dl-tunables.list
|
||||
@@ -54,7 +54,7 @@ glibc {
|
||||
# stored value is fixed, larger register size has minimal impact
|
||||
# on threshold.
|
||||
minval: 1
|
||||
- default: 2048
|
||||
+ default: 1048576
|
||||
}
|
||||
x86_data_cache_size {
|
||||
type: SIZE_T
|
||||
--
|
||||
2.30.0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user