turn the default value of x86_rep_stosb_threshold from 2k to 1M

Signed-off-by: Yang Yanchao <yangyanchao6@huawei.com>
This commit is contained in:
Yang Yanchao 2021-12-03 11:13:07 +08:00
parent 59a7c20682
commit 6495a142f4
3 changed files with 34 additions and 47 deletions

View File

@ -65,7 +65,7 @@
############################################################################## ##############################################################################
Name: glibc Name: glibc
Version: 2.34 Version: 2.34
Release: 29 Release: 30
Summary: The GNU libc libraries Summary: The GNU libc libraries
License: %{all_license} License: %{all_license}
URL: http://www.gnu.org/software/glibc/ URL: http://www.gnu.org/software/glibc/
@ -138,7 +138,7 @@ Patch51: linux-Simplify-get_nprocs.patch
Patch52: misc-Add-__get_nprocs_sched.patch Patch52: misc-Add-__get_nprocs_sched.patch
Patch53: linux-Revert-the-use-of-sched_getaffinity-on-get_npr.patch Patch53: linux-Revert-the-use-of-sched_getaffinity-on-get_npr.patch
#Patch9000: turn-REP_STOSB_THRESHOLD-from-2k-to-1M.patch Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch
Patch9001: delete-no-hard-link-to-avoid-all_language-package-to.patch Patch9001: delete-no-hard-link-to-avoid-all_language-package-to.patch
Patch9002: 0001-add-base-files-for-libphtread-condition-family.patch Patch9002: 0001-add-base-files-for-libphtread-condition-family.patch
Patch9003: 0002-add-header-files-for-libphtread_2_17_so.patch Patch9003: 0002-add-header-files-for-libphtread_2_17_so.patch
@ -1334,6 +1334,9 @@ fi
%endif %endif
%changelog %changelog
* Thu Dec 3 2021 Yang Yanchao <yangyanchao6@huawei.com> - 2.34-30
- turn the default value of x86_rep_stosb_threshold from 2k to 1M
* Thu Dec 2 2021 Qingqing Li <liqingqing3@huawei.com> - 2.34-29 * Thu Dec 2 2021 Qingqing Li <liqingqing3@huawei.com> - 2.34-29
- revert the use of sched_getaffinity [BZ #28310] - revert the use of sched_getaffinity [BZ #28310]

View File

@ -1,45 +0,0 @@
From dc8c5d3ba8ec3c2de8ca0898d682d89492d275b3 Mon Sep 17 00:00:00 2001
From: Shuo Wang <wangshuo_1994@foxmail.com>
Date: Tue, 2 Mar 2021 10:41:09 +0800
Subject: [PATCH] turn REP_STOSB_THRESHOLD from 2k to 1M
REP_STOSB_THRESHOLD is designed to choose vec mov or stosb.
The default threshold (2k) will lead to performance degradation if the
memcpy size is between 2k and 1M.
---
sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index faa40856..76f84748 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -58,6 +58,16 @@
# endif
#endif
+/* Threshold to use Enhanced REP STOSB. Since there is overhead to set
+ up REP STOSB operation, REP STOSB isn't faster on short data. The
+ memset micro benchmark in glibc shows that 2KB is the approximate
+ value above which REP STOSB becomes faster on processors with
+ Enhanced REP STOSB. Since the stored value is fixed, larger register
+ size has minimal impact on threshold. */
+#ifndef REP_STOSB_THRESHOLD
+# define REP_STOSB_THRESHOLD 1048576
+#endif
+
#ifndef SECTION
# error SECTION is not defined!
#endif
@@ -171,7 +181,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
ret
L(stosb_more_2x_vec):
- cmp __x86_rep_stosb_threshold(%rip), %RDX_LP
+ cmp $REP_STOSB_THRESHOLD, %RDX_LP
ja L(stosb)
#endif
L(more_2x_vec):
--
2.23.0

View File

@ -0,0 +1,29 @@
From 4dee2794b8c78ccd540e3f72bc07585828e0143b Mon Sep 17 00:00:00 2001
From: Yang Yanchao <yangyanchao6@huawei.com>
Date: Thu, 2 Dec 2021 19:56:20 +0800
Subject: [PATCH] turn the default value of x86_rep_stosb_threshold from 2k to 1M
x86_rep_stosb_threshold is designed to choose vec mov or stosb.
For the libMicro, after set this x86_rep_stosb_threshold to 1 MB.
The performance of memset_256_u, memset_4k_uc, and memset_1m is improved.
The performance deteriorates in the memset_4k and memset_10k scenarios.
---
sysdeps/x86/dl-tunables.list | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sysdeps/x86/dl-tunables.list b/sysdeps/x86/dl-tunables.list
index dd6e1d65..a4c3af69 100644
--- a/sysdeps/x86/dl-tunables.list
+++ b/sysdeps/x86/dl-tunables.list
@@ -54,7 +54,7 @@ glibc {
# stored value is fixed, larger register size has minimal impact
# on threshold.
minval: 1
- default: 2048
+ default: 1048576
}
x86_data_cache_size {
type: SIZE_T
--
2.30.0