glibc/turn-REP_STOSB_THRESHOLD-from-2k-to-1M.patch
2021-03-08 17:25:09 +08:00

46 lines
1.6 KiB
Diff

From dc8c5d3ba8ec3c2de8ca0898d682d89492d275b3 Mon Sep 17 00:00:00 2001
From: Shuo Wang <wangshuo_1994@foxmail.com>
Date: Tue, 2 Mar 2021 10:41:09 +0800
Subject: [PATCH] turn REP_STOSB_THRESHOLD from 2k to 1M
REP_STOSB_THRESHOLD is designed to choose vec mov or stosb.
The default threshold (2k) will lead to performance degradation if the
memcpy size is between 2k and 1M.
---
sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index faa40856..76f84748 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -58,6 +58,16 @@
# endif
#endif
+/* Threshold to use Enhanced REP STOSB. Since there is overhead to set
+ up REP STOSB operation, REP STOSB isn't faster on short data. The
+ memset micro benchmark in glibc shows that 2KB is the approximate
+ value above which REP STOSB becomes faster on processors with
+ Enhanced REP STOSB. Since the stored value is fixed, larger register
+ size has minimal impact on threshold. */
+#ifndef REP_STOSB_THRESHOLD
+# define REP_STOSB_THRESHOLD 1048576
+#endif
+
#ifndef SECTION
# error SECTION is not defined!
#endif
@@ -171,7 +181,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
ret
L(stosb_more_2x_vec):
- cmp __x86_rep_stosb_threshold(%rip), %RDX_LP
+ cmp $REP_STOSB_THRESHOLD, %RDX_LP
ja L(stosb)
#endif
L(more_2x_vec):
--
2.23.0