35 lines
1.2 KiB
Diff
35 lines
1.2 KiB
Diff
|
|
From 5a64f933655384477d85122c6855dc6d84061810 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Noah Goldstein <goldstein.w.n@gmail.com>
|
||
|
|
Date: Wed, 1 Nov 2023 15:30:26 -0500
|
||
|
|
Subject: [PATCH] x86: Only align destination to 1x VEC_SIZE in memset 4x
|
||
|
|
loop
|
||
|
|
|
||
|
|
Current code aligns to 2x VEC_SIZE. Aligning to 2x has no affect on
|
||
|
|
performance other than potentially resulting in an additional
|
||
|
|
iteration of the loop.
|
||
|
|
1x maintains aligned stores (the only reason to align in this case)
|
||
|
|
and doesn't incur any unnecessary loop iterations.
|
||
|
|
Reviewed-by: Sunil K Pandey <skpgkp2@gmail.com>
|
||
|
|
|
||
|
|
(cherry picked from commit 9469261cf1924d350feeec64d2c80cafbbdcdd4d)
|
||
|
|
---
|
||
|
|
sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S | 2 +-
|
||
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||
|
|
|
||
|
|
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||
|
|
index 3d9ad49cb9..0f0636b90f 100644
|
||
|
|
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||
|
|
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
|
||
|
|
@@ -293,7 +293,7 @@ L(more_2x_vec):
|
||
|
|
leaq (VEC_SIZE * 4)(%rax), %LOOP_REG
|
||
|
|
#endif
|
||
|
|
/* Align dst for loop. */
|
||
|
|
- andq $(VEC_SIZE * -2), %LOOP_REG
|
||
|
|
+ andq $(VEC_SIZE * -1), %LOOP_REG
|
||
|
|
.p2align 4
|
||
|
|
L(loop):
|
||
|
|
VMOVA %VMM(0), LOOP_4X_OFFSET(%LOOP_REG)
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|