here is the bugfix: - x86_64: Optimize ffsll function code size - S390: Fix building with disable mutli arch (BZ 31196) - sparc: Fix broken memset for sparc32 (BZ 31068) - sparc: Remove unwind information from signal return - sparc: Fix sparc64 memmove length comparison (BZ 31266) - sparc: Remove unwind information from signal return stubs (BZ 31244)
51 lines
1.7 KiB
Diff
51 lines
1.7 KiB
Diff
From 30e546d76e756fe4d2d20a8b2286de4fbf30ceb5 Mon Sep 17 00:00:00 2001
|
|
From: Sunil K Pandey <skpgkp2@gmail.com>
|
|
Date: Wed, 26 Jul 2023 08:34:05 -0700
|
|
Subject: [PATCH 1/6] x86_64: Optimize ffsll function code size.
|
|
|
|
Ffsll function randomly regress by ~20%, depending on how code gets
|
|
aligned in memory. Ffsll function code size is 17 bytes. Since default
|
|
function alignment is 16 bytes, it can load on 16, 32, 48 or 64 bytes
|
|
aligned memory. When ffsll function load at 16, 32 or 64 bytes aligned
|
|
memory, entire code fits in single 64 bytes cache line. When ffsll
|
|
function load at 48 bytes aligned memory, it splits in two cache line,
|
|
hence random regression.
|
|
|
|
Ffsll function size reduction from 17 bytes to 12 bytes ensures that it
|
|
will always fit in single 64 bytes cache line.
|
|
|
|
This patch fixes ffsll function random performance regression.
|
|
|
|
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
|
|
(cherry picked from commit 9d94997b5f9445afd4f2bccc5fa60ff7c4361ec1)
|
|
---
|
|
sysdeps/x86_64/ffsll.c | 10 +++++-----
|
|
1 file changed, 5 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/sysdeps/x86_64/ffsll.c b/sysdeps/x86_64/ffsll.c
|
|
index a1c13d4906..0c6680735c 100644
|
|
--- a/sysdeps/x86_64/ffsll.c
|
|
+++ b/sysdeps/x86_64/ffsll.c
|
|
@@ -26,13 +26,13 @@ int
|
|
ffsll (long long int x)
|
|
{
|
|
long long int cnt;
|
|
- long long int tmp;
|
|
|
|
- asm ("bsfq %2,%0\n" /* Count low bits in X and store in %1. */
|
|
- "cmoveq %1,%0\n" /* If number was zero, use -1 as result. */
|
|
- : "=&r" (cnt), "=r" (tmp) : "rm" (x), "1" (-1));
|
|
+ asm ("mov $-1,%k0\n" /* Initialize cnt to -1. */
|
|
+ "bsf %1,%0\n" /* Count low bits in x and store in cnt. */
|
|
+ "inc %k0\n" /* Increment cnt by 1. */
|
|
+ : "=&r" (cnt) : "r" (x));
|
|
|
|
- return cnt + 1;
|
|
+ return cnt;
|
|
}
|
|
|
|
#ifndef __ILP32__
|
|
--
|
|
2.33.0
|
|
|