diff --git a/glibc.spec b/glibc.spec index b59b2d3..67ec7b3 100644 --- a/glibc.spec +++ b/glibc.spec @@ -65,7 +65,7 @@ ############################################################################## Name: glibc Version: 2.36 -Release: 1 +Release: 2 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -99,6 +99,7 @@ Patch9001: locale-delete-no-hard-link-to-avoid-all_language-pac.patch Patch9011: use-region-to-instead-of-country-for-extract-timezon.patch Patch9012: malloc-use-__get_nprocs-replace-__get_nprocs_sched.patch Patch9013: x86-use-total-l3cache-for-non_temporal_threshold.patch +Patch9014: strcmp-delete-align-for-loop_aligned.patch Provides: ldconfig rtld(GNU_HASH) bundled(gnulib) @@ -1257,6 +1258,9 @@ fi %endif %changelog +* Wed Aug 10 2022 Qingqing Li - 2.36-2 +- aarch64: strcmp delete align for better unixbench performance + * Tue Aug 2 2022 Qingqing Li - 2.36-1 - upgrade to 2.36 diff --git a/strcmp-delete-align-for-loop_aligned.patch b/strcmp-delete-align-for-loop_aligned.patch new file mode 100644 index 0000000..cf5b15a --- /dev/null +++ b/strcmp-delete-align-for-loop_aligned.patch @@ -0,0 +1,32 @@ +From 9bbffed83b93f633b272368fc536a4f24e9942e6 Mon Sep 17 00:00:00 2001 +From: Yang Yanchao +Date: Mon, 21 Feb 2022 14:25:25 +0800 +Subject: [PATCH] strcmp: delete align for loop_aligned + +In Kunpeng-920, the performance of strcmp deteriorates only +when the 16 to 23 characters are different.Or the string is +only 16-23 characters.That shows 2 misses per iteration which +means this is a branch predictor issue indeed. +In the preceding scenario, strcmp performance is 300% worse than expected. + +Fortunately, this problem can be solved by modifying the alignment of the functions. +--- + sysdeps/aarch64/strcmp.S | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/sysdeps/aarch64/strcmp.S b/sysdeps/aarch64/strcmp.S +index f225d718..7a048b66 100644 +--- a/sysdeps/aarch64/strcmp.S ++++ b/sysdeps/aarch64/strcmp.S +@@ -71,8 +71,6 @@ ENTRY(strcmp) + b.ne L(misaligned8) + cbnz tmp, L(mutual_align) + +- .p2align 4 +- + L(loop_aligned): + ldr data2, [src1, off2] + ldr data1, [src1], 8 +-- +2.33.0 +