aarch64: update a64fx memset not to degrade at 16KB
This commit is contained in:
parent
97485454ba
commit
653f3153b9
39
AArch64-Update-A64FX-memset-not-to-degrade-at-16KB.patch
Normal file
39
AArch64-Update-A64FX-memset-not-to-degrade-at-16KB.patch
Normal file
@ -0,0 +1,39 @@
|
||||
From 23777232c23f80809613bdfa329f63aadf992922 Mon Sep 17 00:00:00 2001
|
||||
From: Naohiro Tamura via Libc-alpha <libc-alpha@sourceware.org>
|
||||
Date: Fri, 27 Aug 2021 05:03:04 +0000
|
||||
Subject: [PATCH] AArch64: Update A64FX memset not to degrade at 16KB
|
||||
|
||||
This patch updates unroll8 code so as not to degrade at the peak
|
||||
performance 16KB for both FX1000 and FX700.
|
||||
|
||||
Inserted 2 instructions at the beginning of the unroll8 loop,
|
||||
cmp and branch, are a workaround that is found heuristically.
|
||||
|
||||
Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
|
||||
---
|
||||
sysdeps/aarch64/multiarch/memset_a64fx.S | 9 ++++++++-
|
||||
1 file changed, 8 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/sysdeps/aarch64/multiarch/memset_a64fx.S b/sysdeps/aarch64/multiarch/memset_a64fx.S
|
||||
index 7bf759b..f7dfdaa 100644
|
||||
--- a/sysdeps/aarch64/multiarch/memset_a64fx.S
|
||||
+++ b/sysdeps/aarch64/multiarch/memset_a64fx.S
|
||||
@@ -96,7 +96,14 @@ L(vl_agnostic): // VL Agnostic
|
||||
L(unroll8):
|
||||
sub count, count, tmp1
|
||||
.p2align 4
|
||||
-1: st1b_unroll 0, 7
|
||||
+ // The 2 instructions at the beginning of the following loop,
|
||||
+ // cmp and branch, are a workaround so as not to degrade at
|
||||
+ // the peak performance 16KB.
|
||||
+ // It is found heuristically and the branch condition, b.ne,
|
||||
+ // is chosen intentionally never to jump.
|
||||
+1: cmp xzr, xzr
|
||||
+ b.ne 1b
|
||||
+ st1b_unroll 0, 7
|
||||
add dst, dst, tmp1
|
||||
subs count, count, tmp1
|
||||
b.hi 1b
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
@ -65,7 +65,7 @@
|
||||
##############################################################################
|
||||
Name: glibc
|
||||
Version: 2.34
|
||||
Release: 18
|
||||
Release: 19
|
||||
Summary: The GNU libc libraries
|
||||
License: %{all_license}
|
||||
URL: http://www.gnu.org/software/glibc/
|
||||
@ -118,6 +118,7 @@ Patch31: mtrace-Fix-output-with-PIE-and-ASLR-BZ-22716.patch
|
||||
Patch32: rtld-copy-terminating-null-in-tunables_strdup-bug-28.patch
|
||||
Patch33: Use-__executable_start-as-the-lowest-address-for-pro.patch
|
||||
Patch34: x86-64-Use-testl-to-check-__x86_string_control.patch
|
||||
Patch35: AArch64-Update-A64FX-memset-not-to-degrade-at-16KB.patch
|
||||
|
||||
#Patch9000: turn-REP_STOSB_THRESHOLD-from-2k-to-1M.patch
|
||||
Patch9001: delete-no-hard-link-to-avoid-all_language-package-to.patch
|
||||
@ -1307,6 +1308,9 @@ fi
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Fri Oct 29 2021 Qingqing Li<liqingqing3@huawei.com> - 2.34-19
|
||||
- aarch64: update a64fx memset not to degrade at 16KB
|
||||
|
||||
* Thu Oct 28 2021 Qingqing Li<liqingqing3@huawei.com> - 2.34-18
|
||||
- use testl instead of andl to check __x86_string_control to
|
||||
avoid updating __x86_string_control
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user