229 lines
7.7 KiB
Diff
229 lines
7.7 KiB
Diff
From 156e44845f4137d6d3ea6c2824dd459652a7efda Mon Sep 17 00:00:00 2001
|
|
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
|
Date: Thu, 26 Oct 2023 17:07:21 +0100
|
|
Subject: [PATCH 08/26] AArch64: Add memset_zva64
|
|
|
|
Add a specialized memset for the common ZVA size of 64 to avoid the
|
|
overhead of reading the ZVA size. Since the code is identical to
|
|
__memset_falkor, remove the latter.
|
|
|
|
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
|
(cherry picked from commit 3d7090f14b13312320e425b27dcf0fe72de026fd)
|
|
---
|
|
sysdeps/aarch64/memset.S | 10 ++--
|
|
sysdeps/aarch64/multiarch/Makefile | 2 +-
|
|
sysdeps/aarch64/multiarch/ifunc-impl-list.c | 4 +-
|
|
sysdeps/aarch64/multiarch/memset.c | 9 ++--
|
|
sysdeps/aarch64/multiarch/memset_falkor.S | 54 ---------------------
|
|
sysdeps/aarch64/multiarch/memset_zva64.S | 27 +++++++++++
|
|
6 files changed, 38 insertions(+), 68 deletions(-)
|
|
delete mode 100644 sysdeps/aarch64/multiarch/memset_falkor.S
|
|
create mode 100644 sysdeps/aarch64/multiarch/memset_zva64.S
|
|
|
|
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
|
|
index bf3cf85c8a..bbfb7184c3 100644
|
|
--- a/sysdeps/aarch64/memset.S
|
|
+++ b/sysdeps/aarch64/memset.S
|
|
@@ -101,19 +101,19 @@ L(tail64):
|
|
ret
|
|
|
|
L(try_zva):
|
|
-#ifdef ZVA_MACRO
|
|
- zva_macro
|
|
-#else
|
|
+#ifndef ZVA64_ONLY
|
|
.p2align 3
|
|
mrs tmp1, dczid_el0
|
|
tbnz tmp1w, 4, L(no_zva)
|
|
and tmp1w, tmp1w, 15
|
|
cmp tmp1w, 4 /* ZVA size is 64 bytes. */
|
|
b.ne L(zva_128)
|
|
-
|
|
+ nop
|
|
+#endif
|
|
/* Write the first and last 64 byte aligned block using stp rather
|
|
than using DC ZVA. This is faster on some cores.
|
|
*/
|
|
+ .p2align 4
|
|
L(zva_64):
|
|
str q0, [dst, 16]
|
|
stp q0, q0, [dst, 32]
|
|
@@ -123,7 +123,6 @@ L(zva_64):
|
|
sub count, dstend, dst /* Count is now 128 too large. */
|
|
sub count, count, 128+64+64 /* Adjust count and bias for loop. */
|
|
add dst, dst, 128
|
|
- nop
|
|
1: dc zva, dst
|
|
add dst, dst, 64
|
|
subs count, count, 64
|
|
@@ -134,6 +133,7 @@ L(zva_64):
|
|
stp q0, q0, [dstend, -32]
|
|
ret
|
|
|
|
+#ifndef ZVA64_ONLY
|
|
.p2align 3
|
|
L(zva_128):
|
|
cmp tmp1w, 5 /* ZVA size is 128 bytes. */
|
|
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
|
|
index a1a4de3cd9..171ca5e4cf 100644
|
|
--- a/sysdeps/aarch64/multiarch/Makefile
|
|
+++ b/sysdeps/aarch64/multiarch/Makefile
|
|
@@ -12,10 +12,10 @@ sysdep_routines += \
|
|
memmove_mops \
|
|
memset_a64fx \
|
|
memset_emag \
|
|
- memset_falkor \
|
|
memset_generic \
|
|
memset_kunpeng \
|
|
memset_mops \
|
|
+ memset_zva64 \
|
|
strlen_asimd \
|
|
strlen_generic \
|
|
# sysdep_routines
|
|
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
|
|
index 3596d3c8d3..fdd9ea9246 100644
|
|
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
|
|
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
|
|
@@ -54,9 +54,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|
IFUNC_IMPL_ADD (array, i, memmove, mops, __memmove_mops)
|
|
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_generic))
|
|
IFUNC_IMPL (i, name, memset,
|
|
- /* Enable this on non-falkor processors too so that other cores
|
|
- can do a comparative analysis with __memset_generic. */
|
|
- IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_falkor)
|
|
+ IFUNC_IMPL_ADD (array, i, memset, (zva_size == 64), __memset_zva64)
|
|
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag)
|
|
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng)
|
|
#if HAVE_AARCH64_SVE_ASM
|
|
diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
|
|
index 9193b197dd..6deb6865e5 100644
|
|
--- a/sysdeps/aarch64/multiarch/memset.c
|
|
+++ b/sysdeps/aarch64/multiarch/memset.c
|
|
@@ -28,7 +28,7 @@
|
|
|
|
extern __typeof (__redirect_memset) __libc_memset;
|
|
|
|
-extern __typeof (__redirect_memset) __memset_falkor attribute_hidden;
|
|
+extern __typeof (__redirect_memset) __memset_zva64 attribute_hidden;
|
|
extern __typeof (__redirect_memset) __memset_emag attribute_hidden;
|
|
extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden;
|
|
extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden;
|
|
@@ -47,18 +47,17 @@ select_memset_ifunc (void)
|
|
{
|
|
if (IS_A64FX (midr) && zva_size == 256)
|
|
return __memset_a64fx;
|
|
- return __memset_generic;
|
|
}
|
|
|
|
if (IS_KUNPENG920 (midr))
|
|
return __memset_kunpeng;
|
|
|
|
- if ((IS_FALKOR (midr) || IS_PHECDA (midr)) && zva_size == 64)
|
|
- return __memset_falkor;
|
|
-
|
|
if (IS_EMAG (midr))
|
|
return __memset_emag;
|
|
|
|
+ if (zva_size == 64)
|
|
+ return __memset_zva64;
|
|
+
|
|
return __memset_generic;
|
|
}
|
|
|
|
diff --git a/sysdeps/aarch64/multiarch/memset_falkor.S b/sysdeps/aarch64/multiarch/memset_falkor.S
|
|
deleted file mode 100644
|
|
index c6946a8072..0000000000
|
|
--- a/sysdeps/aarch64/multiarch/memset_falkor.S
|
|
+++ /dev/null
|
|
@@ -1,54 +0,0 @@
|
|
-/* Memset for falkor.
|
|
- Copyright (C) 2017-2023 Free Software Foundation, Inc.
|
|
-
|
|
- This file is part of the GNU C Library.
|
|
-
|
|
- The GNU C Library is free software; you can redistribute it and/or
|
|
- modify it under the terms of the GNU Lesser General Public
|
|
- License as published by the Free Software Foundation; either
|
|
- version 2.1 of the License, or (at your option) any later version.
|
|
-
|
|
- The GNU C Library is distributed in the hope that it will be useful,
|
|
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
- Lesser General Public License for more details.
|
|
-
|
|
- You should have received a copy of the GNU Lesser General Public
|
|
- License along with the GNU C Library. If not, see
|
|
- <https://www.gnu.org/licenses/>. */
|
|
-
|
|
-#include <sysdep.h>
|
|
-#include <memset-reg.h>
|
|
-
|
|
-/* Reading dczid_el0 is expensive on falkor so move it into the ifunc
|
|
- resolver and assume ZVA size of 64 bytes. The IFUNC resolver takes care to
|
|
- use this function only when ZVA is enabled. */
|
|
-
|
|
-#if IS_IN (libc)
|
|
-.macro zva_macro
|
|
- .p2align 4
|
|
- /* Write the first and last 64 byte aligned block using stp rather
|
|
- than using DC ZVA. This is faster on some cores. */
|
|
- str q0, [dst, 16]
|
|
- stp q0, q0, [dst, 32]
|
|
- bic dst, dst, 63
|
|
- stp q0, q0, [dst, 64]
|
|
- stp q0, q0, [dst, 96]
|
|
- sub count, dstend, dst /* Count is now 128 too large. */
|
|
- sub count, count, 128+64+64 /* Adjust count and bias for loop. */
|
|
- add dst, dst, 128
|
|
-1: dc zva, dst
|
|
- add dst, dst, 64
|
|
- subs count, count, 64
|
|
- b.hi 1b
|
|
- stp q0, q0, [dst, 0]
|
|
- stp q0, q0, [dst, 32]
|
|
- stp q0, q0, [dstend, -64]
|
|
- stp q0, q0, [dstend, -32]
|
|
- ret
|
|
-.endm
|
|
-
|
|
-# define ZVA_MACRO zva_macro
|
|
-# define MEMSET __memset_falkor
|
|
-# include <sysdeps/aarch64/memset.S>
|
|
-#endif
|
|
diff --git a/sysdeps/aarch64/multiarch/memset_zva64.S b/sysdeps/aarch64/multiarch/memset_zva64.S
|
|
new file mode 100644
|
|
index 0000000000..13f45fd3d8
|
|
--- /dev/null
|
|
+++ b/sysdeps/aarch64/multiarch/memset_zva64.S
|
|
@@ -0,0 +1,27 @@
|
|
+/* Optimized memset for zva size = 64.
|
|
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
|
+
|
|
+ This file is part of the GNU C Library.
|
|
+
|
|
+ The GNU C Library is free software; you can redistribute it and/or
|
|
+ modify it under the terms of the GNU Lesser General Public
|
|
+ License as published by the Free Software Foundation; either
|
|
+ version 2.1 of the License, or (at your option) any later version.
|
|
+
|
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ Lesser General Public License for more details.
|
|
+
|
|
+ You should have received a copy of the GNU Lesser General Public
|
|
+ License along with the GNU C Library. If not, see
|
|
+ <https://www.gnu.org/licenses/>. */
|
|
+
|
|
+#include <sysdep.h>
|
|
+
|
|
+#define ZVA64_ONLY 1
|
|
+#define MEMSET __memset_zva64
|
|
+#undef libc_hidden_builtin_def
|
|
+#define libc_hidden_builtin_def(X)
|
|
+
|
|
+#include "../memset.S"
|
|
--
|
|
2.33.0
|
|
|