!1022 [sync] PR-1021: sync from glibc upstream 2.38 branch
From: @openeuler-sync-bot Reviewed-by: @liqingqing_1229 Signed-off-by: @liqingqing_1229
This commit is contained in:
commit
5538373d14
200
AArch64-Add-SVE-memset.patch
Normal file
200
AArch64-Add-SVE-memset.patch
Normal file
@ -0,0 +1,200 @@
|
|||||||
|
From 52c2b1556f773d9a75d030160e0e273a5ea84502 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||||
|
Date: Tue, 24 Dec 2024 18:01:59 +0000
|
||||||
|
Subject: [PATCH] AArch64: Add SVE memset
|
||||||
|
|
||||||
|
Add SVE memset based on the generic memset with predicated load for sizes < 16.
|
||||||
|
Unaligned memsets of 128-1024 are improved by ~20% on average by using aligned
|
||||||
|
stores for the last 64 bytes. Performance of random memset benchmark improves
|
||||||
|
by ~2% on Neoverse V1.
|
||||||
|
|
||||||
|
Reviewed-by: Yury Khrustalev <yury.khrustalev@arm.com>
|
||||||
|
(cherry picked from commit 163b1bbb76caba4d9673c07940c5930a1afa7548)
|
||||||
|
---
|
||||||
|
sysdeps/aarch64/multiarch/Makefile | 1 +
|
||||||
|
sysdeps/aarch64/multiarch/ifunc-impl-list.c | 3 +-
|
||||||
|
sysdeps/aarch64/multiarch/memset.c | 4 +
|
||||||
|
sysdeps/aarch64/multiarch/memset_sve_zva64.S | 123 +++++++++++++++++++
|
||||||
|
4 files changed, 130 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 sysdeps/aarch64/multiarch/memset_sve_zva64.S
|
||||||
|
|
||||||
|
diff --git a/sysdeps/aarch64/multiarch/Makefile b/sysdeps/aarch64/multiarch/Makefile
|
||||||
|
index e4720b7468..214b6137b0 100644
|
||||||
|
--- a/sysdeps/aarch64/multiarch/Makefile
|
||||||
|
+++ b/sysdeps/aarch64/multiarch/Makefile
|
||||||
|
@@ -14,6 +14,7 @@ sysdep_routines += \
|
||||||
|
memset_generic \
|
||||||
|
memset_kunpeng \
|
||||||
|
memset_mops \
|
||||||
|
+ memset_sve_zva64 \
|
||||||
|
memset_zva64 \
|
||||||
|
strlen_asimd \
|
||||||
|
strlen_generic \
|
||||||
|
diff --git a/sysdeps/aarch64/multiarch/ifunc-impl-list.c b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
|
||||||
|
index 73038ac810..2fa6baa319 100644
|
||||||
|
--- a/sysdeps/aarch64/multiarch/ifunc-impl-list.c
|
||||||
|
+++ b/sysdeps/aarch64/multiarch/ifunc-impl-list.c
|
||||||
|
@@ -56,7 +56,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||||
|
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_emag)
|
||||||
|
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_kunpeng)
|
||||||
|
#if HAVE_AARCH64_SVE_ASM
|
||||||
|
- IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 256, __memset_a64fx)
|
||||||
|
+ IFUNC_IMPL_ADD (array, i, memset, sve && !bti && zva_size == 256, __memset_a64fx)
|
||||||
|
+ IFUNC_IMPL_ADD (array, i, memset, sve && zva_size == 64, __memset_sve_zva64)
|
||||||
|
#endif
|
||||||
|
IFUNC_IMPL_ADD (array, i, memset, mops, __memset_mops)
|
||||||
|
IFUNC_IMPL_ADD (array, i, memset, 1, __memset_generic))
|
||||||
|
diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
|
||||||
|
index 6deb6865e5..89fde57f42 100644
|
||||||
|
--- a/sysdeps/aarch64/multiarch/memset.c
|
||||||
|
+++ b/sysdeps/aarch64/multiarch/memset.c
|
||||||
|
@@ -34,6 +34,7 @@ extern __typeof (__redirect_memset) __memset_kunpeng attribute_hidden;
|
||||||
|
extern __typeof (__redirect_memset) __memset_a64fx attribute_hidden;
|
||||||
|
extern __typeof (__redirect_memset) __memset_generic attribute_hidden;
|
||||||
|
extern __typeof (__redirect_memset) __memset_mops attribute_hidden;
|
||||||
|
+extern __typeof (__redirect_memset) __memset_sve_zva64 attribute_hidden;
|
||||||
|
|
||||||
|
static inline __typeof (__redirect_memset) *
|
||||||
|
select_memset_ifunc (void)
|
||||||
|
@@ -47,6 +48,9 @@ select_memset_ifunc (void)
|
||||||
|
{
|
||||||
|
if (IS_A64FX (midr) && zva_size == 256)
|
||||||
|
return __memset_a64fx;
|
||||||
|
+
|
||||||
|
+ if (zva_size == 64)
|
||||||
|
+ return __memset_sve_zva64;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (IS_KUNPENG920 (midr))
|
||||||
|
diff --git a/sysdeps/aarch64/multiarch/memset_sve_zva64.S b/sysdeps/aarch64/multiarch/memset_sve_zva64.S
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..7fb40fdd9e
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/sysdeps/aarch64/multiarch/memset_sve_zva64.S
|
||||||
|
@@ -0,0 +1,123 @@
|
||||||
|
+/* Optimized memset for SVE.
|
||||||
|
+ Copyright (C) 2025 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+ This file is part of the GNU C Library.
|
||||||
|
+
|
||||||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
+ modify it under the terms of the GNU Lesser General Public
|
||||||
|
+ License as published by the Free Software Foundation; either
|
||||||
|
+ version 2.1 of the License, or (at your option) any later version.
|
||||||
|
+
|
||||||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
+ Lesser General Public License for more details.
|
||||||
|
+
|
||||||
|
+ You should have received a copy of the GNU Lesser General Public
|
||||||
|
+ License along with the GNU C Library. If not, see
|
||||||
|
+ <https://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+#include <sysdep.h>
|
||||||
|
+
|
||||||
|
+/* Assumptions:
|
||||||
|
+ *
|
||||||
|
+ * ARMv8-a, AArch64, Advanced SIMD, SVE, unaligned accesses.
|
||||||
|
+ * ZVA size is 64.
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#if HAVE_AARCH64_SVE_ASM
|
||||||
|
+
|
||||||
|
+.arch armv8.2-a+sve
|
||||||
|
+
|
||||||
|
+#define dstin x0
|
||||||
|
+#define val x1
|
||||||
|
+#define valw w1
|
||||||
|
+#define count x2
|
||||||
|
+#define dst x3
|
||||||
|
+#define dstend x4
|
||||||
|
+#define zva_val x5
|
||||||
|
+#define vlen x5
|
||||||
|
+#define off x3
|
||||||
|
+#define dstend2 x5
|
||||||
|
+
|
||||||
|
+ENTRY (__memset_sve_zva64)
|
||||||
|
+ dup v0.16B, valw
|
||||||
|
+ cmp count, 16
|
||||||
|
+ b.lo L(set_16)
|
||||||
|
+
|
||||||
|
+ add dstend, dstin, count
|
||||||
|
+ cmp count, 64
|
||||||
|
+ b.hs L(set_128)
|
||||||
|
+
|
||||||
|
+ /* Set 16..63 bytes. */
|
||||||
|
+ mov off, 16
|
||||||
|
+ and off, off, count, lsr 1
|
||||||
|
+ sub dstend2, dstend, off
|
||||||
|
+ str q0, [dstin]
|
||||||
|
+ str q0, [dstin, off]
|
||||||
|
+ str q0, [dstend2, -16]
|
||||||
|
+ str q0, [dstend, -16]
|
||||||
|
+ ret
|
||||||
|
+
|
||||||
|
+ .p2align 4
|
||||||
|
+L(set_16):
|
||||||
|
+ whilelo p0.b, xzr, count
|
||||||
|
+ st1b z0.b, p0, [dstin]
|
||||||
|
+ ret
|
||||||
|
+
|
||||||
|
+ .p2align 4
|
||||||
|
+L(set_128):
|
||||||
|
+ bic dst, dstin, 15
|
||||||
|
+ cmp count, 128
|
||||||
|
+ b.hi L(set_long)
|
||||||
|
+ stp q0, q0, [dstin]
|
||||||
|
+ stp q0, q0, [dstin, 32]
|
||||||
|
+ stp q0, q0, [dstend, -64]
|
||||||
|
+ stp q0, q0, [dstend, -32]
|
||||||
|
+ ret
|
||||||
|
+
|
||||||
|
+ .p2align 4
|
||||||
|
+L(set_long):
|
||||||
|
+ cmp count, 256
|
||||||
|
+ b.lo L(no_zva)
|
||||||
|
+ tst valw, 255
|
||||||
|
+ b.ne L(no_zva)
|
||||||
|
+
|
||||||
|
+ str q0, [dstin]
|
||||||
|
+ str q0, [dst, 16]
|
||||||
|
+ bic dst, dstin, 31
|
||||||
|
+ stp q0, q0, [dst, 32]
|
||||||
|
+ bic dst, dstin, 63
|
||||||
|
+ sub count, dstend, dst /* Count is now 64 too large. */
|
||||||
|
+ sub count, count, 128 /* Adjust count and bias for loop. */
|
||||||
|
+
|
||||||
|
+ sub x8, dstend, 1 /* Write last bytes before ZVA loop. */
|
||||||
|
+ bic x8, x8, 15
|
||||||
|
+ stp q0, q0, [x8, -48]
|
||||||
|
+ str q0, [x8, -16]
|
||||||
|
+ str q0, [dstend, -16]
|
||||||
|
+
|
||||||
|
+ .p2align 4
|
||||||
|
+L(zva64_loop):
|
||||||
|
+ add dst, dst, 64
|
||||||
|
+ dc zva, dst
|
||||||
|
+ subs count, count, 64
|
||||||
|
+ b.hi L(zva64_loop)
|
||||||
|
+ ret
|
||||||
|
+
|
||||||
|
+L(no_zva):
|
||||||
|
+ str q0, [dstin]
|
||||||
|
+ sub count, dstend, dst /* Count is 16 too large. */
|
||||||
|
+ sub count, count, 64 + 16 /* Adjust count and bias for loop. */
|
||||||
|
+L(no_zva_loop):
|
||||||
|
+ stp q0, q0, [dst, 16]
|
||||||
|
+ stp q0, q0, [dst, 48]
|
||||||
|
+ add dst, dst, 64
|
||||||
|
+ subs count, count, 64
|
||||||
|
+ b.hi L(no_zva_loop)
|
||||||
|
+ stp q0, q0, [dstend, -64]
|
||||||
|
+ stp q0, q0, [dstend, -32]
|
||||||
|
+ ret
|
||||||
|
+
|
||||||
|
+END (__memset_sve_zva64)
|
||||||
|
+#endif
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
92
AArch64-Improve-generic-strlen.patch
Normal file
92
AArch64-Improve-generic-strlen.patch
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
From 9ca74b8ad1968d935815bdc2f1f1c7e9f2e32f70 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||||
|
Date: Wed, 7 Aug 2024 14:43:47 +0100
|
||||||
|
Subject: [PATCH] AArch64: Improve generic strlen
|
||||||
|
|
||||||
|
Improve performance by handling another 16 bytes before entering the loop.
|
||||||
|
Use ADDHN in the loop to avoid SHRN+FMOV when it terminates. Change final
|
||||||
|
size computation to avoid increasing latency. On Neoverse V1 performance
|
||||||
|
of the random strlen benchmark improves by 4.6%.
|
||||||
|
|
||||||
|
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
(cherry picked from commit 3dc426b642dcafdbc11a99f2767e081d086f5fc7)
|
||||||
|
---
|
||||||
|
sysdeps/aarch64/strlen.S | 39 +++++++++++++++++++++++++++------------
|
||||||
|
1 file changed, 27 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/sysdeps/aarch64/strlen.S b/sysdeps/aarch64/strlen.S
|
||||||
|
index 133ef93342..352fb40d3a 100644
|
||||||
|
--- a/sysdeps/aarch64/strlen.S
|
||||||
|
+++ b/sysdeps/aarch64/strlen.S
|
||||||
|
@@ -1,4 +1,5 @@
|
||||||
|
-/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||||
|
+/* Generic optimized strlen using SIMD.
|
||||||
|
+ Copyright (C) 2012-2024 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
@@ -56,36 +57,50 @@ ENTRY (STRLEN)
|
||||||
|
shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
|
||||||
|
fmov synd, dend
|
||||||
|
lsr synd, synd, shift
|
||||||
|
- cbz synd, L(loop)
|
||||||
|
+ cbz synd, L(next16)
|
||||||
|
|
||||||
|
rbit synd, synd
|
||||||
|
clz result, synd
|
||||||
|
lsr result, result, 2
|
||||||
|
ret
|
||||||
|
|
||||||
|
+L(next16):
|
||||||
|
+ ldr data, [src, 16]
|
||||||
|
+ cmeq vhas_nul.16b, vdata.16b, 0
|
||||||
|
+ shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
|
||||||
|
+ fmov synd, dend
|
||||||
|
+ cbz synd, L(loop)
|
||||||
|
+ add src, src, 16
|
||||||
|
+#ifndef __AARCH64EB__
|
||||||
|
+ rbit synd, synd
|
||||||
|
+#endif
|
||||||
|
+ sub result, src, srcin
|
||||||
|
+ clz tmp, synd
|
||||||
|
+ add result, result, tmp, lsr 2
|
||||||
|
+ ret
|
||||||
|
+
|
||||||
|
.p2align 5
|
||||||
|
L(loop):
|
||||||
|
- ldr data, [src, 16]
|
||||||
|
+ ldr data, [src, 32]!
|
||||||
|
cmeq vhas_nul.16b, vdata.16b, 0
|
||||||
|
- umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
|
||||||
|
+ addhn vend.8b, vhas_nul.8h, vhas_nul.8h
|
||||||
|
fmov synd, dend
|
||||||
|
cbnz synd, L(loop_end)
|
||||||
|
- ldr data, [src, 32]!
|
||||||
|
+ ldr data, [src, 16]
|
||||||
|
cmeq vhas_nul.16b, vdata.16b, 0
|
||||||
|
- umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
|
||||||
|
+ addhn vend.8b, vhas_nul.8h, vhas_nul.8h
|
||||||
|
fmov synd, dend
|
||||||
|
cbz synd, L(loop)
|
||||||
|
- sub src, src, 16
|
||||||
|
+ add src, src, 16
|
||||||
|
L(loop_end):
|
||||||
|
- shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
|
||||||
|
- sub result, src, srcin
|
||||||
|
- fmov synd, dend
|
||||||
|
+ sub result, shift, src, lsl 2 /* (srcin - src) << 2. */
|
||||||
|
#ifndef __AARCH64EB__
|
||||||
|
rbit synd, synd
|
||||||
|
+ sub result, result, 3
|
||||||
|
#endif
|
||||||
|
- add result, result, 16
|
||||||
|
clz tmp, synd
|
||||||
|
- add result, result, tmp, lsr 2
|
||||||
|
+ sub result, tmp, result
|
||||||
|
+ lsr result, result, 2
|
||||||
|
ret
|
||||||
|
|
||||||
|
END (STRLEN)
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
287
AArch64-Optimize-memset.patch
Normal file
287
AArch64-Optimize-memset.patch
Normal file
@ -0,0 +1,287 @@
|
|||||||
|
From 95aa21432ccbf77225abd485d98df36ba760ff80 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||||
|
Date: Mon, 9 Sep 2024 15:26:47 +0100
|
||||||
|
Subject: [PATCH] AArch64: Optimize memset
|
||||||
|
|
||||||
|
Improve small memsets by avoiding branches and use overlapping stores.
|
||||||
|
Use DC ZVA for copies over 128 bytes. Remove unnecessary code for ZVA sizes
|
||||||
|
other than 64 and 128. Performance of random memset benchmark improves by 24%
|
||||||
|
on Neoverse N1.
|
||||||
|
|
||||||
|
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
(cherry picked from commit cec3aef32412779e207f825db0d057ebb4628ae8)
|
||||||
|
---
|
||||||
|
sysdeps/aarch64/memset.S | 195 +++++++++++++++++----------------------
|
||||||
|
1 file changed, 84 insertions(+), 111 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
|
||||||
|
index bbfb7184c3..caafb019e2 100644
|
||||||
|
--- a/sysdeps/aarch64/memset.S
|
||||||
|
+++ b/sysdeps/aarch64/memset.S
|
||||||
|
@@ -1,4 +1,5 @@
|
||||||
|
-/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||||
|
+/* Generic optimized memset using SIMD.
|
||||||
|
+ Copyright (C) 2012-2024 Free Software Foundation, Inc.
|
||||||
|
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
@@ -17,7 +18,6 @@
|
||||||
|
<https://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
-#include "memset-reg.h"
|
||||||
|
|
||||||
|
#ifndef MEMSET
|
||||||
|
# define MEMSET memset
|
||||||
|
@@ -25,130 +25,132 @@
|
||||||
|
|
||||||
|
/* Assumptions:
|
||||||
|
*
|
||||||
|
- * ARMv8-a, AArch64, unaligned accesses
|
||||||
|
+ * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
-ENTRY (MEMSET)
|
||||||
|
+#define dstin x0
|
||||||
|
+#define val x1
|
||||||
|
+#define valw w1
|
||||||
|
+#define count x2
|
||||||
|
+#define dst x3
|
||||||
|
+#define dstend x4
|
||||||
|
+#define zva_val x5
|
||||||
|
+#define off x3
|
||||||
|
+#define dstend2 x5
|
||||||
|
|
||||||
|
+ENTRY (MEMSET)
|
||||||
|
PTR_ARG (0)
|
||||||
|
SIZE_ARG (2)
|
||||||
|
|
||||||
|
dup v0.16B, valw
|
||||||
|
+ cmp count, 16
|
||||||
|
+ b.lo L(set_small)
|
||||||
|
+
|
||||||
|
add dstend, dstin, count
|
||||||
|
+ cmp count, 64
|
||||||
|
+ b.hs L(set_128)
|
||||||
|
|
||||||
|
- cmp count, 96
|
||||||
|
- b.hi L(set_long)
|
||||||
|
- cmp count, 16
|
||||||
|
- b.hs L(set_medium)
|
||||||
|
- mov val, v0.D[0]
|
||||||
|
+ /* Set 16..63 bytes. */
|
||||||
|
+ mov off, 16
|
||||||
|
+ and off, off, count, lsr 1
|
||||||
|
+ sub dstend2, dstend, off
|
||||||
|
+ str q0, [dstin]
|
||||||
|
+ str q0, [dstin, off]
|
||||||
|
+ str q0, [dstend2, -16]
|
||||||
|
+ str q0, [dstend, -16]
|
||||||
|
+ ret
|
||||||
|
|
||||||
|
+ .p2align 4
|
||||||
|
/* Set 0..15 bytes. */
|
||||||
|
- tbz count, 3, 1f
|
||||||
|
- str val, [dstin]
|
||||||
|
- str val, [dstend, -8]
|
||||||
|
- ret
|
||||||
|
- nop
|
||||||
|
-1: tbz count, 2, 2f
|
||||||
|
- str valw, [dstin]
|
||||||
|
- str valw, [dstend, -4]
|
||||||
|
+L(set_small):
|
||||||
|
+ add dstend, dstin, count
|
||||||
|
+ cmp count, 4
|
||||||
|
+ b.lo 2f
|
||||||
|
+ lsr off, count, 3
|
||||||
|
+ sub dstend2, dstend, off, lsl 2
|
||||||
|
+ str s0, [dstin]
|
||||||
|
+ str s0, [dstin, off, lsl 2]
|
||||||
|
+ str s0, [dstend2, -4]
|
||||||
|
+ str s0, [dstend, -4]
|
||||||
|
ret
|
||||||
|
+
|
||||||
|
+ /* Set 0..3 bytes. */
|
||||||
|
2: cbz count, 3f
|
||||||
|
+ lsr off, count, 1
|
||||||
|
strb valw, [dstin]
|
||||||
|
- tbz count, 1, 3f
|
||||||
|
- strh valw, [dstend, -2]
|
||||||
|
+ strb valw, [dstin, off]
|
||||||
|
+ strb valw, [dstend, -1]
|
||||||
|
3: ret
|
||||||
|
|
||||||
|
- /* Set 17..96 bytes. */
|
||||||
|
-L(set_medium):
|
||||||
|
- str q0, [dstin]
|
||||||
|
- tbnz count, 6, L(set96)
|
||||||
|
- str q0, [dstend, -16]
|
||||||
|
- tbz count, 5, 1f
|
||||||
|
- str q0, [dstin, 16]
|
||||||
|
- str q0, [dstend, -32]
|
||||||
|
-1: ret
|
||||||
|
-
|
||||||
|
.p2align 4
|
||||||
|
- /* Set 64..96 bytes. Write 64 bytes from the start and
|
||||||
|
- 32 bytes from the end. */
|
||||||
|
-L(set96):
|
||||||
|
- str q0, [dstin, 16]
|
||||||
|
+L(set_128):
|
||||||
|
+ bic dst, dstin, 15
|
||||||
|
+ cmp count, 128
|
||||||
|
+ b.hi L(set_long)
|
||||||
|
+ stp q0, q0, [dstin]
|
||||||
|
stp q0, q0, [dstin, 32]
|
||||||
|
+ stp q0, q0, [dstend, -64]
|
||||||
|
stp q0, q0, [dstend, -32]
|
||||||
|
ret
|
||||||
|
|
||||||
|
- .p2align 3
|
||||||
|
- nop
|
||||||
|
+ .p2align 4
|
||||||
|
L(set_long):
|
||||||
|
- and valw, valw, 255
|
||||||
|
- bic dst, dstin, 15
|
||||||
|
str q0, [dstin]
|
||||||
|
- cmp count, 256
|
||||||
|
- ccmp valw, 0, 0, cs
|
||||||
|
- b.eq L(try_zva)
|
||||||
|
-L(no_zva):
|
||||||
|
- sub count, dstend, dst /* Count is 16 too large. */
|
||||||
|
- sub dst, dst, 16 /* Dst is biased by -32. */
|
||||||
|
- sub count, count, 64 + 16 /* Adjust count and bias for loop. */
|
||||||
|
-1: stp q0, q0, [dst, 32]
|
||||||
|
- stp q0, q0, [dst, 64]!
|
||||||
|
-L(tail64):
|
||||||
|
- subs count, count, 64
|
||||||
|
- b.hi 1b
|
||||||
|
-2: stp q0, q0, [dstend, -64]
|
||||||
|
+ str q0, [dst, 16]
|
||||||
|
+ tst valw, 255
|
||||||
|
+ b.ne L(no_zva)
|
||||||
|
+#ifndef ZVA64_ONLY
|
||||||
|
+ mrs zva_val, dczid_el0
|
||||||
|
+ and zva_val, zva_val, 31
|
||||||
|
+ cmp zva_val, 4 /* ZVA size is 64 bytes. */
|
||||||
|
+ b.ne L(zva_128)
|
||||||
|
+#endif
|
||||||
|
+ stp q0, q0, [dst, 32]
|
||||||
|
+ bic dst, dstin, 63
|
||||||
|
+ sub count, dstend, dst /* Count is now 64 too large. */
|
||||||
|
+ sub count, count, 64 + 64 /* Adjust count and bias for loop. */
|
||||||
|
+
|
||||||
|
+ /* Write last bytes before ZVA loop. */
|
||||||
|
+ stp q0, q0, [dstend, -64]
|
||||||
|
stp q0, q0, [dstend, -32]
|
||||||
|
+
|
||||||
|
+ .p2align 4
|
||||||
|
+L(zva64_loop):
|
||||||
|
+ add dst, dst, 64
|
||||||
|
+ dc zva, dst
|
||||||
|
+ subs count, count, 64
|
||||||
|
+ b.hi L(zva64_loop)
|
||||||
|
ret
|
||||||
|
|
||||||
|
-L(try_zva):
|
||||||
|
-#ifndef ZVA64_ONLY
|
||||||
|
.p2align 3
|
||||||
|
- mrs tmp1, dczid_el0
|
||||||
|
- tbnz tmp1w, 4, L(no_zva)
|
||||||
|
- and tmp1w, tmp1w, 15
|
||||||
|
- cmp tmp1w, 4 /* ZVA size is 64 bytes. */
|
||||||
|
- b.ne L(zva_128)
|
||||||
|
- nop
|
||||||
|
-#endif
|
||||||
|
- /* Write the first and last 64 byte aligned block using stp rather
|
||||||
|
- than using DC ZVA. This is faster on some cores.
|
||||||
|
- */
|
||||||
|
- .p2align 4
|
||||||
|
-L(zva_64):
|
||||||
|
- str q0, [dst, 16]
|
||||||
|
+L(no_zva):
|
||||||
|
+ sub count, dstend, dst /* Count is 32 too large. */
|
||||||
|
+ sub count, count, 64 + 32 /* Adjust count and bias for loop. */
|
||||||
|
+L(no_zva_loop):
|
||||||
|
stp q0, q0, [dst, 32]
|
||||||
|
- bic dst, dst, 63
|
||||||
|
stp q0, q0, [dst, 64]
|
||||||
|
- stp q0, q0, [dst, 96]
|
||||||
|
- sub count, dstend, dst /* Count is now 128 too large. */
|
||||||
|
- sub count, count, 128+64+64 /* Adjust count and bias for loop. */
|
||||||
|
- add dst, dst, 128
|
||||||
|
-1: dc zva, dst
|
||||||
|
add dst, dst, 64
|
||||||
|
subs count, count, 64
|
||||||
|
- b.hi 1b
|
||||||
|
- stp q0, q0, [dst, 0]
|
||||||
|
- stp q0, q0, [dst, 32]
|
||||||
|
+ b.hi L(no_zva_loop)
|
||||||
|
stp q0, q0, [dstend, -64]
|
||||||
|
stp q0, q0, [dstend, -32]
|
||||||
|
ret
|
||||||
|
|
||||||
|
#ifndef ZVA64_ONLY
|
||||||
|
- .p2align 3
|
||||||
|
+ .p2align 4
|
||||||
|
L(zva_128):
|
||||||
|
- cmp tmp1w, 5 /* ZVA size is 128 bytes. */
|
||||||
|
- b.ne L(zva_other)
|
||||||
|
+ cmp zva_val, 5 /* ZVA size is 128 bytes. */
|
||||||
|
+ b.ne L(no_zva)
|
||||||
|
|
||||||
|
- str q0, [dst, 16]
|
||||||
|
stp q0, q0, [dst, 32]
|
||||||
|
stp q0, q0, [dst, 64]
|
||||||
|
stp q0, q0, [dst, 96]
|
||||||
|
bic dst, dst, 127
|
||||||
|
sub count, dstend, dst /* Count is now 128 too large. */
|
||||||
|
- sub count, count, 128+128 /* Adjust count and bias for loop. */
|
||||||
|
- add dst, dst, 128
|
||||||
|
-1: dc zva, dst
|
||||||
|
- add dst, dst, 128
|
||||||
|
+ sub count, count, 128 + 128 /* Adjust count and bias for loop. */
|
||||||
|
+1: add dst, dst, 128
|
||||||
|
+ dc zva, dst
|
||||||
|
subs count, count, 128
|
||||||
|
b.hi 1b
|
||||||
|
stp q0, q0, [dstend, -128]
|
||||||
|
@@ -156,35 +158,6 @@ L(zva_128):
|
||||||
|
stp q0, q0, [dstend, -64]
|
||||||
|
stp q0, q0, [dstend, -32]
|
||||||
|
ret
|
||||||
|
-
|
||||||
|
-L(zva_other):
|
||||||
|
- mov tmp2w, 4
|
||||||
|
- lsl zva_lenw, tmp2w, tmp1w
|
||||||
|
- add tmp1, zva_len, 64 /* Max alignment bytes written. */
|
||||||
|
- cmp count, tmp1
|
||||||
|
- blo L(no_zva)
|
||||||
|
-
|
||||||
|
- sub tmp2, zva_len, 1
|
||||||
|
- add tmp1, dst, zva_len
|
||||||
|
- add dst, dst, 16
|
||||||
|
- subs count, tmp1, dst /* Actual alignment bytes to write. */
|
||||||
|
- bic tmp1, tmp1, tmp2 /* Aligned dc zva start address. */
|
||||||
|
- beq 2f
|
||||||
|
-1: stp q0, q0, [dst], 64
|
||||||
|
- stp q0, q0, [dst, -32]
|
||||||
|
- subs count, count, 64
|
||||||
|
- b.hi 1b
|
||||||
|
-2: mov dst, tmp1
|
||||||
|
- sub count, dstend, tmp1 /* Remaining bytes to write. */
|
||||||
|
- subs count, count, zva_len
|
||||||
|
- b.lo 4f
|
||||||
|
-3: dc zva, dst
|
||||||
|
- add dst, dst, zva_len
|
||||||
|
- subs count, count, zva_len
|
||||||
|
- b.hs 3b
|
||||||
|
-4: add count, count, zva_len
|
||||||
|
- sub dst, dst, 32 /* Bias dst for tail loop. */
|
||||||
|
- b L(tail64)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
END (MEMSET)
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
65
AArch64-Remove-zva_128-from-memset.patch
Normal file
65
AArch64-Remove-zva_128-from-memset.patch
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
From 5fe151d86a19bc3dc791fd2d92efeb6c6e11cf64 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||||
|
Date: Mon, 25 Nov 2024 18:43:08 +0000
|
||||||
|
Subject: [PATCH] AArch64: Remove zva_128 from memset
|
||||||
|
|
||||||
|
Remove ZVA 128 support from memset - the new memset no longer
|
||||||
|
guarantees count >= 256, which can result in underflow and a
|
||||||
|
crash if ZVA size is 128 ([1]). Since only one CPU uses a ZVA
|
||||||
|
size of 128 and its memcpy implementation was removed in commit
|
||||||
|
e162ab2bf1b82c40f29e1925986582fa07568ce8, remove this special
|
||||||
|
case too.
|
||||||
|
|
||||||
|
[1] https://sourceware.org/pipermail/libc-alpha/2024-November/161626.html
|
||||||
|
|
||||||
|
Reviewed-by: Andrew Pinski <quic_apinski@quicinc.com>
|
||||||
|
(cherry picked from commit a08d9a52f967531a77e1824c23b5368c6434a72d)
|
||||||
|
---
|
||||||
|
sysdeps/aarch64/memset.S | 25 +------------------------
|
||||||
|
1 file changed, 1 insertion(+), 24 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/sysdeps/aarch64/memset.S b/sysdeps/aarch64/memset.S
|
||||||
|
index caafb019e2..71814d0b2f 100644
|
||||||
|
--- a/sysdeps/aarch64/memset.S
|
||||||
|
+++ b/sysdeps/aarch64/memset.S
|
||||||
|
@@ -104,7 +104,7 @@ L(set_long):
|
||||||
|
mrs zva_val, dczid_el0
|
||||||
|
and zva_val, zva_val, 31
|
||||||
|
cmp zva_val, 4 /* ZVA size is 64 bytes. */
|
||||||
|
- b.ne L(zva_128)
|
||||||
|
+ b.ne L(no_zva)
|
||||||
|
#endif
|
||||||
|
stp q0, q0, [dst, 32]
|
||||||
|
bic dst, dstin, 63
|
||||||
|
@@ -137,28 +137,5 @@ L(no_zva_loop):
|
||||||
|
stp q0, q0, [dstend, -32]
|
||||||
|
ret
|
||||||
|
|
||||||
|
-#ifndef ZVA64_ONLY
|
||||||
|
- .p2align 4
|
||||||
|
-L(zva_128):
|
||||||
|
- cmp zva_val, 5 /* ZVA size is 128 bytes. */
|
||||||
|
- b.ne L(no_zva)
|
||||||
|
-
|
||||||
|
- stp q0, q0, [dst, 32]
|
||||||
|
- stp q0, q0, [dst, 64]
|
||||||
|
- stp q0, q0, [dst, 96]
|
||||||
|
- bic dst, dst, 127
|
||||||
|
- sub count, dstend, dst /* Count is now 128 too large. */
|
||||||
|
- sub count, count, 128 + 128 /* Adjust count and bias for loop. */
|
||||||
|
-1: add dst, dst, 128
|
||||||
|
- dc zva, dst
|
||||||
|
- subs count, count, 128
|
||||||
|
- b.hi 1b
|
||||||
|
- stp q0, q0, [dstend, -128]
|
||||||
|
- stp q0, q0, [dstend, -96]
|
||||||
|
- stp q0, q0, [dstend, -64]
|
||||||
|
- stp q0, q0, [dstend, -32]
|
||||||
|
- ret
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
END (MEMSET)
|
||||||
|
libc_hidden_builtin_def (MEMSET)
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
29
AArch64-Use-prefer_sve_ifuncs-for-SVE-memset.patch
Normal file
29
AArch64-Use-prefer_sve_ifuncs-for-SVE-memset.patch
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
From 097299ffa904b327fce83770fa6a522e4393ddb3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||||
|
Date: Thu, 27 Feb 2025 16:28:52 +0000
|
||||||
|
Subject: [PATCH] AArch64: Use prefer_sve_ifuncs for SVE memset
|
||||||
|
|
||||||
|
Use prefer_sve_ifuncs for SVE memset just like memcpy.
|
||||||
|
|
||||||
|
Reviewed-by: Yury Khrustalev <yury.khrustalev@arm.com>
|
||||||
|
(cherry picked from commit 0f044be1dae5169d0e57f8d487b427863aeadab4)
|
||||||
|
---
|
||||||
|
sysdeps/aarch64/multiarch/memset.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/sysdeps/aarch64/multiarch/memset.c b/sysdeps/aarch64/multiarch/memset.c
|
||||||
|
index 89fde57f42..ce5d35a20e 100644
|
||||||
|
--- a/sysdeps/aarch64/multiarch/memset.c
|
||||||
|
+++ b/sysdeps/aarch64/multiarch/memset.c
|
||||||
|
@@ -49,7 +49,7 @@ select_memset_ifunc (void)
|
||||||
|
if (IS_A64FX (midr) && zva_size == 256)
|
||||||
|
return __memset_a64fx;
|
||||||
|
|
||||||
|
- if (zva_size == 64)
|
||||||
|
+ if (prefer_sve_ifuncs && zva_size == 64)
|
||||||
|
return __memset_sve_zva64;
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
132
assert-Add-test-for-CVE-2025-0395.patch
Normal file
132
assert-Add-test-for-CVE-2025-0395.patch
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
From f984e2d7e8299726891a1a497a3c36cd5542a0bf Mon Sep 17 00:00:00 2001
|
||||||
|
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
|
||||||
|
Date: Fri, 31 Jan 2025 12:16:30 -0500
|
||||||
|
Subject: [PATCH] assert: Add test for CVE-2025-0395
|
||||||
|
|
||||||
|
Use the __progname symbol to override the program name to induce the
|
||||||
|
failure that CVE-2025-0395 describes.
|
||||||
|
|
||||||
|
This is related to BZ #32582
|
||||||
|
|
||||||
|
Signed-off-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
|
||||||
|
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
(cherry picked from commit cdb9ba84191ce72e86346fb8b1d906e7cd930ea2)
|
||||||
|
---
|
||||||
|
assert/Makefile | 1 +
|
||||||
|
assert/tst-assert-sa-2025-0001.c | 92 ++++++++++++++++++++++++++++++++
|
||||||
|
2 files changed, 93 insertions(+)
|
||||||
|
create mode 100644 assert/tst-assert-sa-2025-0001.c
|
||||||
|
|
||||||
|
diff --git a/assert/Makefile b/assert/Makefile
|
||||||
|
index 67f4e6a570..b0fc9fc4d2 100644
|
||||||
|
--- a/assert/Makefile
|
||||||
|
+++ b/assert/Makefile
|
||||||
|
@@ -38,6 +38,7 @@ tests := \
|
||||||
|
test-assert-perr \
|
||||||
|
tst-assert-c++ \
|
||||||
|
tst-assert-g++ \
|
||||||
|
+ tst-assert-sa-2025-0001 \
|
||||||
|
# tests
|
||||||
|
|
||||||
|
ifeq ($(have-cxx-thread_local),yes)
|
||||||
|
diff --git a/assert/tst-assert-sa-2025-0001.c b/assert/tst-assert-sa-2025-0001.c
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000000..102cb0078d
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/assert/tst-assert-sa-2025-0001.c
|
||||||
|
@@ -0,0 +1,92 @@
|
||||||
|
+/* Test for CVE-2025-0395.
|
||||||
|
+ Copyright The GNU Toolchain Authors.
|
||||||
|
+ This file is part of the GNU C Library.
|
||||||
|
+
|
||||||
|
+ The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
+ modify it under the terms of the GNU Lesser General Public
|
||||||
|
+ License as published by the Free Software Foundation; either
|
||||||
|
+ version 2.1 of the License, or (at your option) any later version.
|
||||||
|
+
|
||||||
|
+ The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
+ Lesser General Public License for more details.
|
||||||
|
+
|
||||||
|
+ You should have received a copy of the GNU Lesser General Public
|
||||||
|
+ License along with the GNU C Library; if not, see
|
||||||
|
+ <https://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+/* Test that a large enough __progname does not result in a buffer overflow
|
||||||
|
+ when printing an assertion failure. This was CVE-2025-0395. */
|
||||||
|
+#include <assert.h>
|
||||||
|
+#include <inttypes.h>
|
||||||
|
+#include <signal.h>
|
||||||
|
+#include <stdbool.h>
|
||||||
|
+#include <string.h>
|
||||||
|
+#include <sys/mman.h>
|
||||||
|
+#include <support/check.h>
|
||||||
|
+#include <support/support.h>
|
||||||
|
+#include <support/xstdio.h>
|
||||||
|
+#include <support/xunistd.h>
|
||||||
|
+
|
||||||
|
+extern const char *__progname;
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+do_test (int argc, char **argv)
|
||||||
|
+{
|
||||||
|
+
|
||||||
|
+ support_need_proc ("Reads /proc/self/maps to add guards to writable maps.");
|
||||||
|
+ ignore_stderr ();
|
||||||
|
+
|
||||||
|
+ /* XXX assumes that the assert is on a 2 digit line number. */
|
||||||
|
+ const char *prompt = ": %s:99: do_test: Assertion `argc < 1' failed.\n";
|
||||||
|
+
|
||||||
|
+ int ret = fprintf (stderr, prompt, __FILE__);
|
||||||
|
+ if (ret < 0)
|
||||||
|
+ FAIL_EXIT1 ("fprintf failed: %m\n");
|
||||||
|
+
|
||||||
|
+ size_t pagesize = getpagesize ();
|
||||||
|
+ size_t namesize = pagesize - 1 - ret;
|
||||||
|
+
|
||||||
|
+ /* Alter the progname so that the assert message fills the entire page. */
|
||||||
|
+ char progname[namesize];
|
||||||
|
+ memset (progname, 'A', namesize - 1);
|
||||||
|
+ progname[namesize - 1] = '\0';
|
||||||
|
+ __progname = progname;
|
||||||
|
+
|
||||||
|
+ FILE *f = xfopen ("/proc/self/maps", "r");
|
||||||
|
+ char *line = NULL;
|
||||||
|
+ size_t len = 0;
|
||||||
|
+ uintptr_t prev_to = 0;
|
||||||
|
+
|
||||||
|
+ /* Pad the beginning of every writable mapping with a PROT_NONE map. This
|
||||||
|
+ ensures that the mmap in the assert_fail path never ends up below a
|
||||||
|
+ writable map and will terminate immediately in case of a buffer
|
||||||
|
+ overflow. */
|
||||||
|
+ while (xgetline (&line, &len, f))
|
||||||
|
+ {
|
||||||
|
+ uintptr_t from, to;
|
||||||
|
+ char perm[4];
|
||||||
|
+
|
||||||
|
+ sscanf (line, "%" SCNxPTR "-%" SCNxPTR " %c%c%c%c ",
|
||||||
|
+ &from, &to,
|
||||||
|
+ &perm[0], &perm[1], &perm[2], &perm[3]);
|
||||||
|
+
|
||||||
|
+ bool writable = (memchr (perm, 'w', 4) != NULL);
|
||||||
|
+
|
||||||
|
+ if (prev_to != 0 && from - prev_to > pagesize && writable)
|
||||||
|
+ xmmap ((void *) from - pagesize, pagesize, PROT_NONE,
|
||||||
|
+ MAP_ANONYMOUS | MAP_PRIVATE, 0);
|
||||||
|
+
|
||||||
|
+ prev_to = to;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ xfclose (f);
|
||||||
|
+
|
||||||
|
+ assert (argc < 1);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#define EXPECTED_SIGNAL SIGABRT
|
||||||
|
+#define TEST_FUNCTION_ARGV do_test
|
||||||
|
+#include <support/test-driver.c>
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
20
glibc.spec
20
glibc.spec
@ -67,7 +67,7 @@
|
|||||||
##############################################################################
|
##############################################################################
|
||||||
Name: glibc
|
Name: glibc
|
||||||
Version: 2.38
|
Version: 2.38
|
||||||
Release: 56
|
Release: 57
|
||||||
Summary: The GNU libc libraries
|
Summary: The GNU libc libraries
|
||||||
License: %{all_license}
|
License: %{all_license}
|
||||||
URL: http://www.gnu.org/software/glibc/
|
URL: http://www.gnu.org/software/glibc/
|
||||||
@ -278,6 +278,14 @@ Patch188: backport-x86-Disable-non-temporal-memset-on-Skylake-Server.patch
|
|||||||
Patch189: backport-Use-Avoid_Non_Temporal_Memset-to-control-non-tem.patch
|
Patch189: backport-Use-Avoid_Non_Temporal_Memset-to-control-non-tem.patch
|
||||||
Patch190: backport-Add-Avoid_STOSB-tunable-to-allow-NT-memset-witho.patch
|
Patch190: backport-Add-Avoid_STOSB-tunable-to-allow-NT-memset-witho.patch
|
||||||
Patch191: backport-x86-Enable-non-temporal-memset-for-Hygon-processors.patch
|
Patch191: backport-x86-Enable-non-temporal-memset-for-Hygon-processors.patch
|
||||||
|
Patch192: assert-Add-test-for-CVE-2025-0395.patch
|
||||||
|
Patch193: AArch64-Improve-generic-strlen.patch
|
||||||
|
Patch194: AArch64-Optimize-memset.patch
|
||||||
|
Patch195: AArch64-Remove-zva_128-from-memset.patch
|
||||||
|
Patch196: math-Improve-layout-of-expf-data.patch
|
||||||
|
Patch197: AArch64-Add-SVE-memset.patch
|
||||||
|
Patch198: AArch64-Use-prefer_sve_ifuncs-for-SVE-memset.patch
|
||||||
|
Patch199: math-Improve-layout-of-exp-exp10-data.patch
|
||||||
|
|
||||||
#openEuler patch list
|
#openEuler patch list
|
||||||
Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch
|
Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch
|
||||||
@ -1502,6 +1510,16 @@ fi
|
|||||||
%endif
|
%endif
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Fri Mar 28 2025 Qingqing Li <liqingqing3@huawei.com> - 2.38-57
|
||||||
|
- math: Improve layout of exp/exp10 data
|
||||||
|
- AArch64: Use prefer_sve_ifuncs for SVE memset
|
||||||
|
- AArch64: Add SVE memset
|
||||||
|
- math: Improve layout of expf data
|
||||||
|
- AArch64: Remove zva_128 from memset
|
||||||
|
- AArch64: Optimize memset
|
||||||
|
- AArch64: Improve generic strlen
|
||||||
|
- assert: Add test for CVE-2025-0395
|
||||||
|
|
||||||
* Wed Mar 12 2025 xiajimei <xiejiamei@hygon.cn> - 2.38-56
|
* Wed Mar 12 2025 xiajimei <xiejiamei@hygon.cn> - 2.38-56
|
||||||
- x86: Enable non-temporal memset for Hygon processors
|
- x86: Enable non-temporal memset for Hygon processors
|
||||||
- x86: Add `Avoid_STOSB` tunable to allow NT memset without ERMS
|
- x86: Add `Avoid_STOSB` tunable to allow NT memset without ERMS
|
||||||
|
|||||||
39
math-Improve-layout-of-exp-exp10-data.patch
Normal file
39
math-Improve-layout-of-exp-exp10-data.patch
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
From 5a08d049dc5037e89eb95bb1506652f0043fa39e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||||
|
Date: Fri, 13 Dec 2024 15:43:07 +0000
|
||||||
|
Subject: [PATCH] math: Improve layout of exp/exp10 data
|
||||||
|
|
||||||
|
GCC aligns global data to 16 bytes if their size is >= 16 bytes. This patch
|
||||||
|
changes the exp_data struct slightly so that the fields are better aligned
|
||||||
|
and without gaps. As a result on targets that support them, more load-pair
|
||||||
|
instructions are used in exp.
|
||||||
|
|
||||||
|
The exp benchmark improves 2.5%, "144bits" by 7.2%, "768bits" by 12.7% on
|
||||||
|
Neoverse V2.
|
||||||
|
|
||||||
|
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
(cherry picked from commit 5afaf99edb326fd9f36eb306a828d129a3a1d7f7)
|
||||||
|
---
|
||||||
|
sysdeps/ieee754/dbl-64/math_config.h | 3 ++-
|
||||||
|
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/sysdeps/ieee754/dbl-64/math_config.h b/sysdeps/ieee754/dbl-64/math_config.h
|
||||||
|
index 19af33fd86..52b720ecd1 100644
|
||||||
|
--- a/sysdeps/ieee754/dbl-64/math_config.h
|
||||||
|
+++ b/sysdeps/ieee754/dbl-64/math_config.h
|
||||||
|
@@ -195,10 +195,11 @@ check_uflow (double x)
|
||||||
|
extern const struct exp_data
|
||||||
|
{
|
||||||
|
double invln2N;
|
||||||
|
- double shift;
|
||||||
|
double negln2hiN;
|
||||||
|
double negln2loN;
|
||||||
|
double poly[4]; /* Last four coefficients. */
|
||||||
|
+ double shift;
|
||||||
|
+
|
||||||
|
double exp2_shift;
|
||||||
|
double exp2_poly[EXP2_POLY_ORDER];
|
||||||
|
uint64_t tab[2*(1 << EXP_TABLE_BITS)];
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
34
math-Improve-layout-of-expf-data.patch
Normal file
34
math-Improve-layout-of-expf-data.patch
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
From 3de5112326a4274c97f154f3d335c11965ee960c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||||
|
Date: Wed, 24 Jul 2024 15:17:47 +0100
|
||||||
|
Subject: [PATCH] math: Improve layout of expf data
|
||||||
|
|
||||||
|
GCC aligns global data to 16 bytes if their size is >= 16 bytes. This patch
|
||||||
|
changes the exp2f_data struct slightly so that the fields are better aligned.
|
||||||
|
As a result on targets that support them, load-pair instructions accessing
|
||||||
|
poly_scaled and invln2_scaled are now 16-byte aligned.
|
||||||
|
|
||||||
|
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
||||||
|
(cherry picked from commit 44fa9c1080fe6a9539f0d2345b9d2ae37b8ee57a)
|
||||||
|
---
|
||||||
|
sysdeps/ieee754/flt-32/math_config.h | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/sysdeps/ieee754/flt-32/math_config.h b/sysdeps/ieee754/flt-32/math_config.h
|
||||||
|
index d1b06a1a90..5904eb9bac 100644
|
||||||
|
--- a/sysdeps/ieee754/flt-32/math_config.h
|
||||||
|
+++ b/sysdeps/ieee754/flt-32/math_config.h
|
||||||
|
@@ -166,9 +166,9 @@ extern const struct exp2f_data
|
||||||
|
uint64_t tab[1 << EXP2F_TABLE_BITS];
|
||||||
|
double shift_scaled;
|
||||||
|
double poly[EXP2F_POLY_ORDER];
|
||||||
|
- double shift;
|
||||||
|
double invln2_scaled;
|
||||||
|
double poly_scaled[EXP2F_POLY_ORDER];
|
||||||
|
+ double shift;
|
||||||
|
} __exp2f_data attribute_hidden;
|
||||||
|
|
||||||
|
#define LOGF_TABLE_BITS 4
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user