diff --git a/0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch b/0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch new file mode 100644 index 0000000..cb4f03e --- /dev/null +++ b/0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch @@ -0,0 +1,286 @@ +From ced101ed9d3b7cfd12d97ef24940cb00b8658c81 Mon Sep 17 00:00:00 2001 +From: Sajan Karumanchi +Date: Tue, 1 Aug 2023 15:20:55 +0000 +Subject: [PATCH 01/12] x86: Fix for cache computation on AMD legacy cpus. + +Some legacy AMD CPUs and hypervisors have the _cpuid_ '0x8000_001D' +set to Zero, thus resulting in zeroed-out computed cache values. +This patch reintroduces the old way of cache computation as a +fail-safe option to handle these exceptions. +Fixed 'level4_cache_size' value through handle_amd(). + +Reviewed-by: Premachandra Mallappa +Tested-by: Florian Weimer +--- + sysdeps/x86/dl-cacheinfo.h | 226 ++++++++++++++++++++++++++++++++----- + 1 file changed, 199 insertions(+), 27 deletions(-) + +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index cd4d0351ae..285773039f 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -315,40 +315,206 @@ handle_amd (int name) + { + unsigned int eax; + unsigned int ebx; +- unsigned int ecx; ++ unsigned int ecx = 0; + unsigned int edx; +- unsigned int count = 0x1; ++ unsigned int max_cpuid = 0; ++ unsigned int fn = 0; + + /* No level 4 cache (yet). */ + if (name > _SC_LEVEL3_CACHE_LINESIZE) + return 0; + +- if (name >= _SC_LEVEL3_CACHE_SIZE) +- count = 0x3; +- else if (name >= _SC_LEVEL2_CACHE_SIZE) +- count = 0x2; +- else if (name >= _SC_LEVEL1_DCACHE_SIZE) +- count = 0x0; ++ __cpuid (0x80000000, max_cpuid, ebx, ecx, edx); ++ ++ if (max_cpuid >= 0x8000001D) ++ /* Use __cpuid__ '0x8000_001D' to compute cache details. */ ++ { ++ unsigned int count = 0x1; ++ ++ if (name >= _SC_LEVEL3_CACHE_SIZE) ++ count = 0x3; ++ else if (name >= _SC_LEVEL2_CACHE_SIZE) ++ count = 0x2; ++ else if (name >= _SC_LEVEL1_DCACHE_SIZE) ++ count = 0x0; ++ ++ __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); ++ ++ if (ecx != 0) ++ { ++ switch (name) ++ { ++ case _SC_LEVEL1_ICACHE_ASSOC: ++ case _SC_LEVEL1_DCACHE_ASSOC: ++ case _SC_LEVEL2_CACHE_ASSOC: ++ case _SC_LEVEL3_CACHE_ASSOC: ++ return ((ebx >> 22) & 0x3ff) + 1; ++ case _SC_LEVEL1_ICACHE_LINESIZE: ++ case _SC_LEVEL1_DCACHE_LINESIZE: ++ case _SC_LEVEL2_CACHE_LINESIZE: ++ case _SC_LEVEL3_CACHE_LINESIZE: ++ return (ebx & 0xfff) + 1; ++ case _SC_LEVEL1_ICACHE_SIZE: ++ case _SC_LEVEL1_DCACHE_SIZE: ++ case _SC_LEVEL2_CACHE_SIZE: ++ case _SC_LEVEL3_CACHE_SIZE: ++ return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1); ++ default: ++ __builtin_unreachable (); ++ } ++ return -1; ++ } ++ } ++ ++ /* Legacy cache computation for CPUs prior to Bulldozer family. ++ This is also a fail-safe mechanism for some hypervisors that ++ accidentally configure __cpuid__ '0x8000_001D' to Zero. */ + +- __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx); ++ fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE); ++ ++ if (max_cpuid < fn) ++ return 0; ++ ++ __cpuid (fn, eax, ebx, ecx, edx); ++ ++ if (name < _SC_LEVEL1_DCACHE_SIZE) ++ { ++ name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE; ++ ecx = edx; ++ } + + switch (name) + { +- case _SC_LEVEL1_ICACHE_ASSOC: +- case _SC_LEVEL1_DCACHE_ASSOC: +- case _SC_LEVEL2_CACHE_ASSOC: ++ case _SC_LEVEL1_DCACHE_SIZE: ++ return (ecx >> 14) & 0x3fc00; ++ ++ case _SC_LEVEL1_DCACHE_ASSOC: ++ ecx >>= 16; ++ if ((ecx & 0xff) == 0xff) ++ { ++ /* Fully associative. */ ++ return (ecx << 2) & 0x3fc00; ++ } ++ return ecx & 0xff; ++ ++ case _SC_LEVEL1_DCACHE_LINESIZE: ++ return ecx & 0xff; ++ ++ case _SC_LEVEL2_CACHE_SIZE: ++ return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00; ++ ++ case _SC_LEVEL2_CACHE_ASSOC: ++ switch ((ecx >> 12) & 0xf) ++ { ++ case 0: ++ case 1: ++ case 2: ++ case 4: ++ return (ecx >> 12) & 0xf; ++ case 6: ++ return 8; ++ case 8: ++ return 16; ++ case 10: ++ return 32; ++ case 11: ++ return 48; ++ case 12: ++ return 64; ++ case 13: ++ return 96; ++ case 14: ++ return 128; ++ case 15: ++ return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff); ++ default: ++ return 0; ++ } ++ ++ case _SC_LEVEL2_CACHE_LINESIZE: ++ return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff; ++ ++ case _SC_LEVEL3_CACHE_SIZE: ++ { ++ long int total_l3_cache = 0, l3_cache_per_thread = 0; ++ unsigned int threads = 0; ++ const struct cpu_features *cpu_features; ++ ++ if ((edx & 0xf000) == 0) ++ return 0; ++ ++ total_l3_cache = (edx & 0x3ffc0000) << 1; ++ cpu_features = __get_cpu_features (); ++ ++ /* Figure out the number of logical threads that share L3. */ ++ if (max_cpuid >= 0x80000008) ++ { ++ /* Get width of APIC ID. */ ++ __cpuid (0x80000008, eax, ebx, ecx, edx); ++ threads = (ecx & 0xff) + 1; ++ } ++ ++ if (threads == 0) ++ { ++ /* If APIC ID width is not available, use logical ++ processor count. */ ++ __cpuid (0x00000001, eax, ebx, ecx, edx); ++ if ((edx & (1 << 28)) != 0) ++ threads = (ebx >> 16) & 0xff; ++ } ++ ++ /* Cap usage of highest cache level to the number of ++ supported threads. */ ++ if (threads > 0) ++ l3_cache_per_thread = total_l3_cache/threads; ++ ++ /* Get shared cache per ccx for Zen architectures. */ ++ if (cpu_features->basic.family >= 0x17) ++ { ++ long int l3_cache_per_ccx = 0; ++ /* Get number of threads share the L3 cache in CCX. */ ++ __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx); ++ unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1; ++ l3_cache_per_ccx = l3_cache_per_thread * threads_per_ccx; ++ return l3_cache_per_ccx; ++ } ++ else ++ { ++ return l3_cache_per_thread; ++ } ++ } ++ + case _SC_LEVEL3_CACHE_ASSOC: +- return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0; +- case _SC_LEVEL1_ICACHE_LINESIZE: +- case _SC_LEVEL1_DCACHE_LINESIZE: +- case _SC_LEVEL2_CACHE_LINESIZE: ++ switch ((edx >> 12) & 0xf) ++ { ++ case 0: ++ case 1: ++ case 2: ++ case 4: ++ return (edx >> 12) & 0xf; ++ case 6: ++ return 8; ++ case 8: ++ return 16; ++ case 10: ++ return 32; ++ case 11: ++ return 48; ++ case 12: ++ return 64; ++ case 13: ++ return 96; ++ case 14: ++ return 128; ++ case 15: ++ return ((edx & 0x3ffc0000) << 1) / (edx & 0xff); ++ default: ++ return 0; ++ } ++ + case _SC_LEVEL3_CACHE_LINESIZE: +- return ecx ? (ebx & 0xfff) + 1 : 0; +- case _SC_LEVEL1_ICACHE_SIZE: +- case _SC_LEVEL1_DCACHE_SIZE: +- case _SC_LEVEL2_CACHE_SIZE: +- case _SC_LEVEL3_CACHE_SIZE: +- return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0; ++ return (edx & 0xf000) == 0 ? 0 : edx & 0xff; ++ + default: + __builtin_unreachable (); + } +@@ -703,7 +869,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); + core = handle_amd (_SC_LEVEL2_CACHE_SIZE); + shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); +- shared_per_thread = shared; + + level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE); + level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE); +@@ -716,13 +881,20 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + level3_cache_size = shared; + level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC); + level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE); ++ level4_cache_size = handle_amd (_SC_LEVEL4_CACHE_SIZE); + + if (shared <= 0) +- /* No shared L3 cache. All we have is the L2 cache. */ +- shared = core; ++ { ++ /* No shared L3 cache. All we have is the L2 cache. */ ++ shared = core; ++ } ++ else if (cpu_features->basic.family < 0x17) ++ { ++ /* Account for exclusive L2 and L3 caches. */ ++ shared += core; ++ } + +- if (shared_per_thread <= 0) +- shared_per_thread = shared; ++ shared_per_thread = shared; + } + + cpu_features->level1_icache_size = level1_icache_size; +-- +2.33.0 + diff --git a/0002-nscd-Do-not-rebuild-getaddrinfo-bug-30709.patch b/0002-nscd-Do-not-rebuild-getaddrinfo-bug-30709.patch new file mode 100644 index 0000000..a0aed1a --- /dev/null +++ b/0002-nscd-Do-not-rebuild-getaddrinfo-bug-30709.patch @@ -0,0 +1,185 @@ +From 6b99458d197ab779ebb6ff632c168e2cbfa4f543 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Fri, 11 Aug 2023 10:10:16 +0200 +Subject: [PATCH 02/12] nscd: Do not rebuild getaddrinfo (bug 30709) + +The nscd daemon caches hosts data from NSS modules verbatim, without +filtering protocol families or sorting them (otherwise separate caches +would be needed for certain ai_flags combinations). The cache +implementation is complete separate from the getaddrinfo code. This +means that rebuilding getaddrinfo is not needed. The only function +actually used is __bump_nl_timestamp from check_pf.c, and this change +moves it into nscd/connections.c. + +Tested on x86_64-linux-gnu with -fexceptions, built with +build-many-glibcs.py. I also backported this patch into a distribution +that still supports nscd and verified manually that caching still works. + +Reviewed-by: Siddhesh Poyarekar +(cherry picked from commit 039ff51ac7e02db1cfc0c23e38ac7bfbb00221d1) +--- + include/ifaddrs.h | 4 --- + inet/check_pf.c | 9 ------ + nscd/Makefile | 2 +- + nscd/connections.c | 11 +++++++ + nscd/gai.c | 50 ------------------------------ + sysdeps/unix/sysv/linux/check_pf.c | 17 +--------- + 6 files changed, 13 insertions(+), 80 deletions(-) + delete mode 100644 nscd/gai.c + +diff --git a/include/ifaddrs.h b/include/ifaddrs.h +index 416118f1b3..19a3afb19f 100644 +--- a/include/ifaddrs.h ++++ b/include/ifaddrs.h +@@ -34,9 +34,5 @@ extern void __check_native (uint32_t a1_index, int *a1_native, + uint32_t a2_index, int *a2_native) + attribute_hidden; + +-#if IS_IN (nscd) +-extern uint32_t __bump_nl_timestamp (void) attribute_hidden; +-#endif +- + # endif /* !_ISOMAC */ + #endif /* ifaddrs.h */ +diff --git a/inet/check_pf.c b/inet/check_pf.c +index 5310c99121..6d1475920f 100644 +--- a/inet/check_pf.c ++++ b/inet/check_pf.c +@@ -60,12 +60,3 @@ __free_in6ai (struct in6addrinfo *in6ai) + { + /* Nothing to do. */ + } +- +- +-#if IS_IN (nscd) +-uint32_t +-__bump_nl_timestamp (void) +-{ +- return 0; +-} +-#endif +diff --git a/nscd/Makefile b/nscd/Makefile +index 2a0489f4cf..16b6460ee9 100644 +--- a/nscd/Makefile ++++ b/nscd/Makefile +@@ -35,7 +35,7 @@ nscd-modules := nscd connections pwdcache getpwnam_r getpwuid_r grpcache \ + getgrnam_r getgrgid_r hstcache gethstbyad_r gethstbynm3_r \ + getsrvbynm_r getsrvbypt_r servicescache \ + dbg_log nscd_conf nscd_stat cache mem nscd_setup_thread \ +- xmalloc xstrdup aicache initgrcache gai res_hconf \ ++ xmalloc xstrdup aicache initgrcache res_hconf \ + netgroupcache cachedumper + + ifeq ($(build-nscd)$(have-thread-library),yesyes) +diff --git a/nscd/connections.c b/nscd/connections.c +index a405a44a9b..15693e5090 100644 +--- a/nscd/connections.c ++++ b/nscd/connections.c +@@ -256,6 +256,17 @@ int inotify_fd = -1; + #ifdef HAVE_NETLINK + /* Descriptor for netlink status updates. */ + static int nl_status_fd = -1; ++ ++static uint32_t ++__bump_nl_timestamp (void) ++{ ++ static uint32_t nl_timestamp; ++ ++ if (atomic_fetch_add_relaxed (&nl_timestamp, 1) + 1 == 0) ++ atomic_fetch_add_relaxed (&nl_timestamp, 1); ++ ++ return nl_timestamp; ++} + #endif + + /* Number of times clients had to wait. */ +diff --git a/nscd/gai.c b/nscd/gai.c +deleted file mode 100644 +index e29f3fe583..0000000000 +--- a/nscd/gai.c ++++ /dev/null +@@ -1,50 +0,0 @@ +-/* Copyright (C) 2004-2023 Free Software Foundation, Inc. +- This file is part of the GNU C Library. +- +- This program is free software; you can redistribute it and/or modify +- it under the terms of the GNU General Public License as published +- by the Free Software Foundation; version 2 of the License, or +- (at your option) any later version. +- +- This program is distributed in the hope that it will be useful, +- but WITHOUT ANY WARRANTY; without even the implied warranty of +- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- GNU General Public License for more details. +- +- You should have received a copy of the GNU General Public License +- along with this program; if not, see . */ +- +-#include +-#include +- +-/* This file uses the getaddrinfo code but it compiles it without NSCD +- support. We just need a few symbol renames. */ +-#define __ioctl ioctl +-#define __getsockname getsockname +-#define __socket socket +-#define __recvmsg recvmsg +-#define __bind bind +-#define __sendto sendto +-#define __strchrnul strchrnul +-#define __getline getline +-#define __qsort_r qsort_r +-/* nscd uses 1MB or 2MB thread stacks. */ +-#define __libc_use_alloca(size) (size <= __MAX_ALLOCA_CUTOFF) +-#define __getifaddrs getifaddrs +-#define __freeifaddrs freeifaddrs +-#undef __fstat64 +-#define __fstat64 fstat64 +-#undef __stat64 +-#define __stat64 stat64 +- +-/* We are nscd, so we don't want to be talking to ourselves. */ +-#undef USE_NSCD +- +-#include +- +-/* Support code. */ +-#include +-#include +- +-/* Some variables normally defined in libc. */ +-nss_action_list __nss_hosts_database attribute_hidden; +diff --git a/sysdeps/unix/sysv/linux/check_pf.c b/sysdeps/unix/sysv/linux/check_pf.c +index 2b0b8b6368..3aa6a00348 100644 +--- a/sysdeps/unix/sysv/linux/check_pf.c ++++ b/sysdeps/unix/sysv/linux/check_pf.c +@@ -66,25 +66,10 @@ static struct cached_data *cache; + __libc_lock_define_initialized (static, lock); + + +-#if IS_IN (nscd) +-static uint32_t nl_timestamp; +- +-uint32_t +-__bump_nl_timestamp (void) +-{ +- if (atomic_fetch_add_relaxed (&nl_timestamp, 1) + 1 == 0) +- atomic_fetch_add_relaxed (&nl_timestamp, 1); +- +- return nl_timestamp; +-} +-#endif +- + static inline uint32_t + get_nl_timestamp (void) + { +-#if IS_IN (nscd) +- return nl_timestamp; +-#elif defined USE_NSCD ++#if defined USE_NSCD + return __nscd_get_nl_timestamp (); + #else + return 0; +-- +2.33.0 + diff --git a/0003-x86-Fix-incorrect-scope-of-setting-shared_per_thread.patch b/0003-x86-Fix-incorrect-scope-of-setting-shared_per_thread.patch new file mode 100644 index 0000000..cb8390d --- /dev/null +++ b/0003-x86-Fix-incorrect-scope-of-setting-shared_per_thread.patch @@ -0,0 +1,45 @@ +From 5ea70cc02626d9b85f1570153873d8648a47bf95 Mon Sep 17 00:00:00 2001 +From: Noah Goldstein +Date: Thu, 10 Aug 2023 19:28:24 -0500 +Subject: [PATCH 03/12] x86: Fix incorrect scope of setting `shared_per_thread` + [BZ# 30745] + +The: + +``` + if (shared_per_thread > 0 && threads > 0) + shared_per_thread /= threads; +``` + +Code was accidentally moved to inside the else scope. This doesn't +match how it was previously (before af992e7abd). + +This patch fixes that by putting the division after the `else` block. + +(cherry picked from commit 084fb31bc2c5f95ae0b9e6df4d3cf0ff43471ede) +--- + sysdeps/x86/dl-cacheinfo.h | 7 +++---- + 1 file changed, 3 insertions(+), 4 deletions(-) + +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index 285773039f..5ddb35c9d9 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -770,11 +770,10 @@ get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, u + level. */ + threads = ((cpu_features->features[CPUID_INDEX_1].cpuid.ebx >> 16) + & 0xff); +- +- /* Get per-thread size of highest level cache. */ +- if (shared_per_thread > 0 && threads > 0) +- shared_per_thread /= threads; + } ++ /* Get per-thread size of highest level cache. */ ++ if (shared_per_thread > 0 && threads > 0) ++ shared_per_thread /= threads; + } + + /* Account for non-inclusive L2 and L3 caches. */ +-- +2.33.0 + diff --git a/0004-x86_64-Fix-build-with-disable-multiarch-BZ-30721.patch b/0004-x86_64-Fix-build-with-disable-multiarch-BZ-30721.patch new file mode 100644 index 0000000..5eacc8d --- /dev/null +++ b/0004-x86_64-Fix-build-with-disable-multiarch-BZ-30721.patch @@ -0,0 +1,60 @@ +From 6135d50e44233d8c89ca788f78c669941ad09fb9 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Tue, 8 Aug 2023 09:27:54 -0300 +Subject: [PATCH 04/12] x86_64: Fix build with --disable-multiarch (BZ 30721) + +With multiarch disabled, the default memmove implementation provides +the fortify routines for memcpy, mempcpy, and memmove. However, it +does not provide the internal hidden definitions used when building +with fortify enabled. The memset has a similar issue. + +Checked on x86_64-linux-gnu building with different options: +default and --disable-multi-arch plus default, --disable-default-pie, +--enable-fortify-source={2,3}, and --enable-fortify-source={2,3} +with --disable-default-pie. +Tested-by: Andreas K. Huettel +Reviewed-by: Siddhesh Poyarekar + +(cherry picked from commit 51cb52214fcd72849c640b12f5099ed3ac776181) +--- + sysdeps/x86_64/memcpy.S | 2 +- + sysdeps/x86_64/memmove.S | 3 +++ + sysdeps/x86_64/memset.S | 1 + + 3 files changed, 5 insertions(+), 1 deletion(-) + +diff --git a/sysdeps/x86_64/memcpy.S b/sysdeps/x86_64/memcpy.S +index d98500a78a..4922cba657 100644 +--- a/sysdeps/x86_64/memcpy.S ++++ b/sysdeps/x86_64/memcpy.S +@@ -1 +1 @@ +-/* Implemented in memcpy.S. */ ++/* Implemented in memmove.S. */ +diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S +index f0b84e3b52..c3c08165e1 100644 +--- a/sysdeps/x86_64/memmove.S ++++ b/sysdeps/x86_64/memmove.S +@@ -46,6 +46,9 @@ weak_alias (__mempcpy, mempcpy) + + #ifndef USE_MULTIARCH + libc_hidden_builtin_def (memmove) ++libc_hidden_builtin_def (__memmove_chk) ++libc_hidden_builtin_def (__memcpy_chk) ++libc_hidden_builtin_def (__mempcpy_chk) + # if defined SHARED && IS_IN (libc) + strong_alias (memmove, __memcpy) + libc_hidden_ver (memmove, memcpy) +diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S +index 7c99df36db..c6df24e8de 100644 +--- a/sysdeps/x86_64/memset.S ++++ b/sysdeps/x86_64/memset.S +@@ -32,6 +32,7 @@ + #include "isa-default-impl.h" + + libc_hidden_builtin_def (memset) ++libc_hidden_builtin_def (__memset_chk) + + #if IS_IN (libc) + libc_hidden_def (__wmemset) +-- +2.33.0 + diff --git a/0005-i686-Fix-build-with-disable-multiarch.patch b/0005-i686-Fix-build-with-disable-multiarch.patch new file mode 100644 index 0000000..a2b7349 --- /dev/null +++ b/0005-i686-Fix-build-with-disable-multiarch.patch @@ -0,0 +1,100 @@ +From 7ac405a74c6069b0627dc2d8449a82a621f8ff06 Mon Sep 17 00:00:00 2001 +From: Adhemerval Zanella +Date: Tue, 8 Aug 2023 09:27:55 -0300 +Subject: [PATCH 05/12] i686: Fix build with --disable-multiarch + +Since i686 provides the fortified wrappers for memcpy, mempcpy, +memmove, and memset on the same string implementation, the static +build tries to optimized it by not tying the fortified wrappers +to string routine (to avoid pulling the fortify function if +they are not required). + +Checked on i686-linux-gnu building with different option: +default and --disable-multi-arch plus default, --disable-default-pie, +--enable-fortify-source={2,3}, and --enable-fortify-source={2,3} +with --disable-default-pie. +Reviewed-by: Siddhesh Poyarekar + +(cherry picked from commit c73c96a4a1af1326df7f96eec58209e1e04066d8) +--- + sysdeps/i386/i686/memcpy.S | 2 +- + sysdeps/i386/i686/mempcpy.S | 2 +- + sysdeps/i386/i686/multiarch/memcpy_chk.c | 2 ++ + sysdeps/i386/i686/multiarch/memmove_chk.c | 2 ++ + sysdeps/i386/i686/multiarch/mempcpy_chk.c | 2 ++ + sysdeps/i386/i686/multiarch/memset_chk.c | 2 ++ + 6 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S +index 9b48ec0ea1..b86af4aac9 100644 +--- a/sysdeps/i386/i686/memcpy.S ++++ b/sysdeps/i386/i686/memcpy.S +@@ -27,7 +27,7 @@ + #define LEN SRC+4 + + .text +-#if defined PIC && IS_IN (libc) ++#if defined SHARED && IS_IN (libc) + ENTRY_CHK (__memcpy_chk) + movl 12(%esp), %eax + cmpl %eax, 16(%esp) +diff --git a/sysdeps/i386/i686/mempcpy.S b/sysdeps/i386/i686/mempcpy.S +index 26f8501e7d..14d9dd681a 100644 +--- a/sysdeps/i386/i686/mempcpy.S ++++ b/sysdeps/i386/i686/mempcpy.S +@@ -27,7 +27,7 @@ + #define LEN SRC+4 + + .text +-#if defined PIC && IS_IN (libc) ++#if defined SHARED && IS_IN (libc) + ENTRY_CHK (__mempcpy_chk) + movl 12(%esp), %eax + cmpl %eax, 16(%esp) +diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.c b/sysdeps/i386/i686/multiarch/memcpy_chk.c +index ec945dc91f..c3a8aeaf18 100644 +--- a/sysdeps/i386/i686/multiarch/memcpy_chk.c ++++ b/sysdeps/i386/i686/multiarch/memcpy_chk.c +@@ -32,4 +32,6 @@ libc_ifunc_redirected (__redirect_memcpy_chk, __memcpy_chk, + __hidden_ver1 (__memcpy_chk, __GI___memcpy_chk, __redirect_memcpy_chk) + __attribute__ ((visibility ("hidden"))) __attribute_copy__ (__memcpy_chk); + # endif ++#else ++# include + #endif +diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.c b/sysdeps/i386/i686/multiarch/memmove_chk.c +index 55c7601d5d..070dde083a 100644 +--- a/sysdeps/i386/i686/multiarch/memmove_chk.c ++++ b/sysdeps/i386/i686/multiarch/memmove_chk.c +@@ -32,4 +32,6 @@ libc_ifunc_redirected (__redirect_memmove_chk, __memmove_chk, + __hidden_ver1 (__memmove_chk, __GI___memmove_chk, __redirect_memmove_chk) + __attribute__ ((visibility ("hidden"))) __attribute_copy__ (__memmove_chk); + # endif ++#else ++# include + #endif +diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.c b/sysdeps/i386/i686/multiarch/mempcpy_chk.c +index 83569cf9d9..14360f1828 100644 +--- a/sysdeps/i386/i686/multiarch/mempcpy_chk.c ++++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.c +@@ -32,4 +32,6 @@ libc_ifunc_redirected (__redirect_mempcpy_chk, __mempcpy_chk, + __hidden_ver1 (__mempcpy_chk, __GI___mempcpy_chk, __redirect_mempcpy_chk) + __attribute__ ((visibility ("hidden"))) __attribute_copy__ (__mempcpy_chk); + # endif ++#else ++# include + #endif +diff --git a/sysdeps/i386/i686/multiarch/memset_chk.c b/sysdeps/i386/i686/multiarch/memset_chk.c +index 1a7503858d..8179ef7c0b 100644 +--- a/sysdeps/i386/i686/multiarch/memset_chk.c ++++ b/sysdeps/i386/i686/multiarch/memset_chk.c +@@ -32,4 +32,6 @@ libc_ifunc_redirected (__redirect_memset_chk, __memset_chk, + __hidden_ver1 (__memset_chk, __GI___memset_chk, __redirect_memset_chk) + __attribute__ ((visibility ("hidden"))) __attribute_copy__ (__memset_chk); + # endif ++#else ++# include + #endif +-- +2.33.0 + diff --git a/0006-malloc-Enable-merging-of-remainders-in-memalign-bug-.patch b/0006-malloc-Enable-merging-of-remainders-in-memalign-bug-.patch new file mode 100644 index 0000000..38f39e1 --- /dev/null +++ b/0006-malloc-Enable-merging-of-remainders-in-memalign-bug-.patch @@ -0,0 +1,301 @@ +From 98c293c61f770b6b7a22f89a6ea81b711ecb1952 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Fri, 11 Aug 2023 11:18:17 +0200 +Subject: [PATCH 06/12] malloc: Enable merging of remainders in memalign (bug + 30723) + +Previously, calling _int_free from _int_memalign could put remainders +into the tcache or into fastbins, where they are invisible to the +low-level allocator. This results in missed merge opportunities +because once these freed chunks become available to the low-level +allocator, further memalign allocations (even of the same size are) +likely obstructing merges. + +Furthermore, during forwards merging in _int_memalign, do not +completely give up when the remainder is too small to serve as a +chunk on its own. We can still give it back if it can be merged +with the following unused chunk. This makes it more likely that +memalign calls in a loop achieve a compact memory layout, +independently of initial heap layout. + +Drop some useless (unsigned long) casts along the way, and tweak +the style to more closely match GNU on changed lines. + +Reviewed-by: DJ Delorie +(cherry picked from commit 542b1105852568c3ebc712225ae78b8c8ba31a78) +--- + malloc/malloc.c | 197 +++++++++++++++++++++++++++++------------------- + 1 file changed, 121 insertions(+), 76 deletions(-) + +diff --git a/malloc/malloc.c b/malloc/malloc.c +index e2f1a615a4..948f9759af 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -1086,6 +1086,11 @@ typedef struct malloc_chunk* mchunkptr; + + static void* _int_malloc(mstate, size_t); + static void _int_free(mstate, mchunkptr, int); ++static void _int_free_merge_chunk (mstate, mchunkptr, INTERNAL_SIZE_T); ++static INTERNAL_SIZE_T _int_free_create_chunk (mstate, ++ mchunkptr, INTERNAL_SIZE_T, ++ mchunkptr, INTERNAL_SIZE_T); ++static void _int_free_maybe_consolidate (mstate, INTERNAL_SIZE_T); + static void* _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T, + INTERNAL_SIZE_T); + static void* _int_memalign(mstate, size_t, size_t); +@@ -4637,31 +4642,52 @@ _int_free (mstate av, mchunkptr p, int have_lock) + if (!have_lock) + __libc_lock_lock (av->mutex); + +- nextchunk = chunk_at_offset(p, size); +- +- /* Lightweight tests: check whether the block is already the +- top block. */ +- if (__glibc_unlikely (p == av->top)) +- malloc_printerr ("double free or corruption (top)"); +- /* Or whether the next chunk is beyond the boundaries of the arena. */ +- if (__builtin_expect (contiguous (av) +- && (char *) nextchunk +- >= ((char *) av->top + chunksize(av->top)), 0)) +- malloc_printerr ("double free or corruption (out)"); +- /* Or whether the block is actually not marked used. */ +- if (__glibc_unlikely (!prev_inuse(nextchunk))) +- malloc_printerr ("double free or corruption (!prev)"); +- +- nextsize = chunksize(nextchunk); +- if (__builtin_expect (chunksize_nomask (nextchunk) <= CHUNK_HDR_SZ, 0) +- || __builtin_expect (nextsize >= av->system_mem, 0)) +- malloc_printerr ("free(): invalid next size (normal)"); ++ _int_free_merge_chunk (av, p, size); + +- free_perturb (chunk2mem(p), size - CHUNK_HDR_SZ); ++ if (!have_lock) ++ __libc_lock_unlock (av->mutex); ++ } ++ /* ++ If the chunk was allocated via mmap, release via munmap(). ++ */ ++ ++ else { ++ munmap_chunk (p); ++ } ++} ++ ++/* Try to merge chunk P of SIZE bytes with its neighbors. Put the ++ resulting chunk on the appropriate bin list. P must not be on a ++ bin list yet, and it can be in use. */ ++static void ++_int_free_merge_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size) ++{ ++ mchunkptr nextchunk = chunk_at_offset(p, size); ++ ++ /* Lightweight tests: check whether the block is already the ++ top block. */ ++ if (__glibc_unlikely (p == av->top)) ++ malloc_printerr ("double free or corruption (top)"); ++ /* Or whether the next chunk is beyond the boundaries of the arena. */ ++ if (__builtin_expect (contiguous (av) ++ && (char *) nextchunk ++ >= ((char *) av->top + chunksize(av->top)), 0)) ++ malloc_printerr ("double free or corruption (out)"); ++ /* Or whether the block is actually not marked used. */ ++ if (__glibc_unlikely (!prev_inuse(nextchunk))) ++ malloc_printerr ("double free or corruption (!prev)"); ++ ++ INTERNAL_SIZE_T nextsize = chunksize(nextchunk); ++ if (__builtin_expect (chunksize_nomask (nextchunk) <= CHUNK_HDR_SZ, 0) ++ || __builtin_expect (nextsize >= av->system_mem, 0)) ++ malloc_printerr ("free(): invalid next size (normal)"); ++ ++ free_perturb (chunk2mem(p), size - CHUNK_HDR_SZ); + +- /* consolidate backward */ +- if (!prev_inuse(p)) { +- prevsize = prev_size (p); ++ /* Consolidate backward. */ ++ if (!prev_inuse(p)) ++ { ++ INTERNAL_SIZE_T prevsize = prev_size (p); + size += prevsize; + p = chunk_at_offset(p, -((long) prevsize)); + if (__glibc_unlikely (chunksize(p) != prevsize)) +@@ -4669,9 +4695,25 @@ _int_free (mstate av, mchunkptr p, int have_lock) + unlink_chunk (av, p); + } + +- if (nextchunk != av->top) { ++ /* Write the chunk header, maybe after merging with the following chunk. */ ++ size = _int_free_create_chunk (av, p, size, nextchunk, nextsize); ++ _int_free_maybe_consolidate (av, size); ++} ++ ++/* Create a chunk at P of SIZE bytes, with SIZE potentially increased ++ to cover the immediately following chunk NEXTCHUNK of NEXTSIZE ++ bytes (if NEXTCHUNK is unused). The chunk at P is not actually ++ read and does not have to be initialized. After creation, it is ++ placed on the appropriate bin list. The function returns the size ++ of the new chunk. */ ++static INTERNAL_SIZE_T ++_int_free_create_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size, ++ mchunkptr nextchunk, INTERNAL_SIZE_T nextsize) ++{ ++ if (nextchunk != av->top) ++ { + /* get and clear inuse bit */ +- nextinuse = inuse_bit_at_offset(nextchunk, nextsize); ++ bool nextinuse = inuse_bit_at_offset (nextchunk, nextsize); + + /* consolidate forward */ + if (!nextinuse) { +@@ -4686,8 +4728,8 @@ _int_free (mstate av, mchunkptr p, int have_lock) + been given one chance to be used in malloc. + */ + +- bck = unsorted_chunks(av); +- fwd = bck->fd; ++ mchunkptr bck = unsorted_chunks (av); ++ mchunkptr fwd = bck->fd; + if (__glibc_unlikely (fwd->bk != bck)) + malloc_printerr ("free(): corrupted unsorted chunks"); + p->fd = fwd; +@@ -4706,61 +4748,52 @@ _int_free (mstate av, mchunkptr p, int have_lock) + check_free_chunk(av, p); + } + +- /* +- If the chunk borders the current high end of memory, +- consolidate into top +- */ +- +- else { ++ else ++ { ++ /* If the chunk borders the current high end of memory, ++ consolidate into top. */ + size += nextsize; + set_head(p, size | PREV_INUSE); + av->top = p; + check_chunk(av, p); + } + +- /* +- If freeing a large space, consolidate possibly-surrounding +- chunks. Then, if the total unused topmost memory exceeds trim +- threshold, ask malloc_trim to reduce top. +- +- Unless max_fast is 0, we don't know if there are fastbins +- bordering top, so we cannot tell for sure whether threshold +- has been reached unless fastbins are consolidated. But we +- don't want to consolidate on each free. As a compromise, +- consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD +- is reached. +- */ ++ return size; ++} + +- if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) { ++/* If freeing a large space, consolidate possibly-surrounding ++ chunks. Then, if the total unused topmost memory exceeds trim ++ threshold, ask malloc_trim to reduce top. */ ++static void ++_int_free_maybe_consolidate (mstate av, INTERNAL_SIZE_T size) ++{ ++ /* Unless max_fast is 0, we don't know if there are fastbins ++ bordering top, so we cannot tell for sure whether threshold has ++ been reached unless fastbins are consolidated. But we don't want ++ to consolidate on each free. As a compromise, consolidation is ++ performed if FASTBIN_CONSOLIDATION_THRESHOLD is reached. */ ++ if (size >= FASTBIN_CONSOLIDATION_THRESHOLD) ++ { + if (atomic_load_relaxed (&av->have_fastchunks)) + malloc_consolidate(av); + +- if (av == &main_arena) { ++ if (av == &main_arena) ++ { + #ifndef MORECORE_CANNOT_TRIM +- if ((unsigned long)(chunksize(av->top)) >= +- (unsigned long)(mp_.trim_threshold)) +- systrim(mp_.top_pad, av); ++ if (chunksize (av->top) >= mp_.trim_threshold) ++ systrim (mp_.top_pad, av); + #endif +- } else { +- /* Always try heap_trim(), even if the top chunk is not +- large, because the corresponding heap might go away. */ +- heap_info *heap = heap_for_ptr(top(av)); ++ } ++ else ++ { ++ /* Always try heap_trim, even if the top chunk is not large, ++ because the corresponding heap might go away. */ ++ heap_info *heap = heap_for_ptr (top (av)); + +- assert(heap->ar_ptr == av); +- heap_trim(heap, mp_.top_pad); +- } ++ assert (heap->ar_ptr == av); ++ heap_trim (heap, mp_.top_pad); ++ } + } +- +- if (!have_lock) +- __libc_lock_unlock (av->mutex); +- } +- /* +- If the chunk was allocated via mmap, release via munmap(). +- */ +- +- else { +- munmap_chunk (p); +- } + } + + /* +@@ -5221,7 +5254,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes) + (av != &main_arena ? NON_MAIN_ARENA : 0)); + set_inuse_bit_at_offset (newp, newsize); + set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0)); +- _int_free (av, p, 1); ++ _int_free_merge_chunk (av, p, leadsize); + p = newp; + + assert (newsize >= nb && +@@ -5232,15 +5265,27 @@ _int_memalign (mstate av, size_t alignment, size_t bytes) + if (!chunk_is_mmapped (p)) + { + size = chunksize (p); +- if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE)) ++ mchunkptr nextchunk = chunk_at_offset(p, size); ++ INTERNAL_SIZE_T nextsize = chunksize(nextchunk); ++ if (size > nb) + { + remainder_size = size - nb; +- remainder = chunk_at_offset (p, nb); +- set_head (remainder, remainder_size | PREV_INUSE | +- (av != &main_arena ? NON_MAIN_ARENA : 0)); +- set_head_size (p, nb); +- _int_free (av, remainder, 1); +- } ++ if (remainder_size >= MINSIZE ++ || nextchunk == av->top ++ || !inuse_bit_at_offset (nextchunk, nextsize)) ++ { ++ /* We can only give back the tail if it is larger than ++ MINSIZE, or if the following chunk is unused (top ++ chunk or unused in-heap chunk). Otherwise we would ++ create a chunk that is smaller than MINSIZE. */ ++ remainder = chunk_at_offset (p, nb); ++ set_head_size (p, nb); ++ remainder_size = _int_free_create_chunk (av, remainder, ++ remainder_size, ++ nextchunk, nextsize); ++ _int_free_maybe_consolidate (av, remainder_size); ++ } ++ } + } + + check_inuse_chunk (av, p); +-- +2.33.0 + diff --git a/0007-malloc-Remove-bin-scanning-from-memalign-bug-30723.patch b/0007-malloc-Remove-bin-scanning-from-memalign-bug-30723.patch new file mode 100644 index 0000000..87db13c --- /dev/null +++ b/0007-malloc-Remove-bin-scanning-from-memalign-bug-30723.patch @@ -0,0 +1,269 @@ +From 2af141bda3cd407abd4bedf615f9e45fe79518e2 Mon Sep 17 00:00:00 2001 +From: Florian Weimer +Date: Thu, 10 Aug 2023 19:36:56 +0200 +Subject: [PATCH 07/12] malloc: Remove bin scanning from memalign (bug 30723) + +On the test workload (mpv --cache=yes with VP9 video decoding), the +bin scanning has a very poor success rate (less than 2%). The tcache +scanning has about 50% success rate, so keep that. + +Update comments in malloc/tst-memalign-2 to indicate the purpose +of the tests. Even with the scanning removed, the additional +merging opportunities since commit 542b1105852568c3ebc712225ae78b +("malloc: Enable merging of remainders in memalign (bug 30723)") +are sufficient to pass the existing large bins test. + +Remove leftover variables from _int_free from refactoring in the +same commit. + +Reviewed-by: DJ Delorie +(cherry picked from commit 0dc7fc1cf094406a138e4d1bcf9553e59edcf89d) +--- + NEWS | 1 + + malloc/malloc.c | 169 ++-------------------------------------- + malloc/tst-memalign-2.c | 7 +- + 3 files changed, 11 insertions(+), 166 deletions(-) + +diff --git a/NEWS b/NEWS +index 872bc8907b..c339cb444e 100644 +--- a/NEWS ++++ b/NEWS +@@ -132,6 +132,7 @@ The following bugs are resolved with this release: + [30555] string: strerror can incorrectly return NULL + [30579] malloc: trim_threshold in realloc lead to high memory usage + [30662] nscd: Group and password cache use errno in place of errval ++ [30723] posix_memalign repeatedly scans long bin lists + + Version 2.37 + +diff --git a/malloc/malloc.c b/malloc/malloc.c +index 948f9759af..d0bbbf3710 100644 +--- a/malloc/malloc.c ++++ b/malloc/malloc.c +@@ -4488,12 +4488,6 @@ _int_free (mstate av, mchunkptr p, int have_lock) + { + INTERNAL_SIZE_T size; /* its size */ + mfastbinptr *fb; /* associated fastbin */ +- mchunkptr nextchunk; /* next contiguous chunk */ +- INTERNAL_SIZE_T nextsize; /* its size */ +- int nextinuse; /* true if nextchunk is used */ +- INTERNAL_SIZE_T prevsize; /* size of previous contiguous chunk */ +- mchunkptr bck; /* misc temp for linking */ +- mchunkptr fwd; /* misc temp for linking */ + + size = chunksize (p); + +@@ -5032,42 +5026,6 @@ _int_realloc (mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize, + ------------------------------ memalign ------------------------------ + */ + +-/* Returns 0 if the chunk is not and does not contain the requested +- aligned sub-chunk, else returns the amount of "waste" from +- trimming. NB is the *chunk* byte size, not the user byte +- size. */ +-static size_t +-chunk_ok_for_memalign (mchunkptr p, size_t alignment, size_t nb) +-{ +- void *m = chunk2mem (p); +- INTERNAL_SIZE_T size = chunksize (p); +- void *aligned_m = m; +- +- if (__glibc_unlikely (misaligned_chunk (p))) +- malloc_printerr ("_int_memalign(): unaligned chunk detected"); +- +- aligned_m = PTR_ALIGN_UP (m, alignment); +- +- INTERNAL_SIZE_T front_extra = (intptr_t) aligned_m - (intptr_t) m; +- +- /* We can't trim off the front as it's too small. */ +- if (front_extra > 0 && front_extra < MINSIZE) +- return 0; +- +- /* If it's a perfect fit, it's an exception to the return value rule +- (we would return zero waste, which looks like "not usable"), so +- handle it here by returning a small non-zero value instead. */ +- if (size == nb && front_extra == 0) +- return 1; +- +- /* If the block we need fits in the chunk, calculate total waste. */ +- if (size > nb + front_extra) +- return size - nb; +- +- /* Can't use this chunk. */ +- return 0; +-} +- + /* BYTES is user requested bytes, not requested chunksize bytes. */ + static void * + _int_memalign (mstate av, size_t alignment, size_t bytes) +@@ -5082,7 +5040,6 @@ _int_memalign (mstate av, size_t alignment, size_t bytes) + mchunkptr remainder; /* spare room at end to split off */ + unsigned long remainder_size; /* its size */ + INTERNAL_SIZE_T size; +- mchunkptr victim; + + nb = checked_request2size (bytes); + if (nb == 0) +@@ -5101,129 +5058,13 @@ _int_memalign (mstate av, size_t alignment, size_t bytes) + we don't find anything in those bins, the common malloc code will + scan starting at 2x. */ + +- /* This will be set if we found a candidate chunk. */ +- victim = NULL; +- +- /* Fast bins are singly-linked, hard to remove a chunk from the middle +- and unlikely to meet our alignment requirements. We have not done +- any experimentation with searching for aligned fastbins. */ +- +- if (av != NULL) +- { +- int first_bin_index; +- int first_largebin_index; +- int last_bin_index; +- +- if (in_smallbin_range (nb)) +- first_bin_index = smallbin_index (nb); +- else +- first_bin_index = largebin_index (nb); +- +- if (in_smallbin_range (nb * 2)) +- last_bin_index = smallbin_index (nb * 2); +- else +- last_bin_index = largebin_index (nb * 2); +- +- first_largebin_index = largebin_index (MIN_LARGE_SIZE); +- +- int victim_index; /* its bin index */ +- +- for (victim_index = first_bin_index; +- victim_index < last_bin_index; +- victim_index ++) +- { +- victim = NULL; +- +- if (victim_index < first_largebin_index) +- { +- /* Check small bins. Small bin chunks are doubly-linked despite +- being the same size. */ +- +- mchunkptr fwd; /* misc temp for linking */ +- mchunkptr bck; /* misc temp for linking */ +- +- bck = bin_at (av, victim_index); +- fwd = bck->fd; +- while (fwd != bck) +- { +- if (chunk_ok_for_memalign (fwd, alignment, nb) > 0) +- { +- victim = fwd; +- +- /* Unlink it */ +- victim->fd->bk = victim->bk; +- victim->bk->fd = victim->fd; +- break; +- } +- +- fwd = fwd->fd; +- } +- } +- else +- { +- /* Check large bins. */ +- mchunkptr fwd; /* misc temp for linking */ +- mchunkptr bck; /* misc temp for linking */ +- mchunkptr best = NULL; +- size_t best_size = 0; +- +- bck = bin_at (av, victim_index); +- fwd = bck->fd; ++ /* Call malloc with worst case padding to hit alignment. */ ++ m = (char *) (_int_malloc (av, nb + alignment + MINSIZE)); + +- while (fwd != bck) +- { +- int extra; +- +- if (chunksize (fwd) < nb) +- break; +- extra = chunk_ok_for_memalign (fwd, alignment, nb); +- if (extra > 0 +- && (extra <= best_size || best == NULL)) +- { +- best = fwd; +- best_size = extra; +- } ++ if (m == 0) ++ return 0; /* propagate failure */ + +- fwd = fwd->fd; +- } +- victim = best; +- +- if (victim != NULL) +- { +- unlink_chunk (av, victim); +- break; +- } +- } +- +- if (victim != NULL) +- break; +- } +- } +- +- /* Strategy: find a spot within that chunk that meets the alignment +- request, and then possibly free the leading and trailing space. +- This strategy is incredibly costly and can lead to external +- fragmentation if header and footer chunks are unused. */ +- +- if (victim != NULL) +- { +- p = victim; +- m = chunk2mem (p); +- set_inuse (p); +- if (av != &main_arena) +- set_non_main_arena (p); +- } +- else +- { +- /* Call malloc with worst case padding to hit alignment. */ +- +- m = (char *) (_int_malloc (av, nb + alignment + MINSIZE)); +- +- if (m == 0) +- return 0; /* propagate failure */ +- +- p = mem2chunk (m); +- } ++ p = mem2chunk (m); + + if ((((unsigned long) (m)) % alignment) != 0) /* misaligned */ + { +diff --git a/malloc/tst-memalign-2.c b/malloc/tst-memalign-2.c +index f229283dbf..ecd6fa249e 100644 +--- a/malloc/tst-memalign-2.c ++++ b/malloc/tst-memalign-2.c +@@ -86,7 +86,8 @@ do_test (void) + TEST_VERIFY (tcache_allocs[i].ptr1 == tcache_allocs[i].ptr2); + } + +- /* Test for non-head tcache hits. */ ++ /* Test for non-head tcache hits. This exercises the memalign ++ scanning code to find matching allocations. */ + for (i = 0; i < array_length (ptr); ++ i) + { + if (i == 4) +@@ -113,7 +114,9 @@ do_test (void) + free (p); + TEST_VERIFY (count > 0); + +- /* Large bins test. */ ++ /* Large bins test. This verifies that the over-allocated parts ++ that memalign releases for future allocations can be reused by ++ memalign itself at least in some cases. */ + + for (i = 0; i < LN; ++ i) + { +-- +2.33.0 + diff --git a/0008-sysdeps-tst-bz21269-fix-test-parameter.patch b/0008-sysdeps-tst-bz21269-fix-test-parameter.patch new file mode 100644 index 0000000..b7c1eee --- /dev/null +++ b/0008-sysdeps-tst-bz21269-fix-test-parameter.patch @@ -0,0 +1,31 @@ +From c8ecda6251dd4a0dfe074e0a6011211cadeef742 Mon Sep 17 00:00:00 2001 +From: Sam James +Date: Fri, 4 Aug 2023 23:58:27 +0100 +Subject: [PATCH 08/12] sysdeps: tst-bz21269: fix test parameter + +All callers pass 1 or 0x11 anyway (same meaning according to man page), +but still. + +Reviewed-by: DJ Delorie +Signed-off-by: Sam James +(cherry picked from commit e0b712dd9183d527aae4506cd39564c14af3bb28) +--- + sysdeps/unix/sysv/linux/i386/tst-bz21269.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c +index 51d4a1b082..f508ef8f16 100644 +--- a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c ++++ b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c +@@ -52,7 +52,7 @@ xset_thread_area (struct user_desc *u_info) + static void + xmodify_ldt (int func, const void *ptr, unsigned long bytecount) + { +- TEST_VERIFY_EXIT (syscall (SYS_modify_ldt, 1, ptr, bytecount) == 0); ++ TEST_VERIFY_EXIT (syscall (SYS_modify_ldt, func, ptr, bytecount) == 0); + } + + static int +-- +2.33.0 + diff --git a/0009-sysdeps-tst-bz21269-handle-ENOSYS-skip-appropriately.patch b/0009-sysdeps-tst-bz21269-handle-ENOSYS-skip-appropriately.patch new file mode 100644 index 0000000..986eb98 --- /dev/null +++ b/0009-sysdeps-tst-bz21269-handle-ENOSYS-skip-appropriately.patch @@ -0,0 +1,42 @@ +From ad9b8399537670a990572c4b0c4da5411e3b68cf Mon Sep 17 00:00:00 2001 +From: Sam James +Date: Sat, 5 Aug 2023 00:04:33 +0100 +Subject: [PATCH 09/12] sysdeps: tst-bz21269: handle ENOSYS & skip + appropriately + +SYS_modify_ldt requires CONFIG_MODIFY_LDT_SYSCALL to be set in the kernel, which +some distributions may disable for hardening. Check if that's the case (unset) +and mark the test as UNSUPPORTED if so. + +Reviewed-by: DJ Delorie +Signed-off-by: Sam James +(cherry picked from commit 652b9fdb77d9fd056d4dd26dad2c14142768ab49) +--- + sysdeps/unix/sysv/linux/i386/tst-bz21269.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c +index f508ef8f16..28f5359bea 100644 +--- a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c ++++ b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c +@@ -52,7 +52,16 @@ xset_thread_area (struct user_desc *u_info) + static void + xmodify_ldt (int func, const void *ptr, unsigned long bytecount) + { +- TEST_VERIFY_EXIT (syscall (SYS_modify_ldt, func, ptr, bytecount) == 0); ++ long ret = syscall (SYS_modify_ldt, func, ptr, bytecount); ++ ++ if (ret == -1) ++ { ++ if (errno == ENOSYS) ++ FAIL_UNSUPPORTED ("modify_ldt not supported"); ++ FAIL_EXIT1 ("modify_ldt failed (errno=%d)", errno); ++ } ++ ++ return 0; + } + + static int +-- +2.33.0 + diff --git a/0010-sysdeps-tst-bz21269-fix-Wreturn-type.patch b/0010-sysdeps-tst-bz21269-fix-Wreturn-type.patch new file mode 100644 index 0000000..e329830 --- /dev/null +++ b/0010-sysdeps-tst-bz21269-fix-Wreturn-type.patch @@ -0,0 +1,30 @@ +From 1aed90c9c8f8be9f68b58e96b6e4cd0fc08eb2b1 Mon Sep 17 00:00:00 2001 +From: Sam James +Date: Thu, 17 Aug 2023 09:30:29 +0100 +Subject: [PATCH 10/12] sysdeps: tst-bz21269: fix -Wreturn-type + +Thanks to Andreas Schwab for reporting. + +Fixes: 652b9fdb77d9fd056d4dd26dad2c14142768ab49 +Signed-off-by: Sam James +(cherry picked from commit 369f373057073c307938da91af16922bda3dff6a) +--- + sysdeps/unix/sysv/linux/i386/tst-bz21269.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c +index 28f5359bea..822c41fceb 100644 +--- a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c ++++ b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c +@@ -60,8 +60,6 @@ xmodify_ldt (int func, const void *ptr, unsigned long bytecount) + FAIL_UNSUPPORTED ("modify_ldt not supported"); + FAIL_EXIT1 ("modify_ldt failed (errno=%d)", errno); + } +- +- return 0; + } + + static int +-- +2.33.0 + diff --git a/0011-io-Fix-record-locking-contants-for-powerpc64-with-__.patch b/0011-io-Fix-record-locking-contants-for-powerpc64-with-__.patch new file mode 100644 index 0000000..1e9a5c3 --- /dev/null +++ b/0011-io-Fix-record-locking-contants-for-powerpc64-with-__.patch @@ -0,0 +1,91 @@ +From 5bdef6f27c91f45505ed5444147be4ed0e9bc3c7 Mon Sep 17 00:00:00 2001 +From: Aurelien Jarno +Date: Mon, 28 Aug 2023 23:30:37 +0200 +Subject: [PATCH 11/12] io: Fix record locking contants for powerpc64 with + __USE_FILE_OFFSET64 + +Commit 5f828ff824e3b7cd1 ("io: Fix F_GETLK, F_SETLK, and F_SETLKW for +powerpc64") fixed an issue with the value of the lock constants on +powerpc64 when not using __USE_FILE_OFFSET64, but it ended-up also +changing the value when using __USE_FILE_OFFSET64 causing an API change. + +Fix that by also checking that define, restoring the pre +4d0fe291aed3a476a commit values: + +Default values: +- F_GETLK: 5 +- F_SETLK: 6 +- F_SETLKW: 7 + +With -D_FILE_OFFSET_BITS=64: +- F_GETLK: 12 +- F_SETLK: 13 +- F_SETLKW: 14 + +At the same time, it has been noticed that there was no test for io lock +with __USE_FILE_OFFSET64, so just add one. + +Tested on x86_64-linux-gnu, i686-linux-gnu and +powerpc64le-unknown-linux-gnu. + +Resolves: BZ #30804. +Co-authored-by: Adhemerval Zanella +Signed-off-by: Aurelien Jarno +(cherry picked from commit 434bf72a94de68f0cc7fbf3c44bf38c1911b70cb) +--- + NEWS | 2 ++ + io/Makefile | 1 + + io/tst-fcntl-lock-lfs.c | 2 ++ + sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h | 2 +- + 4 files changed, 6 insertions(+), 1 deletion(-) + create mode 100644 io/tst-fcntl-lock-lfs.c + +diff --git a/NEWS b/NEWS +index c339cb444e..8156572cdf 100644 +--- a/NEWS ++++ b/NEWS +@@ -133,6 +133,8 @@ The following bugs are resolved with this release: + [30579] malloc: trim_threshold in realloc lead to high memory usage + [30662] nscd: Group and password cache use errno in place of errval + [30723] posix_memalign repeatedly scans long bin lists ++ [30804] F_GETLK, F_SETLK, and F_SETLKW value change for powerpc64 with ++ -D_FILE_OFFSET_BITS=64 + + Version 2.37 + +diff --git a/io/Makefile b/io/Makefile +index 6ccc0e8691..8a3c83a3bb 100644 +--- a/io/Makefile ++++ b/io/Makefile +@@ -192,6 +192,7 @@ tests := \ + tst-fchownat \ + tst-fcntl \ + tst-fcntl-lock \ ++ tst-fcntl-lock-lfs \ + tst-fstatat \ + tst-fts \ + tst-fts-lfs \ +diff --git a/io/tst-fcntl-lock-lfs.c b/io/tst-fcntl-lock-lfs.c +new file mode 100644 +index 0000000000..f2a909fb02 +--- /dev/null ++++ b/io/tst-fcntl-lock-lfs.c +@@ -0,0 +1,2 @@ ++#define _FILE_OFFSET_BITS 64 ++#include +diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h b/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h +index f7615a447e..d8a291a331 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h ++++ b/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h +@@ -33,7 +33,7 @@ + # define __O_LARGEFILE 0200000 + #endif + +-#if __WORDSIZE == 64 ++#if __WORDSIZE == 64 && !defined __USE_FILE_OFFSET64 + # define F_GETLK 5 + # define F_SETLK 6 + # define F_SETLKW 7 +-- +2.33.0 + diff --git a/0012-libio-Fix-oversized-__io_vtables.patch b/0012-libio-Fix-oversized-__io_vtables.patch new file mode 100644 index 0000000..6f8a158 --- /dev/null +++ b/0012-libio-Fix-oversized-__io_vtables.patch @@ -0,0 +1,51 @@ +From 92201f16cbcfd9eafe314ef6654be2ea7ba25675 Mon Sep 17 00:00:00 2001 +From: Adam Jackson +Date: Fri, 8 Sep 2023 15:55:19 -0400 +Subject: [PATCH 12/12] libio: Fix oversized __io_vtables + +IO_VTABLES_LEN is the size of the struct array in bytes, not the number +of __IO_jump_t's in the array. Drops just under 384kb from .rodata on +LP64 machines. + +Fixes: 3020f72618e ("libio: Remove the usage of __libc_IO_vtables") +Signed-off-by: Adam Jackson +Reviewed-by: Florian Weimer +Tested-by: Florian Weimer +(cherry picked from commit 8cb69e054386f980f9ff4d93b157861d72b2019e) +--- + libio/vtables.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/libio/vtables.c b/libio/vtables.c +index 1d8ad612e9..34f7e15f1c 100644 +--- a/libio/vtables.c ++++ b/libio/vtables.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -88,7 +89,7 @@ + # pragma weak __wprintf_buffer_as_file_xsputn + #endif + +-const struct _IO_jump_t __io_vtables[IO_VTABLES_LEN] attribute_relro = ++const struct _IO_jump_t __io_vtables[] attribute_relro = + { + /* _IO_str_jumps */ + [IO_STR_JUMPS] = +@@ -485,6 +486,8 @@ const struct _IO_jump_t __io_vtables[IO_VTABLES_LEN] attribute_relro = + }, + #endif + }; ++_Static_assert (array_length (__io_vtables) == IO_VTABLES_NUM, ++ "initializer count"); + + #ifdef SHARED + +-- +2.33.0 + diff --git a/glibc.spec b/glibc.spec index a8014d2..df56576 100644 --- a/glibc.spec +++ b/glibc.spec @@ -67,7 +67,7 @@ ############################################################################## Name: glibc Version: 2.38 -Release: 6 +Release: 7 Summary: The GNU libc libraries License: %{all_license} URL: http://www.gnu.org/software/glibc/ @@ -87,6 +87,18 @@ Source8: testsuite_whitelist Patch0: glibc-1070416.patch Patch1: stdlib-Improve-tst-realpath-compatibility-with-sourc.patch +Patch2: 0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch +Patch3: 0002-nscd-Do-not-rebuild-getaddrinfo-bug-30709.patch +Patch4: 0003-x86-Fix-incorrect-scope-of-setting-shared_per_thread.patch +Patch5: 0004-x86_64-Fix-build-with-disable-multiarch-BZ-30721.patch +Patch6: 0005-i686-Fix-build-with-disable-multiarch.patch +Patch7: 0006-malloc-Enable-merging-of-remainders-in-memalign-bug-.patch +Patch8: 0007-malloc-Remove-bin-scanning-from-memalign-bug-30723.patch +Patch9: 0008-sysdeps-tst-bz21269-fix-test-parameter.patch +Patch10: 0009-sysdeps-tst-bz21269-handle-ENOSYS-skip-appropriately.patch +Patch11: 0010-sysdeps-tst-bz21269-fix-Wreturn-type.patch +Patch12: 0011-io-Fix-record-locking-contants-for-powerpc64-with-__.patch +Patch13: 0012-libio-Fix-oversized-__io_vtables.patch Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch Patch9001: locale-delete-no-hard-link-to-avoid-all_language-pac.patch @@ -1301,6 +1313,9 @@ fi %endif %changelog +* Mon Sep 11 2023 Qingqing Li - 2.38-7 +- backport patches from glibc upstream 2.38 branch + * Mon Sep 11 2023 Qingqing Li - 2.38-6 - stdlib: Improve tst-realpath compatibility with source fortification