backport patches from glibc upstream 2.38 branch

2023-09-11 11:14:53 +08:00 · 2023-09-11 11:14:53 +08:00 · 969c290221
commit 969c290221
parent 11e462757d
13 changed files with 1507 additions and 1 deletions
--- a/0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch
+++ b/0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch
@ -0,0 +1,286 @@
+From ced101ed9d3b7cfd12d97ef24940cb00b8658c81 Mon Sep 17 00:00:00 2001
+From: Sajan Karumanchi <sajan.karumanchi@amd.com>
+Date: Tue, 1 Aug 2023 15:20:55 +0000
+Subject: [PATCH 01/12] x86: Fix for cache computation on AMD legacy cpus.
+
+Some legacy AMD CPUs and hypervisors have the _cpuid_ '0x8000_001D'
+set to Zero, thus resulting in zeroed-out computed cache values.
+This patch reintroduces the old way of cache computation as a
+fail-safe option to handle these exceptions.
+Fixed 'level4_cache_size' value through handle_amd().
+
+Reviewed-by: Premachandra Mallappa <premachandra.mallappa@amd.com>
+Tested-by: Florian Weimer <fweimer@redhat.com>
+---
+ sysdeps/x86/dl-cacheinfo.h | 226 ++++++++++++++++++++++++++++++++-----
+ 1 file changed, 199 insertions(+), 27 deletions(-)
+
+diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
+index cd4d0351ae..285773039f 100644
+--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
+@@ -315,40 +315,206 @@ handle_amd (int name)
+ {
+   unsigned int eax;
+   unsigned int ebx;
+-  unsigned int ecx;
+  unsigned int ecx = 0;
+   unsigned int edx;
+-  unsigned int count = 0x1;
+  unsigned int max_cpuid = 0;
+  unsigned int fn = 0;
+ 
+   /* No level 4 cache (yet).  */
+   if (name > _SC_LEVEL3_CACHE_LINESIZE)
+     return 0;
+ 
+-  if (name >= _SC_LEVEL3_CACHE_SIZE)
+-    count = 0x3;
+-  else if (name >= _SC_LEVEL2_CACHE_SIZE)
+-    count = 0x2;
+-  else if (name >= _SC_LEVEL1_DCACHE_SIZE)
+-    count = 0x0;
+  __cpuid (0x80000000, max_cpuid, ebx, ecx, edx);
+
+  if (max_cpuid >= 0x8000001D)
+    /* Use __cpuid__ '0x8000_001D' to compute cache details.  */
+    {
+      unsigned int count = 0x1;
+
+      if (name >= _SC_LEVEL3_CACHE_SIZE)
+        count = 0x3;
+      else if (name >= _SC_LEVEL2_CACHE_SIZE)
+        count = 0x2;
+      else if (name >= _SC_LEVEL1_DCACHE_SIZE)
+        count = 0x0;
+
+      __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
+
+      if (ecx != 0)
+        {
+          switch (name)
+            {
+            case _SC_LEVEL1_ICACHE_ASSOC:
+            case _SC_LEVEL1_DCACHE_ASSOC:
+            case _SC_LEVEL2_CACHE_ASSOC:
+            case _SC_LEVEL3_CACHE_ASSOC:
+              return ((ebx >> 22) & 0x3ff) + 1;
+            case _SC_LEVEL1_ICACHE_LINESIZE:
+            case _SC_LEVEL1_DCACHE_LINESIZE:
+            case _SC_LEVEL2_CACHE_LINESIZE:
+            case _SC_LEVEL3_CACHE_LINESIZE:
+              return (ebx & 0xfff) + 1;
+            case _SC_LEVEL1_ICACHE_SIZE:
+            case _SC_LEVEL1_DCACHE_SIZE:
+            case _SC_LEVEL2_CACHE_SIZE:
+            case _SC_LEVEL3_CACHE_SIZE:
+              return (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1);
+            default:
+              __builtin_unreachable ();
+            }
+          return -1;
+        }
+    }
+
+  /* Legacy cache computation for CPUs prior to Bulldozer family.
+     This is also a fail-safe mechanism for some hypervisors that
+     accidentally configure __cpuid__ '0x8000_001D' to Zero.  */
+ 
+-  __cpuid_count (0x8000001D, count, eax, ebx, ecx, edx);
+  fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
+
+  if (max_cpuid < fn)
+    return 0;
+
+  __cpuid (fn, eax, ebx, ecx, edx);
+
+  if (name < _SC_LEVEL1_DCACHE_SIZE)
+    {
+      name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
+      ecx = edx;
+    }
+ 
+   switch (name)
+     {
+-    case _SC_LEVEL1_ICACHE_ASSOC:
+-    case _SC_LEVEL1_DCACHE_ASSOC:
+-    case _SC_LEVEL2_CACHE_ASSOC:
+      case _SC_LEVEL1_DCACHE_SIZE:
+        return (ecx >> 14) & 0x3fc00;
+
+      case _SC_LEVEL1_DCACHE_ASSOC:
+        ecx >>= 16;
+        if ((ecx & 0xff) == 0xff)
+        {
+          /* Fully associative.  */
+          return (ecx << 2) & 0x3fc00;
+        }
+        return ecx & 0xff;
+
+      case _SC_LEVEL1_DCACHE_LINESIZE:
+        return ecx & 0xff;
+
+      case _SC_LEVEL2_CACHE_SIZE:
+        return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
+
+      case _SC_LEVEL2_CACHE_ASSOC:
+        switch ((ecx >> 12) & 0xf)
+          {
+            case 0:
+            case 1:
+            case 2:
+            case 4:
+              return (ecx >> 12) & 0xf;
+            case 6:
+              return 8;
+            case 8:
+              return 16;
+            case 10:
+              return 32;
+            case 11:
+              return 48;
+            case 12:
+              return 64;
+            case 13:
+              return 96;
+            case 14:
+              return 128;
+            case 15:
+              return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
+            default:
+              return 0;
+          }
+
+      case _SC_LEVEL2_CACHE_LINESIZE:
+        return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
+
+      case _SC_LEVEL3_CACHE_SIZE:
+        {
+        long int total_l3_cache = 0, l3_cache_per_thread = 0;
+        unsigned int threads = 0;
+        const struct cpu_features *cpu_features;
+
+        if ((edx & 0xf000) == 0)
+          return 0;
+
+        total_l3_cache = (edx & 0x3ffc0000) << 1;
+        cpu_features = __get_cpu_features ();
+
+        /* Figure out the number of logical threads that share L3.  */
+        if (max_cpuid >= 0x80000008)
+          {
+            /* Get width of APIC ID.  */
+            __cpuid (0x80000008, eax, ebx, ecx, edx);
+            threads = (ecx & 0xff) + 1;
+          }
+
+        if (threads == 0)
+          {
+            /* If APIC ID width is not available, use logical
+            processor count.  */
+            __cpuid (0x00000001, eax, ebx, ecx, edx);
+            if ((edx & (1 << 28)) != 0)
+              threads = (ebx >> 16) & 0xff;
+          }
+
+        /* Cap usage of highest cache level to the number of
+           supported threads.  */
+        if (threads > 0)
+          l3_cache_per_thread = total_l3_cache/threads;
+
+        /* Get shared cache per ccx for Zen architectures.  */
+        if (cpu_features->basic.family >= 0x17)
+          {
+            long int l3_cache_per_ccx = 0;
+            /* Get number of threads share the L3 cache in CCX.  */
+            __cpuid_count (0x8000001D, 0x3, eax, ebx, ecx, edx);
+            unsigned int threads_per_ccx = ((eax >> 14) & 0xfff) + 1;
+            l3_cache_per_ccx = l3_cache_per_thread * threads_per_ccx;
+            return l3_cache_per_ccx;
+          }
+        else
+          {
+            return l3_cache_per_thread;
+          }
+      }
+
+     case _SC_LEVEL3_CACHE_ASSOC:
+-      return ecx ? ((ebx >> 22) & 0x3ff) + 1 : 0;
+-    case _SC_LEVEL1_ICACHE_LINESIZE:
+-    case _SC_LEVEL1_DCACHE_LINESIZE:
+-    case _SC_LEVEL2_CACHE_LINESIZE:
+      switch ((edx >> 12) & 0xf)
+      {
+        case 0:
+        case 1:
+        case 2:
+        case 4:
+          return (edx >> 12) & 0xf;
+        case 6:
+          return 8;
+        case 8:
+          return 16;
+        case 10:
+          return 32;
+        case 11:
+          return 48;
+        case 12:
+          return 64;
+        case 13:
+          return 96;
+        case 14:
+          return 128;
+        case 15:
+          return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
+        default:
+          return 0;
+      }
+
+     case _SC_LEVEL3_CACHE_LINESIZE:
+-      return ecx ? (ebx & 0xfff) + 1 : 0;
+-    case _SC_LEVEL1_ICACHE_SIZE:
+-    case _SC_LEVEL1_DCACHE_SIZE:
+-    case _SC_LEVEL2_CACHE_SIZE:
+-    case _SC_LEVEL3_CACHE_SIZE:
+-      return ecx ? (((ebx >> 22) & 0x3ff) + 1) * ((ebx & 0xfff) + 1) * (ecx + 1): 0;
+      return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
+
+     default:
+       __builtin_unreachable ();
+     }
+@@ -703,7 +869,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+       data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
+       core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
+       shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
+-      shared_per_thread = shared;
+ 
+       level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE);
+       level1_icache_linesize = handle_amd (_SC_LEVEL1_ICACHE_LINESIZE);
+@@ -716,13 +881,20 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
+       level3_cache_size = shared;
+       level3_cache_assoc = handle_amd (_SC_LEVEL3_CACHE_ASSOC);
+       level3_cache_linesize = handle_amd (_SC_LEVEL3_CACHE_LINESIZE);
+      level4_cache_size = handle_amd (_SC_LEVEL4_CACHE_SIZE);
+ 
+       if (shared <= 0)
+-        /* No shared L3 cache.  All we have is the L2 cache.  */
+-	shared = core;
+        {
+           /* No shared L3 cache.  All we have is the L2 cache.  */
+           shared = core;
+        }
+      else if (cpu_features->basic.family < 0x17)
+        {
+           /* Account for exclusive L2 and L3 caches.  */
+           shared += core;
+        }
+ 
+-      if (shared_per_thread <= 0)
+-	shared_per_thread = shared;
+      shared_per_thread = shared;
+     }
+ 
+   cpu_features->level1_icache_size = level1_icache_size;
+-- 
+2.33.0
+
--- a/0002-nscd-Do-not-rebuild-getaddrinfo-bug-30709.patch
+++ b/0002-nscd-Do-not-rebuild-getaddrinfo-bug-30709.patch
@ -0,0 +1,185 @@
+From 6b99458d197ab779ebb6ff632c168e2cbfa4f543 Mon Sep 17 00:00:00 2001
+From: Florian Weimer <fweimer@redhat.com>
+Date: Fri, 11 Aug 2023 10:10:16 +0200
+Subject: [PATCH 02/12] nscd: Do not rebuild getaddrinfo (bug 30709)
+
+The nscd daemon caches hosts data from NSS modules verbatim, without
+filtering protocol families or sorting them (otherwise separate caches
+would be needed for certain ai_flags combinations).  The cache
+implementation is complete separate from the getaddrinfo code.  This
+means that rebuilding getaddrinfo is not needed.  The only function
+actually used is __bump_nl_timestamp from check_pf.c, and this change
+moves it into nscd/connections.c.
+
+Tested on x86_64-linux-gnu with -fexceptions, built with
+build-many-glibcs.py.  I also backported this patch into a distribution
+that still supports nscd and verified manually that caching still works.
+
+Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
+(cherry picked from commit 039ff51ac7e02db1cfc0c23e38ac7bfbb00221d1)
+---
+ include/ifaddrs.h                  |  4 ---
+ inet/check_pf.c                    |  9 ------
+ nscd/Makefile                      |  2 +-
+ nscd/connections.c                 | 11 +++++++
+ nscd/gai.c                         | 50 ------------------------------
+ sysdeps/unix/sysv/linux/check_pf.c | 17 +---------
+ 6 files changed, 13 insertions(+), 80 deletions(-)
+ delete mode 100644 nscd/gai.c
+
+diff --git a/include/ifaddrs.h b/include/ifaddrs.h
+index 416118f1b3..19a3afb19f 100644
+--- a/include/ifaddrs.h
+++ b/include/ifaddrs.h
+@@ -34,9 +34,5 @@ extern void __check_native (uint32_t a1_index, int *a1_native,
+ 			    uint32_t a2_index, int *a2_native)
+   attribute_hidden;
+ 
+-#if IS_IN (nscd)
+-extern uint32_t __bump_nl_timestamp (void) attribute_hidden;
+-#endif
+-
+ # endif /* !_ISOMAC */
+ #endif	/* ifaddrs.h */
+diff --git a/inet/check_pf.c b/inet/check_pf.c
+index 5310c99121..6d1475920f 100644
+--- a/inet/check_pf.c
+++ b/inet/check_pf.c
+@@ -60,12 +60,3 @@ __free_in6ai (struct in6addrinfo *in6ai)
+ {
+   /* Nothing to do.  */
+ }
+-
+-
+-#if IS_IN (nscd)
+-uint32_t
+-__bump_nl_timestamp (void)
+-{
+-  return 0;
+-}
+-#endif
+diff --git a/nscd/Makefile b/nscd/Makefile
+index 2a0489f4cf..16b6460ee9 100644
+--- a/nscd/Makefile
+++ b/nscd/Makefile
+@@ -35,7 +35,7 @@ nscd-modules := nscd connections pwdcache getpwnam_r getpwuid_r grpcache \
+ 		getgrnam_r getgrgid_r hstcache gethstbyad_r gethstbynm3_r \
+ 		getsrvbynm_r getsrvbypt_r servicescache \
+ 		dbg_log nscd_conf nscd_stat cache mem nscd_setup_thread \
+-		xmalloc xstrdup aicache initgrcache gai res_hconf \
+		xmalloc xstrdup aicache initgrcache res_hconf \
+ 		netgroupcache cachedumper
+ 
+ ifeq ($(build-nscd)$(have-thread-library),yesyes)
+diff --git a/nscd/connections.c b/nscd/connections.c
+index a405a44a9b..15693e5090 100644
+--- a/nscd/connections.c
+++ b/nscd/connections.c
+@@ -256,6 +256,17 @@ int inotify_fd = -1;
+ #ifdef HAVE_NETLINK
+ /* Descriptor for netlink status updates.  */
+ static int nl_status_fd = -1;
+
+static uint32_t
+__bump_nl_timestamp (void)
+{
+  static uint32_t nl_timestamp;
+
+  if (atomic_fetch_add_relaxed (&nl_timestamp, 1) + 1 == 0)
+    atomic_fetch_add_relaxed (&nl_timestamp, 1);
+
+  return nl_timestamp;
+}
+ #endif
+ 
+ /* Number of times clients had to wait.  */
+diff --git a/nscd/gai.c b/nscd/gai.c
+deleted file mode 100644
+index e29f3fe583..0000000000
+--- a/nscd/gai.c
+++ /dev/null
+@@ -1,50 +0,0 @@
+-/* Copyright (C) 2004-2023 Free Software Foundation, Inc.
+-   This file is part of the GNU C Library.
+-
+-   This program is free software; you can redistribute it and/or modify
+-   it under the terms of the GNU General Public License as published
+-   by the Free Software Foundation; version 2 of the License, or
+-   (at your option) any later version.
+-
+-   This program is distributed in the hope that it will be useful,
+-   but WITHOUT ANY WARRANTY; without even the implied warranty of
+-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-   GNU General Public License for more details.
+-
+-   You should have received a copy of the GNU General Public License
+-   along with this program; if not, see <https://www.gnu.org/licenses/>.  */
+-
+-#include <alloca.h>
+-#include <sys/stat.h>
+-
+-/* This file uses the getaddrinfo code but it compiles it without NSCD
+-   support.  We just need a few symbol renames.  */
+-#define __ioctl ioctl
+-#define __getsockname getsockname
+-#define __socket socket
+-#define __recvmsg recvmsg
+-#define __bind bind
+-#define __sendto sendto
+-#define __strchrnul strchrnul
+-#define __getline getline
+-#define __qsort_r qsort_r
+-/* nscd uses 1MB or 2MB thread stacks.  */
+-#define __libc_use_alloca(size) (size <= __MAX_ALLOCA_CUTOFF)
+-#define __getifaddrs getifaddrs
+-#define __freeifaddrs freeifaddrs
+-#undef __fstat64
+-#define __fstat64 fstat64
+-#undef __stat64
+-#define __stat64 stat64
+-
+-/* We are nscd, so we don't want to be talking to ourselves.  */
+-#undef  USE_NSCD
+-
+-#include <getaddrinfo.c>
+-
+-/* Support code.  */
+-#include <check_pf.c>
+-#include <check_native.c>
+-
+-/* Some variables normally defined in libc.  */
+-nss_action_list __nss_hosts_database attribute_hidden;
+diff --git a/sysdeps/unix/sysv/linux/check_pf.c b/sysdeps/unix/sysv/linux/check_pf.c
+index 2b0b8b6368..3aa6a00348 100644
+--- a/sysdeps/unix/sysv/linux/check_pf.c
+++ b/sysdeps/unix/sysv/linux/check_pf.c
+@@ -66,25 +66,10 @@ static struct cached_data *cache;
+ __libc_lock_define_initialized (static, lock);
+ 
+ 
+-#if IS_IN (nscd)
+-static uint32_t nl_timestamp;
+-
+-uint32_t
+-__bump_nl_timestamp (void)
+-{
+-  if (atomic_fetch_add_relaxed (&nl_timestamp, 1) + 1 == 0)
+-    atomic_fetch_add_relaxed (&nl_timestamp, 1);
+-
+-  return nl_timestamp;
+-}
+-#endif
+-
+ static inline uint32_t
+ get_nl_timestamp (void)
+ {
+-#if IS_IN (nscd)
+-  return nl_timestamp;
+-#elif defined USE_NSCD
+#if defined USE_NSCD
+   return __nscd_get_nl_timestamp ();
+ #else
+   return 0;
+-- 
+2.33.0
+
--- a/0003-x86-Fix-incorrect-scope-of-setting-shared_per_thread.patch
+++ b/0003-x86-Fix-incorrect-scope-of-setting-shared_per_thread.patch
@ -0,0 +1,45 @@
+From 5ea70cc02626d9b85f1570153873d8648a47bf95 Mon Sep 17 00:00:00 2001
+From: Noah Goldstein <goldstein.w.n@gmail.com>
+Date: Thu, 10 Aug 2023 19:28:24 -0500
+Subject: [PATCH 03/12] x86: Fix incorrect scope of setting `shared_per_thread`
+ [BZ# 30745]
+
+The:
+
+```
+    if (shared_per_thread > 0 && threads > 0)
+      shared_per_thread /= threads;
+```
+
+Code was accidentally moved to inside the else scope.  This doesn't
+match how it was previously (before af992e7abd).
+
+This patch fixes that by putting the division after the `else` block.
+
+(cherry picked from commit 084fb31bc2c5f95ae0b9e6df4d3cf0ff43471ede)
+---
+ sysdeps/x86/dl-cacheinfo.h | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h
+index 285773039f..5ddb35c9d9 100644
+--- a/sysdeps/x86/dl-cacheinfo.h
+++ b/sysdeps/x86/dl-cacheinfo.h
+@@ -770,11 +770,10 @@ get_common_cache_info (long int *shared_ptr, long int * shared_per_thread_ptr, u
+ 	     level.  */
+ 	  threads = ((cpu_features->features[CPUID_INDEX_1].cpuid.ebx >> 16)
+ 		     & 0xff);
+-
+-	  /* Get per-thread size of highest level cache.  */
+-	  if (shared_per_thread > 0 && threads > 0)
+-	    shared_per_thread /= threads;
+ 	}
+      /* Get per-thread size of highest level cache.  */
+      if (shared_per_thread > 0 && threads > 0)
+	shared_per_thread /= threads;
+     }
+ 
+   /* Account for non-inclusive L2 and L3 caches.  */
+-- 
+2.33.0
+
--- a/0004-x86_64-Fix-build-with-disable-multiarch-BZ-30721.patch
+++ b/0004-x86_64-Fix-build-with-disable-multiarch-BZ-30721.patch
@ -0,0 +1,60 @@
+From 6135d50e44233d8c89ca788f78c669941ad09fb9 Mon Sep 17 00:00:00 2001
+From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+Date: Tue, 8 Aug 2023 09:27:54 -0300
+Subject: [PATCH 04/12] x86_64: Fix build with --disable-multiarch (BZ 30721)
+
+With multiarch disabled, the default memmove implementation provides
+the fortify routines for memcpy, mempcpy, and memmove.  However, it
+does not provide the internal hidden definitions used when building
+with fortify enabled.  The memset has a similar issue.
+
+Checked on x86_64-linux-gnu building with different options:
+default and --disable-multi-arch plus default, --disable-default-pie,
+--enable-fortify-source={2,3}, and --enable-fortify-source={2,3}
+with --disable-default-pie.
+Tested-by: Andreas K. Huettel <dilfridge@gentoo.org>
+Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
+
+(cherry picked from commit 51cb52214fcd72849c640b12f5099ed3ac776181)
+---
+ sysdeps/x86_64/memcpy.S  | 2 +-
+ sysdeps/x86_64/memmove.S | 3 +++
+ sysdeps/x86_64/memset.S  | 1 +
+ 3 files changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/sysdeps/x86_64/memcpy.S b/sysdeps/x86_64/memcpy.S
+index d98500a78a..4922cba657 100644
+--- a/sysdeps/x86_64/memcpy.S
+++ b/sysdeps/x86_64/memcpy.S
+@@ -1 +1 @@
+-/* Implemented in memcpy.S.  */
+/* Implemented in memmove.S.  */
+diff --git a/sysdeps/x86_64/memmove.S b/sysdeps/x86_64/memmove.S
+index f0b84e3b52..c3c08165e1 100644
+--- a/sysdeps/x86_64/memmove.S
+++ b/sysdeps/x86_64/memmove.S
+@@ -46,6 +46,9 @@ weak_alias (__mempcpy, mempcpy)
+ 
+ #ifndef USE_MULTIARCH
+ libc_hidden_builtin_def (memmove)
+libc_hidden_builtin_def (__memmove_chk)
+libc_hidden_builtin_def (__memcpy_chk)
+libc_hidden_builtin_def (__mempcpy_chk)
+ # if defined SHARED && IS_IN (libc)
+ strong_alias (memmove, __memcpy)
+ libc_hidden_ver (memmove, memcpy)
+diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
+index 7c99df36db..c6df24e8de 100644
+--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
+@@ -32,6 +32,7 @@
+ #include "isa-default-impl.h"
+ 
+ libc_hidden_builtin_def (memset)
+libc_hidden_builtin_def (__memset_chk)
+ 
+ #if IS_IN (libc)
+ libc_hidden_def (__wmemset)
+-- 
+2.33.0
+
--- a/0005-i686-Fix-build-with-disable-multiarch.patch
+++ b/0005-i686-Fix-build-with-disable-multiarch.patch
@ -0,0 +1,100 @@
+From 7ac405a74c6069b0627dc2d8449a82a621f8ff06 Mon Sep 17 00:00:00 2001
+From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+Date: Tue, 8 Aug 2023 09:27:55 -0300
+Subject: [PATCH 05/12] i686: Fix build with --disable-multiarch
+
+Since i686 provides the fortified wrappers for memcpy, mempcpy,
+memmove, and memset on the same string implementation, the static
+build tries to optimized it by not tying the fortified wrappers
+to string routine (to avoid pulling the fortify function if
+they are not required).
+
+Checked on i686-linux-gnu building with different option:
+default and --disable-multi-arch plus default, --disable-default-pie,
+--enable-fortify-source={2,3}, and --enable-fortify-source={2,3}
+with --disable-default-pie.
+Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
+
+(cherry picked from commit c73c96a4a1af1326df7f96eec58209e1e04066d8)
+---
+ sysdeps/i386/i686/memcpy.S                | 2 +-
+ sysdeps/i386/i686/mempcpy.S               | 2 +-
+ sysdeps/i386/i686/multiarch/memcpy_chk.c  | 2 ++
+ sysdeps/i386/i686/multiarch/memmove_chk.c | 2 ++
+ sysdeps/i386/i686/multiarch/mempcpy_chk.c | 2 ++
+ sysdeps/i386/i686/multiarch/memset_chk.c  | 2 ++
+ 6 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/sysdeps/i386/i686/memcpy.S b/sysdeps/i386/i686/memcpy.S
+index 9b48ec0ea1..b86af4aac9 100644
+--- a/sysdeps/i386/i686/memcpy.S
+++ b/sysdeps/i386/i686/memcpy.S
+@@ -27,7 +27,7 @@
+ #define LEN	SRC+4
+ 
+ 	.text
+-#if defined PIC && IS_IN (libc)
+#if defined SHARED && IS_IN (libc)
+ ENTRY_CHK (__memcpy_chk)
+ 	movl	12(%esp), %eax
+ 	cmpl	%eax, 16(%esp)
+diff --git a/sysdeps/i386/i686/mempcpy.S b/sysdeps/i386/i686/mempcpy.S
+index 26f8501e7d..14d9dd681a 100644
+--- a/sysdeps/i386/i686/mempcpy.S
+++ b/sysdeps/i386/i686/mempcpy.S
+@@ -27,7 +27,7 @@
+ #define LEN	SRC+4
+ 
+ 	.text
+-#if defined PIC && IS_IN (libc)
+#if defined SHARED && IS_IN (libc)
+ ENTRY_CHK (__mempcpy_chk)
+ 	movl	12(%esp), %eax
+ 	cmpl	%eax, 16(%esp)
+diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.c b/sysdeps/i386/i686/multiarch/memcpy_chk.c
+index ec945dc91f..c3a8aeaf18 100644
+--- a/sysdeps/i386/i686/multiarch/memcpy_chk.c
+++ b/sysdeps/i386/i686/multiarch/memcpy_chk.c
+@@ -32,4 +32,6 @@ libc_ifunc_redirected (__redirect_memcpy_chk, __memcpy_chk,
+ __hidden_ver1 (__memcpy_chk, __GI___memcpy_chk, __redirect_memcpy_chk)
+   __attribute__ ((visibility ("hidden"))) __attribute_copy__ (__memcpy_chk);
+ # endif
+#else
+# include <debug/memcpy_chk.c>
+ #endif
+diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.c b/sysdeps/i386/i686/multiarch/memmove_chk.c
+index 55c7601d5d..070dde083a 100644
+--- a/sysdeps/i386/i686/multiarch/memmove_chk.c
+++ b/sysdeps/i386/i686/multiarch/memmove_chk.c
+@@ -32,4 +32,6 @@ libc_ifunc_redirected (__redirect_memmove_chk, __memmove_chk,
+ __hidden_ver1 (__memmove_chk, __GI___memmove_chk, __redirect_memmove_chk)
+   __attribute__ ((visibility ("hidden"))) __attribute_copy__ (__memmove_chk);
+ # endif
+#else
+# include <debug/memmove_chk.c>
+ #endif
+diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.c b/sysdeps/i386/i686/multiarch/mempcpy_chk.c
+index 83569cf9d9..14360f1828 100644
+--- a/sysdeps/i386/i686/multiarch/mempcpy_chk.c
+++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.c
+@@ -32,4 +32,6 @@ libc_ifunc_redirected (__redirect_mempcpy_chk, __mempcpy_chk,
+ __hidden_ver1 (__mempcpy_chk, __GI___mempcpy_chk, __redirect_mempcpy_chk)
+   __attribute__ ((visibility ("hidden"))) __attribute_copy__ (__mempcpy_chk);
+ # endif
+#else
+# include <debug/mempcpy_chk.c>
+ #endif
+diff --git a/sysdeps/i386/i686/multiarch/memset_chk.c b/sysdeps/i386/i686/multiarch/memset_chk.c
+index 1a7503858d..8179ef7c0b 100644
+--- a/sysdeps/i386/i686/multiarch/memset_chk.c
+++ b/sysdeps/i386/i686/multiarch/memset_chk.c
+@@ -32,4 +32,6 @@ libc_ifunc_redirected (__redirect_memset_chk, __memset_chk,
+ __hidden_ver1 (__memset_chk, __GI___memset_chk, __redirect_memset_chk)
+   __attribute__ ((visibility ("hidden"))) __attribute_copy__ (__memset_chk);
+ # endif
+#else
+# include <debug/memset_chk.c>
+ #endif
+-- 
+2.33.0
+
--- a/0006-malloc-Enable-merging-of-remainders-in-memalign-bug-.patch
+++ b/0006-malloc-Enable-merging-of-remainders-in-memalign-bug-.patch
@ -0,0 +1,301 @@
+From 98c293c61f770b6b7a22f89a6ea81b711ecb1952 Mon Sep 17 00:00:00 2001
+From: Florian Weimer <fweimer@redhat.com>
+Date: Fri, 11 Aug 2023 11:18:17 +0200
+Subject: [PATCH 06/12] malloc: Enable merging of remainders in memalign (bug
+ 30723)
+
+Previously, calling _int_free from _int_memalign could put remainders
+into the tcache or into fastbins, where they are invisible to the
+low-level allocator.  This results in missed merge opportunities
+because once these freed chunks become available to the low-level
+allocator, further memalign allocations (even of the same size are)
+likely obstructing merges.
+
+Furthermore, during forwards merging in _int_memalign, do not
+completely give up when the remainder is too small to serve as a
+chunk on its own.  We can still give it back if it can be merged
+with the following unused chunk.  This makes it more likely that
+memalign calls in a loop achieve a compact memory layout,
+independently of initial heap layout.
+
+Drop some useless (unsigned long) casts along the way, and tweak
+the style to more closely match GNU on changed lines.
+
+Reviewed-by: DJ Delorie <dj@redhat.com>
+(cherry picked from commit 542b1105852568c3ebc712225ae78b8c8ba31a78)
+---
+ malloc/malloc.c | 197 +++++++++++++++++++++++++++++-------------------
+ 1 file changed, 121 insertions(+), 76 deletions(-)
+
+diff --git a/malloc/malloc.c b/malloc/malloc.c
+index e2f1a615a4..948f9759af 100644
+--- a/malloc/malloc.c
+++ b/malloc/malloc.c
+@@ -1086,6 +1086,11 @@ typedef struct malloc_chunk* mchunkptr;
+ 
+ static void*  _int_malloc(mstate, size_t);
+ static void     _int_free(mstate, mchunkptr, int);
+static void _int_free_merge_chunk (mstate, mchunkptr, INTERNAL_SIZE_T);
+static INTERNAL_SIZE_T _int_free_create_chunk (mstate,
+					       mchunkptr, INTERNAL_SIZE_T,
+					       mchunkptr, INTERNAL_SIZE_T);
+static void _int_free_maybe_consolidate (mstate, INTERNAL_SIZE_T);
+ static void*  _int_realloc(mstate, mchunkptr, INTERNAL_SIZE_T,
+ 			   INTERNAL_SIZE_T);
+ static void*  _int_memalign(mstate, size_t, size_t);
+@@ -4637,31 +4642,52 @@ _int_free (mstate av, mchunkptr p, int have_lock)
+     if (!have_lock)
+       __libc_lock_lock (av->mutex);
+ 
+-    nextchunk = chunk_at_offset(p, size);
+-
+-    /* Lightweight tests: check whether the block is already the
+-       top block.  */
+-    if (__glibc_unlikely (p == av->top))
+-      malloc_printerr ("double free or corruption (top)");
+-    /* Or whether the next chunk is beyond the boundaries of the arena.  */
+-    if (__builtin_expect (contiguous (av)
+-			  && (char *) nextchunk
+-			  >= ((char *) av->top + chunksize(av->top)), 0))
+-	malloc_printerr ("double free or corruption (out)");
+-    /* Or whether the block is actually not marked used.  */
+-    if (__glibc_unlikely (!prev_inuse(nextchunk)))
+-      malloc_printerr ("double free or corruption (!prev)");
+-
+-    nextsize = chunksize(nextchunk);
+-    if (__builtin_expect (chunksize_nomask (nextchunk) <= CHUNK_HDR_SZ, 0)
+-	|| __builtin_expect (nextsize >= av->system_mem, 0))
+-      malloc_printerr ("free(): invalid next size (normal)");
+    _int_free_merge_chunk (av, p, size);
+ 
+-    free_perturb (chunk2mem(p), size - CHUNK_HDR_SZ);
+    if (!have_lock)
+      __libc_lock_unlock (av->mutex);
+  }
+  /*
+    If the chunk was allocated via mmap, release via munmap().
+  */
+
+  else {
+    munmap_chunk (p);
+  }
+}
+
+/* Try to merge chunk P of SIZE bytes with its neighbors.  Put the
+   resulting chunk on the appropriate bin list.  P must not be on a
+   bin list yet, and it can be in use.  */
+static void
+_int_free_merge_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size)
+{
+  mchunkptr nextchunk = chunk_at_offset(p, size);
+
+  /* Lightweight tests: check whether the block is already the
+     top block.  */
+  if (__glibc_unlikely (p == av->top))
+    malloc_printerr ("double free or corruption (top)");
+  /* Or whether the next chunk is beyond the boundaries of the arena.  */
+  if (__builtin_expect (contiguous (av)
+			&& (char *) nextchunk
+			>= ((char *) av->top + chunksize(av->top)), 0))
+    malloc_printerr ("double free or corruption (out)");
+  /* Or whether the block is actually not marked used.  */
+  if (__glibc_unlikely (!prev_inuse(nextchunk)))
+    malloc_printerr ("double free or corruption (!prev)");
+
+  INTERNAL_SIZE_T nextsize = chunksize(nextchunk);
+  if (__builtin_expect (chunksize_nomask (nextchunk) <= CHUNK_HDR_SZ, 0)
+      || __builtin_expect (nextsize >= av->system_mem, 0))
+    malloc_printerr ("free(): invalid next size (normal)");
+
+  free_perturb (chunk2mem(p), size - CHUNK_HDR_SZ);
+ 
+-    /* consolidate backward */
+-    if (!prev_inuse(p)) {
+-      prevsize = prev_size (p);
+  /* Consolidate backward.  */
+  if (!prev_inuse(p))
+    {
+      INTERNAL_SIZE_T prevsize = prev_size (p);
+       size += prevsize;
+       p = chunk_at_offset(p, -((long) prevsize));
+       if (__glibc_unlikely (chunksize(p) != prevsize))
+@@ -4669,9 +4695,25 @@ _int_free (mstate av, mchunkptr p, int have_lock)
+       unlink_chunk (av, p);
+     }
+ 
+-    if (nextchunk != av->top) {
+  /* Write the chunk header, maybe after merging with the following chunk.  */
+  size = _int_free_create_chunk (av, p, size, nextchunk, nextsize);
+  _int_free_maybe_consolidate (av, size);
+}
+
+/* Create a chunk at P of SIZE bytes, with SIZE potentially increased
+   to cover the immediately following chunk NEXTCHUNK of NEXTSIZE
+   bytes (if NEXTCHUNK is unused).  The chunk at P is not actually
+   read and does not have to be initialized.  After creation, it is
+   placed on the appropriate bin list.  The function returns the size
+   of the new chunk.  */
+static INTERNAL_SIZE_T
+_int_free_create_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T size,
+			mchunkptr nextchunk, INTERNAL_SIZE_T nextsize)
+{
+  if (nextchunk != av->top)
+    {
+       /* get and clear inuse bit */
+-      nextinuse = inuse_bit_at_offset(nextchunk, nextsize);
+      bool nextinuse = inuse_bit_at_offset (nextchunk, nextsize);
+ 
+       /* consolidate forward */
+       if (!nextinuse) {
+@@ -4686,8 +4728,8 @@ _int_free (mstate av, mchunkptr p, int have_lock)
+ 	been given one chance to be used in malloc.
+       */
+ 
+-      bck = unsorted_chunks(av);
+-      fwd = bck->fd;
+      mchunkptr bck = unsorted_chunks (av);
+      mchunkptr fwd = bck->fd;
+       if (__glibc_unlikely (fwd->bk != bck))
+ 	malloc_printerr ("free(): corrupted unsorted chunks");
+       p->fd = fwd;
+@@ -4706,61 +4748,52 @@ _int_free (mstate av, mchunkptr p, int have_lock)
+       check_free_chunk(av, p);
+     }
+ 
+-    /*
+-      If the chunk borders the current high end of memory,
+-      consolidate into top
+-    */
+-
+-    else {
+  else
+    {
+      /* If the chunk borders the current high end of memory,
+	 consolidate into top.  */
+       size += nextsize;
+       set_head(p, size | PREV_INUSE);
+       av->top = p;
+       check_chunk(av, p);
+     }
+ 
+-    /*
+-      If freeing a large space, consolidate possibly-surrounding
+-      chunks. Then, if the total unused topmost memory exceeds trim
+-      threshold, ask malloc_trim to reduce top.
+-
+-      Unless max_fast is 0, we don't know if there are fastbins
+-      bordering top, so we cannot tell for sure whether threshold
+-      has been reached unless fastbins are consolidated.  But we
+-      don't want to consolidate on each free.  As a compromise,
+-      consolidation is performed if FASTBIN_CONSOLIDATION_THRESHOLD
+-      is reached.
+-    */
+  return size;
+}
+ 
+-    if ((unsigned long)(size) >= FASTBIN_CONSOLIDATION_THRESHOLD) {
+/* If freeing a large space, consolidate possibly-surrounding
+   chunks.  Then, if the total unused topmost memory exceeds trim
+   threshold, ask malloc_trim to reduce top.  */
+static void
+_int_free_maybe_consolidate (mstate av, INTERNAL_SIZE_T size)
+{
+  /* Unless max_fast is 0, we don't know if there are fastbins
+     bordering top, so we cannot tell for sure whether threshold has
+     been reached unless fastbins are consolidated.  But we don't want
+     to consolidate on each free.  As a compromise, consolidation is
+     performed if FASTBIN_CONSOLIDATION_THRESHOLD is reached.  */
+  if (size >= FASTBIN_CONSOLIDATION_THRESHOLD)
+    {
+       if (atomic_load_relaxed (&av->have_fastchunks))
+ 	malloc_consolidate(av);
+ 
+-      if (av == &main_arena) {
+      if (av == &main_arena)
+	{
+ #ifndef MORECORE_CANNOT_TRIM
+-	if ((unsigned long)(chunksize(av->top)) >=
+-	    (unsigned long)(mp_.trim_threshold))
+-	  systrim(mp_.top_pad, av);
+	  if (chunksize (av->top) >= mp_.trim_threshold)
+	    systrim (mp_.top_pad, av);
+ #endif
+-      } else {
+-	/* Always try heap_trim(), even if the top chunk is not
+-	   large, because the corresponding heap might go away.  */
+-	heap_info *heap = heap_for_ptr(top(av));
+	}
+      else
+	{
+	  /* Always try heap_trim, even if the top chunk is not large,
+	     because the corresponding heap might go away.  */
+	  heap_info *heap = heap_for_ptr (top (av));
+ 
+-	assert(heap->ar_ptr == av);
+-	heap_trim(heap, mp_.top_pad);
+-      }
+	  assert (heap->ar_ptr == av);
+	  heap_trim (heap, mp_.top_pad);
+	}
+     }
+-
+-    if (!have_lock)
+-      __libc_lock_unlock (av->mutex);
+-  }
+-  /*
+-    If the chunk was allocated via mmap, release via munmap().
+-  */
+-
+-  else {
+-    munmap_chunk (p);
+-  }
+ }
+ 
+ /*
+@@ -5221,7 +5254,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
+                 (av != &main_arena ? NON_MAIN_ARENA : 0));
+       set_inuse_bit_at_offset (newp, newsize);
+       set_head_size (p, leadsize | (av != &main_arena ? NON_MAIN_ARENA : 0));
+-      _int_free (av, p, 1);
+      _int_free_merge_chunk (av, p, leadsize);
+       p = newp;
+ 
+       assert (newsize >= nb &&
+@@ -5232,15 +5265,27 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
+   if (!chunk_is_mmapped (p))
+     {
+       size = chunksize (p);
+-      if ((unsigned long) (size) > (unsigned long) (nb + MINSIZE))
+      mchunkptr nextchunk = chunk_at_offset(p, size);
+      INTERNAL_SIZE_T nextsize = chunksize(nextchunk);
+      if (size > nb)
+         {
+           remainder_size = size - nb;
+-          remainder = chunk_at_offset (p, nb);
+-          set_head (remainder, remainder_size | PREV_INUSE |
+-                    (av != &main_arena ? NON_MAIN_ARENA : 0));
+-          set_head_size (p, nb);
+-          _int_free (av, remainder, 1);
+-        }
+	  if (remainder_size >= MINSIZE
+	      || nextchunk == av->top
+	      || !inuse_bit_at_offset (nextchunk, nextsize))
+	    {
+	      /* We can only give back the tail if it is larger than
+		 MINSIZE, or if the following chunk is unused (top
+		 chunk or unused in-heap chunk).  Otherwise we would
+		 create a chunk that is smaller than MINSIZE.  */
+	      remainder = chunk_at_offset (p, nb);
+	      set_head_size (p, nb);
+	      remainder_size = _int_free_create_chunk (av, remainder,
+						       remainder_size,
+						       nextchunk, nextsize);
+	      _int_free_maybe_consolidate (av, remainder_size);
+	    }
+	}
+     }
+ 
+   check_inuse_chunk (av, p);
+-- 
+2.33.0
+
--- a/0007-malloc-Remove-bin-scanning-from-memalign-bug-30723.patch
+++ b/0007-malloc-Remove-bin-scanning-from-memalign-bug-30723.patch
@ -0,0 +1,269 @@
+From 2af141bda3cd407abd4bedf615f9e45fe79518e2 Mon Sep 17 00:00:00 2001
+From: Florian Weimer <fweimer@redhat.com>
+Date: Thu, 10 Aug 2023 19:36:56 +0200
+Subject: [PATCH 07/12] malloc: Remove bin scanning from memalign (bug 30723)
+
+On the test workload (mpv --cache=yes with VP9 video decoding), the
+bin scanning has a very poor success rate (less than 2%).  The tcache
+scanning has about 50% success rate, so keep that.
+
+Update comments in malloc/tst-memalign-2 to indicate the purpose
+of the tests.  Even with the scanning removed, the additional
+merging opportunities since commit 542b1105852568c3ebc712225ae78b
+("malloc: Enable merging of remainders in memalign (bug 30723)")
+are sufficient to pass the existing large bins test.
+
+Remove leftover variables from _int_free from refactoring in the
+same commit.
+
+Reviewed-by: DJ Delorie <dj@redhat.com>
+(cherry picked from commit 0dc7fc1cf094406a138e4d1bcf9553e59edcf89d)
+---
+ NEWS                    |   1 +
+ malloc/malloc.c         | 169 ++--------------------------------------
+ malloc/tst-memalign-2.c |   7 +-
+ 3 files changed, 11 insertions(+), 166 deletions(-)
+
+diff --git a/NEWS b/NEWS
+index 872bc8907b..c339cb444e 100644
+--- a/NEWS
+++ b/NEWS
+@@ -132,6 +132,7 @@ The following bugs are resolved with this release:
+   [30555] string: strerror can incorrectly return NULL
+   [30579] malloc: trim_threshold in realloc lead to high memory usage
+   [30662] nscd: Group and password cache use errno in place of errval
+  [30723] posix_memalign repeatedly scans long bin lists
+ 
+ Version 2.37
+ 
+diff --git a/malloc/malloc.c b/malloc/malloc.c
+index 948f9759af..d0bbbf3710 100644
+--- a/malloc/malloc.c
+++ b/malloc/malloc.c
+@@ -4488,12 +4488,6 @@ _int_free (mstate av, mchunkptr p, int have_lock)
+ {
+   INTERNAL_SIZE_T size;        /* its size */
+   mfastbinptr *fb;             /* associated fastbin */
+-  mchunkptr nextchunk;         /* next contiguous chunk */
+-  INTERNAL_SIZE_T nextsize;    /* its size */
+-  int nextinuse;               /* true if nextchunk is used */
+-  INTERNAL_SIZE_T prevsize;    /* size of previous contiguous chunk */
+-  mchunkptr bck;               /* misc temp for linking */
+-  mchunkptr fwd;               /* misc temp for linking */
+ 
+   size = chunksize (p);
+ 
+@@ -5032,42 +5026,6 @@ _int_realloc (mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
+    ------------------------------ memalign ------------------------------
+  */
+ 
+-/* Returns 0 if the chunk is not and does not contain the requested
+-   aligned sub-chunk, else returns the amount of "waste" from
+-   trimming.  NB is the *chunk* byte size, not the user byte
+-   size.  */
+-static size_t
+-chunk_ok_for_memalign (mchunkptr p, size_t alignment, size_t nb)
+-{
+-  void *m = chunk2mem (p);
+-  INTERNAL_SIZE_T size = chunksize (p);
+-  void *aligned_m = m;
+-
+-  if (__glibc_unlikely (misaligned_chunk (p)))
+-    malloc_printerr ("_int_memalign(): unaligned chunk detected");
+-
+-  aligned_m = PTR_ALIGN_UP (m, alignment);
+-
+-  INTERNAL_SIZE_T front_extra = (intptr_t) aligned_m - (intptr_t) m;
+-
+-  /* We can't trim off the front as it's too small.  */
+-  if (front_extra > 0 && front_extra < MINSIZE)
+-    return 0;
+-
+-  /* If it's a perfect fit, it's an exception to the return value rule
+-     (we would return zero waste, which looks like "not usable"), so
+-     handle it here by returning a small non-zero value instead.  */
+-  if (size == nb && front_extra == 0)
+-    return 1;
+-
+-  /* If the block we need fits in the chunk, calculate total waste.  */
+-  if (size > nb + front_extra)
+-    return size - nb;
+-
+-  /* Can't use this chunk.  */
+-  return 0;
+-}
+-
+ /* BYTES is user requested bytes, not requested chunksize bytes.  */
+ static void *
+ _int_memalign (mstate av, size_t alignment, size_t bytes)
+@@ -5082,7 +5040,6 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
+   mchunkptr remainder;            /* spare room at end to split off */
+   unsigned long remainder_size;   /* its size */
+   INTERNAL_SIZE_T size;
+-  mchunkptr victim;
+ 
+   nb = checked_request2size (bytes);
+   if (nb == 0)
+@@ -5101,129 +5058,13 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
+      we don't find anything in those bins, the common malloc code will
+      scan starting at 2x.  */
+ 
+-  /* This will be set if we found a candidate chunk.  */
+-  victim = NULL;
+-
+-  /* Fast bins are singly-linked, hard to remove a chunk from the middle
+-     and unlikely to meet our alignment requirements.  We have not done
+-     any experimentation with searching for aligned fastbins.  */
+-
+-  if (av != NULL)
+-    {
+-      int first_bin_index;
+-      int first_largebin_index;
+-      int last_bin_index;
+-
+-      if (in_smallbin_range (nb))
+-	first_bin_index = smallbin_index (nb);
+-      else
+-	first_bin_index = largebin_index (nb);
+-
+-      if (in_smallbin_range (nb * 2))
+-	last_bin_index = smallbin_index (nb * 2);
+-      else
+-	last_bin_index = largebin_index (nb * 2);
+-
+-      first_largebin_index = largebin_index (MIN_LARGE_SIZE);
+-
+-      int victim_index;                 /* its bin index */
+-
+-      for (victim_index = first_bin_index;
+-	   victim_index < last_bin_index;
+-	   victim_index ++)
+-	{
+-	  victim = NULL;
+-
+-	  if (victim_index < first_largebin_index)
+-	    {
+-	      /* Check small bins.  Small bin chunks are doubly-linked despite
+-		 being the same size.  */
+-
+-	      mchunkptr fwd;                    /* misc temp for linking */
+-	      mchunkptr bck;                    /* misc temp for linking */
+-
+-	      bck = bin_at (av, victim_index);
+-	      fwd = bck->fd;
+-	      while (fwd != bck)
+-		{
+-		  if (chunk_ok_for_memalign (fwd, alignment, nb) > 0)
+-		    {
+-		      victim = fwd;
+-
+-		      /* Unlink it */
+-		      victim->fd->bk = victim->bk;
+-		      victim->bk->fd = victim->fd;
+-		      break;
+-		    }
+-
+-		  fwd = fwd->fd;
+-		}
+-	    }
+-	  else
+-	    {
+-	      /* Check large bins.  */
+-	      mchunkptr fwd;                    /* misc temp for linking */
+-	      mchunkptr bck;                    /* misc temp for linking */
+-	      mchunkptr best = NULL;
+-	      size_t best_size = 0;
+-
+-	      bck = bin_at (av, victim_index);
+-	      fwd = bck->fd;
+  /* Call malloc with worst case padding to hit alignment. */
+  m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+ 
+-	      while (fwd != bck)
+-		{
+-		  int extra;
+-
+-		  if (chunksize (fwd) < nb)
+-		    break;
+-		  extra = chunk_ok_for_memalign (fwd, alignment, nb);
+-		  if (extra > 0
+-		      && (extra <= best_size || best == NULL))
+-		    {
+-		      best = fwd;
+-		      best_size = extra;
+-		    }
+  if (m == 0)
+    return 0;           /* propagate failure */
+ 
+-		  fwd = fwd->fd;
+-		}
+-	      victim = best;
+-
+-	      if (victim != NULL)
+-		{
+-		  unlink_chunk (av, victim);
+-		  break;
+-		}
+-	    }
+-
+-	  if (victim != NULL)
+-	    break;
+-	}
+-    }
+-
+-  /* Strategy: find a spot within that chunk that meets the alignment
+-     request, and then possibly free the leading and trailing space.
+-     This strategy is incredibly costly and can lead to external
+-     fragmentation if header and footer chunks are unused.  */
+-
+-  if (victim != NULL)
+-    {
+-      p = victim;
+-      m = chunk2mem (p);
+-      set_inuse (p);
+-      if (av != &main_arena)
+-	set_non_main_arena (p);
+-    }
+-  else
+-    {
+-      /* Call malloc with worst case padding to hit alignment. */
+-
+-      m = (char *) (_int_malloc (av, nb + alignment + MINSIZE));
+-
+-      if (m == 0)
+-	return 0;           /* propagate failure */
+-
+-      p = mem2chunk (m);
+-    }
+  p = mem2chunk (m);
+ 
+   if ((((unsigned long) (m)) % alignment) != 0)   /* misaligned */
+     {
+diff --git a/malloc/tst-memalign-2.c b/malloc/tst-memalign-2.c
+index f229283dbf..ecd6fa249e 100644
+--- a/malloc/tst-memalign-2.c
+++ b/malloc/tst-memalign-2.c
+@@ -86,7 +86,8 @@ do_test (void)
+       TEST_VERIFY (tcache_allocs[i].ptr1 == tcache_allocs[i].ptr2);
+     }
+ 
+-  /* Test for non-head tcache hits.  */
+  /* Test for non-head tcache hits.  This exercises the memalign
+     scanning code to find matching allocations.  */
+   for (i = 0; i < array_length (ptr); ++ i)
+     {
+       if (i == 4)
+@@ -113,7 +114,9 @@ do_test (void)
+   free (p);
+   TEST_VERIFY (count > 0);
+ 
+-  /* Large bins test.  */
+  /* Large bins test.  This verifies that the over-allocated parts
+     that memalign releases for future allocations can be reused by
+     memalign itself at least in some cases.  */
+ 
+   for (i = 0; i < LN; ++ i)
+     {
+-- 
+2.33.0
+
--- a/0008-sysdeps-tst-bz21269-fix-test-parameter.patch
+++ b/0008-sysdeps-tst-bz21269-fix-test-parameter.patch
@ -0,0 +1,31 @@
+From c8ecda6251dd4a0dfe074e0a6011211cadeef742 Mon Sep 17 00:00:00 2001
+From: Sam James <sam@gentoo.org>
+Date: Fri, 4 Aug 2023 23:58:27 +0100
+Subject: [PATCH 08/12] sysdeps: tst-bz21269: fix test parameter
+
+All callers pass 1 or 0x11 anyway (same meaning according to man page),
+but still.
+
+Reviewed-by: DJ Delorie <dj@redhat.com>
+Signed-off-by: Sam James <sam@gentoo.org>
+(cherry picked from commit e0b712dd9183d527aae4506cd39564c14af3bb28)
+---
+ sysdeps/unix/sysv/linux/i386/tst-bz21269.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c
+index 51d4a1b082..f508ef8f16 100644
+--- a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c
+++ b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c
+@@ -52,7 +52,7 @@ xset_thread_area (struct user_desc *u_info)
+ static void
+ xmodify_ldt (int func, const void *ptr, unsigned long bytecount)
+ {
+-  TEST_VERIFY_EXIT (syscall (SYS_modify_ldt, 1, ptr, bytecount) == 0);
+  TEST_VERIFY_EXIT (syscall (SYS_modify_ldt, func, ptr, bytecount) == 0);
+ }
+ 
+ static int
+-- 
+2.33.0
+
--- a/0009-sysdeps-tst-bz21269-handle-ENOSYS-skip-appropriately.patch
+++ b/0009-sysdeps-tst-bz21269-handle-ENOSYS-skip-appropriately.patch
@ -0,0 +1,42 @@
+From ad9b8399537670a990572c4b0c4da5411e3b68cf Mon Sep 17 00:00:00 2001
+From: Sam James <sam@gentoo.org>
+Date: Sat, 5 Aug 2023 00:04:33 +0100
+Subject: [PATCH 09/12] sysdeps: tst-bz21269: handle ENOSYS & skip
+ appropriately
+
+SYS_modify_ldt requires CONFIG_MODIFY_LDT_SYSCALL to be set in the kernel, which
+some distributions may disable for hardening. Check if that's the case (unset)
+and mark the test as UNSUPPORTED if so.
+
+Reviewed-by: DJ Delorie <dj@redhat.com>
+Signed-off-by: Sam James <sam@gentoo.org>
+(cherry picked from commit 652b9fdb77d9fd056d4dd26dad2c14142768ab49)
+---
+ sysdeps/unix/sysv/linux/i386/tst-bz21269.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c
+index f508ef8f16..28f5359bea 100644
+--- a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c
+++ b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c
+@@ -52,7 +52,16 @@ xset_thread_area (struct user_desc *u_info)
+ static void
+ xmodify_ldt (int func, const void *ptr, unsigned long bytecount)
+ {
+-  TEST_VERIFY_EXIT (syscall (SYS_modify_ldt, func, ptr, bytecount) == 0);
+  long ret = syscall (SYS_modify_ldt, func, ptr, bytecount);
+
+  if (ret == -1)
+    {
+      if (errno == ENOSYS)
+	FAIL_UNSUPPORTED ("modify_ldt not supported");
+      FAIL_EXIT1 ("modify_ldt failed (errno=%d)", errno);
+    }
+
+  return 0;
+ }
+ 
+ static int
+-- 
+2.33.0
+
--- a/0010-sysdeps-tst-bz21269-fix-Wreturn-type.patch
+++ b/0010-sysdeps-tst-bz21269-fix-Wreturn-type.patch
@ -0,0 +1,30 @@
+From 1aed90c9c8f8be9f68b58e96b6e4cd0fc08eb2b1 Mon Sep 17 00:00:00 2001
+From: Sam James <sam@gentoo.org>
+Date: Thu, 17 Aug 2023 09:30:29 +0100
+Subject: [PATCH 10/12] sysdeps: tst-bz21269: fix -Wreturn-type
+
+Thanks to Andreas Schwab for reporting.
+
+Fixes: 652b9fdb77d9fd056d4dd26dad2c14142768ab49
+Signed-off-by: Sam James <sam@gentoo.org>
+(cherry picked from commit 369f373057073c307938da91af16922bda3dff6a)
+---
+ sysdeps/unix/sysv/linux/i386/tst-bz21269.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c
+index 28f5359bea..822c41fceb 100644
+--- a/sysdeps/unix/sysv/linux/i386/tst-bz21269.c
+++ b/sysdeps/unix/sysv/linux/i386/tst-bz21269.c
+@@ -60,8 +60,6 @@ xmodify_ldt (int func, const void *ptr, unsigned long bytecount)
+ 	FAIL_UNSUPPORTED ("modify_ldt not supported");
+       FAIL_EXIT1 ("modify_ldt failed (errno=%d)", errno);
+     }
+-
+-  return 0;
+ }
+ 
+ static int
+-- 
+2.33.0
+
--- a/0011-io-Fix-record-locking-contants-for-powerpc64-with-__.patch
+++ b/0011-io-Fix-record-locking-contants-for-powerpc64-with-__.patch
@ -0,0 +1,91 @@
+From 5bdef6f27c91f45505ed5444147be4ed0e9bc3c7 Mon Sep 17 00:00:00 2001
+From: Aurelien Jarno <aurelien@aurel32.net>
+Date: Mon, 28 Aug 2023 23:30:37 +0200
+Subject: [PATCH 11/12] io: Fix record locking contants for powerpc64 with
+ __USE_FILE_OFFSET64
+
+Commit 5f828ff824e3b7cd1 ("io: Fix F_GETLK, F_SETLK, and F_SETLKW for
+powerpc64") fixed an issue with the value of the lock constants on
+powerpc64 when not using __USE_FILE_OFFSET64, but it ended-up also
+changing the value when using __USE_FILE_OFFSET64 causing an API change.
+
+Fix that by also checking that define, restoring the pre
+4d0fe291aed3a476a commit values:
+
+Default values:
+- F_GETLK: 5
+- F_SETLK: 6
+- F_SETLKW: 7
+
+With -D_FILE_OFFSET_BITS=64:
+- F_GETLK: 12
+- F_SETLK: 13
+- F_SETLKW: 14
+
+At the same time, it has been noticed that there was no test for io lock
+with __USE_FILE_OFFSET64, so just add one.
+
+Tested on x86_64-linux-gnu, i686-linux-gnu and
+powerpc64le-unknown-linux-gnu.
+
+Resolves: BZ #30804.
+Co-authored-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
+(cherry picked from commit 434bf72a94de68f0cc7fbf3c44bf38c1911b70cb)
+---
+ NEWS                                         | 2 ++
+ io/Makefile                                  | 1 +
+ io/tst-fcntl-lock-lfs.c                      | 2 ++
+ sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h | 2 +-
+ 4 files changed, 6 insertions(+), 1 deletion(-)
+ create mode 100644 io/tst-fcntl-lock-lfs.c
+
+diff --git a/NEWS b/NEWS
+index c339cb444e..8156572cdf 100644
+--- a/NEWS
+++ b/NEWS
+@@ -133,6 +133,8 @@ The following bugs are resolved with this release:
+   [30579] malloc: trim_threshold in realloc lead to high memory usage
+   [30662] nscd: Group and password cache use errno in place of errval
+   [30723] posix_memalign repeatedly scans long bin lists
+  [30804] F_GETLK, F_SETLK, and F_SETLKW value change for powerpc64 with
+    -D_FILE_OFFSET_BITS=64
+ 
+ Version 2.37
+ 
+diff --git a/io/Makefile b/io/Makefile
+index 6ccc0e8691..8a3c83a3bb 100644
+--- a/io/Makefile
+++ b/io/Makefile
+@@ -192,6 +192,7 @@ tests := \
+   tst-fchownat \
+   tst-fcntl \
+   tst-fcntl-lock \
+  tst-fcntl-lock-lfs \
+   tst-fstatat \
+   tst-fts \
+   tst-fts-lfs \
+diff --git a/io/tst-fcntl-lock-lfs.c b/io/tst-fcntl-lock-lfs.c
+new file mode 100644
+index 0000000000..f2a909fb02
+--- /dev/null
+++ b/io/tst-fcntl-lock-lfs.c
+@@ -0,0 +1,2 @@
+#define _FILE_OFFSET_BITS 64
+#include <io/tst-fcntl-lock.c>
+diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h b/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h
+index f7615a447e..d8a291a331 100644
+--- a/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h
+++ b/sysdeps/unix/sysv/linux/powerpc/bits/fcntl.h
+@@ -33,7 +33,7 @@
+ # define __O_LARGEFILE	0200000
+ #endif
+ 
+-#if __WORDSIZE == 64
+#if __WORDSIZE == 64 && !defined __USE_FILE_OFFSET64
+ # define F_GETLK	5
+ # define F_SETLK	6
+ # define F_SETLKW	7
+-- 
+2.33.0
+
--- a/0012-libio-Fix-oversized-__io_vtables.patch
+++ b/0012-libio-Fix-oversized-__io_vtables.patch
@ -0,0 +1,51 @@
+From 92201f16cbcfd9eafe314ef6654be2ea7ba25675 Mon Sep 17 00:00:00 2001
+From: Adam Jackson <ajax@redhat.com>
+Date: Fri, 8 Sep 2023 15:55:19 -0400
+Subject: [PATCH 12/12] libio: Fix oversized __io_vtables
+
+IO_VTABLES_LEN is the size of the struct array in bytes, not the number
+of __IO_jump_t's in the array. Drops just under 384kb from .rodata on
+LP64 machines.
+
+Fixes: 3020f72618e ("libio: Remove the usage of __libc_IO_vtables")
+Signed-off-by: Adam Jackson <ajax@redhat.com>
+Reviewed-by: Florian Weimer <fweimer@redhat.com>
+Tested-by: Florian Weimer <fweimer@redhat.com>
+(cherry picked from commit 8cb69e054386f980f9ff4d93b157861d72b2019e)
+---
+ libio/vtables.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/libio/vtables.c b/libio/vtables.c
+index 1d8ad612e9..34f7e15f1c 100644
+--- a/libio/vtables.c
+++ b/libio/vtables.c
+@@ -20,6 +20,7 @@
+ #include <libioP.h>
+ #include <stdio.h>
+ #include <ldsodefs.h>
+#include <array_length.h>
+ #include <pointer_guard.h>
+ #include <libio-macros.h>
+ 
+@@ -88,7 +89,7 @@
+ # pragma weak __wprintf_buffer_as_file_xsputn
+ #endif
+ 
+-const struct _IO_jump_t __io_vtables[IO_VTABLES_LEN] attribute_relro =
+const struct _IO_jump_t __io_vtables[] attribute_relro =
+ {
+   /* _IO_str_jumps  */
+   [IO_STR_JUMPS] =
+@@ -485,6 +486,8 @@ const struct _IO_jump_t __io_vtables[IO_VTABLES_LEN] attribute_relro =
+   },
+ #endif
+ };
+_Static_assert (array_length (__io_vtables) == IO_VTABLES_NUM,
+                "initializer count");
+ 
+ #ifdef SHARED
+ 
+-- 
+2.33.0
+
--- a/glibc.spec
+++ b/glibc.spec
@ -67,7 +67,7 @@
 ##############################################################################
 Name: 	 	glibc
 Version: 	2.38
-Release: 	6
+Release: 	7
 Summary: 	The GNU libc libraries
 License:	%{all_license}
 URL: 		http://www.gnu.org/software/glibc/
@ -87,6 +87,18 @@ Source8:   testsuite_whitelist

 Patch0: glibc-1070416.patch
 Patch1: stdlib-Improve-tst-realpath-compatibility-with-sourc.patch
+Patch2: 0001-x86-Fix-for-cache-computation-on-AMD-legacy-cpus.patch
+Patch3: 0002-nscd-Do-not-rebuild-getaddrinfo-bug-30709.patch
+Patch4: 0003-x86-Fix-incorrect-scope-of-setting-shared_per_thread.patch
+Patch5: 0004-x86_64-Fix-build-with-disable-multiarch-BZ-30721.patch
+Patch6: 0005-i686-Fix-build-with-disable-multiarch.patch
+Patch7: 0006-malloc-Enable-merging-of-remainders-in-memalign-bug-.patch
+Patch8: 0007-malloc-Remove-bin-scanning-from-memalign-bug-30723.patch
+Patch9: 0008-sysdeps-tst-bz21269-fix-test-parameter.patch
+Patch10: 0009-sysdeps-tst-bz21269-handle-ENOSYS-skip-appropriately.patch
+Patch11: 0010-sysdeps-tst-bz21269-fix-Wreturn-type.patch
+Patch12: 0011-io-Fix-record-locking-contants-for-powerpc64-with-__.patch
+Patch13: 0012-libio-Fix-oversized-__io_vtables.patch

 Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch
 Patch9001: locale-delete-no-hard-link-to-avoid-all_language-pac.patch 
@ -1301,6 +1313,9 @@ fi
 %endif

 %changelog
+* Mon Sep 11 2023 Qingqing Li <liqingqing3@huawei.com> - 2.38-7
+- backport patches from glibc upstream 2.38 branch
+
 * Mon Sep 11 2023 Qingqing Li <liqingqing3@huawei.com> - 2.38-6
 - stdlib: Improve tst-realpath compatibility with source fortification