LoongArch: Add tlsdesc and tunable support

(cherry picked from commit 09da963a2510672ba92a60667ac840badffceaf2)
2024-11-06 09:47:51 +08:00 · 2024-11-06 09:47:51 +08:00 · 501a0422cf
commit 501a0422cf
parent 51475e137c
16 changed files with 3777 additions and 1 deletions
--- a/0001-LoongArch-Use-builtins-for-ffs-and-ffsll.patch
+++ b/0001-LoongArch-Use-builtins-for-ffs-and-ffsll.patch
@ -0,0 +1,30 @@
 From 288d144301d20104e1b79fe5695f09af336574eb Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sun, 4 Feb 2024 08:27:50 +0800
 Subject: [PATCH 01/15] LoongArch: Use builtins for ffs and ffsll
 On LoongArch GCC compiles __builtin_ffs{,ll} to basically
 `(x ? __builtin_ctz (x) : -1) + 1`.  Since a hardware ctz instruction is
 available, this is much better than the table-driven generic
 implementation.
 Tested on loongarch64.
 Signed-off-by: Xi Ruoyao <xry111@xry111.site>
 Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
 ---
 sysdeps/loongarch/math-use-builtins-ffs.h | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 sysdeps/loongarch/math-use-builtins-ffs.h
 diff --git a/sysdeps/loongarch/math-use-builtins-ffs.h b/sysdeps/loongarch/math-use-builtins-ffs.h
 new file mode 100644
 index 00000000..a83bb154
 --- /dev/null
 +++ b/sysdeps/loongarch/math-use-builtins-ffs.h
@@ -0,0 +1,2 @@
 +#define USE_FFS_BUILTIN   1
 +#define USE_FFSLL_BUILTIN 1
 -- 
 2.43.0
--- a/0002-elf-Add-new-LoongArch-reloc-types-110-to-126-into-el.patch
+++ b/0002-elf-Add-new-LoongArch-reloc-types-110-to-126-into-el.patch
@ -0,0 +1,45 @@
 From ba560bc2785afa80bc1ad512dbab95936cddeea5 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Thu, 22 Feb 2024 18:57:43 +0800
 Subject: [PATCH 02/15] elf: Add new LoongArch reloc types (110 to 126) into
 elf.h
 These reloc types have been added in LoongArch psABI v2.30.
 Link: https://github.com/loongson/la-abi-specs/blob/v2.30/laelf.adoc#relocation-types
 Signed-off-by: Xi Ruoyao <xry111@xry111.site>
 ---
 elf/elf.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 diff --git a/elf/elf.h b/elf/elf.h
 index 51633079..3dbc7c0f 100644
 --- a/elf/elf.h
 +++ b/elf/elf.h
@@ -4219,6 +4219,23 @@ enum
 #define R_LARCH_ADD_ULEB128 107
 #define R_LARCH_SUB_ULEB128 108
 #define R_LARCH_64_PCREL 109
 +#define R_LARCH_CALL36 110
 +#define R_LARCH_TLS_DESC_PC_HI20 111
 +#define R_LARCH_TLS_DESC_PC_LO12 112
 +#define R_LARCH_TLS_DESC64_PC_LO20 113
 +#define R_LARCH_TLS_DESC64_PC_HI12 114
 +#define R_LARCH_TLS_DESC_HI20 115
 +#define R_LARCH_TLS_DESC_LO12 116
 +#define R_LARCH_TLS_DESC64_LO20 117
 +#define R_LARCH_TLS_DESC64_HI12 118
 +#define R_LARCH_TLS_DESC_LD 119
 +#define R_LARCH_TLS_DESC_CALL 120
 +#define R_LARCH_TLS_LE_HI20_R 121
 +#define R_LARCH_TLS_LE_ADD_R 122
 +#define R_LARCH_TLS_LE_LO12_R 123
 +#define R_LARCH_TLS_LD_PCREL20_S2 124
 +#define R_LARCH_TLS_GD_PCREL20_S2 125
 +#define R_LARCH_TLS_DESC_PCREL20_S2 126
 /* ARC specific declarations.  */
 -- 
 2.43.0
--- a/0003-LoongArch-Add-glibc.cpu.hwcap-support.patch
+++ b/0003-LoongArch-Add-glibc.cpu.hwcap-support.patch
@ -0,0 +1,663 @@
 From 3842a3428f45f00b989c6fa37dc1bb1c2e91335d Mon Sep 17 00:00:00 2001
 From: caiyinyu <caiyinyu@loongson.cn>
 Date: Fri, 15 Sep 2023 17:35:19 +0800
 Subject: [PATCH 03/15] LoongArch: Add glibc.cpu.hwcap support.
 The current IFUNC selection is always using the most recent
 features which are available via AT_HWCAP.  But in
 some scenarios it is useful to adjust this selection.
 The environment variable:
 GLIBC_TUNABLES=glibc.cpu.hwcaps=-xxx,yyy,zzz,....
 can be used to enable HWCAP feature yyy, disable HWCAP feature xxx,
 where the feature name is case-sensitive and has to match the ones
 used in sysdeps/loongarch/cpu-tunables.c.
 Signed-off-by: caiyinyu <caiyinyu@loongson.cn>
 ---
 manual/tunables.texi                          |   5 +-
 sysdeps/loongarch/Makefile                    |  12 ++
 sysdeps/loongarch/cpu-tunables.c              |  87 +++++++++++
 sysdeps/loongarch/dl-get-cpu-features.c       |  25 ++++
 sysdeps/loongarch/dl-machine.h                |  28 +++-
 sysdeps/loongarch/dl-tunables.list            |  25 ++++
 .../lp64/multiarch/dl-symbol-redir-ifunc.h    |   5 +-
 sysdeps/loongarch/tst-hwcap-tunables.c        | 136 ++++++++++++++++++
 .../unix/sysv/linux/loongarch/cpu-features.c  |  30 ++++
 .../unix/sysv/linux/loongarch/cpu-features.h  |  17 ++-
 .../unix/sysv/linux/loongarch/dl-procinfo.c   |  60 ++++++++
 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c |  21 +++
 .../unix/sysv/linux/loongarch/libc-start.c    |  34 +++++
 13 files changed, 475 insertions(+), 10 deletions(-)
 create mode 100644 sysdeps/loongarch/cpu-tunables.c
 create mode 100644 sysdeps/loongarch/dl-get-cpu-features.c
 create mode 100644 sysdeps/loongarch/dl-tunables.list
 create mode 100644 sysdeps/loongarch/tst-hwcap-tunables.c
 create mode 100644 sysdeps/unix/sysv/linux/loongarch/cpu-features.c
 create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
 create mode 100644 sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
 create mode 100644 sysdeps/unix/sysv/linux/loongarch/libc-start.c
 diff --git a/manual/tunables.texi b/manual/tunables.texi
 index bb17fef5..6493904b 100644
 --- a/manual/tunables.texi
 +++ b/manual/tunables.texi
@@ -513,7 +513,10 @@ On s390x, the supported HWCAP and STFLE features can be found in
 @code{sysdeps/s390/cpu-features.c}.  In addition the user can also set
 a CPU arch-level like @code{z13} instead of single HWCAP and STFLE features.
 -This tunable is specific to i386, x86-64 and s390x.
 +On loongarch, the supported HWCAP features can be found in
 +@code{sysdeps/loongarch/cpu-tunables.c}.
 +
 +This tunable is specific to i386, x86-64, s390x, powerpc and loongarch.
 @end deftp
 @deftp Tunable glibc.cpu.cached_memopt
 diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
 index 43d2f583..446bda65 100644
 --- a/sysdeps/loongarch/Makefile
 +++ b/sysdeps/loongarch/Makefile
@@ -1,11 +1,23 @@
 ifeq ($(subdir),misc)
 sysdep_headers += sys/asm.h
 +
 +tests += \
 +  tst-hwcap-tunables \
 +  # tests
 +
 +tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd)
 endif
 ifeq ($(subdir),elf)
 gen-as-const-headers += dl-link.sym
 endif
 +ifeq ($(subdir),elf)
 +sysdep-dl-routines += \
 +  dl-get-cpu-features \
 +  # sysdep-dl-routines
 +endif
 +
 # LoongArch's assembler also needs to know about PIC as it changes the
 # definition of some assembler macros.
 ASFLAGS-.os += $(pic-ccflag)
 diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c
 new file mode 100644
 index 00000000..e274e993
 --- /dev/null
 +++ b/sysdeps/loongarch/cpu-tunables.c
@@ -0,0 +1,87 @@
 +/* LoongArch CPU feature tuning.
 +   This file is part of the GNU C Library.
 +   Copyright (C) 2024 Free Software Foundation, Inc.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, see
 +   <http://www.gnu.org/licenses/>.  */
 +
 +#include <stdbool.h>
 +#include <stdint.h>
 +#include <unistd.h>		/* Get STDOUT_FILENO for _dl_printf.  */
 +#include <elf/dl-tunables.h>
 +#include <string.h>
 +#include <cpu-features.h>
 +#include <ldsodefs.h>
 +#include <sys/auxv.h>
 +#include <dl-tunables-parse.h>
 +#include <dl-symbol-redir-ifunc.h>
 +
 +#define CHECK_GLIBC_IFUNC_CPU(f, name, len)			\
 +  _Static_assert (sizeof (#name) - 1 == len, #name " != " #len);	\
 +  if (tunable_str_comma_strcmp_cte (&f, #name))				\
 +    {									\
 +      if (f.disable)							\
 +	GLRO(dl_larch_cpu_features).hwcap &= (~HWCAP_LOONGARCH_##name);	\
 +      else								\
 +	GLRO(dl_larch_cpu_features).hwcap |= HWCAP_LOONGARCH_##name;	\
 +      break;								\
 +    }
 +
 +attribute_hidden void
 +TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
 +{
 +  /* The current IFUNC selection is based on microbenchmarks in glibc.
 +     It should give the best performance for most workloads.  But other
 +     choices may have better performance for a particular workload or on
 +     the hardware which wasn't available when the selection was made.
 +     The environment variable:
 +
 +     GLIBC_TUNABLES=glibc.cpu.hwcaps=-xxx,yyy,-zzz,....
 +
 +     can be used to enable CPU/ARCH feature yyy, disable CPU/ARCH feature
 +     yyy and zzz, where the feature name is case-sensitive and has to
 +     match the ones in cpu-features.h.  It can be used by glibc developers
 +     to tune for a new processor or override the IFUNC selection to
 +     improve performance for a particular workload.
 +
 +     NOTE: the IFUNC selection may change over time.  Please check all
 +     multiarch implementations when experimenting.  */
 +
 +  struct tunable_str_comma_state_t ts;
 +  tunable_str_comma_init (&ts, valp);
 +
 +  struct tunable_str_comma_t n;
 +  while (tunable_str_comma_next (&ts, &n))
 +    {
 +      switch (n.len)
 +	{
 +	default:
 +	  break;
 +	case 3:
 +	  {
 +	    CHECK_GLIBC_IFUNC_CPU (n, LSX, 3);
 +	    CHECK_GLIBC_IFUNC_CPU (n, UAL, 3);
 +	  }
 +	  break;
 +	case 4:
 +	  {
 +	    CHECK_GLIBC_IFUNC_CPU (n, LASX, 4);
 +	  }
 +	  break;
 +	}
 +    }
 +
 +  /* Ensure that the user has not enabled any unsupported features.  */
 +  GLRO(dl_larch_cpu_features).hwcap &= GLRO(dl_hwcap);
 +}
 diff --git a/sysdeps/loongarch/dl-get-cpu-features.c b/sysdeps/loongarch/dl-get-cpu-features.c
 new file mode 100644
 index 00000000..3dcecefb
 --- /dev/null
 +++ b/sysdeps/loongarch/dl-get-cpu-features.c
@@ -0,0 +1,25 @@
 +/* Define _dl_larch_get_cpu_features.
 +   Copyright (C) 2024 Free Software Foundation, Inc.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, see
 +   <https://www.gnu.org/licenses/>.  */
 +
 +
 +#include <ldsodefs.h>
 +
 +const struct cpu_features *
 +_dl_larch_get_cpu_features (void)
 +{
 +  return &GLRO(dl_larch_cpu_features);
 +}
 diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
 index 57913cef..befb29a3 100644
 --- a/sysdeps/loongarch/dl-machine.h
 +++ b/sysdeps/loongarch/dl-machine.h
@@ -29,6 +29,8 @@
 #include <dl-static-tls.h>
 #include <dl-machine-rel.h>
 +#include <cpu-features.c>
 +
 #ifndef _RTLD_PROLOGUE
 # define _RTLD_PROLOGUE(entry)					\
 	".globl\t" __STRING (entry) "\n\t"			\
@@ -53,7 +55,23 @@
 #define ELF_MACHINE_NO_REL 1
 #define ELF_MACHINE_NO_RELA 0
 -/* Return nonzero iff ELF header is compatible with the running host.  */
 +#define DL_PLATFORM_INIT dl_platform_init ()
 +
 +static inline void __attribute__ ((unused))
 +dl_platform_init (void)
 +{
 +  if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
 +    /* Avoid an empty string which would disturb us.  */
 +    GLRO(dl_platform) = NULL;
 +
 +#ifdef SHARED
 +  /* init_cpu_features has been called early from __libc_start_main in
 +     static executable.  */
 +  init_cpu_features (&GLRO(dl_larch_cpu_features));
 +#endif
 +}
 +
 +/* Return nonzero if ELF header is compatible with the running host.  */
 static inline int
 elf_machine_matches_host (const ElfW (Ehdr) *ehdr)
 {
@@ -290,9 +308,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
       if (profile != 0)
 	{
 #if !defined __loongarch_soft_float
 -	  if (SUPPORT_LASX)
 +	  if (RTLD_SUPPORT_LASX)
 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lasx;
 -	  else if (SUPPORT_LSX)
 +	  else if (RTLD_SUPPORT_LSX)
 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_profile_lsx;
 	  else
 #endif
@@ -310,9 +328,9 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	     indicated by the offset on the stack, and then jump to
 	     the resolved address.  */
 #if !defined __loongarch_soft_float
 -	  if (SUPPORT_LASX)
 +	  if (RTLD_SUPPORT_LASX)
 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lasx;
 -	  else if (SUPPORT_LSX)
 +	  else if (RTLD_SUPPORT_LSX)
 	    gotplt[0] = (ElfW(Addr)) &_dl_runtime_resolve_lsx;
 	  else
 #endif
 diff --git a/sysdeps/loongarch/dl-tunables.list b/sysdeps/loongarch/dl-tunables.list
 new file mode 100644
 index 00000000..00869a9f
 --- /dev/null
 +++ b/sysdeps/loongarch/dl-tunables.list
@@ -0,0 +1,25 @@
 +# LoongArch specific tunables.
 +# Copyright (C) 2024 Free Software Foundation, Inc.
 +# This file is part of the GNU C Library.
 +
 +# The GNU C Library is free software; you can redistribute it and/or
 +# modify it under the terms of the GNU Lesser General Public
 +# License as published by the Free Software Foundation; either
 +# version 2.1 of the License, or (at your option) any later version.
 +
 +# The GNU C Library is distributed in the hope that it will be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +# Lesser General Public License for more details.
 +
 +# You should have received a copy of the GNU Lesser General Public
 +# License along with the GNU C Library; if not, see
 +# <http://www.gnu.org/licenses/>.
 +
 +glibc {
 +  cpu {
 +    hwcaps {
 +      type: STRING
 +    }
 +  }
 +}
 diff --git a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
 index e2723873..603d9ec2 100644
 --- a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
 +++ b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h
@@ -19,6 +19,9 @@
 #ifndef _DL_IFUNC_GENERIC_H
 #define _DL_IFUNC_GENERIC_H
 -asm ("memset = __memset_aligned");
 +#ifndef SHARED
 +  asm ("memset = __memset_aligned");
 +  asm ("memcmp = __memcmp_aligned");
 +#endif
 #endif
 diff --git a/sysdeps/loongarch/tst-hwcap-tunables.c b/sysdeps/loongarch/tst-hwcap-tunables.c
 new file mode 100644
 index 00000000..fe1b95a8
 --- /dev/null
 +++ b/sysdeps/loongarch/tst-hwcap-tunables.c
@@ -0,0 +1,136 @@
 +/* Tests for LoongArch GLIBC_TUNABLES=glibc.cpu.hwcaps filter.
 +   Copyright (C) 2024 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, see
 +   <http://www.gnu.org/licenses/>.  */
 +
 +#include <array_length.h>
 +#include <getopt.h>
 +#include <ifunc-impl-list.h>
 +#include <spawn.h>
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
 +#include <support/check.h>
 +#include <support/support.h>
 +#include <support/xunistd.h>
 +#include <sys/auxv.h>
 +#include <sys/wait.h>
 +
 +/* Nonzero if the program gets called via `exec'.  */
 +#define CMDLINE_OPTIONS \
 +  { "restart", no_argument, &restart, 1 }, \
 +  { "enable", no_argument, &enable, 1 },
 +static int restart;
 +static int enable;
 +
 +/* Hold the four initial argument used to respawn the process, plus the extra
 +   '--direct', '--restart', and the function to check.  */
 +static char *spargs[9];
 +static int fc;
 +
 +/* Called on process re-execution.  */
 +_Noreturn static void
 +handle_restart (int argc, char *argv[])
 +{
 +  TEST_VERIFY_EXIT (argc == 1);
 +  const char *funcname = argv[0];
 +
 +  struct libc_ifunc_impl impls[32];
 +  int cnt = __libc_ifunc_impl_list ("memcpy", impls, array_length (impls));
 +  if (cnt == 0)
 +    _exit (EXIT_SUCCESS);
 +  TEST_VERIFY_EXIT (cnt >= 1);
 +  for (int i = 0; i < cnt; i++)
 +  {
 +    if (strcmp (impls[i].name, funcname) == 0)
 +      {
 +	if (enable && impls[i].usable != true)
 +	  FAIL_EXIT1 ("FAIL: %s ifunc selection is not enabled.\n", funcname);
 +	else if (!enable && impls[i].usable != false)
 +	  FAIL_EXIT1 ("FAIL: %s ifunc selection is not disabled.\n", funcname);
 +	break;
 +      }
 +  }
 +
 +  _exit (EXIT_SUCCESS);
 +}
 +
 +static void
 +run_test (const char *filter, const char *funcname, int disable)
 +{
 +  if (disable)
 +    printf ("info: checking filter %s (expect %s ifunc "
 +	    "selection to be disabled)\n", filter, funcname);
 +  else
 +    {
 +      printf ("info: checking filter %s (expect %s ifunc "
 +	      "selection to be enabled)\n", filter, funcname);
 +      spargs[fc++] = (char *) "--enable";
 +    }
 +
 +  char *tunable = xasprintf ("GLIBC_TUNABLES=glibc.cpu.hwcaps=%s", filter);
 +  char *const newenvs[] = { (char*) tunable, NULL };
 +  spargs[fc] = (char *) funcname;
 +
 +  pid_t pid;
 +  TEST_COMPARE (posix_spawn (&pid, spargs[0], NULL, NULL, spargs, newenvs), 0);
 +  int status;
 +  TEST_COMPARE (xwaitpid (pid, &status, 0), pid);
 +  TEST_VERIFY (WIFEXITED (status));
 +  TEST_VERIFY (!WIFSIGNALED (status));
 +  TEST_COMPARE (WEXITSTATUS (status), 0);
 +
 +  if (!disable)
 +    fc--;
 +  free (tunable);
 +}
 +
 +static int
 +do_test (int argc, char *argv[])
 +{
 +  if (restart)
 +    handle_restart (argc - 1, &argv[1]);
 +
 +  TEST_VERIFY_EXIT (argc == 2 || argc == 5);
 +
 +  int i;
 +  for (i = 0; i < argc - 1; i++)
 +    spargs[i] = argv[i + 1];
 +  spargs[i++] = (char *) "--direct";
 +  spargs[i++] = (char *) "--restart";
 +  fc = i++;
 +  spargs[i] = NULL;
 +
 +  unsigned long int hwcap = getauxval (AT_HWCAP);
 +
 +  if (hwcap & HWCAP_LOONGARCH_LASX)
 +    run_test ("-LASX", "__memcpy_lasx", 1);
 +  if (hwcap & HWCAP_LOONGARCH_LSX)
 +    run_test ("-LSX", "__memcpy_lsx", 1);
 +  if (hwcap & HWCAP_LOONGARCH_UAL)
 +    run_test ("-UAL", "__memcpy_unaligned", 1);
 +
 +  /* __memcpy_aligned is the default ifunc selection and will be
 +   * always enabled.  */
 +  run_test ("-LASX,-LSX,-UAL", "__memcpy_aligned", 0);
 +  run_test ("-LASX,-LSX", "__memcpy_aligned", 0);
 +  run_test ("-LASX", "__memcpy_aligned", 0);
 +
 +  return 0;
 +}
 +
 +#define TEST_FUNCTION_ARGV do_test
 +#include <support/test-driver.c>
 diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.c b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
 new file mode 100644
 index 00000000..ba6201ad
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.c
@@ -0,0 +1,30 @@
 +/* Initialize CPU feature data.  LoongArch64 version.
 +   This file is part of the GNU C Library.
 +   Copyright (C) 2024 Free Software Foundation, Inc.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, see
 +   <http://www.gnu.org/licenses/>.  */
 +
 +#include <cpu-features.h>
 +#include <elf/dl-hwcaps.h>
 +#include <elf/dl-tunables.h>
 +extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) attribute_hidden;
 +
 +static inline void
 +init_cpu_features (struct cpu_features *cpu_features)
 +{
 +  GLRO(dl_larch_cpu_features).hwcap = GLRO(dl_hwcap);
 +  TUNABLE_GET (glibc, cpu, hwcaps, tunable_val_t *,
 +	       TUNABLE_CALLBACK (set_hwcaps));
 +}
 diff --git a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
 index d1a280a5..b1fa4b7b 100644
 --- a/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
 +++ b/sysdeps/unix/sysv/linux/loongarch/cpu-features.h
@@ -19,12 +19,23 @@
 #ifndef _CPU_FEATURES_LOONGARCH64_H
 #define _CPU_FEATURES_LOONGARCH64_H
 +#include <stdint.h>
 #include <sys/auxv.h>
 -#define SUPPORT_UAL (GLRO (dl_hwcap) & HWCAP_LOONGARCH_UAL)
 -#define SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
 -#define SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
 +struct cpu_features
 +{
 +  uint64_t hwcap;
 +};
 +/* Get a pointer to the CPU features structure.  */
 +extern const struct cpu_features *
 +_dl_larch_get_cpu_features (void) __attribute__ ((pure));
 +
 +#define SUPPORT_UAL (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_UAL)
 +#define SUPPORT_LSX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LSX)
 +#define SUPPORT_LASX (GLRO (dl_larch_cpu_features).hwcap & HWCAP_LOONGARCH_LASX)
 +#define RTLD_SUPPORT_LSX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LSX)
 +#define RTLD_SUPPORT_LASX (GLRO (dl_hwcap) & HWCAP_LOONGARCH_LASX)
 #define INIT_ARCH()
 #endif /* _CPU_FEATURES_LOONGARCH64_H  */
 diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
 new file mode 100644
 index 00000000..5e056a19
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/loongarch/dl-procinfo.c
@@ -0,0 +1,60 @@
 +/* Data for LoongArch64 version of processor capability information.
 +   Linux version.
 +   Copyright (C) 2024 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, see
 +   <http://www.gnu.org/licenses/>.  */
 +
 +/* If anything should be added here check whether the size of each string
 +   is still ok with the given array size.
 +
 +   All the #ifdefs in the definitions are quite irritating but
 +   necessary if we want to avoid duplicating the information.  There
 +   are three different modes:
 +
 +   - PROCINFO_DECL is defined.  This means we are only interested in
 +     declarations.
 +
 +   - PROCINFO_DECL is not defined:
 +
 +     + if SHARED is defined the file is included in an array
 +       initializer.  The .element = { ... } syntax is needed.
 +
 +     + if SHARED is not defined a normal array initialization is
 +       needed.
 +  */
 +
 +#ifndef PROCINFO_CLASS
 +# define PROCINFO_CLASS
 +#endif
 +
 +#if !IS_IN (ldconfig)
 +# if !defined PROCINFO_DECL && defined SHARED
 +  ._dl_larch_cpu_features
 +# else
 +PROCINFO_CLASS struct cpu_features _dl_larch_cpu_features
 +# endif
 +# ifndef PROCINFO_DECL
 += { }
 +# endif
 +# if !defined SHARED || defined PROCINFO_DECL
 +;
 +# else
 +,
 +# endif
 +#endif
 +
 +#undef PROCINFO_DECL
 +#undef PROCINFO_CLASS
 diff --git a/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
 new file mode 100644
 index 00000000..30b84f19
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/loongarch/dl-sysdep.c
@@ -0,0 +1,21 @@
 +/* Operating system support for run-time dynamic linker.  LoongArch version.
 +   Copyright (C) 2024 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, see
 +   <http://www.gnu.org/licenses/>.  */
 +
 +#include <config.h>
 +#include <sysdeps/loongarch/cpu-tunables.c>
 +#include <sysdeps/unix/sysv/linux/dl-sysdep.c>
 diff --git a/sysdeps/unix/sysv/linux/loongarch/libc-start.c b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
 new file mode 100644
 index 00000000..e545f7f1
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/loongarch/libc-start.c
@@ -0,0 +1,34 @@
 +/* Override csu/libc-start.c on LoongArch64.
 +   Copyright (C) 2024 Free Software Foundation, Inc.
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, see
 +   <http://www.gnu.org/licenses/>.  */
 +
 +#ifndef SHARED
 +
 +/* Mark symbols hidden in static PIE for early self relocation to work.  */
 +#if BUILD_PIE_DEFAULT
 +# pragma GCC visibility push (hidden)
 +#endif
 +
 +#include <ldsodefs.h>
 +#include <cpu-features.c>
 +
 +extern struct cpu_features _dl_larch_cpu_features;
 +
 +#define ARCH_INIT_CPU_FEATURES() init_cpu_features (&_dl_larch_cpu_features)
 +
 +#endif
 +#include <csu/libc-start.c>
 -- 
 2.43.0
--- a/0004-LoongArch-Add-support-for-TLS-Descriptors.patch
+++ b/0004-LoongArch-Add-support-for-TLS-Descriptors.patch
--- a/0005-LoongArch-Fix-tst-gnu2-tls2-compiler-error.patch
+++ b/0005-LoongArch-Fix-tst-gnu2-tls2-compiler-error.patch
@ -0,0 +1,64 @@
 From 479cf47724e06c1020d5806c90abfdb353e832c8 Mon Sep 17 00:00:00 2001
 From: mengqinggang <mengqinggang@loongson.cn>
 Date: Mon, 20 May 2024 17:05:12 +0800
 Subject: [PATCH 05/15] LoongArch: Fix tst-gnu2-tls2 compiler error
 Add -mno-lsx to tst-gnu2-tlsmod*.c if gcc support -mno-lsx.
 Add escape character '\' in vector support test function.
 ---
 sysdeps/loongarch/Makefile     | 2 ++
 sysdeps/loongarch/configure    | 3 +++
 sysdeps/loongarch/configure.ac | 5 +++--
 3 files changed, 8 insertions(+), 2 deletions(-)
 diff --git a/sysdeps/loongarch/Makefile b/sysdeps/loongarch/Makefile
 index a4ee915e..b00c090f 100644
 --- a/sysdeps/loongarch/Makefile
 +++ b/sysdeps/loongarch/Makefile
@@ -35,10 +35,12 @@ sysdep-dl-routines += \
 # Disable the compiler from using LSX for TLS descriptor tests, or storing into
 # 16B TLS variable may clobber FP/vector registers and prevent us from checking
 # their contents.
 +ifeq (yes,$(loongarch-vec-com))
 CFLAGS-tst-gnu2-tls2mod0.c += -mno-lsx
 CFLAGS-tst-gnu2-tls2mod1.c += -mno-lsx
 CFLAGS-tst-gnu2-tls2mod2.c += -mno-lsx
 endif
 +endif
 # LoongArch's assembler also needs to know about PIC as it changes the
 # definition of some assembler macros.
 diff --git a/sysdeps/loongarch/configure b/sysdeps/loongarch/configure
 index a133821f..73cf3b95 100644
 --- a/sysdeps/loongarch/configure
 +++ b/sysdeps/loongarch/configure
@@ -165,3 +165,6 @@ then
   printf "%s\n" "#define HAVE_LOONGARCH_VEC_COM 1" >>confdefs.h
 fi
 +config_vars="$config_vars
 +loongarch-vec-com = $libc_cv_loongarch_vec_com"
 +
 diff --git a/sysdeps/loongarch/configure.ac b/sysdeps/loongarch/configure.ac
 index 9b1cf26e..878c5d64 100644
 --- a/sysdeps/loongarch/configure.ac
 +++ b/sysdeps/loongarch/configure.ac
@@ -83,8 +83,8 @@ AC_CACHE_CHECK([for vector support in compiler],
 AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
 void foo (void)
 {
 -  asm volatile ("vldi $vr0, 1" ::: "$vr0");
 -  asm volatile ("xvldi $xr0, 1" ::: "$xr0");
 +  asm volatile ("vldi \$vr0, 1" ::: "\$vr0");
 +  asm volatile ("xvldi \$xr0, 1" ::: "\$xr0");
 }
 ]])],
                 [libc_cv_loongarch_vec_com=yes],
@@ -93,3 +93,4 @@ if test "$libc_cv_loongarch_vec_com" = yes ;
 then
   AC_DEFINE(HAVE_LOONGARCH_VEC_COM)
 fi
 +LIBC_CONFIG_VAR([loongarch-vec-com], [$libc_cv_loongarch_vec_com])
 -- 
 2.43.0
--- a/0006-LoongArch-Use-fcsr0-instead-of-r0-in-_FPU_-GET-SET-C.patch
+++ b/0006-LoongArch-Use-fcsr0-instead-of-r0-in-_FPU_-GET-SET-C.patch
@ -0,0 +1,37 @@
 From 3a6a1dc7b501846856faeed6b83af2f12bda782f Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Mon, 29 Apr 2024 15:26:24 +0800
 Subject: [PATCH 06/15] LoongArch: Use "$fcsr0" instead of "$r0" in
 _FPU_{GET,SET}CW
 Clang inline-asm parser does not allow using "$r0" in
 movfcsr2gr/movgr2fcsr, so everything using _FPU_{GET,SET}CW is now
 failing to build with Clang on LoongArch.  As we now requires Binutils
 >= 2.41 which supports using "$fcsr0" here, use it instead of "$r0" to
 fix the issue.
 Link: https://github.com/loongson-community/discussions/issues/53#issuecomment-2081507390
 Link: https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=4142b2368353
 Signed-off-by: Xi Ruoyao <xry111@xry111.site>
 ---
 sysdeps/loongarch/fpu_control.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 diff --git a/sysdeps/loongarch/fpu_control.h b/sysdeps/loongarch/fpu_control.h
 index ffb01ca5..3c8ae8b5 100644
 --- a/sysdeps/loongarch/fpu_control.h
 +++ b/sysdeps/loongarch/fpu_control.h
@@ -91,8 +91,8 @@ typedef unsigned int fpu_control_t __attribute__ ((__mode__ (__SI__)));
 /* Macros for accessing the hardware control word.  */
 extern fpu_control_t __loongarch_fpu_getcw (void) __THROW;
 extern void __loongarch_fpu_setcw (fpu_control_t) __THROW;
 -#define _FPU_GETCW(cw) __asm__ volatile ("movfcsr2gr %0,$r0" : "=r"(cw))
 -#define _FPU_SETCW(cw) __asm__ volatile ("movgr2fcsr $r0,%0" : : "r"(cw))
 +#define _FPU_GETCW(cw) __asm__ volatile ("movfcsr2gr %0,$fcsr0" : "=r"(cw))
 +#define _FPU_SETCW(cw) __asm__ volatile ("movgr2fcsr $fcsr0,%0" : : "r"(cw))
 /* Default control word set at startup.  */
 extern fpu_control_t __fpu_control;
 -- 
 2.43.0
--- a/0007-LoongArch-Ensure-sp-16-byte-aligned-for-tlsdesc.patch
+++ b/0007-LoongArch-Ensure-sp-16-byte-aligned-for-tlsdesc.patch
@ -0,0 +1,76 @@
 From 8fac691bbaaf60690902e585290bb88e060cc85a Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Thu, 13 Jun 2024 19:04:05 +0800
 Subject: [PATCH 07/15] LoongArch: Ensure sp 16-byte aligned for tlsdesc
 "ADDI sp, sp, 24" and "ADDI sp, sp, SZFCSREG" (SZFCSREG = 4) are
 misaligning the stack: the ABI mandates a 16-byte alignment.  Fix it
 by changing the first one to "ADDI sp, sp, 32", and reuse the spare 4th
 slot for saving fcsr.
 Reported-by: Jinyang He <hejinyang@loongson.cn>
 Signed-off-by: Xi Ruoyao <xry111@xry111.site>
 ---
 sysdeps/loongarch/dl-tlsdesc.S | 10 ++++------
 sysdeps/loongarch/sys/asm.h    |  1 -
 2 files changed, 4 insertions(+), 7 deletions(-)
 diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
 index 15d5fa1c..346b80f2 100644
 --- a/sysdeps/loongarch/dl-tlsdesc.S
 +++ b/sysdeps/loongarch/dl-tlsdesc.S
@@ -100,7 +100,7 @@ _dl_tlsdesc_undefweak:
 _dl_tlsdesc_dynamic:
 	/* Save just enough registers to support fast path, if we fall
 	   into slow path we will save additional registers.  */
 -	ADDI	sp, sp, -24
 +	ADDI	sp, sp, -32
 	REG_S	t0, sp, 0
 	REG_S	t1, sp, 8
 	REG_S	t2, sp, 16
@@ -141,7 +141,7 @@ Hign address	dynamic_block1 <----- dtv5  */
 	REG_L	t0, sp, 0
 	REG_L	t1, sp, 8
 	REG_L	t2, sp, 16
 -	ADDI	sp, sp, 24
 +	ADDI	sp, sp, 32
 	RET
 .Lslow:
@@ -171,9 +171,8 @@ Hign address	dynamic_block1 <----- dtv5  */
 	/* Save fcsr0 register.
 	   Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
 	   of some fields in fcsr0.  */
 -	ADDI	sp, sp, -SZFCSREG
 	movfcsr2gr  t0, fcsr0
 -	st.w	t0, sp, 0
 +	st.w	t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */
 	/* Whether support LASX.  */
 	la.global   t0, _rtld_global_ro
@@ -406,9 +405,8 @@ Hign address	dynamic_block1 <----- dtv5  */
 .Lfcsr:
 	/* Restore fcsr0 register.  */
 -	ld.w	t0, sp, 0
 +	ld.w	t0, sp, FRAME_SIZE + 24
 	movgr2fcsr  fcsr0, t0
 -	ADDI	sp, sp, SZFCSREG
 #endif /* #ifndef __loongarch_soft_float */
 diff --git a/sysdeps/loongarch/sys/asm.h b/sysdeps/loongarch/sys/asm.h
 index dbee93ee..c5eb8afa 100644
 --- a/sysdeps/loongarch/sys/asm.h
 +++ b/sysdeps/loongarch/sys/asm.h
@@ -25,7 +25,6 @@
 /* Macros to handle different pointer/register sizes for 32/64-bit code.  */
 #define SZREG 8
 #define SZFREG 8
 -#define SZFCSREG 4
 #define SZVREG 16
 #define SZXREG 32
 #define REG_L ld.d
 -- 
 2.43.0
--- a/0008-LoongArch-Fix-_dl_tlsdesc_dynamic-in-LSX-case.patch
+++ b/0008-LoongArch-Fix-_dl_tlsdesc_dynamic-in-LSX-case.patch
@ -0,0 +1,73 @@
 From ff46cceb89bb4aab7e510c2d3dc02855500617a0 Mon Sep 17 00:00:00 2001
 From: mengqinggang <mengqinggang@loongson.cn>
 Date: Fri, 14 Jun 2024 11:58:30 +0800
 Subject: [PATCH 08/15] LoongArch: Fix _dl_tlsdesc_dynamic in LSX case
 HWCAP value is overwritten at the first comparison of the LASX case.
 The second comparison at LSX get incorrect result.
 Change to use t0 to save HWCAP value, and use t1 to save comparison
 result.
 ---
 sysdeps/loongarch/dl-tlsdesc.S | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)
 diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
 index 346b80f2..a6627cc7 100644
 --- a/sysdeps/loongarch/dl-tlsdesc.S
 +++ b/sysdeps/loongarch/dl-tlsdesc.S
@@ -81,7 +81,7 @@ _dl_tlsdesc_undefweak:
 	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
 	   {
 	     struct tlsdesc_dynamic_arg *td = tdp->arg;
 -	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - TCBHEAD_DTV);
 +	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
 	     if (__glibc_likely (td->gen_count <= dtv[0].counter
 		&& (dtv[td->tlsinfo.ti_module].pointer.val
 		    != TLS_DTV_UNALLOCATED),
@@ -177,8 +177,8 @@ Hign address	dynamic_block1 <----- dtv5  */
 	/* Whether support LASX.  */
 	la.global   t0, _rtld_global_ro
 	REG_L	t0, t0, GLRO_DL_HWCAP_OFFSET
 -	andi	t0, t0, HWCAP_LOONGARCH_LASX
 -	beqz	t0, .Llsx
 +	andi	t1, t0, HWCAP_LOONGARCH_LASX
 +	beqz	t1, .Llsx
 	/* Save 256-bit vector registers.
 	   FIXME: Without vector ABI, save all vector registers.  */
@@ -219,8 +219,8 @@ Hign address	dynamic_block1 <----- dtv5  */
 .Llsx:
 	/* Whether support LSX.  */
 -	andi	t0, t0, HWCAP_LOONGARCH_LSX
 -	beqz	t0, .Lfloat
 +	andi	t1, t0, HWCAP_LOONGARCH_LSX
 +	beqz	t1, .Lfloat
 	/* Save 128-bit vector registers.  */
 	ADDI	sp, sp, -FRAME_SIZE_LSX
@@ -296,8 +296,8 @@ Hign address	dynamic_block1 <----- dtv5  */
 	la.global   t0, _rtld_global_ro
 	REG_L	t0, t0, GLRO_DL_HWCAP_OFFSET
 -	andi	t0, t0, HWCAP_LOONGARCH_LASX
 -	beqz	t0, .Llsx1
 +	andi	t1, t0, HWCAP_LOONGARCH_LASX
 +	beqz	t1, .Llsx1
 	/* Restore 256-bit vector registers.  */
 	xvld	xr0, sp, 0*SZXREG
@@ -336,8 +336,8 @@ Hign address	dynamic_block1 <----- dtv5  */
 	b .Lfcsr
 .Llsx1:
 -	andi	t0, s0, HWCAP_LOONGARCH_LSX
 -	beqz	t0, .Lfloat1
 +	andi	t1, t0, HWCAP_LOONGARCH_LSX
 +	beqz	t1, .Lfloat1
 	/* Restore 128-bit vector registers.  */
 	vld	vr0, sp, 0*SZVREG
 -- 
 2.43.0
--- a/0009-LoongArch-Fix-tst-gnu2-tls2-test-case.patch
+++ b/0009-LoongArch-Fix-tst-gnu2-tls2-test-case.patch
@ -0,0 +1,420 @@
 From b230d00bc0f6ab5cd7b017b7d4307ea8e55c261a Mon Sep 17 00:00:00 2001
 From: mengqinggang <mengqinggang@loongson.cn>
 Date: Fri, 21 Jun 2024 16:08:53 +0800
 Subject: [PATCH 09/15] LoongArch: Fix tst-gnu2-tls2 test case
 asm volatile ("movfcsr2gr $t0, $fcsr0" ::: "$t0");
 asm volatile ("st.d $t0, %0" :"=m"(restore_fcsr));
 generate to the following instructions with -Og flag:
 movfcsr2gr      $t0, $zero
 addi.d          $t0, $sp, 2047(0x7ff)
 addi.d          $t0, $t0, 77(0x4d)
 st.w            $t0, $t0, 0
 fcsr0 register and restore_fcsr variable are both stored in t0 register.
 Change to:
 asm volatile ("movfcsr2gr %0, $fcsr0" :"=r"(restore_fcsr));
 to avoid restore_fcsr address in t0.
 Comparing float value using memcmp because float value cannot be
 directly compared for equality.
 Put LOAD_REGISTER_FCSR and SAVE_REGISTER_FCC after LOAD_REGISTER_FLOAT.
 Some float instructions may change fcsr register.
 ---
 sysdeps/loongarch/tst-gnu2-tls2.h | 296 +++++++++++++++---------------
 1 file changed, 153 insertions(+), 143 deletions(-)
 diff --git a/sysdeps/loongarch/tst-gnu2-tls2.h b/sysdeps/loongarch/tst-gnu2-tls2.h
 index 8e421678..863abe59 100644
 --- a/sysdeps/loongarch/tst-gnu2-tls2.h
 +++ b/sysdeps/loongarch/tst-gnu2-tls2.h
@@ -16,6 +16,7 @@
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 +#include <stdio.h>
 #include <config.h>
 #include <string.h>
 #include <stdlib.h>
@@ -42,35 +43,35 @@
 #else /* hard float */
 #define SAVE_REGISTER_FCC(src)				\
 -  asm volatile ("movcf2gr $t0, $fcc0" ::: "$t0");	\
 -  asm volatile ("st.d $t0, %0" :"=m"(src[0]) :);	\
 -  asm volatile ("movcf2gr $t0, $fcc1" ::: "$t0");	\
 -  asm volatile ("st.d $t0, %0" :"=m"(src[1]) :);	\
 -  asm volatile ("movcf2gr $t0, $fcc2" ::: "$t0");	\
 -  asm volatile ("st.d $t0, %0" :"=m"(src[2]) :);	\
 -  asm volatile ("movcf2gr $t0, $fcc3" ::: "$t0");	\
 -  asm volatile ("st.d $t0, %0" :"=m"(src[3]) :);	\
 -  asm volatile ("movcf2gr $t0, $fcc4" ::: "$t0");	\
 -  asm volatile ("st.d $t0, %0" :"=m"(src[4]) :);	\
 -  asm volatile ("movcf2gr $t0, $fcc5" ::: "$t0");	\
 -  asm volatile ("st.d $t0, %0" :"=m"(src[5]) :);	\
 -  asm volatile ("movcf2gr $t0, $fcc6" ::: "$t0");	\
 -  asm volatile ("st.d $t0, %0" :"=m"(src[6]) :);	\
 -  asm volatile ("movcf2gr $t0, $fcc7" ::: "$t0");	\
 -  asm volatile ("st.d $t0, %0" :"=m"(src[7]) :);
 +  asm volatile ("movcf2gr %0, $fcc0" :"=r"(src[0]));	\
 +  asm volatile ("movcf2gr %0, $fcc1" :"=r"(src[1]));	\
 +  asm volatile ("movcf2gr %0, $fcc2" :"=r"(src[2]));	\
 +  asm volatile ("movcf2gr %0, $fcc3" :"=r"(src[3]));	\
 +  asm volatile ("movcf2gr %0, $fcc4" :"=r"(src[4]));	\
 +  asm volatile ("movcf2gr %0, $fcc5" :"=r"(src[5]));	\
 +  asm volatile ("movcf2gr %0, $fcc6" :"=r"(src[6]));	\
 +  asm volatile ("movcf2gr %0, $fcc7" :"=r"(src[7]));	\
 #define LOAD_REGISTER_FCSR()				\
 +  uint64_t src_fcsr = 0x01010101;			\
   asm volatile ("li.d $t0, 0x01010101" ::: "$t0");	\
   asm volatile ("movgr2fcsr $fcsr0, $t0" :::);
 -#define SAVE_REGISTER_FCSR()				\
 -  asm volatile ("movfcsr2gr $t0, $fcsr0" ::: "$t0");    \
 -  asm volatile ("st.d $t0, %0" :"=m"(restore_fcsr) :);
 +#define SAVE_REGISTER_FCSR()						\
 +  uint64_t restore_fcsr;						\
 +  asm volatile ("movfcsr2gr %0, $fcsr0" :"=r"(restore_fcsr));		\
 +  if (src_fcsr != restore_fcsr)						\
 +    {									\
 +      printf ("FCSR registers compare failed!\n");			\
 +      abort ();								\
 +    }									\
 -# define INIT_TLSDESC_CALL()						\
 +#define INIT_TLSDESC_CALL()						\
   unsigned long hwcap = getauxval (AT_HWCAP);
 #define	LOAD_REGISTER_FLOAT()						\
 +  for (int i = 0; i < 32; i++)						\
 +    src_float[i] = i + 1;						\
   asm volatile ("fld.d $f0, %0" ::"m"(src_float[0]) :"$f0");		\
   asm volatile ("fld.d $f1, %0" ::"m"(src_float[1]) :"$f1"); 		\
   asm volatile ("fld.d $f2, %0" ::"m"(src_float[2]) :"$f2"); 		\
@@ -105,38 +106,44 @@
   asm volatile ("fld.d $f31, %0" ::"m"(src_float[31]) :"$f31");
 #define	SAVE_REGISTER_FLOAT()						\
 -  asm volatile ("fst.d $f0, %0" :"=m"(restore_float[0]) :);		\
 -  asm volatile ("fst.d $f1, %0" :"=m"(restore_float[1]) :); 		\
 -  asm volatile ("fst.d $f2, %0" :"=m"(restore_float[2]) :); 		\
 -  asm volatile ("fst.d $f3, %0" :"=m"(restore_float[3]) :); 		\
 -  asm volatile ("fst.d $f4, %0" :"=m"(restore_float[4]) :); 		\
 -  asm volatile ("fst.d $f5, %0" :"=m"(restore_float[5]) :); 		\
 -  asm volatile ("fst.d $f6, %0" :"=m"(restore_float[6]) :); 		\
 -  asm volatile ("fst.d $f7, %0" :"=m"(restore_float[7]) :); 		\
 -  asm volatile ("fst.d $f8, %0" :"=m"(restore_float[8]) :); 		\
 -  asm volatile ("fst.d $f9, %0" :"=m"(restore_float[9]) :); 		\
 -  asm volatile ("fst.d $f10, %0" :"=m"(restore_float[10]) :);		\
 -  asm volatile ("fst.d $f11, %0" :"=m"(restore_float[11]) :);		\
 -  asm volatile ("fst.d $f12, %0" :"=m"(restore_float[12]) :);		\
 -  asm volatile ("fst.d $f13, %0" :"=m"(restore_float[13]) :);		\
 -  asm volatile ("fst.d $f14, %0" :"=m"(restore_float[14]) :);		\
 -  asm volatile ("fst.d $f15, %0" :"=m"(restore_float[15]) :);		\
 -  asm volatile ("fst.d $f16, %0" :"=m"(restore_float[16]) :);		\
 -  asm volatile ("fst.d $f17, %0" :"=m"(restore_float[17]) :);		\
 -  asm volatile ("fst.d $f18, %0" :"=m"(restore_float[18]) :);		\
 -  asm volatile ("fst.d $f19, %0" :"=m"(restore_float[19]) :);		\
 -  asm volatile ("fst.d $f20, %0" :"=m"(restore_float[20]) :);		\
 -  asm volatile ("fst.d $f21, %0" :"=m"(restore_float[21]) :);		\
 -  asm volatile ("fst.d $f22, %0" :"=m"(restore_float[22]) :);		\
 -  asm volatile ("fst.d $f23, %0" :"=m"(restore_float[23]) :);		\
 -  asm volatile ("fst.d $f24, %0" :"=m"(restore_float[24]) :);		\
 -  asm volatile ("fst.d $f25, %0" :"=m"(restore_float[25]) :);		\
 -  asm volatile ("fst.d $f26, %0" :"=m"(restore_float[26]) :);		\
 -  asm volatile ("fst.d $f27, %0" :"=m"(restore_float[27]) :);		\
 -  asm volatile ("fst.d $f28, %0" :"=m"(restore_float[28]) :);		\
 -  asm volatile ("fst.d $f29, %0" :"=m"(restore_float[29]) :);		\
 -  asm volatile ("fst.d $f30, %0" :"=m"(restore_float[30]) :);		\
 -  asm volatile ("fst.d $f31, %0" :"=m"(restore_float[31]) :);
 +  double restore_float[32];						\
 +  asm volatile ("fst.d $f0, %0" :"=m"(restore_float[0]));		\
 +  asm volatile ("fst.d $f1, %0" :"=m"(restore_float[1])); 		\
 +  asm volatile ("fst.d $f2, %0" :"=m"(restore_float[2])); 		\
 +  asm volatile ("fst.d $f3, %0" :"=m"(restore_float[3])); 		\
 +  asm volatile ("fst.d $f4, %0" :"=m"(restore_float[4])); 		\
 +  asm volatile ("fst.d $f5, %0" :"=m"(restore_float[5])); 		\
 +  asm volatile ("fst.d $f6, %0" :"=m"(restore_float[6])); 		\
 +  asm volatile ("fst.d $f7, %0" :"=m"(restore_float[7])); 		\
 +  asm volatile ("fst.d $f8, %0" :"=m"(restore_float[8])); 		\
 +  asm volatile ("fst.d $f9, %0" :"=m"(restore_float[9])); 		\
 +  asm volatile ("fst.d $f10, %0" :"=m"(restore_float[10]));		\
 +  asm volatile ("fst.d $f11, %0" :"=m"(restore_float[11]));		\
 +  asm volatile ("fst.d $f12, %0" :"=m"(restore_float[12]));		\
 +  asm volatile ("fst.d $f13, %0" :"=m"(restore_float[13]));		\
 +  asm volatile ("fst.d $f14, %0" :"=m"(restore_float[14]));		\
 +  asm volatile ("fst.d $f15, %0" :"=m"(restore_float[15]));		\
 +  asm volatile ("fst.d $f16, %0" :"=m"(restore_float[16]));		\
 +  asm volatile ("fst.d $f17, %0" :"=m"(restore_float[17]));		\
 +  asm volatile ("fst.d $f18, %0" :"=m"(restore_float[18]));		\
 +  asm volatile ("fst.d $f19, %0" :"=m"(restore_float[19]));		\
 +  asm volatile ("fst.d $f20, %0" :"=m"(restore_float[20]));		\
 +  asm volatile ("fst.d $f21, %0" :"=m"(restore_float[21]));		\
 +  asm volatile ("fst.d $f22, %0" :"=m"(restore_float[22]));		\
 +  asm volatile ("fst.d $f23, %0" :"=m"(restore_float[23]));		\
 +  asm volatile ("fst.d $f24, %0" :"=m"(restore_float[24]));		\
 +  asm volatile ("fst.d $f25, %0" :"=m"(restore_float[25]));		\
 +  asm volatile ("fst.d $f26, %0" :"=m"(restore_float[26]));		\
 +  asm volatile ("fst.d $f27, %0" :"=m"(restore_float[27]));		\
 +  asm volatile ("fst.d $f28, %0" :"=m"(restore_float[28]));		\
 +  asm volatile ("fst.d $f29, %0" :"=m"(restore_float[29]));		\
 +  asm volatile ("fst.d $f30, %0" :"=m"(restore_float[30]));		\
 +  asm volatile ("fst.d $f31, %0" :"=m"(restore_float[31]));		\
 +  if (memcmp (src_float, restore_float, sizeof (src_float)) != 0)	\
 +    {									\
 +      printf ("Float registers compare failed!\n");			\
 +      abort ();								\
 +    }
 #ifdef HAVE_LOONGARCH_VEC_COM
   #define	LOAD_REGISTER_LSX()					\
@@ -181,44 +188,47 @@
   #define	SAVE_REGISTER_LSX()					\
     int src_lsx[32][4];							\
     int restore_lsx[32][4];						\
 -    asm volatile ("vst $vr0, %0" :"=m"(restore_lsx[0]) :);		\
 -    asm volatile ("vst $vr1, %0" :"=m"(restore_lsx[1]) :); 		\
 -    asm volatile ("vst $vr2, %0" :"=m"(restore_lsx[2]) :); 		\
 -    asm volatile ("vst $vr3, %0" :"=m"(restore_lsx[3]) :); 		\
 -    asm volatile ("vst $vr4, %0" :"=m"(restore_lsx[4]) :); 		\
 -    asm volatile ("vst $vr5, %0" :"=m"(restore_lsx[5]) :); 		\
 -    asm volatile ("vst $vr6, %0" :"=m"(restore_lsx[6]) :); 		\
 -    asm volatile ("vst $vr7, %0" :"=m"(restore_lsx[7]) :); 		\
 -    asm volatile ("vst $vr8, %0" :"=m"(restore_lsx[8]) :); 		\
 -    asm volatile ("vst $vr9, %0" :"=m"(restore_lsx[9]) :); 		\
 -    asm volatile ("vst $vr10, %0" :"=m"(restore_lsx[10]) :);		\
 -    asm volatile ("vst $vr11, %0" :"=m"(restore_lsx[11]) :);		\
 -    asm volatile ("vst $vr12, %0" :"=m"(restore_lsx[12]) :);		\
 -    asm volatile ("vst $vr13, %0" :"=m"(restore_lsx[13]) :);		\
 -    asm volatile ("vst $vr14, %0" :"=m"(restore_lsx[14]) :);		\
 -    asm volatile ("vst $vr15, %0" :"=m"(restore_lsx[15]) :);		\
 -    asm volatile ("vst $vr16, %0" :"=m"(restore_lsx[16]) :);		\
 -    asm volatile ("vst $vr17, %0" :"=m"(restore_lsx[17]) :);		\
 -    asm volatile ("vst $vr18, %0" :"=m"(restore_lsx[18]) :);		\
 -    asm volatile ("vst $vr19, %0" :"=m"(restore_lsx[19]) :);		\
 -    asm volatile ("vst $vr20, %0" :"=m"(restore_lsx[20]) :);		\
 -    asm volatile ("vst $vr21, %0" :"=m"(restore_lsx[21]) :);		\
 -    asm volatile ("vst $vr22, %0" :"=m"(restore_lsx[22]) :);		\
 -    asm volatile ("vst $vr23, %0" :"=m"(restore_lsx[23]) :);		\
 -    asm volatile ("vst $vr24, %0" :"=m"(restore_lsx[24]) :);		\
 -    asm volatile ("vst $vr25, %0" :"=m"(restore_lsx[25]) :);		\
 -    asm volatile ("vst $vr26, %0" :"=m"(restore_lsx[26]) :);		\
 -    asm volatile ("vst $vr27, %0" :"=m"(restore_lsx[27]) :);		\
 -    asm volatile ("vst $vr28, %0" :"=m"(restore_lsx[28]) :);		\
 -    asm volatile ("vst $vr29, %0" :"=m"(restore_lsx[29]) :);		\
 -    asm volatile ("vst $vr30, %0" :"=m"(restore_lsx[30]) :);		\
 -    asm volatile ("vst $vr31, %0" :"=m"(restore_lsx[31]) :);		\
 +    asm volatile ("vst $vr0, %0" :"=m"(restore_lsx[0]));		\
 +    asm volatile ("vst $vr1, %0" :"=m"(restore_lsx[1])); 		\
 +    asm volatile ("vst $vr2, %0" :"=m"(restore_lsx[2])); 		\
 +    asm volatile ("vst $vr3, %0" :"=m"(restore_lsx[3])); 		\
 +    asm volatile ("vst $vr4, %0" :"=m"(restore_lsx[4])); 		\
 +    asm volatile ("vst $vr5, %0" :"=m"(restore_lsx[5])); 		\
 +    asm volatile ("vst $vr6, %0" :"=m"(restore_lsx[6])); 		\
 +    asm volatile ("vst $vr7, %0" :"=m"(restore_lsx[7])); 		\
 +    asm volatile ("vst $vr8, %0" :"=m"(restore_lsx[8])); 		\
 +    asm volatile ("vst $vr9, %0" :"=m"(restore_lsx[9])); 		\
 +    asm volatile ("vst $vr10, %0" :"=m"(restore_lsx[10]));		\
 +    asm volatile ("vst $vr11, %0" :"=m"(restore_lsx[11]));		\
 +    asm volatile ("vst $vr12, %0" :"=m"(restore_lsx[12]));		\
 +    asm volatile ("vst $vr13, %0" :"=m"(restore_lsx[13]));		\
 +    asm volatile ("vst $vr14, %0" :"=m"(restore_lsx[14]));		\
 +    asm volatile ("vst $vr15, %0" :"=m"(restore_lsx[15]));		\
 +    asm volatile ("vst $vr16, %0" :"=m"(restore_lsx[16]));		\
 +    asm volatile ("vst $vr17, %0" :"=m"(restore_lsx[17]));		\
 +    asm volatile ("vst $vr18, %0" :"=m"(restore_lsx[18]));		\
 +    asm volatile ("vst $vr19, %0" :"=m"(restore_lsx[19]));		\
 +    asm volatile ("vst $vr20, %0" :"=m"(restore_lsx[20]));		\
 +    asm volatile ("vst $vr21, %0" :"=m"(restore_lsx[21]));		\
 +    asm volatile ("vst $vr22, %0" :"=m"(restore_lsx[22]));		\
 +    asm volatile ("vst $vr23, %0" :"=m"(restore_lsx[23]));		\
 +    asm volatile ("vst $vr24, %0" :"=m"(restore_lsx[24]));		\
 +    asm volatile ("vst $vr25, %0" :"=m"(restore_lsx[25]));		\
 +    asm volatile ("vst $vr26, %0" :"=m"(restore_lsx[26]));		\
 +    asm volatile ("vst $vr27, %0" :"=m"(restore_lsx[27]));		\
 +    asm volatile ("vst $vr28, %0" :"=m"(restore_lsx[28]));		\
 +    asm volatile ("vst $vr29, %0" :"=m"(restore_lsx[29]));		\
 +    asm volatile ("vst $vr30, %0" :"=m"(restore_lsx[30]));		\
 +    asm volatile ("vst $vr31, %0" :"=m"(restore_lsx[31]));		\
     for (int i = 0; i < 32; i++)					\
       for (int j = 0; j < 4; j++)					\
 	{								\
 	  src_lsx[i][j] = 0x01010101 * (i + 1);				\
 	  if (src_lsx[i][j] != restore_lsx[i][j])			\
 -	    abort ();							\
 +	    {								\
 +	      printf ("LSX registers compare failed!\n");		\
 +	      abort ();							\
 +	    }								\
 	}
 #else
   #define	SAVE_REGISTER_LSX()
@@ -267,44 +277,48 @@
   #define	SAVE_REGISTER_LASX()					\
     int src_lasx[32][8];						\
     int restore_lasx[32][8];						\
 -    asm volatile ("xvst $xr0, %0" :"=m"(restore_lasx[0]) :);		\
 -    asm volatile ("xvst $xr1, %0" :"=m"(restore_lasx[1]) :); 		\
 -    asm volatile ("xvst $xr2, %0" :"=m"(restore_lasx[2]) :); 		\
 -    asm volatile ("xvst $xr3, %0" :"=m"(restore_lasx[3]) :); 		\
 -    asm volatile ("xvst $xr4, %0" :"=m"(restore_lasx[4]) :); 		\
 -    asm volatile ("xvst $xr5, %0" :"=m"(restore_lasx[5]) :); 		\
 -    asm volatile ("xvst $xr6, %0" :"=m"(restore_lasx[6]) :); 		\
 -    asm volatile ("xvst $xr7, %0" :"=m"(restore_lasx[7]) :); 		\
 -    asm volatile ("xvst $xr8, %0" :"=m"(restore_lasx[8]) :); 		\
 -    asm volatile ("xvst $xr9, %0" :"=m"(restore_lasx[9]) :); 		\
 -    asm volatile ("xvst $xr10, %0" :"=m"(restore_lasx[10]) :);		\
 -    asm volatile ("xvst $xr11, %0" :"=m"(restore_lasx[11]) :);		\
 -    asm volatile ("xvst $xr12, %0" :"=m"(restore_lasx[12]) :);		\
 -    asm volatile ("xvst $xr13, %0" :"=m"(restore_lasx[13]) :);		\
 -    asm volatile ("xvst $xr14, %0" :"=m"(restore_lasx[14]) :);		\
 -    asm volatile ("xvst $xr15, %0" :"=m"(restore_lasx[15]) :);		\
 -    asm volatile ("xvst $xr16, %0" :"=m"(restore_lasx[16]) :);		\
 -    asm volatile ("xvst $xr17, %0" :"=m"(restore_lasx[17]) :);		\
 -    asm volatile ("xvst $xr18, %0" :"=m"(restore_lasx[18]) :);		\
 -    asm volatile ("xvst $xr19, %0" :"=m"(restore_lasx[19]) :);		\
 -    asm volatile ("xvst $xr20, %0" :"=m"(restore_lasx[20]) :);		\
 -    asm volatile ("xvst $xr21, %0" :"=m"(restore_lasx[21]) :);		\
 -    asm volatile ("xvst $xr22, %0" :"=m"(restore_lasx[22]) :);		\
 -    asm volatile ("xvst $xr23, %0" :"=m"(restore_lasx[23]) :);		\
 -    asm volatile ("xvst $xr24, %0" :"=m"(restore_lasx[24]) :);		\
 -    asm volatile ("xvst $xr25, %0" :"=m"(restore_lasx[25]) :);		\
 -    asm volatile ("xvst $xr26, %0" :"=m"(restore_lasx[26]) :);		\
 -    asm volatile ("xvst $xr27, %0" :"=m"(restore_lasx[27]) :);		\
 -    asm volatile ("xvst $xr28, %0" :"=m"(restore_lasx[28]) :);		\
 -    asm volatile ("xvst $xr29, %0" :"=m"(restore_lasx[29]) :);		\
 -    asm volatile ("xvst $xr30, %0" :"=m"(restore_lasx[30]) :);		\
 -    asm volatile ("xvst $xr31, %0" :"=m"(restore_lasx[31]) :);		\
 +    asm volatile ("xvst $xr0, %0" :"=m"(restore_lasx[0]));		\
 +    asm volatile ("xvst $xr1, %0" :"=m"(restore_lasx[1])); 		\
 +    asm volatile ("xvst $xr2, %0" :"=m"(restore_lasx[2])); 		\
 +    asm volatile ("xvst $xr3, %0" :"=m"(restore_lasx[3])); 		\
 +    asm volatile ("xvst $xr4, %0" :"=m"(restore_lasx[4])); 		\
 +    asm volatile ("xvst $xr5, %0" :"=m"(restore_lasx[5])); 		\
 +    asm volatile ("xvst $xr6, %0" :"=m"(restore_lasx[6])); 		\
 +    asm volatile ("xvst $xr7, %0" :"=m"(restore_lasx[7])); 		\
 +    asm volatile ("xvst $xr8, %0" :"=m"(restore_lasx[8])); 		\
 +    asm volatile ("xvst $xr9, %0" :"=m"(restore_lasx[9])); 		\
 +    asm volatile ("xvst $xr10, %0" :"=m"(restore_lasx[10]));		\
 +    asm volatile ("xvst $xr11, %0" :"=m"(restore_lasx[11]));		\
 +    asm volatile ("xvst $xr12, %0" :"=m"(restore_lasx[12]));		\
 +    asm volatile ("xvst $xr13, %0" :"=m"(restore_lasx[13]));		\
 +    asm volatile ("xvst $xr14, %0" :"=m"(restore_lasx[14]));		\
 +    asm volatile ("xvst $xr15, %0" :"=m"(restore_lasx[15]));		\
 +    asm volatile ("xvst $xr16, %0" :"=m"(restore_lasx[16]));		\
 +    asm volatile ("xvst $xr17, %0" :"=m"(restore_lasx[17]));		\
 +    asm volatile ("xvst $xr18, %0" :"=m"(restore_lasx[18]));		\
 +    asm volatile ("xvst $xr19, %0" :"=m"(restore_lasx[19]));		\
 +    asm volatile ("xvst $xr20, %0" :"=m"(restore_lasx[20]));		\
 +    asm volatile ("xvst $xr21, %0" :"=m"(restore_lasx[21]));		\
 +    asm volatile ("xvst $xr22, %0" :"=m"(restore_lasx[22]));		\
 +    asm volatile ("xvst $xr23, %0" :"=m"(restore_lasx[23]));		\
 +    asm volatile ("xvst $xr24, %0" :"=m"(restore_lasx[24]));		\
 +    asm volatile ("xvst $xr25, %0" :"=m"(restore_lasx[25]));		\
 +    asm volatile ("xvst $xr26, %0" :"=m"(restore_lasx[26]));		\
 +    asm volatile ("xvst $xr27, %0" :"=m"(restore_lasx[27]));		\
 +    asm volatile ("xvst $xr28, %0" :"=m"(restore_lasx[28]));		\
 +    asm volatile ("xvst $xr29, %0" :"=m"(restore_lasx[29]));		\
 +    asm volatile ("xvst $xr30, %0" :"=m"(restore_lasx[30]));		\
 +    asm volatile ("xvst $xr31, %0" :"=m"(restore_lasx[31]));		\
 +    /* memcmp_lasx/strlen_lasx corrupts LSX/LASX registers, */		\
     for (int i = 0; i < 32; i++)					\
       for (int j = 0; j < 8; j++)					\
 	{								\
 	  src_lasx[i][j] = 0x01010101 * (i + 1);			\
 	  if (src_lasx[i][j] != restore_lasx[i][j])			\
 -	    abort ();							\
 +	    {								\
 +	      printf ("LASX registers compare failed!\n");		\
 +	      abort ();							\
 +	    }								\
 	}
 #else
   #define	SAVE_REGISTER_LASX()
@@ -314,12 +328,7 @@
   uint64_t src;								\
   double src_float[32];							\
   uint64_t src_fcc[8];							\
 -  for (int i = 0; i < 32; i++)						\
 -    src_float[i] = i + 1;						\
 -									\
   SAVE_REGISTER (src);							\
 -  LOAD_REGISTER_FCSR ();						\
 -  SAVE_REGISTER_FCC(src_fcc)						\
 									\
   if (hwcap & HWCAP_LOONGARCH_LASX)					\
     {									\
@@ -332,19 +341,34 @@
   else									\
     {									\
       LOAD_REGISTER_FLOAT ();						\
 -    }
 +    }									\
 +									\
 +  /* LOAD_REGISTER_FLOAT convert int double may change fcsr.  */	\
 +  LOAD_REGISTER_FCSR ();						\
 +  SAVE_REGISTER_FCC (src_fcc)
 +
 #define AFTER_TLSDESC_CALL()						\
   uint64_t restore;							\
 -  uint64_t src_fcsr = 0x01010101;					\
 -  uint64_t restore_fcsr;						\
   uint64_t restore_fcc[8];						\
 +									\
   SAVE_REGISTER (restore);						\
 +  if (src != restore)							\
 +    {									\
 +      printf ("General registers compare failed!\n");			\
 +      abort ();								\
 +    }									\
 +									\
   SAVE_REGISTER_FCSR ();						\
 -  SAVE_REGISTER_FCC(restore_fcc)					\
 									\
 -  /* memcmp_lasx/strlen_lasx corrupts LSX/LASX registers, */		\
 -  /* compare LSX/LASX registers first.  */				\
 +  SAVE_REGISTER_FCC (restore_fcc)					\
 +  for (int i = 0; i < 8; i++)						\
 +    if (src_fcc[i] != restore_fcc[i])					\
 +      {									\
 +	printf ("FCC registers compare failed!\n");			\
 +	abort ();							\
 +      }									\
 +									\
   if (hwcap & HWCAP_LOONGARCH_LASX)					\
     {									\
       SAVE_REGISTER_LASX ();						\
@@ -355,22 +379,8 @@
     }									\
   else									\
     {									\
 -      double restore_float[32];						\
       SAVE_REGISTER_FLOAT ();						\
 -									\
 -      for (int i = 0; i < 32; i++)					\
 -       if (src_float[i] != restore_float[i])				\
 -	abort ();							\
     }									\
 -									\
 -  if (src_fcsr != restore_fcsr)						\
 -    abort ();								\
 -									\
 -  if (memcmp (src_fcc, restore_fcc, sizeof (src_fcc)) != 0)		\
 -    abort ();								\
 -									\
 -  if (src != restore)							\
 -    abort ();
 #endif /* #ifdef __loongarch_soft_float */
 -- 
 2.43.0
--- a/0010-LoongArch-Add-cfi-instructions-for-_dl_tlsdesc_dynam.patch
+++ b/0010-LoongArch-Add-cfi-instructions-for-_dl_tlsdesc_dynam.patch
@ -0,0 +1,719 @@
 From 4c3e5be27dd56bb67f7e259f6f24dcb392b3aaa2 Mon Sep 17 00:00:00 2001
 From: mengqinggang <mengqinggang@loongson.cn>
 Date: Fri, 5 Jul 2024 10:40:33 +0800
 Subject: [PATCH 10/15] LoongArch: Add cfi instructions for _dl_tlsdesc_dynamic
 In _dl_tlsdesc_dynamic, there are three 'addi.d sp, sp, -size'
 instructions to allocate stack size for Float/LSX/LASX registers.
 Every 'addi.d sp, sp, -size' needs a cfi_adjust_cfa_offset because
 of sp is used to compute CFA. But only one 'addi.d sp, sp, -size'
 will be run according to HWCAP value. And all cfi_adjust_cfa_offset
 will be executed in stack unwinding, it result in incorrect CFA.
 Change _dl_tlsdesc_dynamic to _dl_tlsdesc_dynamic,
 _dl_tlsdesc_dynamic_lsx and _dl_tlsdesc_dynamic_lasx.
 Conflicting cfi instructions can be distributed to the three functions.
 And cfi instructions can correspond to stack down instructions.
 ---
 sysdeps/loongarch/dl-machine.h         |   7 +
 sysdeps/loongarch/dl-tlsdesc-dynamic.h | 225 ++++++++++++++
 sysdeps/loongarch/dl-tlsdesc.S         | 386 ++-----------------------
 sysdeps/loongarch/dl-tlsdesc.h         |   4 +
 sysdeps/loongarch/tlsdesc.sym          |   9 -
 5 files changed, 258 insertions(+), 373 deletions(-)
 create mode 100644 sysdeps/loongarch/dl-tlsdesc-dynamic.h
 diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
 index 222d51a8..d934c77f 100644
 --- a/sysdeps/loongarch/dl-machine.h
 +++ b/sysdeps/loongarch/dl-machine.h
@@ -222,6 +222,13 @@ elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
 	      {
 		td->arg = _dl_make_tlsdesc_dynamic (sym_map,
 			      sym->st_value + reloc->r_addend);
 +# ifndef __loongarch_soft_float
 +		if (RTLD_SUPPORT_LASX)
 +		  td->entry = _dl_tlsdesc_dynamic_lasx;
 +		else if (RTLD_SUPPORT_LSX)
 +		  td->entry = _dl_tlsdesc_dynamic_lsx;
 +		else
 +# endif
 		td->entry = _dl_tlsdesc_dynamic;
 	      }
 	    else
 diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
 new file mode 100644
 index 00000000..d10f4a88
 --- /dev/null
 +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
@@ -0,0 +1,225 @@
 +/* Thread-local storage handling in the ELF dynamic linker.
 +   LoongArch version.
 +   Copyright (C) 2024 Free Software Foundation, Inc.
 +
 +   This file is part of the GNU C Library.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, see
 +   <https://www.gnu.org/licenses/>.  */
 +
 +#define FRAME_SIZE	  (-((-14 * SZREG) & ALMASK))
 +#define FRAME_SIZE_LSX	  (-((-32 * SZVREG) & ALMASK))
 +#define FRAME_SIZE_LASX	  (-((-32 * SZXREG) & ALMASK))
 +#define FRAME_SIZE_FLOAT  (-((-24 * SZFREG) & ALMASK))
 +
 +	/* Handler for dynamic TLS symbols.
 +	   Prototype:
 +	   _dl_tlsdesc_dynamic (tlsdesc *) ;
 +
 +	   The second word of the descriptor points to a
 +	   tlsdesc_dynamic_arg structure.
 +
 +	   Returns the offset between the thread pointer and the
 +	   object referenced by the argument.
 +
 +	   ptrdiff_t
 +	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
 +	   {
 +	     struct tlsdesc_dynamic_arg *td = tdp->arg;
 +	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
 +	     if (__glibc_likely (td->gen_count <= dtv[0].counter
 +		&& (dtv[td->tlsinfo.ti_module].pointer.val
 +		    != TLS_DTV_UNALLOCATED),
 +		1))
 +	       return dtv[td->tlsinfo.ti_module].pointer.val
 +		+ td->tlsinfo.ti_offset
 +		- __thread_pointer;
 +
 +	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
 +	   }  */
 +	.hidden _dl_tlsdesc_dynamic
 +	.global	_dl_tlsdesc_dynamic
 +	.type	_dl_tlsdesc_dynamic,%function
 +	cfi_startproc
 +	.align 2
 +_dl_tlsdesc_dynamic:
 +	/* Save just enough registers to support fast path, if we fall
 +	   into slow path we will save additional registers.  */
 +	ADDI	sp, sp, -32
 +	cfi_adjust_cfa_offset (32)
 +	REG_S	t0, sp, 0
 +	REG_S	t1, sp, 8
 +	REG_S	t2, sp, 16
 +	cfi_rel_offset (12, 0)
 +	cfi_rel_offset (13, 8)
 +	cfi_rel_offset (14, 16)
 +
 +/* Runtime Storage Layout of Thread-Local Storage
 +   TP point to the start of TLS block.
 +
 +				      dtv
 +Low address	TCB ----------------> dtv0(counter)
 +	 TP -->	static_block0  <----- dtv1
 +		static_block1  <----- dtv2
 +		static_block2  <----- dtv3
 +		dynamic_block0 <----- dtv4
 +Hign address	dynamic_block1 <----- dtv5  */
 +
 +	REG_L	t0, tp, -SIZE_OF_TCB	  /* t0 = dtv */
 +	REG_L	a0, a0, TLSDESC_ARG	  /* a0(td) = tdp->arg */
 +	REG_L	t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
 +	REG_L	t2, t0, DTV_COUNTER	  /* t2 = dtv[0].counter */
 +	/* If dtv[0].counter < td->gen_count, goto slow path.  */
 +	bltu	t2, t1, .Lslow
 +
 +	REG_L	t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
 +	/* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
 +	slli.d	t1, t1, 4
 +	add.d	t1, t1, t0  /* t1 = dtv[td->tlsinfo.ti_module] */
 +	REG_L	t1, t1, 0   /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
 +	li.d	t2, TLS_DTV_UNALLOCATED
 +	/* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
 +	   goto slow path.  */
 +	beq	t1, t2, .Lslow
 +
 +	cfi_remember_state
 +	REG_L	t2, a0, TLSDESC_MODOFF	/* t2 = td->tlsinfo.ti_offset */
 +	/* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
 +	add.d	a0, t1, t2
 +.Lret:
 +	sub.d	a0, a0, tp
 +	REG_L	t0, sp, 0
 +	REG_L	t1, sp, 8
 +	REG_L	t2, sp, 16
 +	ADDI	sp, sp, 32
 +	cfi_adjust_cfa_offset (-32)
 +	RET
 +
 +.Lslow:
 +	/* This is the slow path.  We need to call __tls_get_addr() which
 +	   means we need to save and restore all the register that the
 +	   callee will trash.  */
 +
 +	/* Save the remaining registers that we must treat as caller save.  */
 +	cfi_restore_state
 +	ADDI	sp, sp, -FRAME_SIZE
 +	cfi_adjust_cfa_offset (FRAME_SIZE)
 +	REG_S	ra, sp, 0 * SZREG
 +	REG_S	a1, sp, 1 * SZREG
 +	REG_S	a2, sp, 2 * SZREG
 +	REG_S	a3, sp, 3 * SZREG
 +	REG_S	a4, sp, 4 * SZREG
 +	REG_S	a5, sp, 5 * SZREG
 +	REG_S	a6, sp, 6 * SZREG
 +	REG_S	a7, sp, 7 * SZREG
 +	REG_S	t3, sp, 8 * SZREG
 +	REG_S	t4, sp, 9 * SZREG
 +	REG_S	t5, sp, 10 * SZREG
 +	REG_S	t6, sp, 11 * SZREG
 +	REG_S	t7, sp, 12 * SZREG
 +	REG_S	t8, sp, 13 * SZREG
 +	cfi_rel_offset (1, 0 * SZREG)
 +	cfi_rel_offset (5, 1 * SZREG)
 +	cfi_rel_offset (6, 2 * SZREG)
 +	cfi_rel_offset (7, 3 * SZREG)
 +	cfi_rel_offset (8, 4 * SZREG)
 +	cfi_rel_offset (9, 5 * SZREG)
 +	cfi_rel_offset (10, 6 * SZREG)
 +	cfi_rel_offset (11, 7 * SZREG)
 +	cfi_rel_offset (15, 8 * SZREG)
 +	cfi_rel_offset (16, 9 * SZREG)
 +	cfi_rel_offset (17, 10 * SZREG)
 +	cfi_rel_offset (18, 11 * SZREG)
 +	cfi_rel_offset (19, 12 * SZREG)
 +	cfi_rel_offset (20, 13 * SZREG)
 +
 +#ifndef __loongarch_soft_float
 +
 +	/* Save fcsr0 register.
 +	   Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
 +	   of some fields in fcsr0.  */
 +	movfcsr2gr  t0, fcsr0
 +	st.w	t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2.  */
 +
 +#ifdef USE_LASX
 +  #define V_REG_S xvst
 +  #define V_REG_L xvld
 +  #define V_SPACE FRAME_SIZE_LASX
 +  #define V_REG(n) $xr##n
 +  #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,  \
 +		 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 +  #define V_REGSZ SZXREG
 +#elif defined USE_LSX
 +  #define V_REG_S vst
 +  #define V_REG_L vld
 +  #define V_SPACE FRAME_SIZE_LSX
 +  #define V_REG(n) $vr##n
 +  #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,  \
 +		 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
 +  #define V_REGSZ SZVREG
 +#else
 +  #define V_REG_S fst.d
 +  #define V_REG_L fld.d
 +  #define V_SPACE FRAME_SIZE_FLOAT
 +  #define V_REG(n) $f##n
 +  #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
 +  #define V_REGSZ SZFREG
 +#endif
 +
 +	ADDI	sp, sp, -V_SPACE
 +	cfi_adjust_cfa_offset (V_SPACE)
 +	.irp	i,V_REGS
 +        V_REG_S	V_REG(\i), sp, \i * V_REGSZ
 +	.endr
 +
 +#endif /* #ifndef __loongarch_soft_float */
 +
 +	bl	HIDDEN_JUMPTARGET(__tls_get_addr)
 +	ADDI	a0, a0, -TLS_DTV_OFFSET
 +
 +#ifndef __loongarch_soft_float
 +
 +	.irp	i,V_REGS
 +	V_REG_L	V_REG(\i), sp, \i * V_REGSZ
 +	.endr
 +	ADDI	sp, sp, V_SPACE
 +	cfi_adjust_cfa_offset (-V_SPACE)
 +
 +	/* Restore fcsr0 register.  */
 +	ld.w	t0, sp, FRAME_SIZE + 24
 +	movgr2fcsr  fcsr0, t0
 +
 +#endif /* #ifndef __loongarch_soft_float */
 +
 +	REG_L	ra, sp, 0 * SZREG
 +	REG_L	a1, sp, 1 * SZREG
 +	REG_L	a2, sp, 2 * SZREG
 +	REG_L	a3, sp, 3 * SZREG
 +	REG_L	a4, sp, 4 * SZREG
 +	REG_L	a5, sp, 5 * SZREG
 +	REG_L	a6, sp, 6 * SZREG
 +	REG_L	a7, sp, 7 * SZREG
 +	REG_L	t3, sp, 8 * SZREG
 +	REG_L	t4, sp, 9 * SZREG
 +	REG_L	t5, sp, 10 * SZREG
 +	REG_L	t6, sp, 11 * SZREG
 +	REG_L	t7, sp, 12 * SZREG
 +	REG_L	t8, sp, 13 * SZREG
 +	ADDI	sp, sp, FRAME_SIZE
 +	cfi_adjust_cfa_offset (-FRAME_SIZE)
 +
 +	b	.Lret
 +	cfi_endproc
 +	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
 +	.hidden HIDDEN_JUMPTARGET(__tls_get_addr)
 diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
 index a6627cc7..b6cfd612 100644
 --- a/sysdeps/loongarch/dl-tlsdesc.S
 +++ b/sysdeps/loongarch/dl-tlsdesc.S
@@ -59,376 +59,34 @@ _dl_tlsdesc_undefweak:
 	cfi_endproc
 	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
 -
 #ifdef SHARED
 -#define FRAME_SIZE	  (-((-14 * SZREG) & ALMASK))
 -#define FRAME_SIZE_LSX	  (-((-32 * SZVREG) & ALMASK))
 -#define FRAME_SIZE_LASX	  (-((-32 * SZXREG) & ALMASK))
 -#define FRAME_SIZE_FLOAT  (-((-24 * SZFREG) & ALMASK))
 -
 -	/* Handler for dynamic TLS symbols.
 -	   Prototype:
 -	   _dl_tlsdesc_dynamic (tlsdesc *) ;
 -
 -	   The second word of the descriptor points to a
 -	   tlsdesc_dynamic_arg structure.
 -
 -	   Returns the offset between the thread pointer and the
 -	   object referenced by the argument.
 -
 -	   ptrdiff_t
 -	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp)
 -	   {
 -	     struct tlsdesc_dynamic_arg *td = tdp->arg;
 -	     dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer - SIZE_OF_TCB);
 -	     if (__glibc_likely (td->gen_count <= dtv[0].counter
 -		&& (dtv[td->tlsinfo.ti_module].pointer.val
 -		    != TLS_DTV_UNALLOCATED),
 -		1))
 -	       return dtv[td->tlsinfo.ti_module].pointer.val
 -		+ td->tlsinfo.ti_offset
 -		- __thread_pointer;
 -
 -	     return ___tls_get_addr (&td->tlsinfo) - __thread_pointer;
 -	   }  */
 -	.hidden _dl_tlsdesc_dynamic
 -	.global	_dl_tlsdesc_dynamic
 -	.type	_dl_tlsdesc_dynamic,%function
 -	cfi_startproc
 -	.align 2
 -_dl_tlsdesc_dynamic:
 -	/* Save just enough registers to support fast path, if we fall
 -	   into slow path we will save additional registers.  */
 -	ADDI	sp, sp, -32
 -	REG_S	t0, sp, 0
 -	REG_S	t1, sp, 8
 -	REG_S	t2, sp, 16
 -
 -/* Runtime Storage Layout of Thread-Local Storage
 -   TP point to the start of TLS block.
 -
 -				      dtv
 -Low address	TCB ----------------> dtv0(counter)
 -	 TP -->	static_block0  <----- dtv1
 -		static_block1  <----- dtv2
 -		static_block2  <----- dtv3
 -		dynamic_block0 <----- dtv4
 -Hign address	dynamic_block1 <----- dtv5  */
 -
 -	REG_L	t0, tp, -SIZE_OF_TCB	  /* t0 = dtv */
 -	REG_L	a0, a0, TLSDESC_ARG	  /* a0(td) = tdp->arg */
 -	REG_L	t1, a0, TLSDESC_GEN_COUNT /* t1 = td->gen_count */
 -	REG_L	t2, t0, DTV_COUNTER	  /* t2 = dtv[0].counter */
 -	/* If dtv[0].counter < td->gen_count, goto slow path.  */
 -	bltu	t2, t1, .Lslow
 -
 -	REG_L	t1, a0, TLSDESC_MODID /* t1 = td->tlsinfo.ti_module */
 -	/* t1 = t1 * sizeof(dtv_t) = t1 * (2 * sizeof(void*)) */
 -	slli.d	t1, t1, 4
 -	add.d	t1, t1, t0  /* t1 = dtv[td->tlsinfo.ti_module] */
 -	REG_L	t1, t1, 0   /* t1 = dtv[td->tlsinfo.ti_module].pointer.val */
 -	li.d	t2, TLS_DTV_UNALLOCATED
 -	/* If dtv[td->tlsinfo.ti_module].pointer.val is TLS_DTV_UNALLOCATED,
 -	   goto slow path.  */
 -	beq	t1, t2, .Lslow
 -
 -	REG_L	t2, a0, TLSDESC_MODOFF	/* t2 = td->tlsinfo.ti_offset */
 -	/* dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset */
 -	add.d	a0, t1, t2
 -.Lret:
 -	sub.d	a0, a0, tp
 -	REG_L	t0, sp, 0
 -	REG_L	t1, sp, 8
 -	REG_L	t2, sp, 16
 -	ADDI	sp, sp, 32
 -	RET
 -
 -.Lslow:
 -	/* This is the slow path. We need to call __tls_get_addr() which
 -	   means we need to save and restore all the register that the
 -	   callee will trash.  */
 -
 -	/* Save the remaining registers that we must treat as caller save.  */
 -	ADDI	sp, sp, -FRAME_SIZE
 -	REG_S	ra, sp, 0 * SZREG
 -	REG_S	a1, sp, 1 * SZREG
 -	REG_S	a2, sp, 2 * SZREG
 -	REG_S	a3, sp, 3 * SZREG
 -	REG_S	a4, sp, 4 * SZREG
 -	REG_S	a5, sp, 5 * SZREG
 -	REG_S	a6, sp, 6 * SZREG
 -	REG_S	a7, sp, 7 * SZREG
 -	REG_S	t3, sp, 8 * SZREG
 -	REG_S	t4, sp, 9 * SZREG
 -	REG_S	t5, sp, 10 * SZREG
 -	REG_S	t6, sp, 11 * SZREG
 -	REG_S	t7, sp, 12 * SZREG
 -	REG_S	t8, sp, 13 * SZREG
 -
 #ifndef __loongarch_soft_float
 -	/* Save fcsr0 register.
 -	   Only one physical fcsr0 register, fcsr1-fcsr3 are aliases
 -	   of some fields in fcsr0.  */
 -	movfcsr2gr  t0, fcsr0
 -	st.w	t0, sp, FRAME_SIZE + 24 /* Use the spare slot above t2 */
 -
 -	/* Whether support LASX.  */
 -	la.global   t0, _rtld_global_ro
 -	REG_L	t0, t0, GLRO_DL_HWCAP_OFFSET
 -	andi	t1, t0, HWCAP_LOONGARCH_LASX
 -	beqz	t1, .Llsx
 -
 -	/* Save 256-bit vector registers.
 -	   FIXME: Without vector ABI, save all vector registers.  */
 -	ADDI	sp, sp, -FRAME_SIZE_LASX
 -	xvst	xr0, sp, 0*SZXREG
 -	xvst	xr1, sp, 1*SZXREG
 -	xvst	xr2, sp, 2*SZXREG
 -	xvst	xr3, sp, 3*SZXREG
 -	xvst	xr4, sp, 4*SZXREG
 -	xvst	xr5, sp, 5*SZXREG
 -	xvst	xr6, sp, 6*SZXREG
 -	xvst	xr7, sp, 7*SZXREG
 -	xvst	xr8, sp, 8*SZXREG
 -	xvst	xr9, sp, 9*SZXREG
 -	xvst	xr10, sp, 10*SZXREG
 -	xvst	xr11, sp, 11*SZXREG
 -	xvst	xr12, sp, 12*SZXREG
 -	xvst	xr13, sp, 13*SZXREG
 -	xvst	xr14, sp, 14*SZXREG
 -	xvst	xr15, sp, 15*SZXREG
 -	xvst	xr16, sp, 16*SZXREG
 -	xvst	xr17, sp, 17*SZXREG
 -	xvst	xr18, sp, 18*SZXREG
 -	xvst	xr19, sp, 19*SZXREG
 -	xvst	xr20, sp, 20*SZXREG
 -	xvst	xr21, sp, 21*SZXREG
 -	xvst	xr22, sp, 22*SZXREG
 -	xvst	xr23, sp, 23*SZXREG
 -	xvst	xr24, sp, 24*SZXREG
 -	xvst	xr25, sp, 25*SZXREG
 -	xvst	xr26, sp, 26*SZXREG
 -	xvst	xr27, sp, 27*SZXREG
 -	xvst	xr28, sp, 28*SZXREG
 -	xvst	xr29, sp, 29*SZXREG
 -	xvst	xr30, sp, 30*SZXREG
 -	xvst	xr31, sp, 31*SZXREG
 -	b	    .Ltga
 -
 -.Llsx:
 -	/* Whether support LSX.  */
 -	andi	t1, t0, HWCAP_LOONGARCH_LSX
 -	beqz	t1, .Lfloat
 -
 -	/* Save 128-bit vector registers.  */
 -	ADDI	sp, sp, -FRAME_SIZE_LSX
 -	vst	vr0, sp, 0*SZVREG
 -	vst	vr1, sp, 1*SZVREG
 -	vst	vr2, sp, 2*SZVREG
 -	vst	vr3, sp, 3*SZVREG
 -	vst	vr4, sp, 4*SZVREG
 -	vst	vr5, sp, 5*SZVREG
 -	vst	vr6, sp, 6*SZVREG
 -	vst	vr7, sp, 7*SZVREG
 -	vst	vr8, sp, 8*SZVREG
 -	vst	vr9, sp, 9*SZVREG
 -	vst	vr10, sp, 10*SZVREG
 -	vst	vr11, sp, 11*SZVREG
 -	vst	vr12, sp, 12*SZVREG
 -	vst	vr13, sp, 13*SZVREG
 -	vst	vr14, sp, 14*SZVREG
 -	vst	vr15, sp, 15*SZVREG
 -	vst	vr16, sp, 16*SZVREG
 -	vst	vr17, sp, 17*SZVREG
 -	vst	vr18, sp, 18*SZVREG
 -	vst	vr19, sp, 19*SZVREG
 -	vst	vr20, sp, 20*SZVREG
 -	vst	vr21, sp, 21*SZVREG
 -	vst	vr22, sp, 22*SZVREG
 -	vst	vr23, sp, 23*SZVREG
 -	vst	vr24, sp, 24*SZVREG
 -	vst	vr25, sp, 25*SZVREG
 -	vst	vr26, sp, 26*SZVREG
 -	vst	vr27, sp, 27*SZVREG
 -	vst	vr28, sp, 28*SZVREG
 -	vst	vr29, sp, 29*SZVREG
 -	vst	vr30, sp, 30*SZVREG
 -	vst	vr31, sp, 31*SZVREG
 -	b	    .Ltga
 -
 -.Lfloat:
 -	/* Save float registers.  */
 -	ADDI	sp, sp, -FRAME_SIZE_FLOAT
 -	FREG_S	fa0, sp, 0*SZFREG
 -	FREG_S	fa1, sp, 1*SZFREG
 -	FREG_S	fa2, sp, 2*SZFREG
 -	FREG_S	fa3, sp, 3*SZFREG
 -	FREG_S	fa4, sp, 4*SZFREG
 -	FREG_S	fa5, sp, 5*SZFREG
 -	FREG_S	fa6, sp, 6*SZFREG
 -	FREG_S	fa7, sp, 7*SZFREG
 -	FREG_S	ft0, sp, 8*SZFREG
 -	FREG_S	ft1, sp, 9*SZFREG
 -	FREG_S	ft2, sp, 10*SZFREG
 -	FREG_S	ft3, sp, 11*SZFREG
 -	FREG_S	ft4, sp, 12*SZFREG
 -	FREG_S	ft5, sp, 13*SZFREG
 -	FREG_S	ft6, sp, 14*SZFREG
 -	FREG_S	ft7, sp, 15*SZFREG
 -	FREG_S	ft8, sp, 16*SZFREG
 -	FREG_S	ft9, sp, 17*SZFREG
 -	FREG_S	ft10, sp, 18*SZFREG
 -	FREG_S	ft11, sp, 19*SZFREG
 -	FREG_S	ft12, sp, 20*SZFREG
 -	FREG_S	ft13, sp, 21*SZFREG
 -	FREG_S	ft14, sp, 22*SZFREG
 -	FREG_S	ft15, sp, 23*SZFREG
 -
 -#endif /* #ifndef __loongarch_soft_float */
 -
 -.Ltga:
 -	bl	HIDDEN_JUMPTARGET(__tls_get_addr)
 -	ADDI	a0, a0, -TLS_DTV_OFFSET
 -
 -#ifndef __loongarch_soft_float
 -
 -	la.global   t0, _rtld_global_ro
 -	REG_L	t0, t0, GLRO_DL_HWCAP_OFFSET
 -	andi	t1, t0, HWCAP_LOONGARCH_LASX
 -	beqz	t1, .Llsx1
 -
 -	/* Restore 256-bit vector registers.  */
 -	xvld	xr0, sp, 0*SZXREG
 -	xvld	xr1, sp, 1*SZXREG
 -	xvld	xr2, sp, 2*SZXREG
 -	xvld	xr3, sp, 3*SZXREG
 -	xvld	xr4, sp, 4*SZXREG
 -	xvld	xr5, sp, 5*SZXREG
 -	xvld	xr6, sp, 6*SZXREG
 -	xvld	xr7, sp, 7*SZXREG
 -	xvld	xr8, sp, 8*SZXREG
 -	xvld	xr9, sp, 9*SZXREG
 -	xvld	xr10, sp, 10*SZXREG
 -	xvld	xr11, sp, 11*SZXREG
 -	xvld	xr12, sp, 12*SZXREG
 -	xvld	xr13, sp, 13*SZXREG
 -	xvld	xr14, sp, 14*SZXREG
 -	xvld	xr15, sp, 15*SZXREG
 -	xvld	xr16, sp, 16*SZXREG
 -	xvld	xr17, sp, 17*SZXREG
 -	xvld	xr18, sp, 18*SZXREG
 -	xvld	xr19, sp, 19*SZXREG
 -	xvld	xr20, sp, 20*SZXREG
 -	xvld	xr21, sp, 21*SZXREG
 -	xvld	xr22, sp, 22*SZXREG
 -	xvld	xr23, sp, 23*SZXREG
 -	xvld	xr24, sp, 24*SZXREG
 -	xvld	xr25, sp, 25*SZXREG
 -	xvld	xr26, sp, 26*SZXREG
 -	xvld	xr27, sp, 27*SZXREG
 -	xvld	xr28, sp, 28*SZXREG
 -	xvld	xr29, sp, 29*SZXREG
 -	xvld	xr30, sp, 30*SZXREG
 -	xvld	xr31, sp, 31*SZXREG
 -	ADDI	sp, sp, FRAME_SIZE_LASX
 -	b .Lfcsr
 -
 -.Llsx1:
 -	andi	t1, t0, HWCAP_LOONGARCH_LSX
 -	beqz	t1, .Lfloat1
 -
 -	/* Restore 128-bit vector registers.  */
 -	vld	vr0, sp, 0*SZVREG
 -	vld	vr1, sp, 1*SZVREG
 -	vld	vr2, sp, 2*SZVREG
 -	vld	vr3, sp, 3*SZVREG
 -	vld	vr4, sp, 4*SZVREG
 -	vld	vr5, sp, 5*SZVREG
 -	vld	vr6, sp, 6*SZVREG
 -	vld	vr7, sp, 7*SZVREG
 -	vld	vr8, sp, 8*SZVREG
 -	vld	vr9, sp, 9*SZVREG
 -	vld	vr10, sp, 10*SZVREG
 -	vld	vr11, sp, 11*SZVREG
 -	vld	vr12, sp, 12*SZVREG
 -	vld	vr13, sp, 13*SZVREG
 -	vld	vr14, sp, 14*SZVREG
 -	vld	vr15, sp, 15*SZVREG
 -	vld	vr16, sp, 16*SZVREG
 -	vld	vr17, sp, 17*SZVREG
 -	vld	vr18, sp, 18*SZVREG
 -	vld	vr19, sp, 19*SZVREG
 -	vld	vr20, sp, 20*SZVREG
 -	vld	vr21, sp, 21*SZVREG
 -	vld	vr22, sp, 22*SZVREG
 -	vld	vr23, sp, 23*SZVREG
 -	vld	vr24, sp, 24*SZVREG
 -	vld	vr25, sp, 25*SZVREG
 -	vld	vr26, sp, 26*SZVREG
 -	vld	vr27, sp, 27*SZVREG
 -	vld	vr28, sp, 28*SZVREG
 -	vld	vr29, sp, 29*SZVREG
 -	vld	vr30, sp, 30*SZVREG
 -	vld	vr31, sp, 31*SZVREG
 -	ADDI	sp, sp, FRAME_SIZE_LSX
 -	b	    .Lfcsr
 -
 -.Lfloat1:
 -	/* Restore float registers.  */
 -	FREG_L	fa0, sp, 0*SZFREG
 -	FREG_L	fa1, sp, 1*SZFREG
 -	FREG_L	fa2, sp, 2*SZFREG
 -	FREG_L	fa3, sp, 3*SZFREG
 -	FREG_L	fa4, sp, 4*SZFREG
 -	FREG_L	fa5, sp, 5*SZFREG
 -	FREG_L	fa6, sp, 6*SZFREG
 -	FREG_L	fa7, sp, 7*SZFREG
 -	FREG_L	ft0, sp, 8*SZFREG
 -	FREG_L	ft1, sp, 9*SZFREG
 -	FREG_L	ft2, sp, 10*SZFREG
 -	FREG_L	ft3, sp, 11*SZFREG
 -	FREG_L	ft4, sp, 12*SZFREG
 -	FREG_L	ft5, sp, 13*SZFREG
 -	FREG_L	ft6, sp, 14*SZFREG
 -	FREG_L	ft7, sp, 15*SZFREG
 -	FREG_L	ft8, sp, 16*SZFREG
 -	FREG_L	ft9, sp, 17*SZFREG
 -	FREG_L	ft10, sp, 18*SZFREG
 -	FREG_L	ft11, sp, 19*SZFREG
 -	FREG_L	ft12, sp, 20*SZFREG
 -	FREG_L	ft13, sp, 21*SZFREG
 -	FREG_L	ft14, sp, 22*SZFREG
 -	FREG_L	ft15, sp, 23*SZFREG
 -	ADDI	sp, sp, FRAME_SIZE_FLOAT
 -
 -.Lfcsr:
 -	/* Restore fcsr0 register.  */
 -	ld.w	t0, sp, FRAME_SIZE + 24
 -	movgr2fcsr  fcsr0, t0
 +#define USE_LASX
 +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lasx
 +#define Lret Lret_lasx
 +#define Lslow Lslow_lasx
 +#include "dl-tlsdesc-dynamic.h"
 +#undef FRAME_SIZE
 +#undef USE_LASX
 +#undef _dl_tlsdesc_dynamic
 +#undef Lret
 +#undef Lslow
 +
 +#define USE_LSX
 +#define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_lsx
 +#define Lret Lret_lsx
 +#define Lslow Lslow_lsx
 +#include "dl-tlsdesc-dynamic.h"
 +#undef FRAME_SIZE
 +#undef USE_LSX
 +#undef _dl_tlsdesc_dynamic
 +#undef Lret
 +#undef Lslow
 #endif /* #ifndef __loongarch_soft_float */
 -	REG_L	ra, sp, 0 * SZREG
 -	REG_L	a1, sp, 1 * SZREG
 -	REG_L	a2, sp, 2 * SZREG
 -	REG_L	a3, sp, 3 * SZREG
 -	REG_L	a4, sp, 4 * SZREG
 -	REG_L	a5, sp, 5 * SZREG
 -	REG_L	a6, sp, 6 * SZREG
 -	REG_L	a7, sp, 7 * SZREG
 -	REG_L	t3, sp, 8 * SZREG
 -	REG_L	t4, sp, 9 * SZREG
 -	REG_L	t5, sp, 10 * SZREG
 -	REG_L	t6, sp, 11 * SZREG
 -	REG_L	t7, sp, 12 * SZREG
 -	REG_L	t8, sp, 13 * SZREG
 -	ADDI	sp, sp, FRAME_SIZE
 -
 -	b	.Lret
 -	cfi_endproc
 -	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
 -	.hidden HIDDEN_JUMPTARGET(__tls_get_addr)
 +#include "dl-tlsdesc-dynamic.h"
 #endif /* #ifdef SHARED */
 diff --git a/sysdeps/loongarch/dl-tlsdesc.h b/sysdeps/loongarch/dl-tlsdesc.h
 index ff8c69cb..45c43a5b 100644
 --- a/sysdeps/loongarch/dl-tlsdesc.h
 +++ b/sysdeps/loongarch/dl-tlsdesc.h
@@ -43,6 +43,10 @@ extern ptrdiff_t attribute_hidden _dl_tlsdesc_undefweak (struct tlsdesc *);
 #ifdef SHARED
 extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
 +#ifndef __loongarch_soft_float
 +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lasx (struct tlsdesc *);
 +extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic_lsx (struct tlsdesc *);
 +#endif
 extern ptrdiff_t attribute_hidden _dl_tlsdesc_dynamic (struct tlsdesc *);
 #endif
 diff --git a/sysdeps/loongarch/tlsdesc.sym b/sysdeps/loongarch/tlsdesc.sym
 index 213d0b30..9f80fcec 100644
 --- a/sysdeps/loongarch/tlsdesc.sym
 +++ b/sysdeps/loongarch/tlsdesc.sym
@@ -4,12 +4,6 @@
 #include <link.h>
 #include <dl-tlsdesc.h>
 -#define SHARED 1
 -
 -#include <ldsodefs.h>
 -
 -#define GLRO_offsetof(name) offsetof (struct rtld_global_ro, _##name)
 -
 --
 -- Abuse tls.h macros to derive offsets relative to the thread register.
@@ -23,6 +17,3 @@ DTV_COUNTER		offsetof(dtv_t, counter)
 TLS_DTV_UNALLOCATED	TLS_DTV_UNALLOCATED
 TLS_DTV_OFFSET		TLS_DTV_OFFSET
 SIZE_OF_TCB		sizeof(tcbhead_t)
 -GLRO_DL_HWCAP_OFFSET    GLRO_offsetof (dl_hwcap)
 -HWCAP_LOONGARCH_LSX	HWCAP_LOONGARCH_LSX
 -HWCAP_LOONGARCH_LASX	HWCAP_LOONGARCH_LASX
 -- 
 2.43.0
--- a/0011-LoongArch-Fix-macro-redefined-warning-in-tls-desc.S.patch
+++ b/0011-LoongArch-Fix-macro-redefined-warning-in-tls-desc.S.patch
@ -0,0 +1,85 @@
 From c07ae520f10f01218640ae2a1f74fd01a4136e1b Mon Sep 17 00:00:00 2001
 From: mengqinggang <mengqinggang@loongson.cn>
 Date: Mon, 2 Sep 2024 09:51:04 +0800
 Subject: [PATCH 11/15] LoongArch: Fix macro redefined warning in tls-desc.S
 Undef macro to avoid redefined warning.
 ---
 sysdeps/loongarch/dl-tlsdesc-dynamic.h |  9 +++------
 sysdeps/loongarch/dl-tlsdesc.S         | 12 ++++++++++++
 2 files changed, 15 insertions(+), 6 deletions(-)
 diff --git a/sysdeps/loongarch/dl-tlsdesc-dynamic.h b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
 index d10f4a88..3daf9910 100644
 --- a/sysdeps/loongarch/dl-tlsdesc-dynamic.h
 +++ b/sysdeps/loongarch/dl-tlsdesc-dynamic.h
@@ -19,9 +19,6 @@
    <https://www.gnu.org/licenses/>.  */
 #define FRAME_SIZE	  (-((-14 * SZREG) & ALMASK))
 -#define FRAME_SIZE_LSX	  (-((-32 * SZVREG) & ALMASK))
 -#define FRAME_SIZE_LASX	  (-((-32 * SZXREG) & ALMASK))
 -#define FRAME_SIZE_FLOAT  (-((-24 * SZFREG) & ALMASK))
 	/* Handler for dynamic TLS symbols.
 	   Prototype:
@@ -155,7 +152,7 @@ Hign address	dynamic_block1 <----- dtv5  */
 #ifdef USE_LASX
   #define V_REG_S xvst
   #define V_REG_L xvld
 -  #define V_SPACE FRAME_SIZE_LASX
 +  #define V_SPACE (-((-32 * SZXREG) & ALMASK)) /* Space for LASX registers.  */
   #define V_REG(n) $xr##n
   #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,  \
 		 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
@@ -163,7 +160,7 @@ Hign address	dynamic_block1 <----- dtv5  */
 #elif defined USE_LSX
   #define V_REG_S vst
   #define V_REG_L vld
 -  #define V_SPACE FRAME_SIZE_LSX
 +  #define V_SPACE (-((-32 * SZVREG) & ALMASK)) /* Space for LSX registers.  */
   #define V_REG(n) $vr##n
   #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,  \
 		 17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
@@ -171,7 +168,7 @@ Hign address	dynamic_block1 <----- dtv5  */
 #else
   #define V_REG_S fst.d
   #define V_REG_L fld.d
 -  #define V_SPACE FRAME_SIZE_FLOAT
 +  #define V_SPACE (-((-24 * SZFREG) & ALMASK)) /* Space for FLOAT registers.  */
   #define V_REG(n) $f##n
   #define V_REGS 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
   #define V_REGSZ SZFREG
 diff --git a/sysdeps/loongarch/dl-tlsdesc.S b/sysdeps/loongarch/dl-tlsdesc.S
 index b6cfd612..be76c07c 100644
 --- a/sysdeps/loongarch/dl-tlsdesc.S
 +++ b/sysdeps/loongarch/dl-tlsdesc.S
@@ -69,6 +69,12 @@ _dl_tlsdesc_undefweak:
 #define Lslow Lslow_lasx
 #include "dl-tlsdesc-dynamic.h"
 #undef FRAME_SIZE
 +#undef V_REG_S
 +#undef V_REG_L
 +#undef V_SPACE
 +#undef V_REG
 +#undef V_REGS
 +#undef V_REGSZ
 #undef USE_LASX
 #undef _dl_tlsdesc_dynamic
 #undef Lret
@@ -80,6 +86,12 @@ _dl_tlsdesc_undefweak:
 #define Lslow Lslow_lsx
 #include "dl-tlsdesc-dynamic.h"
 #undef FRAME_SIZE
 +#undef V_REG_S
 +#undef V_REG_L
 +#undef V_SPACE
 +#undef V_REG
 +#undef V_REGS
 +#undef V_REGSZ
 #undef USE_LSX
 #undef _dl_tlsdesc_dynamic
 #undef Lret
 -- 
 2.43.0
--- a/0012-LoongArch-Undef-__NR_fstat-and-__NR_newfstatat.patch
+++ b/0012-LoongArch-Undef-__NR_fstat-and-__NR_newfstatat.patch
@ -0,0 +1,48 @@
 From 035939a919540b7d4a3e388b47945da28b6900bd Mon Sep 17 00:00:00 2001
 From: caiyinyu <caiyinyu@loongson.cn>
 Date: Tue, 24 Sep 2024 11:09:32 +0800
 Subject: [PATCH 12/15] LoongArch: Undef __NR_fstat and __NR_newfstatat.
 In Linux 6.11, fstat and newfstatat are added back. To avoid the messy
 usage of the fstat, newfstatat, and statx system calls, we will continue
 using statx only in glibc, maintaining consistency with previous versions of
 the LoongArch-specific glibc implementation.
 Signed-off-by: caiyinyu <caiyinyu@loongson.cn>
 Reviewed-by: Xi Ruoyao <xry111@xry111.site>
 Suggested-by: Florian Weimer <fweimer@redhat.com>
 ---
 .../sysv/linux/loongarch/fixup-asm-unistd.h   | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h
 diff --git a/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h b/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h
 new file mode 100644
 index 00000000..0062756b
 --- /dev/null
 +++ b/sysdeps/unix/sysv/linux/loongarch/fixup-asm-unistd.h
@@ -0,0 +1,21 @@
 +/* Regularize <asm/unistd.h> definitions.  LoongArch version.
 +   Copyright (C) 2024 Free Software Foundation, Inc.
 +
 +   The GNU C Library is free software; you can redistribute it and/or
 +   modify it under the terms of the GNU Lesser General Public
 +   License as published by the Free Software Foundation; either
 +   version 2.1 of the License, or (at your option) any later version.
 +
 +   The GNU C Library is distributed in the hope that it will be useful,
 +   but WITHOUT ANY WARRANTY; without even the implied warranty of
 +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 +   Lesser General Public License for more details.
 +
 +   You should have received a copy of the GNU Lesser General Public
 +   License along with the GNU C Library; if not, see
 +   <http://www.gnu.org/licenses/>.  */
 +
 +/* To avoid the messy usage of the fstat, newfstatat, and statx system calls, we
 +only use statx.  */
 +#undef __NR_fstat
 +#undef __NR_newfstatat
 -- 
 2.43.0
--- a/0013-From-Adhemerval-Zanella-adhemerval.zanella-linaro.or.patch
+++ b/0013-From-Adhemerval-Zanella-adhemerval.zanella-linaro.or.patch
@ -0,0 +1,60 @@
 From 2353c043849ba92c1e1f42f442a6286cd6f60438 Mon Sep 17 00:00:00 2001
 From: Xing Li <lixing@loongson.cn>
 Date: Thu, 24 Oct 2024 09:49:10 +0800
 Subject: [PATCH 13/15] From: Adhemerval Zanella
 <adhemerval.zanella@linaro.org> Date: Mon, 6 Nov 2023 17:25:46 -0300 Subject:
 [PATCH] elf: Remove LD_PROFILE for static binaries
 The _dl_non_dynamic_init does not parse LD_PROFILE, which does not
 enable profile for dlopen objects.  Since dlopen is deprecated for
 static objects, it is better to remove the support.
 It also allows to trim down libc.a of profile support.
 Checked on x86_64-linux-gnu.
 Reviewed-by: Siddhesh Poyarekar <siddhesh@sourceware.org>
 ---
 sysdeps/loongarch/dl-machine.h    | 2 ++
 sysdeps/loongarch/dl-trampoline.h | 2 ++
 2 files changed, 4 insertions(+)
 diff --git a/sysdeps/loongarch/dl-machine.h b/sysdeps/loongarch/dl-machine.h
 index d934c77f..7fa3b4e9 100644
 --- a/sysdeps/loongarch/dl-machine.h
 +++ b/sysdeps/loongarch/dl-machine.h
@@ -362,6 +362,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	 to intercept the calls to collect information.  In this case we
 	 don't store the address in the GOT so that all future calls also
 	 end in this function.  */
 +#ifdef SHARED
       if (profile != 0)
 	{
 #if !defined __loongarch_soft_float
@@ -380,6 +381,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
 	    GL(dl_profile_map) = l;
 	}
       else
 +#endif
 	{
 	  /* This function will get called to fix up the GOT entry
 	     indicated by the offset on the stack, and then jump to
 diff --git a/sysdeps/loongarch/dl-trampoline.h b/sysdeps/loongarch/dl-trampoline.h
 index e298439d..1da70aeb 100644
 --- a/sysdeps/loongarch/dl-trampoline.h
 +++ b/sysdeps/loongarch/dl-trampoline.h
@@ -126,6 +126,7 @@ ENTRY (_dl_runtime_resolve)
 	jirl	zero, t1, 0
 END (_dl_runtime_resolve)
 +#ifdef SHARED
 #include "dl-link.h"
 ENTRY (_dl_runtime_profile)
@@ -367,3 +368,4 @@ ENTRY (_dl_runtime_profile)
 	jirl	zero, ra, 0
 END (_dl_runtime_profile)
 +#endif /* SHARED */
 -- 
 2.43.0
--- a/0014-loongarch-Remove-duplicate-strnlen-in-libc.a-BZ-3178.patch
+++ b/0014-loongarch-Remove-duplicate-strnlen-in-libc.a-BZ-3178.patch
@ -0,0 +1,28 @@
 From 7ec8b739ef6b859830a445f30689a024b18b5cc6 Mon Sep 17 00:00:00 2001
 From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
 Date: Wed, 22 May 2024 10:21:10 -0300
 Subject: [PATCH 14/15] loongarch: Remove duplicate strnlen in libc.a (BZ
 31785)
 The generic version provides weak definitions of strnlen,
 which are already provided by the ifunc resolver.
 Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
 ---
 sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S | 2 ++
 1 file changed, 2 insertions(+)
 diff --git a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
 index a8296a1b..05837ce7 100644
 --- a/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
 +++ b/sysdeps/loongarch/lp64/multiarch/strnlen-aligned.S
@@ -98,5 +98,7 @@ L(out):
     jr          ra
 END(STRNLEN)
 +#if !IS_IN (libc)
 weak_alias (STRNLEN, strnlen)
 libc_hidden_builtin_def (STRNLEN)
 +#endif
 -- 
 2.43.0
--- a/0015-LoongArch-Change-tunable-for-2.38.patch
+++ b/0015-LoongArch-Change-tunable-for-2.38.patch
@ -0,0 +1,98 @@
 From 1d87ab186cd328420e27fc231a25858fca5e546e Mon Sep 17 00:00:00 2001
 From: Xing Li <lixing@loongson.cn>
 Date: Fri, 25 Oct 2024 07:31:30 +0000
 Subject: [PATCH 15/15] LoongArch: Change tunable for 2.38
 ---
 sysdeps/loongarch/cpu-tunables.c | 54 ++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 17 deletions(-)
 diff --git a/sysdeps/loongarch/cpu-tunables.c b/sysdeps/loongarch/cpu-tunables.c
 index e274e993..3b341a0d 100644
 --- a/sysdeps/loongarch/cpu-tunables.c
 +++ b/sysdeps/loongarch/cpu-tunables.c
@@ -24,14 +24,12 @@
 #include <cpu-features.h>
 #include <ldsodefs.h>
 #include <sys/auxv.h>
 -#include <dl-tunables-parse.h>
 -#include <dl-symbol-redir-ifunc.h>
 -#define CHECK_GLIBC_IFUNC_CPU(f, name, len)			\
 +#define CHECK_GLIBC_IFUNC_CPU(f, name, disable, len)			\
   _Static_assert (sizeof (#name) - 1 == len, #name " != " #len);	\
 -  if (tunable_str_comma_strcmp_cte (&f, #name))				\
 +  if (!memcmp(f, #name, len))				\
     {									\
 -      if (f.disable)							\
 +      if (disable)							\
 	GLRO(dl_larch_cpu_features).hwcap &= (~HWCAP_LOONGARCH_##name);	\
       else								\
 	GLRO(dl_larch_cpu_features).hwcap |= HWCAP_LOONGARCH_##name;	\
@@ -58,29 +56,51 @@ TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *valp)
      NOTE: the IFUNC selection may change over time.  Please check all
      multiarch implementations when experimenting.  */
 -  struct tunable_str_comma_state_t ts;
 -  tunable_str_comma_init (&ts, valp);
 +  const char *p = valp->strval, *c;
 +  size_t len;
 -  struct tunable_str_comma_t n;
 -  while (tunable_str_comma_next (&ts, &n))
 +  do
     {
 -      switch (n.len)
 +      const char *n;
 +      bool disable;
 +      size_t nl;
 +
 +      for (c = p; *c != ','; c++)
 +	if (*c == '\0')
 +	  break;
 +
 +      len = c - p;
 +      disable = *p == '-';
 +      if (disable)
 +	{
 +	  n = p + 1;
 +	  nl = len - 1;
 +	}
 +      else
 +	{
 +	  n = p;
 +	  nl = len;
 +	}
 +      switch (nl)
 	{
 	default:
 	  break;
 	case 3:
 -	  {
 -	    CHECK_GLIBC_IFUNC_CPU (n, LSX, 3);
 -	    CHECK_GLIBC_IFUNC_CPU (n, UAL, 3);
 -	  }
 +	    {
 +	      CHECK_GLIBC_IFUNC_CPU (n, LSX, disable, 3);
 +	      CHECK_GLIBC_IFUNC_CPU (n, UAL, disable, 3);
 +	    }
 	  break;
 	case 4:
 -	  {
 -	    CHECK_GLIBC_IFUNC_CPU (n, LASX, 4);
 -	  }
 +	    {
 +	      CHECK_GLIBC_IFUNC_CPU (n, LASX, disable, 4);
 +	    }
 	  break;
 	}
 +      p += len + 1;
     }
 +  while (*c != '\0');
 +
   /* Ensure that the user has not enabled any unsupported features.  */
   GLRO(dl_larch_cpu_features).hwcap &= GLRO(dl_hwcap);
 -- 
 2.45.2
--- a/glibc.spec
+++ b/glibc.spec
@ -67,7 +67,7 @@
 ##############################################################################
 Name: 	 	glibc
 Version: 	2.38
-Release: 	40
+Release: 	41
 Summary: 	The GNU libc libraries
 License:	%{all_license}
 URL: 		http://www.gnu.org/software/glibc/
@ -225,6 +225,21 @@ Patch135: nptl-Use-support-check.h-facilities-in-tst-setuid3.patch
 Patch136: libio-Attempt-wide-backup-free-only-for-non-legacy-c.patch
 Patch137: Add-crt1-2.0.o-for-glibc-2.0-compatibility-tests.patch
 Patch138: elf-Change-ldconfig-auxcache-magic-number-bug-32231.patch
 Patch139: 0001-LoongArch-Use-builtins-for-ffs-and-ffsll.patch
 Patch140: 0002-elf-Add-new-LoongArch-reloc-types-110-to-126-into-el.patch
 Patch141: 0003-LoongArch-Add-glibc.cpu.hwcap-support.patch
 Patch142: 0004-LoongArch-Add-support-for-TLS-Descriptors.patch
 Patch143: 0005-LoongArch-Fix-tst-gnu2-tls2-compiler-error.patch
 Patch144: 0006-LoongArch-Use-fcsr0-instead-of-r0-in-_FPU_-GET-SET-C.patch
 Patch145: 0007-LoongArch-Ensure-sp-16-byte-aligned-for-tlsdesc.patch
 Patch146: 0008-LoongArch-Fix-_dl_tlsdesc_dynamic-in-LSX-case.patch
 Patch147: 0009-LoongArch-Fix-tst-gnu2-tls2-test-case.patch
 Patch148: 0010-LoongArch-Add-cfi-instructions-for-_dl_tlsdesc_dynam.patch
 Patch149: 0011-LoongArch-Fix-macro-redefined-warning-in-tls-desc.S.patch
 Patch150: 0012-LoongArch-Undef-__NR_fstat-and-__NR_newfstatat.patch
 Patch151: 0013-From-Adhemerval-Zanella-adhemerval.zanella-linaro.or.patch
 Patch152: 0014-loongarch-Remove-duplicate-strnlen-in-libc.a-BZ-3178.patch
 Patch153: 0015-LoongArch-Change-tunable-for-2.38.patch
 #openEuler patch list
 Patch9000: turn-default-value-of-x86_rep_stosb_threshold_form_2K_to_1M.patch
@ -1446,6 +1461,9 @@ fi
 %endif
 %changelog
 * Wed Nov 6 2024 lixing <lixing@loongson.cn> - 2.38-41
 - update LoongArch with tlsdec and tunable support
 * Tue Nov 5 2024 Qingqing Li <liqingqing3@huawei.com> - 2.38-40
 - elf: Change ldconfig auxcache magic number (bug 32231)
 - Add crt1-2.0.o for glibc 2.0 compatibility tests