!400 LoongArch: Sync patch from gcc upstream

From: @ticat-fp Reviewed-by: @li-yancheng Signed-off-by: @li-yancheng
2024-03-28 01:42:52 +00:00 · 2024-03-28 01:42:52 +00:00 · 1dc8dd9fd8
commit 1dc8dd9fd8
parent 18582a2742 7e7be47bfd
126 changed files with 228142 additions and 9 deletions
--- a/Libvtv-Add-loongarch-support.patch
+++ b/Libvtv-Add-loongarch-support.patch
@ -0,0 +1,59 @@
+From 62ea18c632200edbbf46b4e957bc4d997f1c66f0 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Tue, 27 Sep 2022 15:28:43 +0800
+Subject: [PATCH 024/124] Libvtv: Add loongarch support.
+
+The loongarch64 specification permits page sizes of 4KiB, 16KiB and 64KiB,
+but only 16KiB pages are supported for now.
+
+Co-Authored-By: qijingwen <qijingwen@loongson.cn>
+
+include/ChangeLog:
+
+	* vtv-change-permission.h (defined): Determines whether the macro
+	__loongarch_lp64 is defined
+	(VTV_PAGE_SIZE): Set VTV_PAGE_SIZE to 16KiB for loongarch64.
+
+libvtv/ChangeLog:
+
+	* configure.tgt: Add loongarch support.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ include/vtv-change-permission.h | 4 ++++
+ libvtv/configure.tgt            | 3 +++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/include/vtv-change-permission.h b/include/vtv-change-permission.h
+index 70bdad92b..e7b9294a0 100644
+--- a/include/vtv-change-permission.h
+++ b/include/vtv-change-permission.h
+@@ -48,6 +48,10 @@ extern void __VLTChangePermission (int);
+ #else 
+ #if defined(__sun__) && defined(__svr4__) && defined(__sparc__)
+ #define VTV_PAGE_SIZE 8192
+#elif defined(__loongarch_lp64)
+/* The page size is configurable by the kernel to be 4, 16 or 64 KiB.
+   For now, only the default page size of 16KiB is supported.  */
+#define VTV_PAGE_SIZE 16384
+ #else
+ #define VTV_PAGE_SIZE 4096
+ #endif
+diff --git a/libvtv/configure.tgt b/libvtv/configure.tgt
+index aa2a3f675..6cdd1e97a 100644
+--- a/libvtv/configure.tgt
+++ b/libvtv/configure.tgt
+@@ -50,6 +50,9 @@ case "${target}" in
+ 	;;
+   x86_64-*-darwin[1]* | i?86-*-darwin[1]*)
+ 	;;
+  loongarch*-*-linux*)
+	VTV_SUPPORTED=yes
+	;;
+   *)
+ 	;;
+ esac
+-- 
+2.33.0
+
--- a/LoongArch-Add-Loongson-ASX-base-instruction-support.patch
+++ b/LoongArch-Add-Loongson-ASX-base-instruction-support.patch
--- a/LoongArch-Add-Loongson-ASX-directive-builtin-functio.patch
+++ b/LoongArch-Add-Loongson-ASX-directive-builtin-functio.patch
--- a/LoongArch-Add-Loongson-SX-base-instruction-support.patch
+++ b/LoongArch-Add-Loongson-SX-base-instruction-support.patch
--- a/LoongArch-Add-Loongson-SX-directive-builtin-function.patch
+++ b/LoongArch-Add-Loongson-SX-directive-builtin-function.patch
--- a/LoongArch-Add-built-in-functions-description-of-Loon.patch
+++ b/LoongArch-Add-built-in-functions-description-of-Loon.patch
@ -0,0 +1,166 @@
+From 7cfe6e057045ac794afbe9097b1b211c0e1ea723 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 6 Apr 2023 16:02:07 +0800
+Subject: [PATCH 039/124] LoongArch: Add built-in functions description of
+ LoongArch Base instruction set instructions.
+
+gcc/ChangeLog:
+
+	* doc/extend.texi: Add section for LoongArch Base Built-in functions.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/doc/extend.texi | 129 ++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 129 insertions(+)
+
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index 3c101ca89..1d1bac255 100644
+--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
+@@ -14678,6 +14678,7 @@ instructions, but allow the compiler to schedule those calls.
+ * Blackfin Built-in Functions::
+ * BPF Built-in Functions::
+ * FR-V Built-in Functions::
+* LoongArch Base Built-in Functions::
+ * MIPS DSP Built-in Functions::
+ * MIPS Paired-Single Support::
+ * MIPS Loongson Built-in Functions::
+@@ -16128,6 +16129,134 @@ Use the @code{nldub} instruction to load the contents of address @var{x}
+ into the data cache.  The instruction is issued in slot I1@.
+ @end table
+ 
+@node LoongArch Base Built-in Functions
+@subsection LoongArch Base Built-in Functions
+
+These built-in functions are available for LoongArch.
+
+Data Type Description:
+@itemize
+@item @code{imm0_31}, a compile-time constant in range 0 to 31;
+@item @code{imm0_16383}, a compile-time constant in range 0 to 16383;
+@item @code{imm0_32767}, a compile-time constant in range 0 to 32767;
+@item @code{imm_n2048_2047}, a compile-time constant in range -2048 to 2047;
+@end itemize
+
+The intrinsics provided are listed below:
+@smallexample
+    unsigned int __builtin_loongarch_movfcsr2gr (imm0_31)
+    void __builtin_loongarch_movgr2fcsr (imm0_31, unsigned int)
+    void __builtin_loongarch_cacop_d (imm0_31, unsigned long int, imm_n2048_2047)
+    unsigned int __builtin_loongarch_cpucfg (unsigned int)
+    void __builtin_loongarch_asrtle_d (long int, long int)
+    void __builtin_loongarch_asrtgt_d (long int, long int)
+    long int __builtin_loongarch_lddir_d (long int, imm0_31)
+    void __builtin_loongarch_ldpte_d (long int, imm0_31)
+
+    int __builtin_loongarch_crc_w_b_w (char, int)
+    int __builtin_loongarch_crc_w_h_w (short, int)
+    int __builtin_loongarch_crc_w_w_w (int, int)
+    int __builtin_loongarch_crc_w_d_w (long int, int)
+    int __builtin_loongarch_crcc_w_b_w (char, int)
+    int __builtin_loongarch_crcc_w_h_w (short, int)
+    int __builtin_loongarch_crcc_w_w_w (int, int)
+    int __builtin_loongarch_crcc_w_d_w (long int, int)
+
+    unsigned int __builtin_loongarch_csrrd_w (imm0_16383)
+    unsigned int __builtin_loongarch_csrwr_w (unsigned int, imm0_16383)
+    unsigned int __builtin_loongarch_csrxchg_w (unsigned int, unsigned int, imm0_16383)
+    unsigned long int __builtin_loongarch_csrrd_d (imm0_16383)
+    unsigned long int __builtin_loongarch_csrwr_d (unsigned long int, imm0_16383)
+    unsigned long int __builtin_loongarch_csrxchg_d (unsigned long int, unsigned long int, imm0_16383)
+
+    unsigned char __builtin_loongarch_iocsrrd_b (unsigned int)
+    unsigned short __builtin_loongarch_iocsrrd_h (unsigned int)
+    unsigned int __builtin_loongarch_iocsrrd_w (unsigned int)
+    unsigned long int __builtin_loongarch_iocsrrd_d (unsigned int)
+    void __builtin_loongarch_iocsrwr_b (unsigned char, unsigned int)
+    void __builtin_loongarch_iocsrwr_h (unsigned short, unsigned int)
+    void __builtin_loongarch_iocsrwr_w (unsigned int, unsigned int)
+    void __builtin_loongarch_iocsrwr_d (unsigned long int, unsigned int)
+
+    void __builtin_loongarch_dbar (imm0_32767)
+    void __builtin_loongarch_ibar (imm0_32767)
+
+    void __builtin_loongarch_syscall (imm0_32767)
+    void __builtin_loongarch_break (imm0_32767)
+@end smallexample
+
+@emph{Note:}Since the control register is divided into 32-bit and 64-bit,
+but the access instruction is not distinguished. So GCC renames the control
+instructions when implementing intrinsics.
+
+Take the csrrd instruction as an example, built-in functions are implemented as follows:
+@smallexample
+  __builtin_loongarch_csrrd_w  // When reading the 32-bit control register use.
+  __builtin_loongarch_csrrd_d  // When reading the 64-bit control register use.
+@end smallexample
+
+For the convenience of use, the built-in functions are encapsulated,
+the encapsulated functions and @code{__drdtime_t, __rdtime_t} are
+defined in the @code{larchintrin.h}. So if you call the following
+function you need to include @code{larchintrin.h}.
+
+@smallexample
+     typedef struct drdtime@{
+            unsigned long dvalue;
+            unsigned long dtimeid;
+     @} __drdtime_t;
+
+     typedef struct rdtime@{
+            unsigned int value;
+            unsigned int timeid;
+     @} __rdtime_t;
+@end smallexample
+
+@smallexample
+    __drdtime_t __rdtime_d (void)
+    __rdtime_t  __rdtimel_w (void)
+    __rdtime_t  __rdtimeh_w (void)
+    unsigned int  __movfcsr2gr (imm0_31)
+    void __movgr2fcsr (imm0_31, unsigned int)
+    void __cacop_d (imm0_31, unsigned long, imm_n2048_2047)
+    unsigned int  __cpucfg (unsigned int)
+    void __asrtle_d (long int, long int)
+    void __asrtgt_d (long int, long int)
+    long int  __lddir_d (long int, imm0_31)
+    void __ldpte_d (long int, imm0_31)
+
+    int  __crc_w_b_w (char, int)
+    int  __crc_w_h_w (short, int)
+    int  __crc_w_w_w (int, int)
+    int  __crc_w_d_w (long int, int)
+    int  __crcc_w_b_w (char, int)
+    int  __crcc_w_h_w (short, int)
+    int  __crcc_w_w_w (int, int)
+    int  __crcc_w_d_w (long int, int)
+
+    unsigned int  __csrrd_w (imm0_16383)
+    unsigned int  __csrwr_w (unsigned int, imm0_16383)
+    unsigned int  __csrxchg_w (unsigned int, unsigned int, imm0_16383)
+    unsigned long  __csrrd_d (imm0_16383)
+    unsigned long  __csrwr_d (unsigned long, imm0_16383)
+    unsigned long  __csrxchg_d (unsigned long, unsigned long, imm0_16383)
+
+    unsigned char   __iocsrrd_b (unsigned int)
+    unsigned short  __iocsrrd_h (unsigned int)
+    unsigned int  __iocsrrd_w (unsigned int)
+    unsigned long  __iocsrrd_d (unsigned int)
+    void __iocsrwr_b (unsigned char, unsigned int)
+    void __iocsrwr_h (unsigned short, unsigned int)
+    void __iocsrwr_w (unsigned int, unsigned int)
+    void __iocsrwr_d (unsigned long, unsigned int)
+
+    void __dbar (imm0_32767)
+    void __ibar (imm0_32767)
+
+    void __syscall (imm0_32767)
+    void __break (imm0_32767)
+@end smallexample
+
+ @node MIPS DSP Built-in Functions
+ @subsection MIPS DSP Built-in Functions
+ 
+-- 
+2.33.0
+
--- a/LoongArch-Add-fcopysign-instructions.patch
+++ b/LoongArch-Add-fcopysign-instructions.patch
@ -0,0 +1,107 @@
+From 41a4945886631a1b2898ae957389d5db18a07141 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 4 Nov 2022 15:12:22 +0800
+Subject: [PATCH 025/124] LoongArch: Add fcopysign instructions
+
+Add fcopysign.{s,d} with the names copysign{sf,df}3 so GCC will expand
+__builtin_copysign{f,} to a single instruction.
+
+Link: https://sourceware.org/pipermail/libc-alpha/2022-November/143177.html
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (UNSPEC_FCOPYSIGN): New unspec.
+	(type): Add fcopysign.
+	(copysign<mode>3): New instruction template.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/fcopysign.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md             | 22 ++++++++++++++++++-
+ .../gcc.target/loongarch/fcopysign.c          | 16 ++++++++++++++
+ 2 files changed, 37 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/fcopysign.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 214b14bdd..bda34d0f3 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -37,6 +37,7 @@
+   UNSPEC_FCLASS
+   UNSPEC_FMAX
+   UNSPEC_FMIN
+  UNSPEC_FCOPYSIGN
+ 
+   ;; Override return address for exception handling.
+   UNSPEC_EH_RETURN
+@@ -214,6 +215,7 @@
+ ;; fabs		floating point absolute value
+ ;; fneg		floating point negation
+ ;; fcmp		floating point compare
+;; fcopysign	floating point copysign
+ ;; fcvt		floating point convert
+ ;; fsqrt	floating point square root
+ ;; frsqrt       floating point reciprocal square root
+@@ -226,7 +228,7 @@
+   "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
+    prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
+    shift,slt,signext,clz,trap,imul,idiv,move,
+-   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcvt,fsqrt,
+   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
+    frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+   (cond [(eq_attr "jirl" "!unset") (const_string "call")
+ 	 (eq_attr "got" "load") (const_string "load")
+@@ -976,6 +978,24 @@
+    (set_attr "mode" "<UNITMODE>")])
+ 
+ ;;
+;;  ....................
+;;
+;;	FLOATING POINT COPYSIGN
+;;
+;;  ....................
+
+(define_insn "copysign<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
+		      (match_operand:ANYF 2 "register_operand" "f")]
+		     UNSPEC_FCOPYSIGN))]
+  "TARGET_HARD_FLOAT"
+  "fcopysign.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fcopysign")
+   (set_attr "mode" "<UNITMODE>")])
+
+
+;;
+ ;;  ...................
+ ;;
+ ;;  Count leading zeroes.
+diff --git a/gcc/testsuite/gcc.target/loongarch/fcopysign.c b/gcc/testsuite/gcc.target/loongarch/fcopysign.c
+new file mode 100644
+index 000000000..058ba2cf5
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/fcopysign.c
+@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mdouble-float" } */
+/* { dg-final { scan-assembler "fcopysign\\.s" } } */
+/* { dg-final { scan-assembler "fcopysign\\.d" } } */
+
+double
+my_copysign (double a, double b)
+{
+  return __builtin_copysign (a, b);
+}
+
+float
+my_copysignf (float a, float b)
+{
+  return __builtin_copysignf (a, b);
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-flogb.-s-d-instructions-and-expand-log.patch
+++ b/LoongArch-Add-flogb.-s-d-instructions-and-expand-log.patch
@ -0,0 +1,123 @@
+From 2ae587a86bba31b91a127e353c31c9f861ff5326 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 8 Nov 2022 13:42:20 +0800
+Subject: [PATCH 030/124] LoongArch: Add flogb.{s,d} instructions and expand
+ logb{sf,df}2
+
+On LoongArch, flogb instructions extract the exponent of a non-negative
+floating point value, but produces NaN for negative values.  So we need
+to add a fabs instruction when we expand logb.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (UNSPEC_FLOGB): New unspec.
+	(type): Add flogb.
+	(logb_non_negative<mode>2): New instruction template.
+	(logb<mode>2): New define_expand.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/flogb.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md          | 35 ++++++++++++++++++++--
+ gcc/testsuite/gcc.target/loongarch/flogb.c | 18 +++++++++++
+ 2 files changed, 51 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/flogb.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index c141c9add..682ab9617 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -42,6 +42,7 @@
+   UNSPEC_FTINTRM
+   UNSPEC_FTINTRP
+   UNSPEC_FSCALEB
+  UNSPEC_FLOGB
+ 
+   ;; Override return address for exception handling.
+   UNSPEC_EH_RETURN
+@@ -217,6 +218,7 @@
+ ;; fdiv		floating point divide
+ ;; frdiv	floating point reciprocal divide
+ ;; fabs		floating point absolute value
+;; flogb	floating point exponent extract
+ ;; fneg		floating point negation
+ ;; fcmp		floating point compare
+ ;; fcopysign	floating point copysign
+@@ -233,8 +235,8 @@
+   "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
+    prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
+    shift,slt,signext,clz,trap,imul,idiv,move,
+-   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
+-   fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
+   fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+   (cond [(eq_attr "jirl" "!unset") (const_string "call")
+ 	 (eq_attr "got" "load") (const_string "load")
+ 
+@@ -1039,6 +1041,35 @@
+    (set_attr "mode" "<UNITMODE>")])
+ 
+ ;;
+;;  ....................
+;;
+;;	FLOATING POINT EXPONENT EXTRACT
+;;
+;;  ....................
+
+(define_insn "logb_non_negative<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+		     UNSPEC_FLOGB))]
+  "TARGET_HARD_FLOAT"
+  "flogb.<fmt>\t%0,%1"
+  [(set_attr "type" "flogb")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_expand "logb<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand")
+	(unspec:ANYF [(abs:ANYF (match_operand:ANYF 1 "register_operand"))]
+		     UNSPEC_FLOGB))]
+  "TARGET_HARD_FLOAT"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_abs<mode>2 (tmp, operands[1]));
+  emit_insn (gen_logb_non_negative<mode>2 (operands[0], tmp));
+  DONE;
+})
+
+;;
+ ;;  ...................
+ ;;
+ ;;  Count leading zeroes.
+diff --git a/gcc/testsuite/gcc.target/loongarch/flogb.c b/gcc/testsuite/gcc.target/loongarch/flogb.c
+new file mode 100644
+index 000000000..1daefe54e
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/flogb.c
+@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mdouble-float -fno-math-errno" } */
+/* { dg-final { scan-assembler "fabs\\.s" } } */
+/* { dg-final { scan-assembler "fabs\\.d" } } */
+/* { dg-final { scan-assembler "flogb\\.s" } } */
+/* { dg-final { scan-assembler "flogb\\.d" } } */
+
+double
+my_logb (double a)
+{
+  return __builtin_logb (a);
+}
+
+float
+my_logbf (float a)
+{
+  return __builtin_logbf (a);
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-fscaleb.-s-d-instructions-as-ldexp-sf-.patch
+++ b/LoongArch-Add-fscaleb.-s-d-instructions-as-ldexp-sf-.patch
@ -0,0 +1,155 @@
+From e3d69a3b7a4e00e8bba88b8b4abaa1c17bc083d5 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 8 Nov 2022 12:14:35 +0800
+Subject: [PATCH 029/124] LoongArch: Add fscaleb.{s,d} instructions as
+ ldexp{sf,df}3
+
+This allows optimizing __builtin_ldexp{,f} and __builtin_scalbn{,f} with
+-fno-math-errno.
+
+IMODE is added because we can't hard code SI for operand 2: fscaleb.d
+instruction always take the high half of both source registers into
+account.  See my_ldexp_long in the test case.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (UNSPEC_FSCALEB): New unspec.
+	(type): Add fscaleb.
+	(IMODE): New mode attr.
+	(ldexp<mode>3): New instruction template.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/fscaleb.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md            | 26 ++++++++++-
+ gcc/testsuite/gcc.target/loongarch/fscaleb.c | 48 ++++++++++++++++++++
+ 2 files changed, 72 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/fscaleb.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index eb127c346..c141c9add 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -41,6 +41,7 @@
+   UNSPEC_FTINT
+   UNSPEC_FTINTRM
+   UNSPEC_FTINTRP
+  UNSPEC_FSCALEB
+ 
+   ;; Override return address for exception handling.
+   UNSPEC_EH_RETURN
+@@ -220,6 +221,7 @@
+ ;; fcmp		floating point compare
+ ;; fcopysign	floating point copysign
+ ;; fcvt		floating point convert
+;; fscaleb	floating point scale
+ ;; fsqrt	floating point square root
+ ;; frsqrt       floating point reciprocal square root
+ ;; multi	multiword sequence (or user asm statements)
+@@ -231,8 +233,8 @@
+   "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
+    prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
+    shift,slt,signext,clz,trap,imul,idiv,move,
+-   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
+-   frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
+   fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+   (cond [(eq_attr "jirl" "!unset") (const_string "call")
+ 	 (eq_attr "got" "load") (const_string "load")
+ 
+@@ -418,6 +420,10 @@
+ ;; the controlling mode.
+ (define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")])
+ 
+;; This attribute gives the integer mode that has the same size of a
+;; floating-point mode.
+(define_mode_attr IMODE [(SF "SI") (DF "DI")])
+
+ ;; This code iterator allows signed and unsigned widening multiplications
+ ;; to use the same template.
+ (define_code_iterator any_extend [sign_extend zero_extend])
+@@ -1014,7 +1020,23 @@
+   "fcopysign.<fmt>\t%0,%1,%2"
+   [(set_attr "type" "fcopysign")
+    (set_attr "mode" "<UNITMODE>")])
+
+;;
+;;  ....................
+;;
+;;	FLOATING POINT SCALE
+;;
+;;  ....................
+ 
+(define_insn "ldexp<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF    1 "register_operand" "f")
+		      (match_operand:<IMODE> 2 "register_operand" "f")]
+		     UNSPEC_FSCALEB))]
+  "TARGET_HARD_FLOAT"
+  "fscaleb.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fscaleb")
+   (set_attr "mode" "<UNITMODE>")])
+ 
+ ;;
+ ;;  ...................
+diff --git a/gcc/testsuite/gcc.target/loongarch/fscaleb.c b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
+new file mode 100644
+index 000000000..f18470fbb
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
+@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno" } */
+/* { dg-final { scan-assembler-times "fscaleb\\.s" 3 } } */
+/* { dg-final { scan-assembler-times "fscaleb\\.d" 4 } } */
+/* { dg-final { scan-assembler-times "slli\\.w" 1 } } */
+
+double
+my_scalbln (double a, long b)
+{
+  return __builtin_scalbln (a, b);
+}
+
+double
+my_scalbn (double a, int b)
+{
+  return __builtin_scalbn (a, b);
+}
+
+double
+my_ldexp (double a, int b)
+{
+  return __builtin_ldexp (a, b);
+}
+
+float
+my_scalblnf (float a, long b)
+{
+  return __builtin_scalblnf (a, b);
+}
+
+float
+my_scalbnf (float a, int b)
+{
+  return __builtin_scalbnf (a, b);
+}
+
+float
+my_ldexpf (float a, int b)
+{
+  return __builtin_ldexpf (a, b);
+}
+
+/* b must be sign-extended */
+double
+my_ldexp_long (double a, long b)
+{
+  return __builtin_ldexp (a, b);
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-ftint-rm-rp-.-w-l-.-s-d-instructions.patch
+++ b/LoongArch-Add-ftint-rm-rp-.-w-l-.-s-d-instructions.patch
@ -0,0 +1,220 @@
+From 76d599c6d8f9cf78b51cd76a7ca8fbe11e2cda2b Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 6 Nov 2022 23:16:49 +0800
+Subject: [PATCH 028/124] LoongArch: Add ftint{,rm,rp}.{w,l}.{s,d} instructions
+
+This allows to optimize the following builtins if -fno-math-errno:
+
+- __builtin_lrint{,f}
+- __builtin_lfloor{,f}
+- __builtin_lceil{,f}
+
+Inspired by
+https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605287.html.
+
+ANYFI is added so the compiler won't try ftint.l.s if -mfpu=32.  If we
+simply used GPR here an ICE would be triggered with __builtin_lrintf
+and -mfpu=32.
+
+ftint{rm,rp} instructions may raise inexact exception, so they can't be
+used if -fno-trapping-math -fno-fp-int-builtin-inexact.
+
+Note that the .w.{s,d} variants are not tested because we don't support
+ILP32 for now.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (UNSPEC_FTINT): New unspec.
+	(UNSPEC_FTINTRM): Likewise.
+	(UNSPEC_FTINTRP): Likewise.
+	(LRINT): New define_int_iterator.
+	(lrint_pattern): New define_int_attr.
+	(lrint_submenmonic): Likewise.
+	(lrint_allow_inexact): Likewise.
+	(ANYFI): New define_mode_iterator.
+	(lrint<ANYF><ANYFI>): New instruction template.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/ftint.c: New test.
+	* gcc.target/loongarch/ftint-no-inexact.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md             | 34 ++++++++++++++
+ .../gcc.target/loongarch/ftint-no-inexact.c   | 44 +++++++++++++++++++
+ gcc/testsuite/gcc.target/loongarch/ftint.c    | 44 +++++++++++++++++++
+ 3 files changed, 122 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/ftint.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index a14ab14ac..eb127c346 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -38,6 +38,9 @@
+   UNSPEC_FMAX
+   UNSPEC_FMIN
+   UNSPEC_FCOPYSIGN
+  UNSPEC_FTINT
+  UNSPEC_FTINTRM
+  UNSPEC_FTINTRP
+ 
+   ;; Override return address for exception handling.
+   UNSPEC_EH_RETURN
+@@ -374,6 +377,11 @@
+ (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
+ 			    (DF "TARGET_DOUBLE_FLOAT")])
+ 
+;; Iterator for fixed-point modes which can be hold by a hardware
+;; floating-point register.
+(define_mode_iterator ANYFI [(SI "TARGET_HARD_FLOAT")
+			     (DI "TARGET_DOUBLE_FLOAT")])
+
+ ;; A mode for which moves involving FPRs may need to be split.
+ (define_mode_iterator SPLITF
+   [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
+@@ -515,6 +523,19 @@
+ (define_code_attr sel [(eq "masknez") (ne "maskeqz")])
+ (define_code_attr selinv [(eq "maskeqz") (ne "masknez")])
+ 
+;; Iterator and attributes for floating-point to fixed-point conversion
+;; instructions.
+(define_int_iterator LRINT [UNSPEC_FTINT UNSPEC_FTINTRM UNSPEC_FTINTRP])
+(define_int_attr lrint_pattern [(UNSPEC_FTINT "lrint")
+				(UNSPEC_FTINTRM "lfloor")
+				(UNSPEC_FTINTRP "lceil")])
+(define_int_attr lrint_submenmonic [(UNSPEC_FTINT "")
+				    (UNSPEC_FTINTRM "rm")
+				    (UNSPEC_FTINTRP "rp")])
+(define_int_attr lrint_allow_inexact [(UNSPEC_FTINT "1")
+				      (UNSPEC_FTINTRM "0")
+				      (UNSPEC_FTINTRP "0")])
+
+ ;;
+ ;;  ....................
+ ;;
+@@ -2022,6 +2043,19 @@
+   [(set_attr "type" "fcvt")
+    (set_attr "mode" "<MODE>")])
+ 
+;; Convert floating-point numbers to integers
+(define_insn "<lrint_pattern><ANYF:mode><ANYFI:mode>2"
+  [(set (match_operand:ANYFI 0 "register_operand" "=f")
+	(unspec:ANYFI [(match_operand:ANYF 1 "register_operand" "f")]
+		      LRINT))]
+  "TARGET_HARD_FLOAT &&
+   (<lrint_allow_inexact>
+    || flag_fp_int_builtin_inexact
+    || !flag_trapping_math)"
+  "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1"
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "<ANYF:MODE>")])
+
+ ;; Load the low word of operand 0 with operand 1.
+ (define_insn "load_low<mode>"
+   [(set (match_operand:SPLITF 0 "register_operand" "=f,f")
+diff --git a/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
+new file mode 100644
+index 000000000..88b83a9c0
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
+@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact" } */
+/* { dg-final { scan-assembler "ftint\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftint\\.l\\.d" } } */
+/* { dg-final { scan-assembler-not "ftintrm\\.l\\.s" } } */
+/* { dg-final { scan-assembler-not "ftintrm\\.l\\.d" } } */
+/* { dg-final { scan-assembler-not "ftintrp\\.l\\.s" } } */
+/* { dg-final { scan-assembler-not "ftintrp\\.l\\.d" } } */
+
+long
+my_lrint (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long
+my_lrintf (float a)
+{
+  return __builtin_lrintf (a);
+}
+
+long
+my_lfloor (double a)
+{
+  return __builtin_lfloor (a);
+}
+
+long
+my_lfloorf (float a)
+{
+  return __builtin_lfloorf (a);
+}
+
+long
+my_lceil (double a)
+{
+  return __builtin_lceil (a);
+}
+
+long
+my_lceilf (float a)
+{
+  return __builtin_lceilf (a);
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/ftint.c b/gcc/testsuite/gcc.target/loongarch/ftint.c
+new file mode 100644
+index 000000000..7a326a454
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/ftint.c
+@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact" } */
+/* { dg-final { scan-assembler "ftint\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftint\\.l\\.d" } } */
+/* { dg-final { scan-assembler "ftintrm\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftintrm\\.l\\.d" } } */
+/* { dg-final { scan-assembler "ftintrp\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftintrp\\.l\\.d" } } */
+
+long
+my_lrint (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long
+my_lrintf (float a)
+{
+  return __builtin_lrintf (a);
+}
+
+long
+my_lfloor (double a)
+{
+  return __builtin_lfloor (a);
+}
+
+long
+my_lfloorf (float a)
+{
+  return __builtin_lfloorf (a);
+}
+
+long
+my_lceil (double a)
+{
+  return __builtin_lceil (a);
+}
+
+long
+my_lceilf (float a)
+{
+  return __builtin_lceilf (a);
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-new-code-model-medium.patch
+++ b/LoongArch-Add-new-code-model-medium.patch
--- a/LoongArch-Add-prefetch-instructions.patch
+++ b/LoongArch-Add-prefetch-instructions.patch
@ -0,0 +1,158 @@
+From 52a41006c2e8141a42de93ffcc2c040e034244b2 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Wed, 16 Nov 2022 09:25:14 +0800
+Subject: [PATCH 031/124] LoongArch: Add prefetch instructions.
+
+Enable sw prefetching at -O3 and higher.
+
+Co-Authored-By: xujiahao <xujiahao@loongson.cn>
+
+gcc/ChangeLog:
+
+	* config/loongarch/constraints.md (ZD): New constraint.
+	* config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
+	* config/loongarch/loongarch-tune.h (struct loongarch_cache):
+	Define number of parallel prefetch.
+	* config/loongarch/loongarch.cc (loongarch_option_override_internal):
+	Set up parameters to be used in prefetching algorithm.
+	* config/loongarch/loongarch.md (prefetch): New template.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/constraints.md   | 10 ++++++++++
+ gcc/config/loongarch/loongarch-def.c  |  2 ++
+ gcc/config/loongarch/loongarch-tune.h |  1 +
+ gcc/config/loongarch/loongarch.cc     | 28 +++++++++++++++++++++++++++
+ gcc/config/loongarch/loongarch.md     | 14 ++++++++++++++
+ 5 files changed, 55 insertions(+)
+
+diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
+index 43cb7b5f0..46f7f63ae 100644
+--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
+@@ -86,6 +86,10 @@
+ ;;    "ZB"
+ ;;      "An address that is held in a general-purpose register.
+ ;;      The offset is zero"
+;;    "ZD"
+;;	"An address operand whose address is formed by a base register
+;;	 and offset that is suitable for use in instructions with the same
+;;	 addressing mode as @code{preld}."
+ ;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
+ ;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
+ 
+@@ -190,3 +194,9 @@
+   The offset is zero"
+   (and (match_code "mem")
+        (match_test "REG_P (XEXP (op, 0))")))
+
+(define_address_constraint "ZD"
+  "An address operand whose address is formed by a base register
+   and offset that is suitable for use in instructions with the same
+   addressing mode as @code{preld}."
+   (match_test "loongarch_12bit_offset_address_p (op, mode)"))
+diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
+index cbf995d81..80ab10a52 100644
+--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
+@@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
+       .l1d_line_size = 64,
+       .l1d_size = 64,
+       .l2d_size = 256,
+      .simultaneous_prefetches = 4,
+   },
+   [CPU_LA464] = {
+       .l1d_line_size = 64,
+       .l1d_size = 64,
+       .l2d_size = 256,
+      .simultaneous_prefetches = 4,
+   },
+ };
+ 
+diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
+index 6f3530f5c..8e3eb2947 100644
+--- a/gcc/config/loongarch/loongarch-tune.h
+++ b/gcc/config/loongarch/loongarch-tune.h
+@@ -45,6 +45,7 @@ struct loongarch_cache {
+     int l1d_line_size;  /* bytes */
+     int l1d_size;       /* KiB */
+     int l2d_size;       /* kiB */
+    int simultaneous_prefetches; /* number of parallel prefetch */
+ };
+ 
+ #endif /* LOONGARCH_TUNE_H */
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index d552b162a..622c9435b 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "context.h"
+ #include "builtins.h"
+ #include "rtl-iter.h"
+#include "opts.h"
+ 
+ /* This file should be included last.  */
+ #include "target-def.h"
+@@ -6099,6 +6100,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
+   if (loongarch_branch_cost == 0)
+     loongarch_branch_cost = loongarch_cost->branch_cost;
+ 
+  /* Set up parameters to be used in prefetching algorithm.  */
+  int simultaneous_prefetches
+    = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_simultaneous_prefetches,
+		       simultaneous_prefetches);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l1_cache_line_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l1_cache_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l2_cache_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
+
+
+  /* Enable sw prefetching at -O3 and higher.  */
+  if (opts->x_flag_prefetch_loop_arrays < 0
+      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
+      && !opts->x_optimize_size)
+    opts->x_flag_prefetch_loop_arrays = 1;
+
+   if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
+     error ("%qs cannot be used for compiling a shared library",
+ 	   "-mdirect-extern-access");
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 682ab9617..2fda53819 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -3282,6 +3282,20 @@
+ ;;  ....................
+ ;;
+ 
+(define_insn "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "ZD")
+	     (match_operand 1 "const_int_operand" "n")
+	     (match_operand 2 "const_int_operand" "n"))]
+  ""
+{
+  switch (INTVAL (operands[1]))
+  {
+    case 0: return "preld\t0,%a0";
+    case 1: return "preld\t8,%a0";
+    default: gcc_unreachable ();
+  }
+})
+
+ (define_insn "nop"
+   [(const_int 0)]
+   ""
+-- 
+2.33.0
+
--- a/LoongArch-Add-support-code-model-extreme.patch
+++ b/LoongArch-Add-support-code-model-extreme.patch
@ -0,0 +1,794 @@
+From b1c92fb9dab678e4c9c23fa77185011494d145b9 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 18 Aug 2022 17:26:13 +0800
+Subject: [PATCH 011/124] LoongArch: Add support code model extreme.
+
+Use five instructions to calculate a signed 64-bit offset relative to the pc.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-opts.cc: Allow cmodel to be extreme.
+	* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
+	Add extreme support for TLS GD and LD types.
+	(loongarch_legitimize_tls_address): Add extreme support for TLS LE
+	and IE.
+	(loongarch_split_symbol): When compiling with -mcmodel=extreme,
+	the symbol address will be obtained through five instructions.
+	(loongarch_print_operand_reloc): Add support.
+	(loongarch_print_operand): Add support.
+	(loongarch_print_operand_address): Add support.
+	(loongarch_option_override_internal): Set '-mcmodel=extreme' option
+	incompatible with '-mno-explicit-relocs'.
+	* config/loongarch/loongarch.md (@lui_l_hi20<mode>):
+	Loads bits 12-31 of data into registers.
+	(lui_h_lo20): Load bits 32-51 of the data and spell bits 0-31 of
+	the source register.
+	(lui_h_hi12): Load bits 52-63 of the data and spell bits 0-51 of
+	the source register.
+	* config/loongarch/predicates.md: Symbols need to be decomposed
+	when defining the macro TARGET_CMODEL_EXTREME
+	* doc/invoke.texi: Modify the description information of cmodel in the document.
+	Document -W[no-]extreme-plt.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/func-call-1.c: Add option '-mcmodel=normal'.
+	* gcc.target/loongarch/func-call-2.c: Likewise.
+	* gcc.target/loongarch/func-call-3.c: Likewise.
+	* gcc.target/loongarch/func-call-4.c: Likewise.
+	* gcc.target/loongarch/func-call-5.c: Likewise.
+	* gcc.target/loongarch/func-call-6.c: Likewise.
+	* gcc.target/loongarch/func-call-7.c: Likewise.
+	* gcc.target/loongarch/func-call-8.c: Likewise.
+	* gcc.target/loongarch/relocs-symbol-noaddend.c: Likewise.
+	* gcc.target/loongarch/func-call-extreme-1.c: New test.
+	* gcc.target/loongarch/func-call-extreme-2.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch-opts.cc        |   3 +-
+ gcc/config/loongarch/loongarch.cc             | 222 +++++++++++++++---
+ gcc/config/loongarch/loongarch.md             |  34 ++-
+ gcc/config/loongarch/predicates.md            |   9 +-
+ gcc/doc/invoke.texi                           |  50 +---
+ .../gcc.target/loongarch/func-call-1.c        |   2 +-
+ .../gcc.target/loongarch/func-call-2.c        |   2 +-
+ .../gcc.target/loongarch/func-call-3.c        |   2 +-
+ .../gcc.target/loongarch/func-call-4.c        |   2 +-
+ .../gcc.target/loongarch/func-call-5.c        |   2 +-
+ .../gcc.target/loongarch/func-call-6.c        |   2 +-
+ .../gcc.target/loongarch/func-call-7.c        |   2 +-
+ .../gcc.target/loongarch/func-call-8.c        |   2 +-
+ .../loongarch/func-call-extreme-1.c           |  32 +++
+ .../loongarch/func-call-extreme-2.c           |  32 +++
+ .../loongarch/relocs-symbol-noaddend.c        |   2 +-
+ 16 files changed, 318 insertions(+), 82 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
+
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index 3f70943de..2ae89f234 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -376,14 +376,13 @@ fallback:
+ 
+   /* 5.  Target code model */
+   t.cmodel = constrained.cmodel ? opt_cmodel : CMODEL_NORMAL;
+-  if (t.cmodel != CMODEL_NORMAL)
+  if (t.cmodel != CMODEL_NORMAL && t.cmodel != CMODEL_EXTREME)
+     {
+       warning (0, "%qs is not supported, now cmodel is set to %qs",
+ 	       loongarch_cmodel_strings[t.cmodel], "normal");
+       t.cmodel = CMODEL_NORMAL;
+     }
+ 
+-
+   /* Cleanup and return.  */
+   obstack_free (&msg_obstack, NULL);
+   *target = t;
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 76bf55ea4..1a33f668f 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -2436,7 +2436,19 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+       /* Split tls symbol to high and low.  */
+       rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
+       high = loongarch_force_temporary (tmp, high);
+-      emit_insn (gen_tls_low (Pmode, a0, high, loc));
+
+      if (TARGET_CMODEL_EXTREME)
+	{
+	  gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+	  rtx tmp1 = gen_reg_rtx (Pmode);
+	  emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
+	  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
+	  emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc));
+	  emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1));
+	}
+      else
+	emit_insn (gen_tls_low (Pmode, a0, high, loc));
+     }
+   else
+     {
+@@ -2449,14 +2461,44 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+     }
+ 
+   if (flag_plt)
+-    insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
+    insn = emit_call_insn (gen_call_value_internal (v0,
+						    loongarch_tls_symbol,
+ 						    const0_rtx));
+   else
+     {
+       rtx dest = gen_reg_rtx (Pmode);
+-      rtx high = gen_reg_rtx (Pmode);
+-      loongarch_emit_move (high, gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
+-      emit_insn (gen_ld_from_got (Pmode, dest, high, loongarch_tls_symbol));
+
+      if (TARGET_CMODEL_EXTREME)
+	{
+	  gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+	  rtx tmp1 = gen_reg_rtx (Pmode);
+	  rtx high = gen_reg_rtx (Pmode);
+
+	  loongarch_emit_move (high,
+			       gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
+	  loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode,
+						     gen_rtx_REG (Pmode, 0),
+						     loongarch_tls_symbol));
+	  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol));
+	  emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol));
+	  loongarch_emit_move (dest,
+			       gen_rtx_MEM (Pmode,
+					    gen_rtx_PLUS (Pmode, high, tmp1)));
+	}
+      else
+	{
+	  if (TARGET_EXPLICIT_RELOCS)
+	    {
+	      rtx high = gen_reg_rtx (Pmode);
+	      loongarch_emit_move (high,
+				   gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
+	      emit_insn (gen_ld_from_got (Pmode, dest, high,
+					  loongarch_tls_symbol));
+	    }
+	  else
+	    loongarch_emit_move (dest, loongarch_tls_symbol);
+	}
+       insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx));
+     }
+ 
+@@ -2508,7 +2550,23 @@ loongarch_legitimize_tls_address (rtx loc)
+ 	      tmp3 = gen_reg_rtx (Pmode);
+ 	      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
+ 	      high = loongarch_force_temporary (tmp3, high);
+-	      emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
+
+	      if (TARGET_CMODEL_EXTREME)
+		{
+		  gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+		  rtx tmp3 = gen_reg_rtx (Pmode);
+		  emit_insn (gen_tls_low (Pmode, tmp3,
+					  gen_rtx_REG (Pmode, 0), tmp2));
+		  emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2));
+		  emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2));
+		  emit_move_insn (tmp1,
+				  gen_rtx_MEM (Pmode,
+					       gen_rtx_PLUS (Pmode,
+							     high, tmp3)));
+		}
+	      else
+		emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
+ 	    }
+ 	  else
+ 	    emit_insn (loongarch_got_load_tls_ie (tmp1, loc));
+@@ -2530,11 +2588,18 @@ loongarch_legitimize_tls_address (rtx loc)
+ 	      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
+ 	      high = loongarch_force_temporary (tmp3, high);
+ 	      emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
+
+	      if (TARGET_CMODEL_EXTREME)
+		{
+		  gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+		  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2));
+		  emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2));
+		}
+ 	    }
+ 	  else
+ 	    emit_insn (loongarch_got_load_tls_le (tmp1, loc));
+ 	  emit_insn (gen_add3_insn (dest, tmp1, tp));
+-
+ 	}
+       break;
+ 
+@@ -2603,7 +2668,6 @@ bool
+ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
+ {
+   enum loongarch_symbol_type symbol_type;
+-  rtx high;
+ 
+   /* If build with '-mno-explicit-relocs', don't split symbol.  */
+   if (!TARGET_EXPLICIT_RELOCS)
+@@ -2615,6 +2679,8 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
+       || !loongarch_split_symbol_type (symbol_type))
+     return false;
+ 
+  rtx high, temp1 = NULL;
+
+   if (temp == NULL)
+     temp = gen_reg_rtx (Pmode);
+ 
+@@ -2622,20 +2688,42 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
+   high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
+   high = loongarch_force_temporary (temp, high);
+ 
+  if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
+    {
+      gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+      temp1 = gen_reg_rtx (Pmode);
+      emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0),
+					     addr));
+      emit_insn (gen_lui_h_lo20 (temp1, temp1, addr));
+      emit_insn (gen_lui_h_hi12 (temp1, temp1, addr));
+    }
+
+   if (low_out)
+     switch (symbol_type)
+       {
+       case SYMBOL_PCREL:
+-	*low_out = gen_rtx_LO_SUM (Pmode, high, addr);
+-	break;
+	{
+	  if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
+	    *low_out = gen_rtx_PLUS (Pmode, high, temp1);
+	  else
+	    *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
+	  break;
+	}
+ 
+       case SYMBOL_GOT_DISP:
+ 	/* SYMBOL_GOT_DISP symbols are loaded from the GOT.  */
+ 	{
+-	  rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
+-	  rtx mem = gen_rtx_MEM (Pmode, low);
+-	  *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
+-				     UNSPEC_LOAD_FROM_GOT);
+	  if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
+	    *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, temp1));
+	  else
+	    {
+	      rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
+	      rtx mem = gen_rtx_MEM (Pmode, low);
+	      *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
+					 UNSPEC_LOAD_FROM_GOT);
+	    }
+
+ 	  break;
+ 	}
+ 
+@@ -4584,34 +4672,86 @@ loongarch_memmodel_needs_release_fence (enum memmodel model)
+    in context CONTEXT.  HI_RELOC indicates a high-part reloc.  */
+ 
+ static void
+-loongarch_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
+loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
+			       bool hi_reloc)
+ {
+   const char *reloc;
+ 
+  if (TARGET_CMODEL_EXTREME)
+    gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+   switch (loongarch_classify_symbolic_expression (op))
+     {
+     case SYMBOL_PCREL:
+-      reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%pc64_hi12" : "%pc64_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
+       break;
+ 
+     case SYMBOL_GOT_DISP:
+-      reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
+       break;
+ 
+     case SYMBOL_TLS_IE:
+-      reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%ie64_pc_hi12" : "%ie64_pc_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
+       break;
+ 
+     case SYMBOL_TLS_LE:
+-      reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%le64_hi12" : "%le64_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
+       break;
+ 
+     case SYMBOL_TLSGD:
+-      reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
+       break;
+ 
+     case SYMBOL_TLSLDM:
+-      reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
+       break;
+ 
+     default:
+@@ -4637,6 +4777,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
+    'L'  Print the low-part relocation associated with OP.
+    'm'	Print one less than CONST_INT OP in decimal.
+    'N'	Print the inverse of the integer branch condition for comparison OP.
+   'r'  Print address 12-31bit relocation associated with OP.
+   'R'  Print address 32-51bit relocation associated with OP.
+    'T'	Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
+ 	      'z' for (eq:?I ...), 'n' for (ne:?I ...).
+    't'	Like 'T', but with the EQ/NE cases reversed
+@@ -4694,7 +4836,13 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+     case 'h':
+       if (code == HIGH)
+ 	op = XEXP (op, 0);
+-      loongarch_print_operand_reloc (file, op, true /* hi_reloc */);
+      loongarch_print_operand_reloc (file, op, false /* hi64_part */,
+				     true /* hi_reloc */);
+      break;
+
+    case 'H':
+      loongarch_print_operand_reloc (file, op, true /* hi64_part */,
+				     true /* hi_reloc */);
+       break;
+ 
+     case 'i':
+@@ -4703,7 +4851,8 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+       break;
+ 
+     case 'L':
+-      loongarch_print_operand_reloc (file, op, false /* lo_reloc */);
+      loongarch_print_operand_reloc (file, op, false /* hi64_part*/,
+				     false /* lo_reloc */);
+       break;
+ 
+     case 'm':
+@@ -4718,6 +4867,16 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+ 					    letter);
+       break;
+ 
+    case 'r':
+      loongarch_print_operand_reloc (file, op, false /* hi64_part */,
+				     true /* lo_reloc */);
+      break;
+
+    case 'R':
+      loongarch_print_operand_reloc (file, op, true /* hi64_part */,
+				     false /* lo_reloc */);
+      break;
+
+     case 't':
+     case 'T':
+       {
+@@ -4848,7 +5007,8 @@ loongarch_print_operand_address (FILE *file, machine_mode /* mode  */, rtx x)
+ 
+       case ADDRESS_LO_SUM:
+ 	fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
+-	loongarch_print_operand_reloc (file, addr.offset, false /* hi_reloc */);
+	loongarch_print_operand_reloc (file, addr.offset, false /* hi64_part */,
+				       false /* hi_reloc */);
+ 	return;
+ 
+       case ADDRESS_CONST_INT:
+@@ -5821,13 +5981,21 @@ loongarch_option_override_internal (struct gcc_options *opts)
+ 
+   switch (la_target.cmodel)
+     {
+-      case CMODEL_TINY_STATIC:
+       case CMODEL_EXTREME:
+	if (!TARGET_EXPLICIT_RELOCS)
+	  error ("code model %qs needs %s",
+		 "extreme", "-mexplicit-relocs");
+
+ 	if (opts->x_flag_plt)
+-	  error ("code model %qs and %qs not support %s mode",
+-		 "tiny-static", "extreme", "plt");
+	  {
+	    if (global_options_set.x_flag_plt)
+	      error ("code model %qs is not compatible with %s",
+		     "extreme", "-fplt");
+	    opts->x_flag_plt = 0;
+	  }
+ 	break;
+ 
+      case CMODEL_TINY_STATIC:
+       case CMODEL_NORMAL:
+       case CMODEL_TINY:
+       case CMODEL_LARGE:
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 8e8868de9..8fc10444c 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -60,6 +60,9 @@
+ 
+   UNSPEC_LOAD_FROM_GOT
+   UNSPEC_ORI_L_LO12
+  UNSPEC_LUI_L_HI20
+  UNSPEC_LUI_H_LO20
+  UNSPEC_LUI_H_HI12
+   UNSPEC_TLS_LOW
+ ])
+ 
+@@ -1934,16 +1937,45 @@
+   [(set_attr "type" "move")]
+ )
+ 
+(define_insn "@lui_l_hi20<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(match_operand:P 1 "symbolic_operand")]
+	UNSPEC_LUI_L_HI20))]
+  ""
+  "lu12i.w\t%0,%r1"
+  [(set_attr "type" "move")]
+)
+
+ (define_insn "@ori_l_lo12<mode>"
+   [(set (match_operand:P 0 "register_operand" "=r")
+ 	(unspec:P [(match_operand:P 1 "register_operand" "r")
+-		    (match_operand:P 2 "symbolic_operand")]
+		   (match_operand:P 2 "symbolic_operand")]
+ 	UNSPEC_ORI_L_LO12))]
+   ""
+   "ori\t%0,%1,%L2"
+   [(set_attr "type" "move")]
+ )
+ 
+(define_insn "lui_h_lo20"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:DI 2 "symbolic_operand")]
+	UNSPEC_LUI_H_LO20))]
+  "TARGET_64BIT"
+  "lu32i.d\t%0,%R2"
+  [(set_attr "type" "move")]
+)
+
+(define_insn "lui_h_hi12"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")]
+	UNSPEC_LUI_H_HI12))]
+  "TARGET_64BIT"
+  "lu52i.d\t%0,%1,%H2"
+  [(set_attr "type" "move")]
+)
+
+ ;; Convert floating-point numbers to integers
+ (define_insn "frint_<fmt>"
+   [(set (match_operand:ANYF 0 "register_operand" "=f")
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index cd3528c7c..e38c6fbdd 100644
+--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
+@@ -111,7 +111,7 @@
+   (match_code "const,symbol_ref,label_ref")
+ {
+   /* Split symbol to high and low if return false.
+-     If defined TARGET_CMODEL_LARGE, all symbol would be splited,
+     If defined TARGET_CMODEL_EXTREME, all symbol would be splited,
+      else if offset is not zero, the symbol would be splited.  */
+ 
+   enum loongarch_symbol_type symbol_type;
+@@ -126,10 +126,13 @@
+   switch (symbol_type)
+     {
+     case SYMBOL_PCREL:
+-      return 1;
+      if (TARGET_CMODEL_EXTREME)
+	return false;
+      else
+	return 1;
+ 
+     case SYMBOL_GOT_DISP:
+-      if (TARGET_CMODEL_LARGE || !flag_plt)
+      if (TARGET_CMODEL_EXTREME || !flag_plt)
+ 	return false;
+       else
+ 	return 1;
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 1de2b2bd4..c4f83e62a 100644
+--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
+@@ -1006,6 +1006,7 @@ Objective-C and Objective-C++ Dialects}.
+ -mcond-move-float  -mno-cond-move-float @gol
+ -memcpy  -mno-memcpy -mstrict-align -mno-strict-align @gol
+ -mmax-inline-memcpy-size=@var{n} @gol
+-mexplicit-relocs -mno-explicit-relocs @gol
+ -mcmodel=@var{code-model}}
+ 
+ @emph{M32R/D Options}
+@@ -24617,50 +24618,19 @@ less than or equal to @var{n} bytes.  The default value of @var{n} is 1024.
+ @item -mcmodel=@var{code-model}
+ Set the code model to one of:
+ @table @samp
+-@item tiny-static
+-@itemize @bullet
+-@item
+-local symbol and global strong symbol: The data section must be within +/-2MiB addressing space.
+-The text section must be within +/-128MiB addressing space.
+-@item
+-global weak symbol: The got table must be within +/-2GiB addressing space.
+-@end itemize
+-
+-@item tiny
+-@itemize @bullet
+-@item
+-local symbol: The data section must be within +/-2MiB addressing space.
+-The text section must be within +/-128MiB
+-addressing space.
+-@item
+-global symbol: The got table must be within +/-2GiB addressing space.
+-@end itemize
+@item tiny-static (Not implemented yet)
+@item tiny (Not implemented yet)
+ 
+ @item normal
+-@itemize @bullet
+-@item
+-local symbol: The data section must be within +/-2GiB addressing space.
+-The text section must be within +/-128MiB addressing space.
+-@item
+-global symbol: The got table must be within +/-2GiB addressing space.
+-@end itemize
+The text segment must be within 128MB addressing space.  The data segment must
+be within 2GB addressing space.
+ 
+-@item large
+-@itemize @bullet
+-@item
+-local symbol: The data section must be within +/-2GiB addressing space.
+-The text section must be within +/-128GiB addressing space.
+-@item
+-global symbol: The got table must be within +/-2GiB addressing space.
+-@end itemize
+@item large (Not implemented yet)
+ 
+-@item extreme(Not implemented yet)
+-@itemize @bullet
+-@item
+-local symbol: The data and text section must be within +/-8EiB addressing space.
+-@item
+-global symbol: The data got table must be within +/-8EiB addressing space.
+-@end itemize
+@item extreme
+This mode does not limit the size of the code segment and data segment.
+The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} and
+@option{-mno-explicit-relocs}.
+ @end table
+ The default code model is @code{normal}.
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-1.c
+index 01b8ea23f..76bf11b0c 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-1.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-1.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\t%plt\\(f\\)\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-2.c
+index 4565baaec..4b468fef8 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-2.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-2.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-3.c
+index 4f669a029..dd3a4882d 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-3.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-3.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test1:.*la\.global\t.*f\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-4.c
+index 943adb640..f8158ec34 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-4.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-4.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-5.c b/gcc/testsuite/gcc.target/loongarch/func-call-5.c
+index 2c2a1c8a1..37994af43 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-5.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-5.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\t%plt\\(f\\)\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-6.c b/gcc/testsuite/gcc.target/loongarch/func-call-6.c
+index 4b0e4266e..8e366e376 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-6.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-6.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-7.c b/gcc/testsuite/gcc.target/loongarch/func-call-7.c
+index 51792711f..4177c3d96 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-7.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-7.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%got_pc_hi20\\(f\\)\n\tld\.d\t.*%got_pc_lo12\\(f\\)\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-8.c b/gcc/testsuite/gcc.target/loongarch/func-call-8.c
+index 330140d88..4254eaa16 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-8.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-8.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
+new file mode 100644
+index 000000000..db1e0f853
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
+@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
+
+extern void g (void);
+void
+f (void)
+{}
+
+static void
+l (void)
+{}
+
+void
+test (void)
+{
+  g ();
+}
+
+void
+test1 (void)
+{
+  f ();
+}
+
+void
+test2 (void)
+{
+  l ();
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
+new file mode 100644
+index 000000000..21bf81ae8
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
+@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
+
+extern void g (void);
+void
+f (void)
+{}
+
+static void
+l (void)
+{}
+
+void
+test (void)
+{
+  g ();
+}
+
+void
+test1 (void)
+{
+  f ();
+}
+
+void
+test2 (void)
+{
+  l ();
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c b/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
+index bfcc9bc33..3ec8bd229 100644
+--- a/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
+++ b/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -mexplicit-relocs -fno-pic -O2" } */
+/* { dg-options "-mabi=lp64d -mexplicit-relocs -fno-pic -O2 -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "pcalau12i.*%pc_hi20\\(\.LANCHOR0\\)\n" } } */
+ /* { dg-final { scan-assembler "addi\.d.*%pc_lo12\\(\.LANCHOR0\\)\n" } } */
+ /* { dg-final { scan-assembler "ldptr.d\t\\\$r4,.*,0\n" } } */
+-- 
+2.33.0
+
--- a/LoongArch-Add-tests-for-ASX-builtin-functions.patch
+++ b/LoongArch-Add-tests-for-ASX-builtin-functions.patch
--- a/LoongArch-Add-tests-for-ASX-vector-comparison-and-se.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-comparison-and-se.patch
--- a/LoongArch-Add-tests-for-ASX-vector-floating-point-co.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-floating-point-co.patch
--- a/LoongArch-Add-tests-for-ASX-vector-floating-point-op.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-floating-point-op.patch
--- a/LoongArch-Add-tests-for-ASX-vector-subtraction-instr.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-subtraction-instr.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvabsd-xvavg-xvav.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvabsd-xvavg-xvav.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvadd-xvadda-xvad.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvadd-xvadda-xvad.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvand-xvandi-xvan.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvand-xvandi-xvan.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvbitclr-xvbitclr.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvbitclr-xvbitclr.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvext2xv-xvexth-x.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvext2xv-xvexth-x.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvextl-xvsra-xvsr.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvextl-xvsra-xvsr.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvfcmp-caf-ceq-cl.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvfcmp-caf-ceq-cl.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvfcmp-saf-seq-sl.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvfcmp-saf-seq-sl.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvfnmadd-xvfrstp-.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvfnmadd-xvfrstp-.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvhadd-xvhaddw-xv.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvhadd-xvhaddw-xv.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvldi-xvmskgez-xv.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvldi-xvmskgez-xv.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvmax-xvmaxi-xvmi.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvmax-xvmaxi-xvmi.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvmul-xvmod-xvdiv.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvmul-xvmod-xvdiv.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvpackev-xvpackod.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvpackev-xvpackod.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvsll-xvsrl-instr.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvsll-xvsrl-instr.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvssran-xvssrani-.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvssran-xvssrani-.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvssrln-xvssrlni-.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvssrln-xvssrlni-.patch
--- a/LoongArch-Add-tests-for-ASX-xvldrepl-xvstelm-instruc.patch
+++ b/LoongArch-Add-tests-for-ASX-xvldrepl-xvstelm-instruc.patch
@ -0,0 +1,65 @@
+From 2ef90d604d7bae207d5b2067b4ce38d04d4835be Mon Sep 17 00:00:00 2001
+From: Xiaolong Chen <chenxiaolong@loongson.cn>
+Date: Tue, 12 Sep 2023 16:00:48 +0800
+Subject: [PATCH 110/124] LoongArch: Add tests for ASX xvldrepl/xvstelm
+ instruction generation.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c: New test.
+	* gcc.target/loongarch/vector/lasx/lasx-xvstelm.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ .../loongarch/vector/lasx/lasx-xvldrepl.c        | 16 ++++++++++++++++
+ .../loongarch/vector/lasx/lasx-xvstelm.c         | 14 ++++++++++++++
+ 2 files changed, 30 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
+new file mode 100644
+index 000000000..105567951
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
+@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx" } */
+/* { dg-final { scan-assembler-times "xvldrepl.w" 2} } */
+
+#define N 258
+
+float a[N], b[N], c[N];
+
+void
+test ()
+{
+  for (int i = 0; i < 256; i++)
+    {
+      a[i] = c[0] * b[i] + c[1];
+    }
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
+new file mode 100644
+index 000000000..1a7b0e86f
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
+@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx" } */
+/* { dg-final { scan-assembler-times "xvstelm.w" 8} } */
+
+#define LEN 256
+
+float a[LEN], b[LEN], c[LEN];
+
+void
+test ()
+{
+  for (int i = 0; i < LEN; i += 2)
+    a[i] = b[i] + c[i];
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-tests-for-Loongson-SX-builtin-function.patch
+++ b/LoongArch-Add-tests-for-Loongson-SX-builtin-function.patch
--- a/LoongArch-Add-tests-for-SX-vector-addition-instructi.patch
+++ b/LoongArch-Add-tests-for-SX-vector-addition-instructi.patch
--- a/LoongArch-Add-tests-for-SX-vector-addition-vsadd-ins.patch
+++ b/LoongArch-Add-tests-for-SX-vector-addition-vsadd-ins.patch
@ -0,0 +1,715 @@
+From 243656b5b87a3125c2a885d11f022a79cca98b39 Mon Sep 17 00:00:00 2001
+From: Xiaolong Chen <chenxiaolong@loongson.cn>
+Date: Mon, 11 Sep 2023 10:07:24 +0800
+Subject: [PATCH 082/124] LoongArch: Add tests for SX vector addition vsadd
+ instructions.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ .../loongarch/vector/lsx/lsx-vsadd-1.c        | 335 +++++++++++++++++
+ .../loongarch/vector/lsx/lsx-vsadd-2.c        | 345 ++++++++++++++++++
+ 2 files changed, 680 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
+new file mode 100644
+index 000000000..1bc27c983
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
+@@ -0,0 +1,335 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include <lsxintrin.h>
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x00000000ffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00000000ffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0x00000000ffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x00000000ffffffff;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xfefefefefefefefe;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffff3c992b2e;
+  *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffff730f;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffff3c992b2e;
+  *((unsigned long *)&__m128i_result[0]) = 0xffffffffffff730f;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00007fff00007fff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x000000002bfd9461;
+  *((unsigned long *)&__m128i_result[1]) = 0x00007fff00007fff;
+  *((unsigned long *)&__m128i_result[0]) = 0x000000002bfd9461;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00d3012acc56f9bb;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000001021;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x00d3012acc56f9bb;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000001021;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000001000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000001000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000001000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000001000;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x80808080806b000b;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x80808080806b000b;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op0[0]) = 0xffffffffff01ff01;
+  *((unsigned long *)&__m128i_op1[1]) = 0x3c600000ff800000;
+  *((unsigned long *)&__m128i_op1[0]) = 0xfffffffffffffffe;
+  *((unsigned long *)&__m128i_result[1]) = 0x3c5fffffff7fffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xfffefffeff00feff;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00ff00ff00ff00ff;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x00ff00ff00ff00ff;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000000ffffffff;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x00000000ffffffff;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x3ff0000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x40f3fa0000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x3ff0000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x40f3fa0000000000;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000008a0000008a;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000008900000009;
+  *((unsigned long *)&__m128i_op1[1]) = 0x63637687636316bb;
+  *((unsigned long *)&__m128i_op1[0]) = 0x6363636363636363;
+  *((unsigned long *)&__m128i_result[1]) = 0x6363771163631745;
+  *((unsigned long *)&__m128i_result[0]) = 0x636363ec6363636c;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000004;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000004;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000080000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000080000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000080000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000080000000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xfffffffffefefe6a;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000000c2bac2c2;
+  *((unsigned long *)&__m128i_op1[1]) = 0x00000001fffffffe;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x00000000fefefe68;
+  *((unsigned long *)&__m128i_result[0]) = 0x00000000c2bac2c2;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x027c027c000027c0;
+  *((unsigned long *)&__m128i_op1[1]) = 0x001ffff0003ffff0;
+  *((unsigned long *)&__m128i_op1[0]) = 0x000fffefffefffef;
+  *((unsigned long *)&__m128i_result[1]) = 0x001ffff0003ffff0;
+  *((unsigned long *)&__m128i_result[0]) = 0x028c026bfff027af;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0007000000040000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0003000000010000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0007000000040000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0003000000010000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x3f8000003f800000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x3f8000003f800000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x3fffff0000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x3fffff0000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x7f7fff003f800000;
+  *((unsigned long *)&__m128i_result[0]) = 0x7f7fff003f800000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000820202020;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00fe01fc0005fff4;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000003a24;
+  *((unsigned long *)&__m128i_op1[0]) = 0x003dbe88077c78c1;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000820205a44;
+  *((unsigned long *)&__m128i_result[0]) = 0x013bc084078278b5;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000140001;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000140001;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x67eb85afb2ebb000;
+  *((unsigned long *)&__m128i_op0[0]) = 0xc8847ef6ed3f2000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000100000001;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x67eb85b0b2ebb001;
+  *((unsigned long *)&__m128i_result[0]) = 0xc8847ef6ed3f2000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xffffffff00000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0xffff000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000014eb54ab;
+  *((unsigned long *)&__m128i_op1[0]) = 0x14eb6a002a406a00;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffff14eb54ab;
+  *((unsigned long *)&__m128i_result[0]) = 0x14ea6a002a406a00;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000004;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000004;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0xce9035c49ffff570;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000004;
+  *((unsigned long *)&__m128i_result[0]) = 0xce9035c49ffff574;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000010;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000010;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x000000000000000d;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000400;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x000000000000040d;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000001300000013;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000001300000013;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000001300000013;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000001300000013;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000100000100;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000100000100;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000100000100;
+  *((unsigned long *)&__m128i_result[0]) = 0x00000001000000ff;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000300000001;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000100010001;
+  *((unsigned long *)&__m128i_op1[1]) = 0xfffffffffffffffa;
+  *((unsigned long *)&__m128i_op1[0]) = 0xfffffffffffffffa;
+  *((unsigned long *)&__m128i_result[1]) = 0x00000002fffffffb;
+  *((unsigned long *)&__m128i_result[0]) = 0x000000010000fffb;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  return 0;
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
+new file mode 100644
+index 000000000..67d189991
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
+@@ -0,0 +1,345 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include <lsxintrin.h>
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x10f917d72d3d01e4;
+  *((unsigned long *)&__m128i_op1[0]) = 0x203e16d116de012b;
+  *((unsigned long *)&__m128i_result[1]) = 0x10f917d72d3d01e4;
+  *((unsigned long *)&__m128i_result[0]) = 0x203e16d116de012b;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xfffebd06fffe820c;
+  *((unsigned long *)&__m128i_op0[0]) = 0x7fff7ffe7fff3506;
+  *((unsigned long *)&__m128i_op1[1]) = 0xfffebd06fffe820c;
+  *((unsigned long *)&__m128i_op1[0]) = 0x7fff7ffe7fff3506;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffff0cffffff18;
+  *((unsigned long *)&__m128i_result[0]) = 0xfefffefffeff6a0c;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[1]) = 0x4f804f804f804f80;
+  *((unsigned long *)&__m128i_op1[0]) = 0x4f804f804f804f80;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xfffff60ca7104649;
+  *((unsigned long *)&__m128i_op0[0]) = 0xfffff790a15db63d;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_result[1]) = 0xfffff60ca710464a;
+  *((unsigned long *)&__m128i_result[0]) = 0xfffff790a15db63e;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xfffffffffffffffe;
+  *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffff46;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00fe000100cf005f;
+  *((unsigned long *)&__m128i_op0[0]) = 0x7fff7fff7fff7fff;
+  *((unsigned long *)&__m128i_op1[1]) = 0x5f675e96e29a5a60;
+  *((unsigned long *)&__m128i_op1[0]) = 0x7fff7fff7fff7fff;
+  *((unsigned long *)&__m128i_result[1]) = 0x5fff5e97e2ff5abf;
+  *((unsigned long *)&__m128i_result[0]) = 0xfefffefffefffeff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000001000100010;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0001000100010058;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0001001100110068;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x7fffffff7fffffff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x7fffffff7fffffff;
+  *((unsigned long *)&__m128i_op1[1]) = 0x7fff010181010102;
+  *((unsigned long *)&__m128i_op1[0]) = 0x7fffffff81010102;
+  *((unsigned long *)&__m128i_result[1]) = 0xfeffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xfeffffffffffffff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000ebd20000714f;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00012c8a0000a58a;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffb81a6f70;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000d48eaa1a2;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffb81ae0bf;
+  *((unsigned long *)&__m128i_result[0]) = 0x00012c9748eaffff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0177fff0fffffff0;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000000011ff8bc;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000200;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000200;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000200;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000200;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000001;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000d0000000d;
+  *((unsigned long *)&__m128i_op1[1]) = 0x8006000000040000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x8002000000000007;
+  *((unsigned long *)&__m128i_result[1]) = 0x8006000000040000;
+  *((unsigned long *)&__m128i_result[0]) = 0x8002000d00000014;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000014;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000014;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  int_out = __lsx_vpickve2gr_h (__m128i_op0, 0x1);
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000600007fff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000008ffffa209;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000600007fff;
+  *((unsigned long *)&__m128i_result[0]) = 0x00000008ffffa209;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x636363633f3e47c1;
+  *((unsigned long *)&__m128i_op0[0]) = 0x41f8e080f1ef4eaa;
+  *((unsigned long *)&__m128i_op1[1]) = 0x00000807bf0a1f80;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00000800ecedee68;
+  *((unsigned long *)&__m128i_result[1]) = 0x63636b6afe486741;
+  *((unsigned long *)&__m128i_result[0]) = 0x41f8e880ffffffff;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000ebd20000714f;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00012c8a0000a58a;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000ebd20000714f;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00012c8a0000a58a;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000ffff0000e29e;
+  *((unsigned long *)&__m128i_result[0]) = 0x000259140000ffff;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0xfffffffeffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0xfffffffeffffffff;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0c03e17edd781b11;
+  *((unsigned long *)&__m128i_op0[0]) = 0x342caf9be55700b5;
+  *((unsigned long *)&__m128i_op1[1]) = 0x00040003ff83ff84;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00040003ff4dffca;
+  *((unsigned long *)&__m128i_result[1]) = 0x0c07e181ffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x3430af9effffffff;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00000000ffa8ff9f;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000ffffffabff99;
+  *((unsigned long *)&__m128i_op1[1]) = 0x000100000002007d;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0001000000020001;
+  *((unsigned long *)&__m128i_result[1]) = 0x00010000ffab001c;
+  *((unsigned long *)&__m128i_result[0]) = 0x0001ffffffadff9a;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0800080008000800;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0800080008000800;
+  *((unsigned long *)&__m128i_result[1]) = 0x0800080008000800;
+  *((unsigned long *)&__m128i_result[0]) = 0x0800080008000800;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_op0[0]) = 0x76f424887fffffff;
+  *((unsigned long *)&__m128i_op1[1]) = 0xc110000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0xc00d060000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0xc110000000000001;
+  *((unsigned long *)&__m128i_result[0]) = 0xffffffff7fffffff;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x000000000000002f;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000029;
+  *((unsigned long *)&__m128i_op1[1]) = 0xfbfbfb17fbfb38ea;
+  *((unsigned long *)&__m128i_op1[0]) = 0xfbfb47fbfbfb0404;
+  *((unsigned long *)&__m128i_result[1]) = 0xfbfbfb17fbfb3919;
+  *((unsigned long *)&__m128i_result[0]) = 0xfbfb47fbfbfb042d;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x8080808080808081;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00000000ffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x80808080ffffffff;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00123fff00120012;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0012001200120012;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x000000000005003a;
+  *((unsigned long *)&__m128i_result[1]) = 0x00123fff00120012;
+  *((unsigned long *)&__m128i_result[0]) = 0x001200120017004c;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0xbfd10d0d7b6b6b73;
+  *((unsigned long *)&__m128i_op1[0]) = 0xc5c534920000c4ed;
+  *((unsigned long *)&__m128i_result[1]) = 0xbfd10d0d7b6b6b73;
+  *((unsigned long *)&__m128i_result[0]) = 0xc5c534920000c4ed;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x000aa822a79308f6;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000000084d12ce;
+  *((unsigned long *)&__m128i_op1[1]) = 0x000aa822a79308f6;
+  *((unsigned long *)&__m128i_op1[0]) = 0x03aa558e1d37b5a1;
+  *((unsigned long *)&__m128i_result[1]) = 0x00155044ffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x03aa558e2584c86f;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x021b7d24c9678a35;
+  *((unsigned long *)&__m128i_op0[0]) = 0x030298a6a1030a49;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x021b7d24c9678a35;
+  *((unsigned long *)&__m128i_result[0]) = 0x030298a6a1030a49;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00007a8000000480;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000485000004cc;
+  *((unsigned long *)&__m128i_op1[1]) = 0x00007a8000000480;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00000485000004cc;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000f50000000900;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000090a00000998;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x004eff6200d2ff76;
+  *((unsigned long *)&__m128i_op1[0]) = 0xff70002800be00a0;
+  *((unsigned long *)&__m128i_result[1]) = 0x004eff6200d2ff76;
+  *((unsigned long *)&__m128i_result[0]) = 0xff70002800be00a0;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  return 0;
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-tests-for-SX-vector-floating-point-ari.patch
+++ b/LoongArch-Add-tests-for-SX-vector-floating-point-ari.patch
--- a/LoongArch-Add-tests-for-SX-vector-floating-point-ins.patch
+++ b/LoongArch-Add-tests-for-SX-vector-floating-point-ins.patch
--- a/LoongArch-Add-tests-for-SX-vector-handling-and-shuff.patch
+++ b/LoongArch-Add-tests-for-SX-vector-handling-and-shuff.patch
--- a/LoongArch-Add-tests-for-SX-vector-subtraction-instru.patch
+++ b/LoongArch-Add-tests-for-SX-vector-subtraction-instru.patch
--- a/LoongArch-Add-tests-for-SX-vector-vabsd-vmskgez-vmsk.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vabsd-vmskgez-vmsk.patch
--- a/LoongArch-Add-tests-for-SX-vector-vand-vandi-vandn-v.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vand-vandi-vandn-v.patch
--- a/LoongArch-Add-tests-for-SX-vector-vavg-vavgr-instruc.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vavg-vavgr-instruc.patch
--- a/LoongArch-Add-tests-for-SX-vector-vbitclr-vbitclri-v.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vbitclr-vbitclri-v.patch
--- a/LoongArch-Add-tests-for-SX-vector-vdiv-vmod-instruct.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vdiv-vmod-instruct.patch
--- a/LoongArch-Add-tests-for-SX-vector-vexth-vextl-vldi-v.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vexth-vextl-vldi-v.patch
--- a/LoongArch-Add-tests-for-SX-vector-vfcmp-instructions.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vfcmp-instructions.patch
--- a/LoongArch-Add-tests-for-SX-vector-vfmadd-vfnmadd-vld.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vfmadd-vfnmadd-vld.patch
--- a/LoongArch-Add-tests-for-SX-vector-vfrstp-vfrstpi-vse.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vfrstp-vfrstpi-vse.patch
--- a/LoongArch-Add-tests-for-SX-vector-vmax-vmaxi-vmin-vm.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vmax-vmaxi-vmin-vm.patch
--- a/LoongArch-Add-tests-for-SX-vector-vrotr-vrotri-vsra-.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vrotr-vrotri-vsra-.patch
--- a/LoongArch-Add-tests-for-SX-vector-vsll-vslli-vsrl-vs.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vsll-vslli-vsrl-vs.patch
--- a/LoongArch-Add-tests-for-SX-vector-vssran-vssrani-vss.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vssran-vssrani-vss.patch
--- a/LoongArch-Add-tests-for-the-SX-vector-multiplication.patch
+++ b/LoongArch-Add-tests-for-the-SX-vector-multiplication.patch
--- a/LoongArch-Add-tests-of-mstrict-align-option.patch
+++ b/LoongArch-Add-tests-of-mstrict-align-option.patch
@ -0,0 +1,37 @@
+From f07b91862055533d779fbf76c12cb7c0ae75b53d Mon Sep 17 00:00:00 2001
+From: Xiaolong Chen <chenxiaolong@loongson.cn>
+Date: Mon, 11 Sep 2023 09:35:24 +0800
+Subject: [PATCH 076/124] LoongArch: Add tests of -mstrict-align option.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/strict-align.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/testsuite/gcc.target/loongarch/strict-align.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/strict-align.c
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/strict-align.c b/gcc/testsuite/gcc.target/loongarch/strict-align.c
+new file mode 100644
+index 000000000..040d84958
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/strict-align.c
+@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mstrict-align -mlasx" } */
+/* { dg-final { scan-assembler-not "vfadd.s" } } */
+
+void
+foo (float *restrict x, float *restrict y)
+{
+  x[0] = x[0] + y[0];
+  x[1] = x[1] + y[1];
+  x[2] = x[2] + y[2];
+  x[3] = x[3] + y[3];
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-testsuite-framework-for-Loongson-SX-AS.patch
+++ b/LoongArch-Add-testsuite-framework-for-Loongson-SX-AS.patch
@ -0,0 +1,131 @@
+From aebd03c944312be767f03d129eeebc0c4cdf5b4a Mon Sep 17 00:00:00 2001
+From: Xiaolong Chen <chenxiaolong@loongson.cn>
+Date: Mon, 11 Sep 2023 09:36:35 +0800
+Subject: [PATCH 077/124] LoongArch: Add testsuite framework for Loongson
+ SX/ASX.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/loongarch-vector.exp: New test.
+	* gcc.target/loongarch/vector/simd_correctness_check.h: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ .../loongarch/vector/loongarch-vector.exp     | 42 +++++++++++++++
+ .../loongarch/vector/simd_correctness_check.h | 54 +++++++++++++++++++
+ 2 files changed, 96 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
+new file mode 100644
+index 000000000..2c37aa91d
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
+@@ -0,0 +1,42 @@
+#Copyright(C) 2023 Free Software Foundation, Inc.
+
+#This program is free software; you can redistribute it and / or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation; either version 3 of the License, or
+#(at your option) any later version.
+#
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
+#GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License
+#along with GCC; see the file COPYING3.If not see
+# <http: //www.gnu.org/licenses/>.
+
+#GCC testsuite that uses the `dg.exp' driver.
+
+#Exit immediately if this isn't a LoongArch target.
+if ![istarget loongarch*-*-*] then {
+    return
+}
+
+#Load support procs.
+load_lib gcc-dg.exp
+
+#If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+    set DEFAULT_CFLAGS " "
+}
+
+#Initialize `dg'.
+dg-init
+
+#Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lsx/*.\[cS\]]] \
+	" -mlsx" $DEFAULT_CFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lasx/*.\[cS\]]] \
+	" -mlasx" $DEFAULT_CFLAGS
+# All done.
+dg-finish
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
+new file mode 100644
+index 000000000..eb7fbd59c
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
+@@ -0,0 +1,54 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define ASSERTEQ_64(line, ref, res)                                           \
+  do                                                                          \
+    {                                                                         \
+      int fail = 0;                                                           \
+      for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i)             \
+        {                                                                     \
+          long *temp_ref = &ref[i], *temp_res = &res[i];                      \
+          if (abs (*temp_ref - *temp_res) > 0)                                \
+            {                                                                 \
+              printf (" error: %s at line %ld , expected " #ref               \
+                      "[%ld]:0x%lx, got: 0x%lx\n",                            \
+                      __FILE__, line, i, *temp_ref, *temp_res);               \
+              fail = 1;                                                       \
+            }                                                                 \
+        }                                                                     \
+      if (fail == 1)                                                          \
+        abort ();                                                             \
+    }                                                                         \
+  while (0)
+
+#define ASSERTEQ_32(line, ref, res)                                           \
+  do                                                                          \
+    {                                                                         \
+      int fail = 0;                                                           \
+      for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i)             \
+        {                                                                     \
+          int *temp_ref = &ref[i], *temp_res = &res[i];                       \
+          if (abs (*temp_ref - *temp_res) > 0)                                \
+            {                                                                 \
+              printf (" error: %s at line %ld , expected " #ref               \
+                      "[%ld]:0x%x, got: 0x%x\n",                              \
+                      __FILE__, line, i, *temp_ref, *temp_res);               \
+              fail = 1;                                                       \
+            }                                                                 \
+        }                                                                     \
+      if (fail == 1)                                                          \
+        abort ();                                                             \
+    }                                                                         \
+  while (0)
+
+#define ASSERTEQ_int(line, ref, res)                                          \
+  do                                                                          \
+    {                                                                         \
+      if (ref != res)                                                         \
+        {                                                                     \
+          printf (" error: %s at line %ld , expected %d, got %d\n", __FILE__, \
+                  line, ref, res);                                            \
+        }                                                                     \
+    }                                                                         \
+  while (0)
+-- 
+2.33.0
+
--- a/LoongArch-Adjust-C-multilib-header-layout.patch
+++ b/LoongArch-Adjust-C-multilib-header-layout.patch
@ -0,0 +1,53 @@
+From a4bf17e87a965ed7f2bb1d2921fb9dd820c79a96 Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Thu, 7 Sep 2023 14:50:10 +0800
+Subject: [PATCH 069/124] LoongArch: Adjust C++ multilib header layout.
+
+For LoongArch, the toplevel library build is always aliased to
+one of the multilib variants.  This patch installs it with the
+actual MULTISUBDIR (instead of ".") so that the headers can be
+reached by the compiler.
+
+This patch is an update of
+https://gcc.gnu.org/pipermail/gcc-patches/2023-September/629435.html
+
+libstdc++-v3/ChangeLog:
+
+	* configure.host: Register t-loongarch in tmake_file.
+	* config/cpu/loongarch/t-loongarch: New file.  Manually refresh
+	MULTISUBDIR with $(shell $(CXX) --print-multi-directory).
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ libstdc++-v3/config/cpu/loongarch/t-loongarch | 1 +
+ libstdc++-v3/configure.host                   | 5 ++++-
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+ create mode 100644 libstdc++-v3/config/cpu/loongarch/t-loongarch
+
+diff --git a/libstdc++-v3/config/cpu/loongarch/t-loongarch b/libstdc++-v3/config/cpu/loongarch/t-loongarch
+new file mode 100644
+index 000000000..adfc8ebb9
+--- /dev/null
+++ b/libstdc++-v3/config/cpu/loongarch/t-loongarch
+@@ -0,0 +1 @@
+AM_MAKEFLAGS += " MULTISUBDIR=/$(shell $(CXX) --print-multi-directory)"
+diff --git a/libstdc++-v3/configure.host b/libstdc++-v3/configure.host
+index ec32980aa..592160e6d 100644
+--- a/libstdc++-v3/configure.host
+++ b/libstdc++-v3/configure.host
+@@ -315,7 +315,10 @@ esac
+ # Set any OS-dependent and CPU-dependent bits.
+ # THIS TABLE IS SORTED.  KEEP IT THAT WAY.
+ case "${host}" in
+-  *-*-linux* | *-*-uclinux*)
+ loongarch*)
+    tmake_file="cpu/loongarch/t-loongarch"
+    ;;
+ *-*-linux* | *-*-uclinux*)
+     case "${host_cpu}" in
+       i[567]86)
+         abi_baseline_pair=i486-linux-gnu
+-- 
+2.33.0
+
--- a/LoongArch-Avoid-RTL-flag-check-failure-in-loongarch_.patch
+++ b/LoongArch-Avoid-RTL-flag-check-failure-in-loongarch_.patch
@ -0,0 +1,55 @@
+From e82403e918e18fa8e8ecd0c9e26f2657cc814e12 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 24 Aug 2022 21:31:34 +0800
+Subject: [PATCH 013/124] LoongArch: Avoid RTL flag check failure in
+ loongarch_classify_symbol
+
+SYMBOL_REF_TLS_MODEL invokes SYMBOL_REF_FLAGS, and SYMBOL_REF_FLAGS
+invokes RTL_FLAG_CHECK1 and aborts when RTL code is not SYMBOL_REF.
+
+r13-1833 removed "gcc_assert (SYMBOL_REF_P (x))" before invoking
+"SYMBOL_REF_TLS_MODEL (x)", indicating that it's now possible that "x"
+is not a SYMBOL_REF.  So we need to check if "x" is SYMBOL_REF first.
+
+This fixes a test failure happening with r13-2173 with RTL flag
+checking enabled:
+
+    pr106096.C:26:1: internal compiler error: RTL flag check:
+    SYMBOL_REF_FLAGS used with unexpected rtx code 'const' in
+    loongarch_classify_symbol
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_classify_symbol):
+	Return early if the rtx is not SYMBOL_REF.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.cc | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 04c4ddaed..452aba9d4 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -1633,14 +1633,13 @@ loongarch_rtx_constant_in_small_data_p (machine_mode mode)
+ static enum loongarch_symbol_type
+ loongarch_classify_symbol (const_rtx x)
+ {
+-  if (LABEL_REF_P (x))
+  if (!SYMBOL_REF_P (x))
+     return SYMBOL_PCREL;
+ 
+   if (SYMBOL_REF_TLS_MODEL (x))
+     return SYMBOL_TLS;
+ 
+-  if (SYMBOL_REF_P (x)
+-      && !loongarch_symbol_binds_local_p (x))
+  if (!loongarch_symbol_binds_local_p (x))
+     return SYMBOL_GOT_DISP;
+ 
+   return SYMBOL_PCREL;
+-- 
+2.33.0
+
--- a/LoongArch-Avoid-non-returning-indirect-jumps-through.patch
+++ b/LoongArch-Avoid-non-returning-indirect-jumps-through.patch
@ -0,0 +1,62 @@
+From 7e759740048ee6f24c1055c32868fa21cabb4f75 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Wed, 7 Jun 2023 10:21:58 +0800
+Subject: [PATCH 048/124] LoongArch: Avoid non-returning indirect jumps through
+ $ra [PR110136]
+
+Micro-architecture unconditionally treats a "jr $ra" as "return from subroutine",
+hence doing "jr $ra" would interfere with both subroutine return prediction and
+the more general indirect branch prediction.
+
+Therefore, a problem like PR110136 can cause a significant increase in branch error
+prediction rate and affect performance. The same problem exists with "indirect_jump".
+
+gcc/ChangeLog:
+
+	PR target/110136
+	* config/loongarch/loongarch.md: Modify the register constraints for template
+	"jumptable" and "indirect_jump" from "r" to "e".
+
+Co-authored-by: Andrew Pinski <apinski@marvell.com>
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index b23248c33..c79951c1d 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -2895,6 +2895,10 @@
+ }
+   [(set_attr "type" "branch")])
+ 
+;; Micro-architecture unconditionally treats a "jr $ra" as "return from subroutine",
+;; non-returning indirect jumps through $ra would interfere with both subroutine
+;; return prediction and the more general indirect branch prediction.
+
+ (define_expand "indirect_jump"
+   [(set (pc) (match_operand 0 "register_operand"))]
+   ""
+@@ -2905,7 +2909,7 @@
+ })
+ 
+ (define_insn "@indirect_jump<mode>"
+-  [(set (pc) (match_operand:P 0 "register_operand" "r"))]
+  [(set (pc) (match_operand:P 0 "register_operand" "e"))]
+   ""
+   "jr\t%0"
+   [(set_attr "type" "jump")
+@@ -2928,7 +2932,7 @@
+ 
+ (define_insn "@tablejump<mode>"
+   [(set (pc)
+-	(match_operand:P 0 "register_operand" "r"))
+	(match_operand:P 0 "register_operand" "e"))
+    (use (label_ref (match_operand 1 "" "")))]
+   ""
+   "jr\t%0"
+-- 
+2.33.0
+
--- a/LoongArch-Change-the-default-value-of-LARCH_CALL_RAT.patch
+++ b/LoongArch-Change-the-default-value-of-LARCH_CALL_RAT.patch
@ -0,0 +1,41 @@
+From 59824f1062d77d0e02ea82d47415bf95c235de87 Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxl04200420@163.com>
+Date: Thu, 15 Jun 2023 02:46:24 +0000
+Subject: [PATCH 046/124] LoongArch: Change the default value of
+ LARCH_CALL_RATIO to 6.
+
+During the regression testing of the LoongArch architecture GCC, it was found
+that the tests in the pr90883.C file failed. The problem was modulated and
+found that the error was caused by setting the macro LARCH_CALL_RATIO to a too
+large value. Combined with the actual LoongArch architecture, the different
+thresholds for meeting the test conditions were tested using the engineering method
+(SPEC CPU 2006), and the results showed that its optimal threshold should be set
+to 6.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.h (LARCH_CALL_RATIO): Modify the value
+	of macro LARCH_CALL_RATIO on LoongArch to make it perform optimally.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index 44ebadfaa..0e35d4dec 100644
+--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
+@@ -1073,7 +1073,7 @@ typedef struct {
+ /* The base cost of a memcpy call, for MOVE_RATIO and friends.  These
+    values were determined experimentally by benchmarking with CSiBE.
+ */
+-#define LARCH_CALL_RATIO 8
+#define LARCH_CALL_RATIO 6
+ 
+ /* Any loop-based implementation of cpymemsi will have at least
+    LARCH_MAX_MOVE_OPS_PER_LOOP_ITER memory-to-memory
+-- 
+2.33.0
+
--- a/LoongArch-Change-the-value-of-branch_cost-from-2-to-.patch
+++ b/LoongArch-Change-the-value-of-branch_cost-from-2-to-.patch
@ -0,0 +1,69 @@
+From 7e843ed8da168a05eb04eee0b14cbe681bf798fe Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Wed, 13 Sep 2023 11:01:34 +0800
+Subject: [PATCH 123/124] LoongArch: Change the value of branch_cost from 2 to
+ 6.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-def.c: Modify the default value of
+	branch_cost.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/cmov_ii.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch-def.c         |  4 ++--
+ gcc/testsuite/gcc.target/loongarch/cmov_ii.c | 15 +++++++++++++++
+ 2 files changed, 17 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/cmov_ii.c
+
+diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
+index d29d5f001..eeb32dbf6 100644
+--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
+@@ -85,7 +85,7 @@ loongarch_cpu_align[N_TUNE_TYPES] = {
+     .int_mult_di	= COSTS_N_INSNS (1),	\
+     .int_div_si		= COSTS_N_INSNS (4),	\
+     .int_div_di		= COSTS_N_INSNS (6),	\
+-    .branch_cost	= 2,			\
+    .branch_cost	= 6,			\
+     .memory_latency	= 4
+ 
+ /* The following properties cannot be looked up directly using "cpucfg".
+@@ -118,7 +118,7 @@ loongarch_rtx_cost_optimize_size = {
+     .int_mult_di      = 4,
+     .int_div_si	      = 4,
+     .int_div_di	      = 4,
+-    .branch_cost      = 2,
+    .branch_cost      = 6,
+     .memory_latency   = 4,
+ };
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/cmov_ii.c b/gcc/testsuite/gcc.target/loongarch/cmov_ii.c
+new file mode 100644
+index 000000000..21b468e8a
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/cmov_ii.c
+@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "test:.*xor.*maskeqz.*masknez.*or.*" } } */
+
+extern void foo_ii (int *, int *, int *, int *);
+
+int
+test (void)
+{
+  int a, b;
+  int c, d, out;
+  foo_ii (&a, &b, &c, &d);
+  out = a == b ? c : d;
+  return out;
+}
+-- 
+2.33.0
+
--- a/LoongArch-Change-the-value-of-macro-TRY_EMPTY_VM_SPA.patch
+++ b/LoongArch-Change-the-value-of-macro-TRY_EMPTY_VM_SPA.patch
@ -0,0 +1,49 @@
+From 6e9265e571a63deb2584704a0b088a6d67ec8af5 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Mon, 20 Feb 2023 16:47:11 +0800
+Subject: [PATCH 037/124] LoongArch: Change the value of macro
+ TRY_EMPTY_VM_SPACE from 0x8000000000 to 0x1000000000.
+
+The PCH mechanism first tries to map the .gch file to the virtual memory
+space pointed to by TRY_EMPTY_VM_SPACE during the compilation process.
+
+The original value of TRY_EMPTY_VM_SPACE macro is 0x8000000000,
+but like la464 only has 40 bits of virtual address space, this value
+just exceeds the address range.
+
+If we want to support chips with less than 40 bits virtual addresses,
+then the value of this macro needs to be set small. I think setting
+this value small will increase the probability of virtual address
+mapping failure. And the purpose of pch is to make compilation faster,
+but I think we rarely compile on embedded systems. So this situation
+may not be within our consideration.
+
+So change the value of this macro to 0x1000000000.
+
+gcc/ChangeLog:
+
+	* config/host-linux.cc (TRY_EMPTY_VM_SPACE): Modify the value of
+	the macro to 0x1000000000.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/host-linux.cc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/host-linux.cc b/gcc/config/host-linux.cc
+index 817d3c087..d93cfc064 100644
+--- a/gcc/config/host-linux.cc
+++ b/gcc/config/host-linux.cc
+@@ -99,7 +99,7 @@
+ #elif defined(__riscv) && defined (__LP64__)
+ # define TRY_EMPTY_VM_SPACE	0x1000000000
+ #elif defined(__loongarch__) && defined(__LP64__)
+-# define TRY_EMPTY_VM_SPACE	0x8000000000
+# define TRY_EMPTY_VM_SPACE	0x1000000000
+ #else
+ # define TRY_EMPTY_VM_SPACE	0
+ #endif
+-- 
+2.33.0
+
--- a/LoongArch-Define-the-macro-ASM_PREFERRED_EH_DATA_FOR.patch
+++ b/LoongArch-Define-the-macro-ASM_PREFERRED_EH_DATA_FOR.patch
@ -0,0 +1,139 @@
+From 05c1df09c70cd0ed48f0644890f69a0128b17a98 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Fri, 29 Jul 2022 09:44:52 +0800
+Subject: [PATCH 008/124] LoongArch: Define the macro
+ ASM_PREFERRED_EH_DATA_FORMAT by checking the assembler's support for eh_frame
+ encoding.
+
+.eh_frame DW_EH_PE_pcrel encoding format is not supported by gas <= 2.39.
+Check if the assembler support DW_EH_PE_PCREL encoding and define .eh_frame
+encoding type.
+
+gcc/ChangeLog:
+
+	* config.in: Regenerate.
+	* config/loongarch/loongarch.h (ASM_PREFERRED_EH_DATA_FORMAT):
+	Select the value of the macro definition according to whether
+	HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT is defined.
+	* configure: Regenerate.
+	* configure.ac: Reinstate HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config.in                    |  8 +++++++-
+ gcc/config/loongarch/loongarch.h |  5 +++++
+ gcc/configure                    | 34 ++++++++++++++++++++++++++++++++
+ gcc/configure.ac                 |  8 ++++++++
+ 4 files changed, 54 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/config.in b/gcc/config.in
+index 64c27c9cf..67ce422f2 100644
+--- a/gcc/config.in
+++ b/gcc/config.in
+@@ -404,13 +404,19 @@
+ #endif
+ 
+ 
+/* Define if your assembler supports eh_frame pcrel encoding. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT
+#endif
+
+
+ /* Define if your assembler supports the R_PPC64_ENTRY relocation. */
+ #ifndef USED_FOR_TARGET
+ #undef HAVE_AS_ENTRY_MARKERS
+ #endif
+ 
+ 
+-/* Define if your assembler supports explicit relocations. */
+/* Define if your assembler supports explicit relocation. */
+ #ifndef USED_FOR_TARGET
+ #undef HAVE_AS_EXPLICIT_RELOCS
+ #endif
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index 12f209047..a52a81adf 100644
+--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
+@@ -1130,8 +1130,13 @@ struct GTY (()) machine_function
+ };
+ #endif
+ 
+#ifdef HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+  (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4)
+#else
+ #define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+   (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_absptr)
+#endif
+ 
+ /* Do emit .note.GNU-stack by default.  */
+ #ifndef NEED_INDICATE_EXEC_STACK
+diff --git a/gcc/configure b/gcc/configure
+index 840eddc7c..3788e240a 100755
+--- a/gcc/configure
+++ b/gcc/configure
+@@ -28857,6 +28857,40 @@ if test $gcc_cv_as_loongarch_explicit_relocs = yes; then
+ 
+ $as_echo "#define HAVE_AS_EXPLICIT_RELOCS 1" >>confdefs.h
+ 
+fi
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for eh_frame pcrel encoding support" >&5
+$as_echo_n "checking assembler for eh_frame pcrel encoding support... " >&6; }
+if ${gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support=no
+  if test x$gcc_cv_as != x; then
+    $as_echo '.cfi_startproc
+       .cfi_personality 0x9b,a
+       .cfi_lsda 0x1b,b
+       .cfi_endproc' > conftest.s
+    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+    then
+	gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support=yes
+    else
+      echo "configure: failed program was" >&5
+      cat conftest.s >&5
+    fi
+    rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support" >&5
+$as_echo "$gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support" >&6; }
+if test $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support = yes; then
+
+$as_echo "#define HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT 1" >>confdefs.h
+
+ fi
+ 
+     ;;
+diff --git a/gcc/configure.ac b/gcc/configure.ac
+index 975c852c6..1c376e0d4 100644
+--- a/gcc/configure.ac
+++ b/gcc/configure.ac
+@@ -5324,6 +5324,14 @@ x:
+       [a:pcalau12i $t0,%pc_hi20(a)],,
+       [AC_DEFINE(HAVE_AS_EXPLICIT_RELOCS, 1,
+ 	  [Define if your assembler supports explicit relocation.])])
+    gcc_GAS_CHECK_FEATURE([eh_frame pcrel encoding support],
+      gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support,,
+      [.cfi_startproc
+       .cfi_personality 0x9b,a
+       .cfi_lsda 0x1b,b
+       .cfi_endproc],,
+      [AC_DEFINE(HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT, 1,
+	  [Define if your assembler supports eh_frame pcrel encoding.])])
+     ;;
+     s390*-*-*)
+     gcc_GAS_CHECK_FEATURE([.gnu_attribute support],
+-- 
+2.33.0
+
--- a/LoongArch-Don-t-add-crtfastmath.o-for-shared.patch
+++ b/LoongArch-Don-t-add-crtfastmath.o-for-shared.patch
@ -0,0 +1,34 @@
+From 2e19311d1bf4f932f5e67f6866123b895b12c97f Mon Sep 17 00:00:00 2001
+From: Richard Biener <rguenther@suse.de>
+Date: Fri, 13 Jan 2023 09:01:12 +0100
+Subject: [PATCH 035/124] LoongArch: Don't add crtfastmath.o for -shared
+
+Don't add crtfastmath.o for -shared to avoid altering the FP
+environment when loading a shared library.
+
+	PR target/55522
+	* config/loongarch/gnu-user.h (GNU_USER_TARGET_MATHFILE_SPEC):
+	Don't add crtfastmath.o for -shared.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/gnu-user.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
+index c5b1afe53..1dc6add62 100644
+--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
+@@ -49,7 +49,7 @@ along with GCC; see the file COPYING3.  If not see
+ /* Similar to standard Linux, but adding -ffast-math support.  */
+ #undef GNU_USER_TARGET_MATHFILE_SPEC
+ #define GNU_USER_TARGET_MATHFILE_SPEC \
+-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+  "%{Ofast|ffast-math|funsafe-math-optimizations:%{!shared:crtfastmath.o%s}}"
+ 
+ #undef LIB_SPEC
+ #define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
+-- 
+2.33.0
+
--- a/LoongArch-Enable-free-starting-at-O2.patch
+++ b/LoongArch-Enable-free-starting-at-O2.patch
@ -0,0 +1,71 @@
+From 0369836718ffb25ac64c135e748f409302068a56 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Mon, 28 Aug 2023 11:30:21 +0800
+Subject: [PATCH 052/124] LoongArch: Enable '-free' starting at -O2.
+
+gcc/ChangeLog:
+
+	* common/config/loongarch/loongarch-common.cc:
+	Enable '-free' on O2 and above.
+	* doc/invoke.texi: Modify the description information
+	of the '-free' compilation option and add the LoongArch
+	description.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/sign-extend.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ .../config/loongarch/loongarch-common.cc      |  1 +
+ .../gcc.target/loongarch/sign-extend.c        | 25 +++++++++++++++++++
+ 2 files changed, 26 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend.c
+
+diff --git a/gcc/common/config/loongarch/loongarch-common.cc b/gcc/common/config/loongarch/loongarch-common.cc
+index f8b4660fa..309fcb280 100644
+--- a/gcc/common/config/loongarch/loongarch-common.cc
+++ b/gcc/common/config/loongarch/loongarch-common.cc
+@@ -35,6 +35,7 @@ static const struct default_options loongarch_option_optimization_table[] =
+ {
+   { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
+   { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
+  { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
+   { OPT_LEVELS_NONE, 0, NULL, 0 }
+ };
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend.c
+new file mode 100644
+index 000000000..3f339d06b
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/sign-extend.c
+@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler-times "slli.w" 1 } } */
+
+extern int PL_savestack_ix;
+extern int PL_regsize;
+extern int PL_savestack_max;
+void Perl_savestack_grow_cnt (int need);
+extern void Perl_croak (char *);
+
+int
+S_regcppush(int parenfloor)
+{
+  int retval = PL_savestack_ix;
+  int paren_elems_to_push = (PL_regsize - parenfloor) * 4;
+  int p;
+
+  if (paren_elems_to_push < 0)
+    Perl_croak ("panic: paren_elems_to_push < 0");
+
+  if (PL_savestack_ix + (paren_elems_to_push + 6) > PL_savestack_max)
+    Perl_savestack_grow_cnt (paren_elems_to_push + 6);
+
+  return retval;
+}
+-- 
+2.33.0
+
--- a/LoongArch-Enable-fsched-pressure-by-default-at-O1-an.patch
+++ b/LoongArch-Enable-fsched-pressure-by-default-at-O1-an.patch
@ -0,0 +1,33 @@
+From a9f72e237d5c176e4ef8ba03a8b4ee5c5daa25fb Mon Sep 17 00:00:00 2001
+From: Guo Jie <guojie@loongson.cn>
+Date: Fri, 8 Sep 2023 10:00:21 +0800
+Subject: [PATCH 071/124] LoongArch: Enable -fsched-pressure by default at -O1
+ and higher.
+
+gcc/ChangeLog:
+
+	* common/config/loongarch/loongarch-common.cc:
+	(default_options loongarch_option_optimization_table):
+	Default to -fsched-pressure.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/common/config/loongarch/loongarch-common.cc | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/gcc/common/config/loongarch/loongarch-common.cc b/gcc/common/config/loongarch/loongarch-common.cc
+index 309fcb280..c8bc5718d 100644
+--- a/gcc/common/config/loongarch/loongarch-common.cc
+++ b/gcc/common/config/loongarch/loongarch-common.cc
+@@ -36,6 +36,7 @@ static const struct default_options loongarch_option_optimization_table[] =
+   { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
+   { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
+   { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
+  { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
+   { OPT_LEVELS_NONE, 0, NULL, 0 }
+ };
+ 
+-- 
+2.33.0
+
--- a/LoongArch-Enable-shrink-wrapping.patch
+++ b/LoongArch-Enable-shrink-wrapping.patch
@ -0,0 +1,309 @@
+From e86c9ece7ae922fe80017ba2ffe22f6267531682 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 23 Apr 2023 20:52:22 +0800
+Subject: [PATCH 045/124] LoongArch: Enable shrink wrapping
+
+This commit implements the target macros for shrink wrapping of function
+prologues/epilogues shrink wrapping on LoongArch.
+
+Bootstrapped and regtested on loongarch64-linux-gnu.  I don't have an
+access to SPEC CPU so I hope the reviewer can perform a benchmark to see
+if there is real benefit.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.h (struct machine_function): Add
+	reg_is_wrapped_separately array for register wrapping
+	information.
+	* config/loongarch/loongarch.cc
+	(loongarch_get_separate_components): New function.
+	(loongarch_components_for_bb): Likewise.
+	(loongarch_disqualify_components): Likewise.
+	(loongarch_process_components): Likewise.
+	(loongarch_emit_prologue_components): Likewise.
+	(loongarch_emit_epilogue_components): Likewise.
+	(loongarch_set_handled_components): Likewise.
+	(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS): Define.
+	(TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB): Likewise.
+	(TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS): Likewise.
+	(TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS): Likewise.
+	(TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS): Likewise.
+	(TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Likewise.
+	(loongarch_for_each_saved_reg): Skip registers that are wrapped
+	separately.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/shrink-wrap.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.cc             | 179 +++++++++++++++++-
+ gcc/config/loongarch/loongarch.h              |   2 +
+ .../gcc.target/loongarch/shrink-wrap.c        |  19 ++
+ 3 files changed, 197 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index d3c6f22ad..4c0f393b6 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "builtins.h"
+ #include "rtl-iter.h"
+ #include "opts.h"
+#include "function-abi.h"
+ 
+ /* This file should be included last.  */
+ #include "target-def.h"
+@@ -1014,19 +1015,23 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
+   for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+     if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+       {
+-	loongarch_save_restore_reg (word_mode, regno, offset, fn);
+	if (!cfun->machine->reg_is_wrapped_separately[regno])
+	  loongarch_save_restore_reg (word_mode, regno, offset, fn);
+
+ 	offset -= UNITS_PER_WORD;
+       }
+ 
+   /* This loop must iterate over the same space as its companion in
+      loongarch_compute_frame_info.  */
+   offset = cfun->machine->frame.fp_sp_offset - sp_offset;
+  machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
+
+   for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+     if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
+       {
+-	machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
+	if (!cfun->machine->reg_is_wrapped_separately[regno])
+	  loongarch_save_restore_reg (word_mode, regno, offset, fn);
+ 
+-	loongarch_save_restore_reg (mode, regno, offset, fn);
+ 	offset -= GET_MODE_SIZE (mode);
+       }
+ }
+@@ -6630,6 +6635,151 @@ loongarch_asan_shadow_offset (void)
+   return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
+ }
+ 
+static sbitmap
+loongarch_get_separate_components (void)
+{
+  HOST_WIDE_INT offset;
+  sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+  bitmap_clear (components);
+  offset = cfun->machine->frame.gp_sp_offset;
+
+  /* The stack should be aligned to 16-bytes boundary, so we can make the use
+     of ldptr instructions.  */
+  gcc_assert (offset % UNITS_PER_WORD == 0);
+
+  for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+      {
+	/* We can wrap general registers saved at [sp, sp + 32768) using the
+	   ldptr/stptr instructions.  For large offsets a pseudo register
+	   might be needed which cannot be created during the shrink
+	   wrapping pass.
+
+	   TODO: This may need a revise when we add LA32 as ldptr.w is not
+	   guaranteed available by the manual.  */
+	if (offset < 32768)
+	  bitmap_set_bit (components, regno);
+
+	offset -= UNITS_PER_WORD;
+      }
+
+  offset = cfun->machine->frame.fp_sp_offset;
+  for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
+      {
+	/* We can only wrap FP registers with imm12 offsets.  For large
+	   offsets a pseudo register might be needed which cannot be
+	   created during the shrink wrapping pass.  */
+	if (IMM12_OPERAND (offset))
+	  bitmap_set_bit (components, regno);
+
+	offset -= UNITS_PER_FPREG;
+      }
+
+  /* Don't mess with the hard frame pointer.  */
+  if (frame_pointer_needed)
+    bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
+
+  bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
+
+  return components;
+}
+
+static sbitmap
+loongarch_components_for_bb (basic_block bb)
+{
+  /* Registers are used in a bb if they are in the IN, GEN, or KILL sets.  */
+  auto_bitmap used;
+  bitmap_copy (used, DF_LIVE_IN (bb));
+  bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->gen);
+  bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->kill);
+
+  sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+  bitmap_clear (components);
+
+  function_abi_aggregator callee_abis;
+  rtx_insn *insn;
+  FOR_BB_INSNS (bb, insn)
+    if (CALL_P (insn))
+      callee_abis.note_callee_abi (insn_callee_abi (insn));
+
+  HARD_REG_SET extra_caller_saves =
+    callee_abis.caller_save_regs (*crtl->abi);
+
+  for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    if (!fixed_regs[regno]
+	&& !crtl->abi->clobbers_full_reg_p (regno)
+	&& (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
+	    bitmap_bit_p (used, regno)))
+      bitmap_set_bit (components, regno);
+
+  for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+    if (!fixed_regs[regno]
+	&& !crtl->abi->clobbers_full_reg_p (regno)
+	&& (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
+	    bitmap_bit_p (used, regno)))
+      bitmap_set_bit (components, regno);
+
+  return components;
+}
+
+static void
+loongarch_disqualify_components (sbitmap, edge, sbitmap, bool)
+{
+  /* Do nothing.  */
+}
+
+static void
+loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn)
+{
+  HOST_WIDE_INT offset = cfun->machine->frame.gp_sp_offset;
+
+  for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+      {
+	if (bitmap_bit_p (components, regno))
+	  loongarch_save_restore_reg (word_mode, regno, offset, fn);
+
+	offset -= UNITS_PER_WORD;
+      }
+
+  offset = cfun->machine->frame.fp_sp_offset;
+  machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
+
+  for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+    if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
+      {
+	if (bitmap_bit_p (components, regno))
+	  loongarch_save_restore_reg (mode, regno, offset, fn);
+
+	offset -= UNITS_PER_FPREG;
+      }
+}
+
+static void
+loongarch_emit_prologue_components (sbitmap components)
+{
+  loongarch_process_components (components, loongarch_save_reg);
+}
+
+static void
+loongarch_emit_epilogue_components (sbitmap components)
+{
+  loongarch_process_components (components, loongarch_restore_reg);
+}
+
+static void
+loongarch_set_handled_components (sbitmap components)
+{
+    for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+      if (bitmap_bit_p (components, regno))
+	cfun->machine->reg_is_wrapped_separately[regno] = true;
+
+    for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+      if (bitmap_bit_p (components, regno))
+	cfun->machine->reg_is_wrapped_separately[regno] = true;
+}
+
+ /* Initialize the GCC target structure.  */
+ #undef TARGET_ASM_ALIGNED_HI_OP
+ #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+@@ -6827,6 +6977,29 @@ loongarch_asan_shadow_offset (void)
+ #undef TARGET_ASAN_SHADOW_OFFSET
+ #define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset
+ 
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
+  loongarch_get_separate_components
+
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB loongarch_components_for_bb
+
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
+  loongarch_disqualify_components
+
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
+  loongarch_emit_prologue_components
+
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
+  loongarch_emit_epilogue_components
+
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
+  loongarch_set_handled_components
+
+ struct gcc_target targetm = TARGET_INITIALIZER;
+ 
+ #include "gt-loongarch.h"
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index af24bfa01..44ebadfaa 100644
+--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
+@@ -1147,6 +1147,8 @@ struct GTY (()) machine_function
+   /* The current frame information, calculated by loongarch_compute_frame_info.
+    */
+   struct loongarch_frame_info frame;
+
+  bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
+ };
+ #endif
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
+new file mode 100644
+index 000000000..1431536c5
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
+@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fshrink-wrap" } */
+
+/* We should not save anything before checking the value of x.  */
+/* { dg-final { scan-assembler-not "st(ptr)?\\\.\[dw\].*b(eq|ne)z" } } */
+
+int
+foo (int x)
+{
+  __asm__ ("nop" :);
+  if (x)
+    {
+      __asm__ ("" ::: "s0", "s1");
+      return x;
+    }
+
+  __asm__ ("" ::: "s2", "s3");
+  return 0;
+}
+-- 
+2.33.0
+
--- a/LoongArch-Fix-MUSL_DYNAMIC_LINKER.patch
+++ b/LoongArch-Fix-MUSL_DYNAMIC_LINKER.patch
@ -0,0 +1,43 @@
+From 3db61acfbaa773568fad2bc31d950c6d9b3729b0 Mon Sep 17 00:00:00 2001
+From: Peng Fan <fanpeng@loongson.cn>
+Date: Wed, 19 Apr 2023 16:23:42 +0800
+Subject: [PATCH 044/124] LoongArch: Fix MUSL_DYNAMIC_LINKER
+
+The system based on musl has no '/lib64', so change it.
+
+https://wiki.musl-libc.org/guidelines-for-distributions.html,
+"Multilib/multi-arch" section of this introduces it.
+
+gcc/
+	* config/loongarch/gnu-user.h (MUSL_DYNAMIC_LINKER): Redefine.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Suggested-by: Xi Ruoyao <xry111@xry111.site>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/gnu-user.h | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
+index 1dc6add62..44e4f2575 100644
+--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
+@@ -33,9 +33,14 @@ along with GCC; see the file COPYING3.  If not see
+ #define GLIBC_DYNAMIC_LINKER \
+   "/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1"
+ 
+#define MUSL_ABI_SPEC \
+  "%{mabi=lp64d:-lp64d}" \
+  "%{mabi=lp64f:-lp64f}" \
+  "%{mabi=lp64s:-lp64s}"
+
+ #undef MUSL_DYNAMIC_LINKER
+ #define MUSL_DYNAMIC_LINKER \
+-  "/lib" ABI_GRLEN_SPEC "/ld-musl-loongarch-" ABI_SPEC ".so.1"
+  "/lib/ld-musl-loongarch" ABI_GRLEN_SPEC MUSL_ABI_SPEC ".so.1"
+ 
+ #undef GNU_USER_TARGET_LINK_SPEC
+ #define GNU_USER_TARGET_LINK_SPEC \
+-- 
+2.33.0
+
--- a/LoongArch-Fix-bug-in-loongarch_emit_stack_tie-PR1104.patch
+++ b/LoongArch-Fix-bug-in-loongarch_emit_stack_tie-PR1104.patch
@ -0,0 +1,43 @@
+From 7c8fc6b414dc1718e71e0d05c7a78498e06eb499 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 29 Jun 2023 19:30:59 +0800
+Subject: [PATCH 053/124] LoongArch: Fix bug in loongarch_emit_stack_tie
+ [PR110484].
+
+Which may result in implicit references to $fp when frame_pointer_needed is false,
+causing regs_ever_live[$fp] to be true when $fp is not explicitly used,
+resulting in $fp being used as the target replacement register in the rnreg pass.
+
+The bug originates from SPEC2017 541.leela_r(-flto).
+
+gcc/ChangeLog:
+
+	PR target/110484
+	* config/loongarch/loongarch.cc (loongarch_emit_stack_tie): Use the
+	frame_pointer_needed to determine whether to use the $fp register.
+
+Co-authored-by: Guo Jie <guojie@loongson.cn>
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.cc | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index caacfa8a3..7b48e3216 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -1109,7 +1109,9 @@ loongarch_first_stack_step (struct loongarch_frame_info *frame)
+ static void
+ loongarch_emit_stack_tie (void)
+ {
+-  emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx, hard_frame_pointer_rtx));
+  emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx,
+			    frame_pointer_needed ? hard_frame_pointer_rtx
+			    : stack_pointer_rtx));
+ }
+ 
+ #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
+-- 
+2.33.0
+
--- a/LoongArch-Fix-bug-of-optab-di3_fake.patch
+++ b/LoongArch-Fix-bug-of-optab-di3_fake.patch
@ -0,0 +1,123 @@
+From df1df2e7b7e27bd9fba77f572d74d833aff4a202 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Mon, 11 Sep 2023 16:20:29 +0800
+Subject: [PATCH 122/124] LoongArch: Fix bug of '<optab>di3_fake'.
+
+	PR target/111334
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md: Fix bug of '<optab>di3_fake'.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/pr111334.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md             | 20 ++++++----
+ gcc/testsuite/gcc.target/loongarch/pr111334.c | 39 +++++++++++++++++++
+ 2 files changed, 52 insertions(+), 7 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr111334.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 264cd325c..7746116e6 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -72,6 +72,9 @@
+   UNSPEC_LUI_H_HI12
+   UNSPEC_TLS_LOW
+ 
+  ;; Fake div.w[u] mod.w[u]
+  UNSPEC_FAKE_ANY_DIV
+
+   UNSPEC_SIBCALL_VALUE_MULTIPLE_INTERNAL_1
+   UNSPEC_CALL_VALUE_MULTIPLE_INTERNAL_1
+ ])
+@@ -900,7 +903,7 @@
+ 		     (match_operand:GPR 2 "register_operand")))]
+   ""
+ {
+- if (GET_MODE (operands[0]) == SImode)
+ if (GET_MODE (operands[0]) == SImode && TARGET_64BIT)
+   {
+     rtx reg1 = gen_reg_rtx (DImode);
+     rtx reg2 = gen_reg_rtx (DImode);
+@@ -920,9 +923,9 @@
+ })
+ 
+ (define_insn "*<optab><mode>3"
+-  [(set (match_operand:GPR 0 "register_operand" "=r,&r,&r")
+-	(any_div:GPR (match_operand:GPR 1 "register_operand" "r,r,0")
+-		     (match_operand:GPR 2 "register_operand" "r,r,r")))]
+  [(set (match_operand:X 0 "register_operand" "=r,&r,&r")
+	(any_div:X (match_operand:X 1 "register_operand" "r,r,0")
+		   (match_operand:X 2 "register_operand" "r,r,r")))]
+   ""
+ {
+   return loongarch_output_division ("<insn>.<d><u>\t%0,%1,%2", operands);
+@@ -938,9 +941,12 @@
+ (define_insn "<optab>di3_fake"
+   [(set (match_operand:DI 0 "register_operand" "=r,&r,&r")
+ 	(sign_extend:DI
+-	  (any_div:SI (match_operand:DI 1 "register_operand" "r,r,0")
+-		      (match_operand:DI 2 "register_operand" "r,r,r"))))]
+-  ""
+	  (unspec:SI
+	   [(subreg:SI
+	     (any_div:DI (match_operand:DI 1 "register_operand" "r,r,0")
+			 (match_operand:DI 2 "register_operand" "r,r,r")) 0)]
+	  UNSPEC_FAKE_ANY_DIV)))]
+  "TARGET_64BIT"
+ {
+   return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands);
+ }
+diff --git a/gcc/testsuite/gcc.target/loongarch/pr111334.c b/gcc/testsuite/gcc.target/loongarch/pr111334.c
+new file mode 100644
+index 000000000..47366afcb
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr111334.c
+@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned
+util_next_power_of_two (unsigned x)
+{
+  return (1 << __builtin_clz (x - 1));
+}
+
+extern int create_vec_from_array (void);
+
+struct ac_shader_args {
+    struct {
+	unsigned char offset;
+	unsigned char size;
+    } args[384];
+};
+
+struct isel_context {
+    const struct ac_shader_args* args;
+    int arg_temps[384];
+};
+
+
+void
+add_startpgm (struct isel_context* ctx, unsigned short arg_count)
+{
+
+  for (unsigned i = 0, arg = 0; i < arg_count; i++)
+    {
+      unsigned size = ctx->args->args[i].size;
+      unsigned reg = ctx->args->args[i].offset;
+
+      if (reg % ( 4 < util_next_power_of_two (size)
+		 ? 4 : util_next_power_of_two (size)))
+	  ctx->arg_temps[i] = create_vec_from_array ();
+    }
+}
+
+-- 
+2.33.0
+
--- a/LoongArch-Fix-pr106828-by-define-hook-TARGET_ASAN_SH.patch
+++ b/LoongArch-Fix-pr106828-by-define-hook-TARGET_ASAN_SH.patch
@ -0,0 +1,69 @@
+From a70fe51d9813d490a89cbc8da1ae4b040bf8b37e Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Wed, 7 Sep 2022 11:25:45 +0800
+Subject: [PATCH 017/124] LoongArch: Fix pr106828 by define hook
+ TARGET_ASAN_SHADOW_OFFSET in loongarch backend [PR106828].
+
+gcc/ChangeLog:
+
+	PR target/106828
+	* config/loongarch/loongarch.cc (loongarch_asan_shadow_offset): New.
+	(TARGET_ASAN_SHADOW_OFFSET): New.
+
+gcc/testsuite/ChangeLog:
+
+	PR target/106828
+	* g++.target/loongarch/pr106828.C: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.cc             | 13 +++++++++++++
+ gcc/testsuite/g++.target/loongarch/pr106828.C |  4 ++++
+ 2 files changed, 17 insertions(+)
+ create mode 100644 gcc/testsuite/g++.target/loongarch/pr106828.C
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index c9187bf81..98c0e26cd 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -6466,6 +6466,16 @@ loongarch_use_anchors_for_symbol_p (const_rtx symbol)
+   return default_use_anchors_for_symbol_p (symbol);
+ }
+ 
+/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
+
+static unsigned HOST_WIDE_INT
+loongarch_asan_shadow_offset (void)
+{
+  /* We only have libsanitizer support for LOONGARCH64 at present.
+     This value is taken from the file libsanitizer/asan/asan_mappint.h.  */
+  return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
+}
+
+ /* Initialize the GCC target structure.  */
+ #undef TARGET_ASM_ALIGNED_HI_OP
+ #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
+@@ -6660,6 +6670,9 @@ loongarch_use_anchors_for_symbol_p (const_rtx symbol)
+ #undef  TARGET_USE_ANCHORS_FOR_SYMBOL_P
+ #define TARGET_USE_ANCHORS_FOR_SYMBOL_P loongarch_use_anchors_for_symbol_p
+ 
+#undef TARGET_ASAN_SHADOW_OFFSET
+#define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset
+
+ struct gcc_target targetm = TARGET_INITIALIZER;
+ 
+ #include "gt-loongarch.h"
+diff --git a/gcc/testsuite/g++.target/loongarch/pr106828.C b/gcc/testsuite/g++.target/loongarch/pr106828.C
+new file mode 100644
+index 000000000..190c1db71
+--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/pr106828.C
+@@ -0,0 +1,4 @@
+/* { dg-do-preprocess } */
+/* { dg-options "-mabi=lp64d -fsanitize=address" } */
+
+/* Tests whether the compiler supports compile option '-fsanitize=address'.  */
+-- 
+2.33.0
+
--- a/LoongArch-Fix-unintentional-bash-ism-in-r14-3665.patch
+++ b/LoongArch-Fix-unintentional-bash-ism-in-r14-3665.patch
@ -0,0 +1,31 @@
+From 8e5c9f349877af07dde4804974d47625c1292956 Mon Sep 17 00:00:00 2001
+From: Yang Yujie <yangyujie@loongson.cn>
+Date: Wed, 6 Sep 2023 17:57:47 +0800
+Subject: [PATCH 070/124] LoongArch: Fix unintentional bash-ism in r14-3665.
+
+gcc/ChangeLog:
+
+	* config.gcc: remove non-POSIX syntax "<<<".
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config.gcc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 19f584344..57e724080 100644
+--- a/gcc/config.gcc
+++ b/gcc/config.gcc
+@@ -5263,7 +5263,7 @@ case "${target}" in
+ 				if test x${parse_state} = x"abi-base"; then
+ 					# Base ABI type
+ 					case ${component} in
+-					lp64d | lp64f | lp64s) elem_tmp="ABI_BASE_$(tr a-z A-Z <<< ${component}),";;
+					lp64d | lp64f | lp64s) elem_tmp="ABI_BASE_$(echo ${component} | tr a-z A-Z),";;
+ 					*)
+ 						echo "Unknown base ABI \"${component}\" in --with-multilib-list." 1>&2
+ 						exit 1
+-- 
+2.33.0
+
--- a/LoongArch-Fix-unintentionally-breakage-in-r14-3665.patch
+++ b/LoongArch-Fix-unintentionally-breakage-in-r14-3665.patch
@ -0,0 +1,34 @@
+From 8de6f5e1aad2a1ff85ff3a4b732055d625c61139 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 5 Sep 2023 20:02:51 +0800
+Subject: [PATCH 067/124] LoongArch: Fix unintentionally breakage in r14-3665
+
+Fix a build failure with no system assembler or system old assembler.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-opts.h (HAVE_AS_EXPLICIT_RELOCS):
+	Define to 0 if not defined yet.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch-opts.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
+index e3f9b6f99..0d148e43b 100644
+--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
+@@ -93,4 +93,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
+    while -m[no]-memcpy imposes a global constraint.  */
+ #define TARGET_DO_OPTIMIZE_BLOCK_MOVE_P  loongarch_do_optimize_block_move_p()
+ 
+#ifndef HAVE_AS_EXPLICIT_RELOCS
+#define HAVE_AS_EXPLICIT_RELOCS 0
+#endif
+
+ #endif /* LOONGARCH_OPTS_H */
+-- 
+2.33.0
+
--- a/LoongArch-Fix-up-memcpy-vec-3.c-test-case.patch
+++ b/LoongArch-Fix-up-memcpy-vec-3.c-test-case.patch
@ -0,0 +1,33 @@
+From 78896e68f50164af7827e8da01a7220764d1e296 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sat, 9 Sep 2023 16:18:06 +0800
+Subject: [PATCH 075/124] LoongArch: Fix up memcpy-vec-3.c test case
+
+The generic code will split 16-byte copy into two 8-byte copies, so the
+vector code wouldn't be used even if -mno-strict-align.  This
+contradicted with the purpose of this test case.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/memcpy-vec-3.c: Increase the amount of
+	copied bytes to 32.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
+index 233ed2150..db2ea510b 100644
+--- a/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
+++ b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
+@@ -3,4 +3,4 @@
+ /* { dg-final { scan-assembler-not "vst" } } */
+ 
+ extern char a[], b[];
+-void test() { __builtin_memcpy(a, b, 16); }
+void test() { __builtin_memcpy(a, b, 32); }
+-- 
+2.33.0
+
--- a/LoongArch-Fixed-a-bug-in-the-loongarch-architecture-.patch
+++ b/LoongArch-Fixed-a-bug-in-the-loongarch-architecture-.patch
@ -0,0 +1,43 @@
+From 80ed9ab39d9b1b08ad9d054f16d65b2a249a89e5 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Wed, 12 Oct 2022 11:02:11 +0800
+Subject: [PATCH 022/124] LoongArch: Fixed a bug in the loongarch architecture
+ of libitm package.
+
+Add a soft floating point condition to the register recovery part of the code.
+
+libitm/ChangeLog:
+
+	* config/loongarch/sjlj.S: Add a soft floating point condition to the
+	register recovery part of the code.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ libitm/config/loongarch/sjlj.S | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/libitm/config/loongarch/sjlj.S b/libitm/config/loongarch/sjlj.S
+index a5f9fadde..f896e400e 100644
+--- a/libitm/config/loongarch/sjlj.S
+++ b/libitm/config/loongarch/sjlj.S
+@@ -104,6 +104,8 @@ GTM_longjmp:
+         GPR_L  $s7, $r5, 10*SZ_GPR
+         GPR_L  $s8, $r5, 11*SZ_GPR
+ 
+#if !defined(__loongarch_soft_float)
+        /* Callee-saved scratch FPRs (f24-f31) */
+         FPR_L  $f24, $r5, 12*SZ_GPR + 0*SZ_FPR
+         FPR_L  $f25, $r5, 12*SZ_GPR + 1*SZ_FPR
+         FPR_L  $f26, $r5, 12*SZ_GPR + 2*SZ_FPR
+@@ -112,6 +114,7 @@ GTM_longjmp:
+         FPR_L  $f29, $r5, 12*SZ_GPR + 5*SZ_FPR
+         FPR_L  $f30, $r5, 12*SZ_GPR + 6*SZ_FPR
+         FPR_L  $f31, $r5, 12*SZ_GPR + 7*SZ_FPR
+#endif
+ 
+         GPR_L  $r7, $r5, 2*SZ_GPR
+         GPR_L  $fp, $r5, 0*SZ_GPR
+-- 
+2.33.0
+
--- a/LoongArch-Fixed-a-compilation-failure-with-c-in-inli.patch
+++ b/LoongArch-Fixed-a-compilation-failure-with-c-in-inli.patch
@ -0,0 +1,182 @@
+From 49a63dbaf3b4296f0b1f8a0e11790cc3455aeec7 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Wed, 18 Jan 2023 11:06:56 +0800
+Subject: [PATCH 034/124] LoongArch: Fixed a compilation failure with '%c' in
+ inline assembly [PR107731].
+
+Co-authored-by: Yang Yujie <yangyujie@loongson.cn>
+
+	PR target/107731
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_classify_address):
+	Add precessint for CONST_INT.
+	(loongarch_print_operand_reloc): Operand modifier 'c' is supported.
+	(loongarch_print_operand): Increase the processing of '%c'.
+	* doc/extend.texi: Adds documents for LoongArch operand modifiers.
+	And port the public operand modifiers information to this document.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/tst-asm-const.c: Moved to...
+	* gcc.target/loongarch/pr107731.c: ...here.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.cc             | 14 +++++
+ gcc/doc/extend.texi                           | 51 +++++++++++++++++--
+ .../loongarch/{tst-asm-const.c => pr107731.c} |  6 +--
+ 3 files changed, 64 insertions(+), 7 deletions(-)
+ rename gcc/testsuite/gcc.target/loongarch/{tst-asm-const.c => pr107731.c} (78%)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index e59edc4cd..1a4686f03 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -2074,6 +2074,11 @@ loongarch_classify_address (struct loongarch_address_info *info, rtx x,
+       return (loongarch_valid_base_register_p (info->reg, mode, strict_p)
+ 	      && loongarch_valid_lo_sum_p (info->symbol_type, mode,
+ 					   info->offset));
+    case CONST_INT:
+      /* Small-integer addresses don't occur very often, but they
+	 are legitimate if $r0 is a valid base register.  */
+      info->type = ADDRESS_CONST_INT;
+      return IMM12_OPERAND (INTVAL (x));
+ 
+     default:
+       return false;
+@@ -4932,6 +4937,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
+ 
+    'A'	Print a _DB suffix if the memory model requires a release.
+    'b'	Print the address of a memory operand, without offset.
+   'c'  Print an integer.
+    'C'	Print the integer branch condition for comparison OP.
+    'd'	Print CONST_INT OP in decimal.
+    'F'	Print the FPU branch condition for comparison OP.
+@@ -4978,6 +4984,14 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+        fputs ("_db", file);
+       break;
+ 
+    case 'c':
+      if (CONST_INT_P (op))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
+      else
+	output_operand_lossage ("unsupported operand for code '%c'", letter);
+
+      break;
+
+     case 'C':
+       loongarch_print_int_branch_condition (file, code, letter);
+       break;
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index da2840c23..3c101ca89 100644
+--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
+@@ -10414,8 +10414,10 @@ ensures that modifying @var{a} does not affect the address referenced by
+ is undefined if @var{a} is modified before using @var{b}.
+ 
+ @code{asm} supports operand modifiers on operands (for example @samp{%k2} 
+-instead of simply @samp{%2}). Typically these qualifiers are hardware 
+-dependent. The list of supported modifiers for x86 is found at 
+instead of simply @samp{%2}). @ref{GenericOperandmodifiers,
+Generic Operand modifiers} lists the modifiers that are available
+on all targets.  Other modifiers are hardware dependent.
+For example, the list of supported modifiers for x86 is found at
+ @ref{x86Operandmodifiers,x86 Operand modifiers}.
+ 
+ If the C code that follows the @code{asm} makes no use of any of the output 
+@@ -10683,8 +10685,10 @@ optimizers may discard the @code{asm} statement as unneeded
+ (see @ref{Volatile}).
+ 
+ @code{asm} supports operand modifiers on operands (for example @samp{%k2} 
+-instead of simply @samp{%2}). Typically these qualifiers are hardware 
+-dependent. The list of supported modifiers for x86 is found at 
+instead of simply @samp{%2}). @ref{GenericOperandmodifiers,
+Generic Operand modifiers} lists the modifiers that are available
+on all targets.  Other modifiers are hardware dependent.
+For example, the list of supported modifiers for x86 is found at
+ @ref{x86Operandmodifiers,x86 Operand modifiers}.
+ 
+ In this example using the fictitious @code{combine} instruction, the 
+@@ -11036,6 +11040,30 @@ lab:
+ @}
+ @end example
+ 
+@anchor{GenericOperandmodifiers}
+@subsubsection Generic Operand Modifiers
+@noindent
+The following table shows the modifiers supported by all targets and their effects:
+
+@multitable {Modifier} {Description} {Example}
+@headitem Modifier @tab Description @tab Example
+@item @code{c}
+@tab Require a constant operand and print the constant expression with no punctuation.
+@tab @code{%c0}
+@item @code{n}
+@tab Like @samp{%c} except that the value of the constant is negated before printing.
+@tab @code{%n0}
+@item @code{a}
+@tab Substitute a memory reference, with the actual operand treated as the address.
+This may be useful when outputting a ``load address'' instruction, because
+often the assembler syntax for such an instruction requires you to write the
+operand as if it were a memory reference.
+@tab @code{%a0}
+@item @code{l}
+@tab Print the label name with no punctuation.
+@tab @code{%l0}
+@end multitable
+
+ @anchor{x86Operandmodifiers}
+ @subsubsection x86 Operand Modifiers
+ 
+@@ -11386,6 +11414,21 @@ constant.  Used to select the specified bit position.
+ @item @code{x} @tab Equivialent to @code{X}, but only for pointers.
+ @end multitable
+ 
+@anchor{loongarchOperandmodifiers}
+@subsubsection LoongArch Operand Modifiers
+
+The list below describes the supported modifiers and their effects for LoongArch.
+
+@multitable @columnfractions .10 .90
+@headitem Modifier @tab Description
+@item @code{d} @tab Same as @code{c}.
+@item @code{i} @tab Print the character ''@code{i}'' if the operand is not a register.
+@item @code{m} @tab Same as @code{c}, but the printed value is @code{operand - 1}.
+@item @code{X} @tab Print a constant integer operand in hexadecimal.
+@item @code{z} @tab Print the operand in its unmodified form, followed by a comma.
+@end multitable
+
+
+ @lowersections
+ @include md.texi
+ @raisesections
+diff --git a/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c b/gcc/testsuite/gcc.target/loongarch/pr107731.c
+similarity index 78%
+rename from gcc/testsuite/gcc.target/loongarch/tst-asm-const.c
+rename to gcc/testsuite/gcc.target/loongarch/pr107731.c
+index 2e04b99e3..80d84c48c 100644
+--- a/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c
+++ b/gcc/testsuite/gcc.target/loongarch/pr107731.c
+@@ -1,13 +1,13 @@
+-/* Test asm const. */
+ /* { dg-do compile } */
+ /* { dg-final { scan-assembler-times "foo:.*\\.long 1061109567.*\\.long 52" 1 } } */
+
+ int foo ()
+ {
+   __asm__ volatile (
+           "foo:"
+           "\n\t"
+-	  ".long %a0\n\t"
+-	  ".long %a1\n\t"
+	  ".long %c0\n\t"
+	  ".long %c1\n\t"
+ 	  :
+ 	  :"i"(0x3f3f3f3f), "i"(52)
+ 	  :
+-- 
+2.33.0
+
--- a/LoongArch-Fixed-a-typo-in-the-comment-information-of.patch
+++ b/LoongArch-Fixed-a-typo-in-the-comment-information-of.patch
@ -0,0 +1,33 @@
+From cbb5f181544e35b119fee4ed150bec24eee7179c Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Wed, 28 Sep 2022 16:35:06 +0800
+Subject: [PATCH 020/124] LoongArch: Fixed a typo in the comment information of
+ the function loongarch_asan_shadow_offset.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_asan_shadow_offset):
+	Fixed typo in "asan_mapping.h".
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.cc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 98c0e26cd..e9ba3374e 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -6472,7 +6472,7 @@ static unsigned HOST_WIDE_INT
+ loongarch_asan_shadow_offset (void)
+ {
+   /* We only have libsanitizer support for LOONGARCH64 at present.
+-     This value is taken from the file libsanitizer/asan/asan_mappint.h.  */
+     This value is taken from the file libsanitizer/asan/asan_mapping.h.  */
+   return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
+ }
+ 
+-- 
+2.33.0
+
--- a/LoongArch-Generate-bytepick.-wd-for-suitable-bit-ope.patch
+++ b/LoongArch-Generate-bytepick.-wd-for-suitable-bit-ope.patch
@ -0,0 +1,196 @@
+From 9311c0f56086e38fe5e9bf4bbfc2e37d0f18347c Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 3 Feb 2023 17:06:06 +0800
+Subject: [PATCH 036/124] LoongArch: Generate bytepick.[wd] for suitable bit
+ operation pattern
+
+We can use bytepick.[wd] for
+
+    a << (8 * x) | b >> (8 * (sizeof(a) - x))
+
+while a and b are uint32_t or uint64_t.  This is useful for some cases,
+for example:
+https://sourceware.org/pipermail/libc-alpha/2023-February/145203.html
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (bytepick_w_ashift_amount):
+	New define_int_iterator.
+	(bytepick_d_ashift_amount): Likewise.
+	(bytepick_imm): New define_int_attr.
+	(bytepick_w_lshiftrt_amount): Likewise.
+	(bytepick_d_lshiftrt_amount): Likewise.
+	(bytepick_w_<bytepick_imm>): New define_insn template.
+	(bytepick_w_<bytepick_imm>_extend): Likewise.
+	(bytepick_d_<bytepick_imm>): Likewise.
+	(bytepick_w): Remove unused define_insn.
+	(bytepick_d): Likewise.
+	(UNSPEC_BYTEPICK_W): Remove unused unspec.
+	(UNSPEC_BYTEPICK_D): Likewise.
+	* config/loongarch/predicates.md (const_0_to_3_operand):
+	Remove unused define_predicate.
+	(const_0_to_7_operand): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* g++.target/loongarch/bytepick.C: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md             | 60 ++++++++++++++-----
+ gcc/config/loongarch/predicates.md            |  8 ---
+ gcc/testsuite/g++.target/loongarch/bytepick.C | 32 ++++++++++
+ 3 files changed, 77 insertions(+), 23 deletions(-)
+ create mode 100644 gcc/testsuite/g++.target/loongarch/bytepick.C
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index f61db66d5..833b94753 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -48,8 +48,6 @@
+   UNSPEC_EH_RETURN
+ 
+   ;; Bit operation
+-  UNSPEC_BYTEPICK_W
+-  UNSPEC_BYTEPICK_D
+   UNSPEC_BITREV_4B
+   UNSPEC_BITREV_8B
+ 
+@@ -544,6 +542,27 @@
+ 				      (UNSPEC_FTINTRM "0")
+ 				      (UNSPEC_FTINTRP "0")])
+ 
+;; Iterator and attributes for bytepick.d
+(define_int_iterator bytepick_w_ashift_amount [8 16 24])
+(define_int_attr bytepick_w_lshiftrt_amount [(8 "24")
+					     (16 "16")
+					     (24 "8")])
+(define_int_iterator bytepick_d_ashift_amount [8 16 24 32 40 48 56])
+(define_int_attr bytepick_d_lshiftrt_amount [(8 "56")
+					     (16 "48")
+					     (24 "40")
+					     (32 "32")
+					     (40 "24")
+					     (48 "16")
+					     (56 "8")])
+(define_int_attr bytepick_imm [(8 "1")
+				 (16 "2")
+				 (24 "3")
+				 (32 "4")
+				 (40 "5")
+				 (48 "6")
+				 (56 "7")])
+
+ ;;
+ ;;  ....................
+ ;;
+@@ -3364,24 +3383,35 @@
+   [(set_attr "type" "unknown")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "bytepick_w"
+(define_insn "bytepick_w_<bytepick_imm>"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+-	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+-		    (match_operand:SI 2 "register_operand" "r")
+-		    (match_operand:SI 3 "const_0_to_3_operand" "n")]
+-		    UNSPEC_BYTEPICK_W))]
+	(ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
+			  (const_int <bytepick_w_lshiftrt_amount>))
+		(ashift (match_operand:SI 2 "register_operand" "r")
+			(const_int bytepick_w_ashift_amount))))]
+   ""
+-  "bytepick.w\t%0,%1,%2,%z3"
+  "bytepick.w\t%0,%1,%2,<bytepick_imm>"
+   [(set_attr "mode" "SI")])
+ 
+-(define_insn "bytepick_d"
+(define_insn "bytepick_w_<bytepick_imm>_extend"
+   [(set (match_operand:DI 0 "register_operand" "=r")
+-	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+-		    (match_operand:DI 2 "register_operand" "r")
+-		    (match_operand:DI 3 "const_0_to_7_operand" "n")]
+-		    UNSPEC_BYTEPICK_D))]
+-  ""
+-  "bytepick.d\t%0,%1,%2,%z3"
+	(sign_extend:DI
+	  (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
+			    (const_int <bytepick_w_lshiftrt_amount>))
+		  (ashift (match_operand:SI 2 "register_operand" "r")
+			  (const_int bytepick_w_ashift_amount)))))]
+  "TARGET_64BIT"
+  "bytepick.w\t%0,%1,%2,<bytepick_imm>"
+  [(set_attr "mode" "SI")])
+
+(define_insn "bytepick_d_<bytepick_imm>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ior:DI (lshiftrt (match_operand:DI 1 "register_operand" "r")
+			  (const_int <bytepick_d_lshiftrt_amount>))
+		(ashift (match_operand:DI 2 "register_operand" "r")
+			(const_int bytepick_d_ashift_amount))))]
+  "TARGET_64BIT"
+  "bytepick.d\t%0,%1,%2,<bytepick_imm>"
+   [(set_attr "mode" "DI")])
+ 
+ (define_insn "bitrev_4b"
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index 58c3dc226..3c32b2987 100644
+--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
+@@ -91,14 +91,6 @@
+   (ior (match_operand 0 "const_1_operand")
+        (match_operand 0 "register_operand")))
+ 
+-(define_predicate "const_0_to_3_operand"
+-  (and (match_code "const_int")
+-       (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
+-
+-(define_predicate "const_0_to_7_operand"
+-  (and (match_code "const_int")
+-       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+-
+ (define_predicate "lu52i_mask_operand"
+   (and (match_code "const_int")
+        (match_test "UINTVAL (op) == 0xfffffffffffff")))
+diff --git a/gcc/testsuite/g++.target/loongarch/bytepick.C b/gcc/testsuite/g++.target/loongarch/bytepick.C
+new file mode 100644
+index 000000000..a39e2fa65
+--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/bytepick.C
+@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d" } */
+/* { dg-final { scan-assembler-times "bytepick.w\t\\\$r4,\\\$r5,\\\$r4" 3 } } */
+/* { dg-final { scan-assembler-times "bytepick.d\t\\\$r4,\\\$r5,\\\$r4" 7 } } */
+/* { dg-final { scan-assembler-not "slli.w" } } */
+
+template <class T, int offs>
+T
+merge (T a, T b)
+{
+  return a << offs | b >> (8 * sizeof (T) - offs);
+}
+
+using u32 = __UINT32_TYPE__;
+using u64 = __UINT64_TYPE__;
+using i64 = __INT64_TYPE__;
+
+template u32 merge<u32, 8> (u32, u32);
+template u32 merge<u32, 16> (u32, u32);
+template u32 merge<u32, 24> (u32, u32);
+
+template u64 merge<u64, 8> (u64, u64);
+template u64 merge<u64, 16> (u64, u64);
+template u64 merge<u64, 24> (u64, u64);
+template u64 merge<u64, 32> (u64, u64);
+template u64 merge<u64, 40> (u64, u64);
+template u64 merge<u64, 48> (u64, u64);
+template u64 merge<u64, 56> (u64, u64);
+
+/* we cannot use bytepick for the following cases */
+template i64 merge<i64, 8> (i64, i64);
+template u64 merge<u64, 42> (u64, u64);
+-- 
+2.33.0
+
--- a/LoongArch-Get-__tls_get_addr-address-through-got-tab.patch
+++ b/LoongArch-Get-__tls_get_addr-address-through-got-tab.patch
@ -0,0 +1,71 @@
+From a96dee6ba3c916f9a4329b196a0c5a1652fe294f Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 18 Aug 2022 09:57:14 +0800
+Subject: [PATCH 010/124] LoongArch: Get __tls_get_addr address through got
+ table when disable plt.
+
+Fix bug, ICE with tls gd/ld var with -fno-plt.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
+	Get __tls_get_addr address through got table when disable plt.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/tls-gd-noplt.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.cc                 | 14 ++++++++++++--
+ gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c | 12 ++++++++++++
+ 2 files changed, 24 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 1b5af2c7d..76bf55ea4 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -2448,8 +2448,18 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+ 	gcc_unreachable ();
+     }
+ 
+-  insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
+-						  const0_rtx));
+  if (flag_plt)
+    insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
+						    const0_rtx));
+  else
+    {
+      rtx dest = gen_reg_rtx (Pmode);
+      rtx high = gen_reg_rtx (Pmode);
+      loongarch_emit_move (high, gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
+      emit_insn (gen_ld_from_got (Pmode, dest, high, loongarch_tls_symbol));
+      insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx));
+    }
+
+   RTL_CONST_CALL_P (insn) = 1;
+   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
+   insn = get_insns ();
+diff --git a/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c
+new file mode 100644
+index 000000000..32a0acf9b
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c
+@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-plt -mcmodel=normal" } */
+/* { dg-final { scan-assembler "pcalau12i\t.*%got_pc_hi20\\(__tls_get_addr\\)" } } */
+
+__attribute__ ((tls_model ("global-dynamic"))) __thread int a;
+
+void
+test (void)
+{
+  a = 10;
+}
+
+-- 
+2.33.0
+
--- a/LoongArch-Implement-128-bit-floating-point-functions.patch
+++ b/LoongArch-Implement-128-bit-floating-point-functions.patch
@ -0,0 +1,204 @@
+From 12ab9eae9e8a5b83c778182f15c6216bcbc3dc36 Mon Sep 17 00:00:00 2001
+From: chenxiaolong <chenxiaolong@loongson.cn>
+Date: Fri, 1 Sep 2023 11:22:42 +0800
+Subject: [PATCH 054/124] LoongArch: Implement 128-bit floating point functions
+ in gcc.
+
+During implementation, float128_type_node is bound with the type "__float128"
+so that the compiler can correctly identify the type   of the function. The
+"q" suffix is associated with the "f128" function, which makes GCC more
+flexible to support different user input cases, implementing functions such
+as __builtin_{huge_valq, infq, fabsq, copysignq, nanq, nansq}.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-builtins.cc (loongarch_init_builtins):
+	Associate the __float128 type to float128_type_node so that it can
+	be recognized by the compiler.
+	* config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins):
+	Add the flag "FLOAT128_TYPE" to gcc and associate a function
+	with the suffix "q" to "f128".
+	* doc/extend.texi:Added support for 128-bit floating-point functions on
+	the LoongArch architecture.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/math-float-128.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch-builtins.cc    |  5 ++
+ gcc/config/loongarch/loongarch-c.cc           | 11 +++
+ gcc/doc/extend.texi                           | 20 ++++-
+ .../gcc.target/loongarch/math-float-128.c     | 81 +++++++++++++++++++
+ 4 files changed, 114 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/math-float-128.c
+
+diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
+index 64fe11168..cb0ea1664 100644
+--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
+@@ -256,6 +256,11 @@ loongarch_init_builtins (void)
+   unsigned int i;
+   tree type;
+ 
+  /* Register the type float128_type_node as a built-in type and
+     give it an alias "__float128".  */
+  (*lang_hooks.types.register_builtin_type) (float128_type_node,
+					    "__float128");
+
+   /* Iterate through all of the bdesc arrays, initializing all of the
+      builtin functions.  */
+   for (i = 0; i < ARRAY_SIZE (loongarch_builtins); i++)
+diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc
+index d6e3e19f0..f779a7355 100644
+--- a/gcc/config/loongarch/loongarch-c.cc
+++ b/gcc/config/loongarch/loongarch-c.cc
+@@ -99,6 +99,17 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
+   else
+     builtin_define ("__loongarch_frlen=0");
+ 
+  /* Add support for FLOAT128_TYPE on the LoongArch architecture.  */
+  builtin_define ("__FLOAT128_TYPE__");
+
+  /* Map the old _Float128 'q' builtins into the new 'f128' builtins.  */
+  builtin_define ("__builtin_fabsq=__builtin_fabsf128");
+  builtin_define ("__builtin_copysignq=__builtin_copysignf128");
+  builtin_define ("__builtin_nanq=__builtin_nanf128");
+  builtin_define ("__builtin_nansq=__builtin_nansf128");
+  builtin_define ("__builtin_infq=__builtin_inff128");
+  builtin_define ("__builtin_huge_valq=__builtin_huge_valf128");
+
+   /* Native Data Sizes.  */
+   builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE);
+   builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE);
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index 1d1bac255..bb19d0f27 100644
+--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
+@@ -1085,10 +1085,10 @@ types.
+ As an extension, GNU C and GNU C++ support additional floating
+ types, which are not supported by all targets.
+ @itemize @bullet
+-@item @code{__float128} is available on i386, x86_64, IA-64, and
+-hppa HP-UX, as well as on PowerPC GNU/Linux targets that enable
+@item @code{__float128} is available on i386, x86_64, IA-64, LoongArch
+and hppa HP-UX, as well as on PowerPC GNU/Linux targets that enable
+ the vector scalar (VSX) instruction set.  @code{__float128} supports
+-the 128-bit floating type.  On i386, x86_64, PowerPC, and IA-64
+the 128-bit floating type.  On i386, x86_64, PowerPC, LoongArch and IA-64,
+ other than HP-UX, @code{__float128} is an alias for @code{_Float128}.
+ On hppa and IA-64 HP-UX, @code{__float128} is an alias for @code{long
+ double}.
+@@ -16257,6 +16257,20 @@ function you need to include @code{larchintrin.h}.
+     void __break (imm0_32767)
+ @end smallexample
+ 
+Additional built-in functions are available for LoongArch family
+processors to efficiently use 128-bit floating-point (__float128)
+values.
+
+The following are the basic built-in functions supported.
+@smallexample
+__float128 __builtin_fabsq (__float128);
+__float128 __builtin_copysignq (__float128, __float128);
+__float128 __builtin_infq (void);
+__float128 __builtin_huge_valq (void);
+__float128 __builtin_nanq (void);
+__float128 __builtin_nansq (void);
+@end smallexample
+
+ @node MIPS DSP Built-in Functions
+ @subsection MIPS DSP Built-in Functions
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/math-float-128.c b/gcc/testsuite/gcc.target/loongarch/math-float-128.c
+new file mode 100644
+index 000000000..387566a57
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/math-float-128.c
+@@ -0,0 +1,81 @@
+/* { dg-do compile } */
+/* { dg-options " -march=loongarch64 -O2 " } */
+/* { dg-final { scan-assembler-not "my_fabsq2:.*\\bl\t%plt\\(__builtin_fabsq\\).*my_fabsq2" } } */
+/* { dg-final { scan-assembler-not "my_copysignq2:.*\\bl\t%plt\\(__builtin_copysignq\\).*my_copysignq2" } } */
+/* { dg-final { scan-assembler-not "my_infq2:.*\\bl\t%plt\\(__builtin_infq\\).*my_infq2" } } */
+/* { dg-final { scan-assembler-not "my_huge_valq2:.*\\bl\t%plt\\(__builtin_huge_valq\\).*my_huge_valq2" } } */
+/* { dg-final { scan-assembler-not "my_nanq2:.*\\bl\t%plt\\(__builtin_nanq\\).*my_nanq2" } } */
+/* { dg-final { scan-assembler-not "my_nansq2:.*\\bl\t%plt\\(__builtin_nansq\\).*my_nansq2" } } */
+
+__float128
+my_fabsq1 (__float128 a)
+{
+  return __builtin_fabsq (a);
+}
+
+_Float128
+my_fabsq2 (_Float128 a)
+{
+  return __builtin_fabsq (a);
+}
+
+__float128
+my_copysignq1 (__float128 a, __float128 b)
+{
+  return __builtin_copysignq (a, b);
+}
+
+_Float128
+my_copysignq2 (_Float128 a, _Float128 b)
+{
+  return __builtin_copysignq (a, b);
+}
+
+__float128
+my_infq1 (void)
+{
+  return __builtin_infq ();
+}
+
+_Float128
+my_infq2 (void)
+{
+  return __builtin_infq ();
+}
+
+__float128
+my_huge_valq1 (void)
+{
+  return __builtin_huge_valq ();
+}
+
+_Float128
+my_huge_valq2 (void)
+{
+  return __builtin_huge_valq ();
+}
+
+__float128
+my_nanq1 (void)
+{
+  return __builtin_nanq ("");
+}
+
+_Float128
+my_nanq2 (void)
+{
+  return __builtin_nanq ("");
+}
+
+__float128
+my_nansq1 (void)
+{
+  return __builtin_nansq ("");
+}
+
+_Float128
+my_nansq2 (void)
+{
+  return __builtin_nansq ("");
+}
+
+-- 
+2.33.0
+
--- a/LoongArch-Improve-GAR-store-for-va_list.patch
+++ b/LoongArch-Improve-GAR-store-for-va_list.patch
@ -0,0 +1,83 @@
+From 4075f299ca6a5d15fdb46f877cbe11b7166a19ff Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 29 Mar 2023 01:36:09 +0800
+Subject: [PATCH 042/124] LoongArch: Improve GAR store for va_list
+
+LoongArch backend used to save all GARs for a function with variable
+arguments.  But sometimes a function only accepts variable arguments for
+a purpose like C++ function overloading.  For example, POSIX defines
+open() as:
+
+    int open(const char *path, int oflag, ...);
+
+But only two forms are actually used:
+
+    int open(const char *pathname, int flags);
+    int open(const char *pathname, int flags, mode_t mode);
+
+So it's obviously a waste to save all 8 GARs in open().  We can use the
+cfun->va_list_gpr_size field set by the stdarg pass to only save the
+GARs necessary to be saved.
+
+If the va_list escapes (for example, in fprintf() we pass it to
+vfprintf()), stdarg would set cfun->va_list_gpr_size to 255 so we
+don't need a special case.
+
+With this patch, only one GAR ($a2/$r6) is saved in open().  Ideally
+even this stack store should be omitted too, but doing so is not trivial
+and AFAIK there are no compilers (for any target) performing the "ideal"
+optimization here, see https://godbolt.org/z/n1YqWq9c9.
+
+Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk
+(GCC 14 or now)?
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc
+	(loongarch_setup_incoming_varargs): Don't save more GARs than
+	cfun->va_list_gpr_size / UNITS_PER_WORD.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/va_arg.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/testsuite/gcc.target/loongarch/va_arg.c | 24 +++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/va_arg.c
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/va_arg.c b/gcc/testsuite/gcc.target/loongarch/va_arg.c
+new file mode 100644
+index 000000000..980c96d0e
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/va_arg.c
+@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Technically we shouldn't save any register for this function: it should be
+   compiled as if it accepts 3 named arguments.  But AFAIK no compilers can
+   achieve this "perfect" optimization now, so just ensure we are using the
+   knowledge provided by stdarg pass and we won't save GARs impossible to be
+   accessed with __builtin_va_arg () when the va_list does not escape.  */
+
+/* { dg-final { scan-assembler-not "st.*r7" } } */
+
+int
+test (int a0, ...)
+{
+  void *arg;
+  int a1, a2;
+
+  __builtin_va_start (arg, a0);
+  a1 = __builtin_va_arg (arg, int);
+  a2 = __builtin_va_arg (arg, int);
+  __builtin_va_end (arg);
+
+  return a0 + a1 + a2;
+}
+-- 
+2.33.0
+
--- a/LoongArch-Improve-cpymemsi-expansion-PR109465.patch
+++ b/LoongArch-Improve-cpymemsi-expansion-PR109465.patch
@ -0,0 +1,339 @@
+From 33fff578e7df7aa7e236efc6c9c85c595918d86a Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 12 Apr 2023 11:45:48 +0000
+Subject: [PATCH 043/124] LoongArch: Improve cpymemsi expansion [PR109465]
+
+We'd been generating really bad block move sequences which is recently
+complained by kernel developers who tried __builtin_memcpy.  To improve
+it:
+
+1. Take the advantage of -mno-strict-align.  When it is set, set mode
+   size to UNITS_PER_WORD regardless of the alignment.
+2. Half the mode size when (block size) % (mode size) != 0, instead of
+   falling back to ld.bu/st.b at once.
+3. Limit the length of block move sequence considering the number of
+   instructions, not the size of block.  When -mstrict-align is set and
+   the block is not aligned, the old size limit for straight-line
+   implementation (64 bytes) was definitely too large (we don't have 64
+   registers anyway).
+
+Change since v1: add a comment about the calculation of num_reg.
+
+gcc/ChangeLog:
+
+	PR target/109465
+	* config/loongarch/loongarch-protos.h
+	(loongarch_expand_block_move): Add a parameter as alignment RTX.
+	* config/loongarch/loongarch.h:
+	(LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER): Remove.
+	(LARCH_MAX_MOVE_BYTES_STRAIGHT): Remove.
+	(LARCH_MAX_MOVE_OPS_PER_LOOP_ITER): Define.
+	(LARCH_MAX_MOVE_OPS_STRAIGHT): Define.
+	(MOVE_RATIO): Use LARCH_MAX_MOVE_OPS_PER_LOOP_ITER instead of
+	LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER.
+	* config/loongarch/loongarch.cc (loongarch_expand_block_move):
+	Take the alignment from the parameter, but set it to
+	UNITS_PER_WORD if !TARGET_STRICT_ALIGN.  Limit the length of
+	straight-line implementation with LARCH_MAX_MOVE_OPS_STRAIGHT
+	instead of LARCH_MAX_MOVE_BYTES_STRAIGHT.
+	(loongarch_block_move_straight): When there are left-over bytes,
+	half the mode size instead of falling back to byte mode at once.
+	(loongarch_block_move_loop): Limit the length of loop body with
+	LARCH_MAX_MOVE_OPS_PER_LOOP_ITER instead of
+	LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER.
+	* config/loongarch/loongarch.md (cpymemsi): Pass the alignment
+	to loongarch_expand_block_move.
+
+gcc/testsuite/ChangeLog:
+
+	PR target/109465
+	* gcc.target/loongarch/pr109465-1.c: New test.
+	* gcc.target/loongarch/pr109465-2.c: New test.
+	* gcc.target/loongarch/pr109465-3.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch-protos.h       |  2 +-
+ gcc/config/loongarch/loongarch.cc             | 95 +++++++++++--------
+ gcc/config/loongarch/loongarch.h              | 10 +-
+ gcc/config/loongarch/loongarch.md             |  3 +-
+ .../gcc.target/loongarch/pr109465-1.c         |  9 ++
+ .../gcc.target/loongarch/pr109465-2.c         |  9 ++
+ .../gcc.target/loongarch/pr109465-3.c         | 12 +++
+ 7 files changed, 91 insertions(+), 49 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-2.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-3.c
+
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index 0a9b47722..3ac3b5e19 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -95,7 +95,7 @@ extern void loongarch_expand_conditional_trap (rtx);
+ #endif
+ extern void loongarch_set_return_address (rtx, rtx);
+ extern bool loongarch_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
+-extern bool loongarch_expand_block_move (rtx, rtx, rtx);
+extern bool loongarch_expand_block_move (rtx, rtx, rtx, rtx);
+ extern bool loongarch_do_optimize_block_move_p (void);
+ 
+ extern bool loongarch_expand_ext_as_unaligned_load (rtx, rtx, HOST_WIDE_INT,
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 233dddbac..d3c6f22ad 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -4456,41 +4456,46 @@ loongarch_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
+    Assume that the areas do not overlap.  */
+ 
+ static void
+-loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
+			       HOST_WIDE_INT delta)
+ {
+-  HOST_WIDE_INT offset, delta;
+-  unsigned HOST_WIDE_INT bits;
+  HOST_WIDE_INT offs, delta_cur;
+   int i;
+   machine_mode mode;
+   rtx *regs;
+ 
+-  bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)));
+-
+-  mode = int_mode_for_size (bits, 0).require ();
+-  delta = bits / BITS_PER_UNIT;
+  /* Calculate how many registers we'll need for the block move.
+     We'll emit length / delta move operations with delta as the size
+     first.  Then we may still have length % delta bytes not copied.
+     We handle these remaining bytes by move operations with smaller
+     (halfed) sizes.  For example, if length = 21 and delta = 8, we'll
+     emit two ld.d/st.d pairs, one ld.w/st.w pair, and one ld.b/st.b
+     pair.  For each load/store pair we use a dedicated register to keep
+     the pipeline as populated as possible.  */
+  HOST_WIDE_INT num_reg = length / delta;
+  for (delta_cur = delta / 2; delta_cur != 0; delta_cur /= 2)
+    num_reg += !!(length & delta_cur);
+ 
+   /* Allocate a buffer for the temporary registers.  */
+-  regs = XALLOCAVEC (rtx, length / delta);
+  regs = XALLOCAVEC (rtx, num_reg);
+ 
+-  /* Load as many BITS-sized chunks as possible.  Use a normal load if
+-     the source has enough alignment, otherwise use left/right pairs.  */
+-  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+  for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
+     {
+-      regs[i] = gen_reg_rtx (mode);
+-      loongarch_emit_move (regs[i], adjust_address (src, mode, offset));
+-    }
+      mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
+ 
+-  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+-    loongarch_emit_move (adjust_address (dest, mode, offset), regs[i]);
+      for (; offs + delta_cur <= length; offs += delta_cur, i++)
+	{
+	  regs[i] = gen_reg_rtx (mode);
+	  loongarch_emit_move (regs[i], adjust_address (src, mode, offs));
+	}
+    }
+ 
+-  /* Mop up any left-over bytes.  */
+-  if (offset < length)
+  for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
+     {
+-      src = adjust_address (src, BLKmode, offset);
+-      dest = adjust_address (dest, BLKmode, offset);
+-      move_by_pieces (dest, src, length - offset,
+-		      MIN (MEM_ALIGN (src), MEM_ALIGN (dest)),
+-		      (enum memop_ret) 0);
+      mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
+
+      for (; offs + delta_cur <= length; offs += delta_cur, i++)
+	loongarch_emit_move (adjust_address (dest, mode, offs), regs[i]);
+     }
+ }
+ 
+@@ -4520,10 +4525,11 @@ loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
+ 
+ static void
+ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+-			   HOST_WIDE_INT bytes_per_iter)
+			   HOST_WIDE_INT align)
+ {
+   rtx_code_label *label;
+   rtx src_reg, dest_reg, final_src, test;
+  HOST_WIDE_INT bytes_per_iter = align * LARCH_MAX_MOVE_OPS_PER_LOOP_ITER;
+   HOST_WIDE_INT leftover;
+ 
+   leftover = length % bytes_per_iter;
+@@ -4543,7 +4549,7 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+   emit_label (label);
+ 
+   /* Emit the loop body.  */
+-  loongarch_block_move_straight (dest, src, bytes_per_iter);
+  loongarch_block_move_straight (dest, src, bytes_per_iter, align);
+ 
+   /* Move on to the next block.  */
+   loongarch_emit_move (src_reg,
+@@ -4560,7 +4566,7 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+ 
+   /* Mop up any left-over bytes.  */
+   if (leftover)
+-    loongarch_block_move_straight (dest, src, leftover);
+    loongarch_block_move_straight (dest, src, leftover, align);
+   else
+     /* Temporary fix for PR79150.  */
+     emit_insn (gen_nop ());
+@@ -4570,25 +4576,32 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+    memory reference SRC to memory reference DEST.  */
+ 
+ bool
+-loongarch_expand_block_move (rtx dest, rtx src, rtx length)
+loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align)
+ {
+-  int max_move_bytes = LARCH_MAX_MOVE_BYTES_STRAIGHT;
+  if (!CONST_INT_P (r_length))
+    return false;
+
+  HOST_WIDE_INT length = INTVAL (r_length);
+  if (length > loongarch_max_inline_memcpy_size)
+    return false;
+
+  HOST_WIDE_INT align = INTVAL (r_align);
+
+  if (!TARGET_STRICT_ALIGN || align > UNITS_PER_WORD)
+    align = UNITS_PER_WORD;
+ 
+-  if (CONST_INT_P (length)
+-      && INTVAL (length) <= loongarch_max_inline_memcpy_size)
+  if (length <= align * LARCH_MAX_MOVE_OPS_STRAIGHT)
+     {
+-      if (INTVAL (length) <= max_move_bytes)
+-	{
+-	  loongarch_block_move_straight (dest, src, INTVAL (length));
+-	  return true;
+-	}
+-      else if (optimize)
+-	{
+-	  loongarch_block_move_loop (dest, src, INTVAL (length),
+-				     LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER);
+-	  return true;
+-	}
+      loongarch_block_move_straight (dest, src, length, align);
+      return true;
+    }
+
+  if (optimize)
+    {
+      loongarch_block_move_loop (dest, src, length, align);
+      return true;
+     }
+
+   return false;
+ }
+ 
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index 9d3cd9ca0..af24bfa01 100644
+--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
+@@ -1062,13 +1062,13 @@ typedef struct {
+ 
+ /* The maximum number of bytes that can be copied by one iteration of
+    a cpymemsi loop; see loongarch_block_move_loop.  */
+-#define LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER (UNITS_PER_WORD * 4)
+#define LARCH_MAX_MOVE_OPS_PER_LOOP_ITER 4
+ 
+ /* The maximum number of bytes that can be copied by a straight-line
+    implementation of cpymemsi; see loongarch_block_move_straight.  We want
+    to make sure that any loop-based implementation will iterate at
+    least twice.  */
+-#define LARCH_MAX_MOVE_BYTES_STRAIGHT (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER * 2)
+#define LARCH_MAX_MOVE_OPS_STRAIGHT (LARCH_MAX_MOVE_OPS_PER_LOOP_ITER * 2)
+ 
+ /* The base cost of a memcpy call, for MOVE_RATIO and friends.  These
+    values were determined experimentally by benchmarking with CSiBE.
+@@ -1076,7 +1076,7 @@ typedef struct {
+ #define LARCH_CALL_RATIO 8
+ 
+ /* Any loop-based implementation of cpymemsi will have at least
+-   LARCH_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory
+   LARCH_MAX_MOVE_OPS_PER_LOOP_ITER memory-to-memory
+    moves, so allow individual copies of fewer elements.
+ 
+    When cpymemsi is not available, use a value approximating
+@@ -1087,9 +1087,7 @@ typedef struct {
+    value of LARCH_CALL_RATIO to take that into account.  */
+ 
+ #define MOVE_RATIO(speed) \
+-  (HAVE_cpymemsi \
+-   ? LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD \
+-   : CLEAR_RATIO (speed) / 2)
+  (HAVE_cpymemsi ? LARCH_MAX_MOVE_OPS_PER_LOOP_ITER : CLEAR_RATIO (speed) / 2)
+ 
+ /* For CLEAR_RATIO, when optimizing for size, give a better estimate
+    of the length of a memset call, but use the default otherwise.  */
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index b2f7c7f78..b23248c33 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -2488,7 +2488,8 @@
+   ""
+ {
+   if (TARGET_DO_OPTIMIZE_BLOCK_MOVE_P
+-      && loongarch_expand_block_move (operands[0], operands[1], operands[2]))
+      && loongarch_expand_block_move (operands[0], operands[1],
+				      operands[2], operands[3]))
+     DONE;
+   else
+     FAIL;
+diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-1.c b/gcc/testsuite/gcc.target/loongarch/pr109465-1.c
+new file mode 100644
+index 000000000..4cd35d139
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr109465-1.c
+@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d -mno-strict-align" } */
+/* { dg-final { scan-assembler-times "st\\.d|stptr\\.d" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.w|stptr\\.w" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.h" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.b" 1 } } */
+
+extern char a[], b[];
+void test() { __builtin_memcpy(a, b, 15); }
+diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-2.c b/gcc/testsuite/gcc.target/loongarch/pr109465-2.c
+new file mode 100644
+index 000000000..703eb951c
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr109465-2.c
+@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d -mstrict-align" } */
+/* { dg-final { scan-assembler-times "st\\.d|stptr\\.d" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.w|stptr\\.w" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.h" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.b" 1 } } */
+
+extern long a[], b[];
+void test() { __builtin_memcpy(a, b, 15); }
+diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-3.c b/gcc/testsuite/gcc.target/loongarch/pr109465-3.c
+new file mode 100644
+index 000000000..d6a80659b
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr109465-3.c
+@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d -mstrict-align" } */
+
+/* Three loop iterations each contains 4 st.b, and 3 st.b after the loop */
+/* { dg-final { scan-assembler-times "st\\.b" 7 } } */
+
+/* { dg-final { scan-assembler-not "st\\.h" } } */
+/* { dg-final { scan-assembler-not "st\\.w|stptr\\.w" } } */
+/* { dg-final { scan-assembler-not "st\\.d|stptr\\.d" } } */
+
+extern char a[], b[];
+void test() { __builtin_memcpy(a, b, 15); }
+-- 
+2.33.0
+
--- a/LoongArch-Libitm-add-LoongArch-support.patch
+++ b/LoongArch-Libitm-add-LoongArch-support.patch
@ -0,0 +1,291 @@
+From 7f9f1dd3c87cffeab58150997e22e8fff707646b Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Mon, 26 Sep 2022 09:42:51 +0800
+Subject: [PATCH 019/124] LoongArch: Libitm add LoongArch support.
+
+Co-Authored-By: Yang Yujie <yangyujie@loongson.cn>
+
+libitm/ChangeLog:
+
+	* configure.tgt: Add loongarch support.
+	* config/loongarch/asm.h: New file.
+	* config/loongarch/sjlj.S: New file.
+	* config/loongarch/target.h: New file.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ libitm/config/loongarch/asm.h    |  54 +++++++++++++
+ libitm/config/loongarch/sjlj.S   | 127 +++++++++++++++++++++++++++++++
+ libitm/config/loongarch/target.h |  50 ++++++++++++
+ libitm/configure.tgt             |   2 +
+ 4 files changed, 233 insertions(+)
+ create mode 100644 libitm/config/loongarch/asm.h
+ create mode 100644 libitm/config/loongarch/sjlj.S
+ create mode 100644 libitm/config/loongarch/target.h
+
+diff --git a/libitm/config/loongarch/asm.h b/libitm/config/loongarch/asm.h
+new file mode 100644
+index 000000000..a8e3304bb
+--- /dev/null
+++ b/libitm/config/loongarch/asm.h
+@@ -0,0 +1,54 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+   Contributed by Loongson Co. Ltd.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _LA_ASM_H
+#define _LA_ASM_H
+
+#if defined(__loongarch_lp64)
+#  define GPR_L ld.d
+#  define GPR_S st.d
+#  define SZ_GPR 8
+#  define ADDSP(si)   addi.d  $sp, $sp, si
+#elif defined(__loongarch64_ilp32)
+#  define GPR_L ld.w
+#  define GPR_S st.w
+#  define SZ_GPR 4
+#  define ADDSP(si)   addi.w  $sp, $sp, si
+#else
+#  error Unsupported GPR size (must be 64-bit or 32-bit).
+#endif
+
+#if defined(__loongarch_double_float)
+#  define FPR_L fld.d
+#  define FPR_S fst.d
+#  define SZ_FPR 8
+#elif defined(__loongarch_single_float)
+#  define FPR_L fld.s
+#  define FPR_S fst.s
+#  define SZ_FPR 4
+#else
+#  define SZ_FPR 0
+#endif
+
+#endif  /* _LA_ASM_H */
+diff --git a/libitm/config/loongarch/sjlj.S b/libitm/config/loongarch/sjlj.S
+new file mode 100644
+index 000000000..a5f9fadde
+--- /dev/null
+++ b/libitm/config/loongarch/sjlj.S
+@@ -0,0 +1,127 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+   Contributed by Loongson Co. Ltd.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "asmcfi.h"
+#include "asm.h"
+
+	.text
+	.align	2
+	.global	_ITM_beginTransaction
+	.type	_ITM_beginTransaction, @function
+
+_ITM_beginTransaction:
+        cfi_startproc
+        move   $r5, $sp
+        ADDSP(-(12*SZ_GPR+8*SZ_FPR))
+        cfi_adjust_cfa_offset(12*SZ_GPR+8*SZ_FPR)
+
+        /* Frame Pointer */
+        GPR_S  $fp, $sp, 0*SZ_GPR
+        cfi_rel_offset(22, 0)
+
+        /* Return Address */
+        GPR_S  $r1, $sp, 1*SZ_GPR
+        cfi_rel_offset(1, SZ_GPR)
+
+        /* Caller's $sp */
+        GPR_S  $r5, $sp, 2*SZ_GPR
+
+        /* Callee-saved scratch GPRs (r23-r31) */
+        GPR_S  $s0, $sp, 3*SZ_GPR
+        GPR_S  $s1, $sp, 4*SZ_GPR
+        GPR_S  $s2, $sp, 5*SZ_GPR
+        GPR_S  $s3, $sp, 6*SZ_GPR
+        GPR_S  $s4, $sp, 7*SZ_GPR
+        GPR_S  $s5, $sp, 8*SZ_GPR
+        GPR_S  $s6, $sp, 9*SZ_GPR
+        GPR_S  $s7, $sp, 10*SZ_GPR
+        GPR_S  $s8, $sp, 11*SZ_GPR
+
+#if !defined(__loongarch_soft_float)
+        /* Callee-saved scratch FPRs (f24-f31) */
+        FPR_S  $f24, $sp, 12*SZ_GPR + 0*SZ_FPR
+        FPR_S  $f25, $sp, 12*SZ_GPR + 1*SZ_FPR
+        FPR_S  $f26, $sp, 12*SZ_GPR + 2*SZ_FPR
+        FPR_S  $f27, $sp, 12*SZ_GPR + 3*SZ_FPR
+        FPR_S  $f28, $sp, 12*SZ_GPR + 4*SZ_FPR
+        FPR_S  $f29, $sp, 12*SZ_GPR + 5*SZ_FPR
+        FPR_S  $f30, $sp, 12*SZ_GPR + 6*SZ_FPR
+        FPR_S  $f31, $sp, 12*SZ_GPR + 7*SZ_FPR
+#endif
+        move   $fp, $sp
+
+        /* Invoke GTM_begin_transaction with the struct we've just built.  */
+        move   $r5, $sp
+        bl     %plt(GTM_begin_transaction)
+
+        /* Return. (no call-saved scratch reg needs to be restored here)  */
+        GPR_L  $fp, $sp, 0*SZ_GPR
+        cfi_restore(22)
+        GPR_L  $r1, $sp, 1*SZ_GPR
+        cfi_restore(1)
+
+        ADDSP(12*SZ_GPR+8*SZ_FPR)
+        cfi_adjust_cfa_offset(-(12*SZ_GPR+8*SZ_FPR))
+
+        jr     $r1
+        cfi_endproc
+	.size	_ITM_beginTransaction, . - _ITM_beginTransaction
+
+	.align	2
+	.global	GTM_longjmp
+	.hidden	GTM_longjmp
+	.type	GTM_longjmp, @function
+
+GTM_longjmp:
+        cfi_startproc
+        GPR_L  $s0, $r5, 3*SZ_GPR
+        GPR_L  $s1, $r5, 4*SZ_GPR
+        GPR_L  $s2, $r5, 5*SZ_GPR
+        GPR_L  $s3, $r5, 6*SZ_GPR
+        GPR_L  $s4, $r5, 7*SZ_GPR
+        GPR_L  $s5, $r5, 8*SZ_GPR
+        GPR_L  $s6, $r5, 9*SZ_GPR
+        GPR_L  $s7, $r5, 10*SZ_GPR
+        GPR_L  $s8, $r5, 11*SZ_GPR
+
+        FPR_L  $f24, $r5, 12*SZ_GPR + 0*SZ_FPR
+        FPR_L  $f25, $r5, 12*SZ_GPR + 1*SZ_FPR
+        FPR_L  $f26, $r5, 12*SZ_GPR + 2*SZ_FPR
+        FPR_L  $f27, $r5, 12*SZ_GPR + 3*SZ_FPR
+        FPR_L  $f28, $r5, 12*SZ_GPR + 4*SZ_FPR
+        FPR_L  $f29, $r5, 12*SZ_GPR + 5*SZ_FPR
+        FPR_L  $f30, $r5, 12*SZ_GPR + 6*SZ_FPR
+        FPR_L  $f31, $r5, 12*SZ_GPR + 7*SZ_FPR
+
+        GPR_L  $r7, $r5, 2*SZ_GPR
+        GPR_L  $fp, $r5, 0*SZ_GPR
+        GPR_L  $r1, $r5, 1*SZ_GPR
+        cfi_def_cfa(5, 0)
+        move   $sp, $r7
+        jr     $r1
+        cfi_endproc
+	.size	GTM_longjmp, . - GTM_longjmp
+
+#ifdef __linux__
+.section .note.GNU-stack, "", @progbits
+#endif
+diff --git a/libitm/config/loongarch/target.h b/libitm/config/loongarch/target.h
+new file mode 100644
+index 000000000..0c5cf3ada
+--- /dev/null
+++ b/libitm/config/loongarch/target.h
+@@ -0,0 +1,50 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+   Contributed by Loongson Co. Ltd.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+namespace GTM HIDDEN {
+
+typedef struct gtm_jmpbuf
+  {
+    long int fp;        /* Frame Pointer: r22 */
+    long int pc;        /* Return Address: r1 */
+    void *cfa;          /* CFA: r3 */
+    long int gpr[9];	/* Callee-saved scratch GPRs: r23(s0)-r31(s8) */
+
+    /* Callee-saved scratch FPRs: f24-f31 */
+#if defined(__loongarch_double_float)
+    double fpr[8];
+#elif defined(__loongarch_single_float)
+    float fpr[8];
+#endif
+  } gtm_jmpbuf;
+
+#define HW_CACHELINE_SIZE 128
+
+static inline void
+cpu_relax (void)
+{
+    __asm__ volatile ("" : : : "memory");
+}
+
+} // namespace GTM
+diff --git a/libitm/configure.tgt b/libitm/configure.tgt
+index 06e90973e..4c0e78cff 100644
+--- a/libitm/configure.tgt
+++ b/libitm/configure.tgt
+@@ -80,6 +80,8 @@ EOF
+ 	ARCH=x86
+ 	;;
+ 
+  loongarch*)   ARCH=loongarch ;;
+
+   sh*)		ARCH=sh ;;
+ 
+   sparc)
+-- 
+2.33.0
+
--- a/LoongArch-Modify-the-output-message-string-of-the-wa.patch
+++ b/LoongArch-Modify-the-output-message-string-of-the-wa.patch
@ -0,0 +1,37 @@
+From 83d6cfbbdc41766af9d7941d00204cc0f26ff40c Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Tue, 26 Jul 2022 21:03:52 +0800
+Subject: [PATCH 005/124] LoongArch: Modify the output message string of the
+ warning.
+
+Fix bug for "error: spurious trailing punctuation sequence '.' in format [-Werror=format-diag]".
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-opts.cc: Modify the output message string
+	of the warning.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch-opts.cc | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index fc477bfd4..3f70943de 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -378,8 +378,8 @@ fallback:
+   t.cmodel = constrained.cmodel ? opt_cmodel : CMODEL_NORMAL;
+   if (t.cmodel != CMODEL_NORMAL)
+     {
+-      warning (0, "%qs is not supported, now cmodel is set to 'normal'.",
+-	       loongarch_cmodel_strings[t.cmodel]);
+      warning (0, "%qs is not supported, now cmodel is set to %qs",
+	       loongarch_cmodel_strings[t.cmodel], "normal");
+       t.cmodel = CMODEL_NORMAL;
+     }
+ 
+-- 
+2.33.0
+
--- a/LoongArch-Optimize-additions-with-immediates.patch
+++ b/LoongArch-Optimize-additions-with-immediates.patch
@ -0,0 +1,445 @@
+From a31baa1e437fa4acedfaf03db91c1d6e5ce78013 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 2 Apr 2023 21:37:49 +0800
+Subject: [PATCH 041/124] LoongArch: Optimize additions with immediates
+
+1. Use addu16i.d for TARGET_64BIT and suitable immediates.
+2. Split one addition with immediate into two addu16i.d or addi.{d/w}
+   instructions if possible.  This can avoid using a temp register w/o
+   increase the count of instructions.
+
+Inspired by https://reviews.llvm.org/D143710 and
+https://reviews.llvm.org/D147222.
+
+Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for GCC 14?
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-protos.h
+	(loongarch_addu16i_imm12_operand_p): New function prototype.
+	(loongarch_split_plus_constant): Likewise.
+	* config/loongarch/loongarch.cc
+	(loongarch_addu16i_imm12_operand_p): New function.
+	(loongarch_split_plus_constant): Likewise.
+	* config/loongarch/loongarch.h (ADDU16I_OPERAND): New macro.
+	(DUAL_IMM12_OPERAND): Likewise.
+	(DUAL_ADDU16I_OPERAND): Likewise.
+	* config/loongarch/constraints.md (La, Lb, Lc, Ld, Le): New
+	constraint.
+	* config/loongarch/predicates.md (const_dual_imm12_operand): New
+	predicate.
+	(const_addu16i_operand): Likewise.
+	(const_addu16i_imm12_di_operand): Likewise.
+	(const_addu16i_imm12_si_operand): Likewise.
+	(plus_di_operand): Likewise.
+	(plus_si_operand): Likewise.
+	(plus_si_extend_operand): Likewise.
+	* config/loongarch/loongarch.md (add<mode>3): Convert to
+	define_insn_and_split.  Use plus_<mode>_operand predicate
+	instead of arith_operand.  Add alternatives for La, Lb, Lc, Ld,
+	and Le constraints.
+	(*addsi3_extended): Convert to define_insn_and_split.  Use
+	plus_si_extend_operand instead of arith_operand.  Add
+	alternatives for La and Le alternatives.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/add-const.c: New test.
+	* gcc.target/loongarch/stack-check-cfa-1.c: Adjust for stack
+	frame size change.
+	* gcc.target/loongarch/stack-check-cfa-2.c: Likewise.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/constraints.md           | 46 ++++++++++++-
+ gcc/config/loongarch/loongarch-protos.h       |  2 +
+ gcc/config/loongarch/loongarch.cc             | 44 +++++++++++++
+ gcc/config/loongarch/loongarch.h              | 19 ++++++
+ gcc/config/loongarch/loongarch.md             | 66 +++++++++++++++----
+ gcc/config/loongarch/predicates.md            | 36 ++++++++++
+ .../gcc.target/loongarch/add-const.c          | 45 +++++++++++++
+ .../gcc.target/loongarch/stack-check-cfa-1.c  |  2 +-
+ .../gcc.target/loongarch/stack-check-cfa-2.c  |  2 +-
+ 9 files changed, 246 insertions(+), 16 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/add-const.c
+
+diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
+index 46f7f63ae..25f3cda35 100644
+--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
+@@ -60,7 +60,22 @@
+ ;; "I" "A signed 12-bit constant (for arithmetic instructions)."
+ ;; "J" "Integer zero."
+ ;; "K" "An unsigned 12-bit constant (for logic instructions)."
+-;; "L" <-----unused
+;; "L" -
+;;     "La"
+;;	 "A signed constant in [-4096, 2048) or (2047, 4094]."
+;;     "Lb"
+;;	 "A signed 32-bit constant and low 16-bit is zero, which can be
+;;	  added onto a register with addu16i.d.  It matches nothing if
+;;	  the addu16i.d instruction is not available."
+;;     "Lc"
+;;	 "A signed 64-bit constant can be expressed as Lb + I, but not a
+;;	  single Lb or I."
+;;     "Ld"
+;;	 "A signed 64-bit constant can be expressed as Lb + Lb, but not a
+;;	  single Lb."
+;;     "Le"
+;;	 "A signed 32-bit constant can be expressed as Lb + I, but not a
+;;	  single Lb or I."
+ ;; "M" <-----unused
+ ;; "N" <-----unused
+ ;; "O" <-----unused
+@@ -170,6 +185,35 @@
+   (and (match_code "const_int")
+        (match_test "IMM12_OPERAND_UNSIGNED (ival)")))
+ 
+(define_constraint "La"
+  "A signed constant in [-4096, 2048) or (2047, 4094]."
+  (and (match_code "const_int")
+       (match_test "DUAL_IMM12_OPERAND (ival)")))
+
+(define_constraint "Lb"
+  "A signed 32-bit constant and low 16-bit is zero, which can be added
+   onto a register with addu16i.d."
+  (and (match_code "const_int")
+       (match_test "ADDU16I_OPERAND (ival)")))
+
+(define_constraint "Lc"
+  "A signed 64-bit constant can be expressed as Lb + I, but not a single Lb
+   or I."
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (ival, DImode)")))
+
+(define_constraint "Ld"
+  "A signed 64-bit constant can be expressed as Lb + Lb, but not a single
+   Lb."
+  (and (match_code "const_int")
+       (match_test "DUAL_ADDU16I_OPERAND (ival)")))
+
+(define_constraint "Le"
+  "A signed 32-bit constant can be expressed as Lb + I, but not a single Lb
+   or I."
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (ival, SImode)")))
+
+ (define_constraint "Yd"
+   "@internal
+    A constant @code{move_operand} that can be safely loaded using
+diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
+index 77b221724..0a9b47722 100644
+--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
+@@ -83,6 +83,8 @@ extern rtx loongarch_legitimize_call_address (rtx);
+ extern rtx loongarch_subword (rtx, bool);
+ extern bool loongarch_split_move_p (rtx, rtx);
+ extern void loongarch_split_move (rtx, rtx, rtx);
+extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
+extern void loongarch_split_plus_constant (rtx *, machine_mode);
+ extern const char *loongarch_output_move (rtx, rtx);
+ extern bool loongarch_cfun_has_cprestore_slot_p (void);
+ #ifdef RTX_CODE
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 1a4686f03..233dddbac 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -3753,6 +3753,50 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_)
+     }
+ }
+ 
+/* Check if adding an integer constant value for a specific mode can be
+   performed with an addu16i.d instruction and an addi.{w/d}
+   instruction.  */
+
+bool
+loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT value, machine_mode mode)
+{
+  /* Not necessary, but avoid unnecessary calculation if !TARGET_64BIT.  */
+  if (!TARGET_64BIT)
+    return false;
+
+  if ((value & 0xffff) == 0)
+    return false;
+
+  if (IMM12_OPERAND (value))
+    return false;
+
+  value = (value & ~HWIT_UC_0xFFF) + ((value & 0x800) << 1);
+  return ADDU16I_OPERAND (trunc_int_for_mode (value, mode));
+}
+
+/* Split one integer constant op[0] into two (op[1] and op[2]) for constant
+   plus operation in a specific mode.  The splitted constants can be added
+   onto a register with a single instruction (addi.{d/w} or addu16i.d).  */
+
+void
+loongarch_split_plus_constant (rtx *op, machine_mode mode)
+{
+  HOST_WIDE_INT v = INTVAL (op[0]), a;
+
+  if (DUAL_IMM12_OPERAND (v))
+    a = (v > 0 ? 2047 : -2048);
+  else if (loongarch_addu16i_imm12_operand_p (v, mode))
+    a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1);
+  else if (mode == DImode && DUAL_ADDU16I_OPERAND (v))
+    a = (v > 0 ? 0x7fff : -0x8000) << 16;
+  else
+    gcc_unreachable ();
+
+  op[1] = gen_int_mode (a, mode);
+  v = v - (unsigned HOST_WIDE_INT) a;
+  op[2] = gen_int_mode (v, mode);
+}
+
+ /* Return true if a move from SRC to DEST in INSN should be split.  */
+ 
+ static bool
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index c6e37b1b4..9d3cd9ca0 100644
+--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
+@@ -612,6 +612,25 @@ enum reg_class
+ 
+ #define CONST_LOW_PART(VALUE) ((VALUE) - CONST_HIGH_PART (VALUE))
+ 
+/* True if VALUE can be added onto a register with one addu16i.d
+   instruction.  */
+
+#define ADDU16I_OPERAND(VALUE)			\
+  (TARGET_64BIT && (((VALUE) & 0xffff) == 0	\
+   && IMM16_OPERAND ((HOST_WIDE_INT) (VALUE) / 65536)))
+
+/* True if VALUE can be added onto a register with two addi.{d/w}
+   instructions, but not one addi.{d/w} instruction.  */
+#define DUAL_IMM12_OPERAND(VALUE) \
+  (IN_RANGE ((VALUE), -4096, 4094) && !IMM12_OPERAND (VALUE))
+
+/* True if VALUE can be added onto a register with two addu16i.d
+   instruction, but not one addu16i.d instruction.  */
+#define DUAL_ADDU16I_OPERAND(VALUE)		\
+  (TARGET_64BIT && (((VALUE) & 0xffff) == 0	\
+   && !ADDU16I_OPERAND (VALUE)			\
+   && IN_RANGE ((VALUE) / 65536, -0x10000, 0xfffe)))
+
+ #define IMM12_INT(X) IMM12_OPERAND (INTVAL (X))
+ #define IMM12_INT_UNSIGNED(X) IMM12_OPERAND_UNSIGNED (INTVAL (X))
+ #define LU12I_INT(X) LU12I_OPERAND (INTVAL (X))
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 833b94753..b2f7c7f78 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -598,24 +598,64 @@
+   [(set_attr "type" "fadd")
+    (set_attr "mode" "<UNITMODE>")])
+ 
+-(define_insn "add<mode>3"
+-  [(set (match_operand:GPR 0 "register_operand" "=r,r")
+-	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r")
+-		  (match_operand:GPR 2 "arith_operand" "r,I")))]
+(define_insn_and_split "add<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
+	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
+		  (match_operand:GPR 2 "plus_<mode>_operand"
+				       "r,I,La,Lb,Lc,Ld,Le")))]
+   ""
+-  "add%i2.<d>\t%0,%1,%2";
+  "@
+   add.<d>\t%0,%1,%2
+   addi.<d>\t%0,%1,%2
+   #
+   * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
+     return \"addu16i.d\t%0,%1,%2\";
+   #
+   #
+   #"
+  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
+   && !ADDU16I_OPERAND (INTVAL (operands[2]))"
+  [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
+  {
+    loongarch_split_plus_constant (&operands[2], <MODE>mode);
+  }
+   [(set_attr "alu_type" "add")
+-   (set_attr "mode" "<MODE>")])
+-
+-(define_insn "*addsi3_extended"
+-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+   (set_attr "mode" "<MODE>")
+   (set_attr "insn_count" "1,1,2,1,2,2,2")
+   (set (attr "enabled")
+      (cond
+	[(match_test "<MODE>mode != DImode && which_alternative == 4")
+	 (const_string "no")
+	 (match_test "<MODE>mode != DImode && which_alternative == 5")
+	 (const_string "no")
+	 (match_test "<MODE>mode != SImode && which_alternative == 6")
+	 (const_string "no")]
+	(const_string "yes")))])
+
+(define_insn_and_split "*addsi3_extended"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+ 	(sign_extend:DI
+-	     (plus:SI (match_operand:SI 1 "register_operand" "r,r")
+-		      (match_operand:SI 2 "arith_operand" "r,I"))))]
+	     (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
+		      (match_operand:SI 2 "plus_si_extend_operand"
+					  "r,I,La,Le"))))]
+   "TARGET_64BIT"
+-  "add%i2.w\t%0,%1,%2"
+  "@
+   add.w\t%0,%1,%2
+   addi.w\t%0,%1,%2
+   #
+   #"
+  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])"
+  [(set (subreg:SI (match_dup 0) 0) (plus:SI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0)
+	(sign_extend:DI (plus:SI (subreg:SI (match_dup 0) 0)
+				 (match_dup 4))))]
+  {
+    loongarch_split_plus_constant (&operands[2], SImode);
+  }
+   [(set_attr "alu_type" "add")
+-   (set_attr "mode" "SI")])
+   (set_attr "mode" "SI")
+   (set_attr "insn_count" "1,1,2,2")])
+ 
+ 
+ ;;
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index 3c32b2987..4966d5569 100644
+--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
+@@ -39,14 +39,50 @@
+   (and (match_code "const_int")
+        (match_test "IMM12_OPERAND (INTVAL (op))")))
+ 
+(define_predicate "const_dual_imm12_operand"
+  (and (match_code "const_int")
+       (match_test "DUAL_IMM12_OPERAND (INTVAL (op))")))
+
+ (define_predicate "const_imm16_operand"
+   (and (match_code "const_int")
+        (match_test "IMM16_OPERAND (INTVAL (op))")))
+ 
+(define_predicate "const_addu16i_operand"
+  (and (match_code "const_int")
+       (match_test "ADDU16I_OPERAND (INTVAL (op))")))
+
+(define_predicate "const_addu16i_imm12_di_operand"
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), DImode)")))
+
+(define_predicate "const_addu16i_imm12_si_operand"
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), SImode)")))
+
+(define_predicate "const_dual_addu16i_operand"
+  (and (match_code "const_int")
+       (match_test "DUAL_ADDU16I_OPERAND (INTVAL (op))")))
+
+ (define_predicate "arith_operand"
+   (ior (match_operand 0 "const_arith_operand")
+        (match_operand 0 "register_operand")))
+ 
+(define_predicate "plus_di_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_dual_imm12_operand")
+       (match_operand 0 "const_addu16i_operand")
+       (match_operand 0 "const_addu16i_imm12_di_operand")
+       (match_operand 0 "const_dual_addu16i_operand")))
+
+(define_predicate "plus_si_extend_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_dual_imm12_operand")
+       (match_operand 0 "const_addu16i_imm12_si_operand")))
+
+(define_predicate "plus_si_operand"
+  (ior (match_operand 0 "plus_si_extend_operand")
+       (match_operand 0 "const_addu16i_operand")))
+
+ (define_predicate "const_immalsl_operand"
+   (and (match_code "const_int")
+        (match_test "IN_RANGE (INTVAL (op), 1, 4)")))
+diff --git a/gcc/testsuite/gcc.target/loongarch/add-const.c b/gcc/testsuite/gcc.target/loongarch/add-const.c
+new file mode 100644
+index 000000000..7b6a7cb92
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/add-const.c
+@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mabi=lp64d" } */
+
+/* None of these functions should load the const operand into a temp
+   register.  */
+
+/* { dg-final { scan-assembler-not "add\\.[dw]" } } */
+
+unsigned long f01 (unsigned long x) { return x + 1; }
+unsigned long f02 (unsigned long x) { return x - 1; }
+unsigned long f03 (unsigned long x) { return x + 2047; }
+unsigned long f04 (unsigned long x) { return x + 4094; }
+unsigned long f05 (unsigned long x) { return x - 2048; }
+unsigned long f06 (unsigned long x) { return x - 4096; }
+unsigned long f07 (unsigned long x) { return x + 0x7fff0000; }
+unsigned long f08 (unsigned long x) { return x - 0x80000000l; }
+unsigned long f09 (unsigned long x) { return x + 0x7fff0000l * 2; }
+unsigned long f10 (unsigned long x) { return x - 0x80000000l * 2; }
+unsigned long f11 (unsigned long x) { return x + 0x7fff0000 + 0x1; }
+unsigned long f12 (unsigned long x) { return x + 0x7fff0000 - 0x1; }
+unsigned long f13 (unsigned long x) { return x + 0x7fff0000 + 0x7ff; }
+unsigned long f14 (unsigned long x) { return x + 0x7fff0000 - 0x800; }
+unsigned long f15 (unsigned long x) { return x - 0x80000000l - 1; }
+unsigned long f16 (unsigned long x) { return x - 0x80000000l + 1; }
+unsigned long f17 (unsigned long x) { return x - 0x80000000l - 0x800; }
+unsigned long f18 (unsigned long x) { return x - 0x80000000l + 0x7ff; }
+
+unsigned int g01 (unsigned int x) { return x + 1; }
+unsigned int g02 (unsigned int x) { return x - 1; }
+unsigned int g03 (unsigned int x) { return x + 2047; }
+unsigned int g04 (unsigned int x) { return x + 4094; }
+unsigned int g05 (unsigned int x) { return x - 2048; }
+unsigned int g06 (unsigned int x) { return x - 4096; }
+unsigned int g07 (unsigned int x) { return x + 0x7fff0000; }
+unsigned int g08 (unsigned int x) { return x - 0x80000000l; }
+unsigned int g09 (unsigned int x) { return x + 0x7fff0000l * 2; }
+unsigned int g10 (unsigned int x) { return x - 0x80000000l * 2; }
+unsigned int g11 (unsigned int x) { return x + 0x7fff0000 + 0x1; }
+unsigned int g12 (unsigned int x) { return x + 0x7fff0000 - 0x1; }
+unsigned int g13 (unsigned int x) { return x + 0x7fff0000 + 0x7ff; }
+unsigned int g14 (unsigned int x) { return x + 0x7fff0000 - 0x800; }
+unsigned int g15 (unsigned int x) { return x - 0x80000000l - 1; }
+unsigned int g16 (unsigned int x) { return x - 0x80000000l + 1; }
+unsigned int g17 (unsigned int x) { return x - 0x80000000l - 0x800; }
+unsigned int g18 (unsigned int x) { return x - 0x80000000l + 0x7ff; }
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
+index 3533fe7b6..cd72154f4 100644
+--- a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
+@@ -6,7 +6,7 @@
+ #define SIZE 128*1024
+ #include "stack-check-prologue.h"
+ 
+-/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131088} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131072} 1 } } */
+ /* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
+ 
+ /* Checks that the CFA notes are correct for every sp adjustment.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
+index e5e711105..3e5ca05b2 100644
+--- a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
+@@ -6,7 +6,7 @@
+ #define SIZE 1280*1024 + 512
+ #include "stack-check-prologue.h"
+ 
+-/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311248} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311232} 1 } } */
+ /* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
+ 
+ /* Checks that the CFA notes are correct for every sp adjustment.  */
+-- 
+2.33.0
+
--- a/LoongArch-Optimize-immediate-load.patch
+++ b/LoongArch-Optimize-immediate-load.patch
@ -0,0 +1,338 @@
+From b533b615ae47b97d51eeb83e1a63f7c72407430f Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 17 Nov 2022 17:08:36 +0800
+Subject: [PATCH 032/124] LoongArch: Optimize immediate load.
+
+The immediate number is split in the Split pass, not in the expand pass.
+
+Because loop2_invariant pass will extract the instructions that do not change
+in the loop out of the loop, some instructions will not meet the extraction
+conditions if the machine performs immediate decomposition while expand pass,
+so the immediate decomposition will be transferred to the split process.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (enum loongarch_load_imm_method):
+	Remove the member METHOD_INSV that is not currently used.
+	(struct loongarch_integer_op): Define a new member curr_value,
+	that records the value of the number stored in the destination
+	register immediately after the current instruction has run.
+	(loongarch_build_integer): Assign a value to the curr_value member variable.
+	(loongarch_move_integer): Adds information for the immediate load instruction.
+	* config/loongarch/loongarch.md (*movdi_32bit): Redefine as define_insn_and_split.
+	(*movdi_64bit): Likewise.
+	(*movsi_internal): Likewise.
+	(*movhi_internal): Likewise.
+	* config/loongarch/predicates.md: Return true as long as it is CONST_INT, ensure
+	that the immediate number is not optimized by decomposition during expand
+	optimization loop.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/imm-load.c: New test.
+	* gcc.target/loongarch/imm-load1.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.cc             | 62 ++++++++++---------
+ gcc/config/loongarch/loongarch.md             | 44 +++++++++++--
+ gcc/config/loongarch/predicates.md            |  2 +-
+ gcc/testsuite/gcc.target/loongarch/imm-load.c | 10 +++
+ .../gcc.target/loongarch/imm-load1.c          | 26 ++++++++
+ 5 files changed, 110 insertions(+), 34 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load1.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 622c9435b..f45a49f90 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -139,22 +139,21 @@ struct loongarch_address_info
+ 
+    METHOD_LU52I:
+      Load 52-63 bit of the immediate number.
+-
+-   METHOD_INSV:
+-     immediate like 0xfff00000fffffxxx
+-   */
+*/
+ enum loongarch_load_imm_method
+ {
+   METHOD_NORMAL,
+   METHOD_LU32I,
+-  METHOD_LU52I,
+-  METHOD_INSV
+  METHOD_LU52I
+ };
+ 
+ struct loongarch_integer_op
+ {
+   enum rtx_code code;
+   HOST_WIDE_INT value;
+  /* Represent the result of the immediate count of the load instruction at
+     each step.  */
+  HOST_WIDE_INT curr_value;
+   enum loongarch_load_imm_method method;
+ };
+ 
+@@ -1474,24 +1473,27 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
+     {
+       /* The value of the lower 32 bit be loaded with one instruction.
+ 	 lu12i.w.  */
+-      codes[0].code = UNKNOWN;
+-      codes[0].method = METHOD_NORMAL;
+-      codes[0].value = low_part;
+      codes[cost].code = UNKNOWN;
+      codes[cost].method = METHOD_NORMAL;
+      codes[cost].value = low_part;
+      codes[cost].curr_value = low_part;
+       cost++;
+     }
+   else
+     {
+       /* lu12i.w + ior.  */
+-      codes[0].code = UNKNOWN;
+-      codes[0].method = METHOD_NORMAL;
+-      codes[0].value = low_part & ~(IMM_REACH - 1);
+      codes[cost].code = UNKNOWN;
+      codes[cost].method = METHOD_NORMAL;
+      codes[cost].value = low_part & ~(IMM_REACH - 1);
+      codes[cost].curr_value = codes[cost].value;
+       cost++;
+       HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1);
+       if (iorv != 0)
+ 	{
+-	  codes[1].code = IOR;
+-	  codes[1].method = METHOD_NORMAL;
+-	  codes[1].value = iorv;
+	  codes[cost].code = IOR;
+	  codes[cost].method = METHOD_NORMAL;
+	  codes[cost].value = iorv;
+	  codes[cost].curr_value = low_part;
+ 	  cost++;
+ 	}
+     }
+@@ -1514,11 +1516,14 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
+ 	{
+ 	  codes[cost].method = METHOD_LU52I;
+ 	  codes[cost].value = value & LU52I_B;
+	  codes[cost].curr_value = value;
+ 	  return cost + 1;
+ 	}
+ 
+       codes[cost].method = METHOD_LU32I;
+       codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0);
+      codes[cost].curr_value = (value & 0xfffffffffffff)
+	| (sign51 ? LU52I_B : 0);
+       cost++;
+ 
+       /* Determine whether the 52-61 bits are sign-extended from the low order,
+@@ -1527,6 +1532,7 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
+ 	{
+ 	  codes[cost].method = METHOD_LU52I;
+ 	  codes[cost].value = value & LU52I_B;
+	  codes[cost].curr_value = value;
+ 	  cost++;
+ 	}
+     }
+@@ -2910,6 +2916,9 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
+       else
+ 	x = force_reg (mode, x);
+ 
+      set_unique_reg_note (get_last_insn (), REG_EQUAL,
+			   GEN_INT (codes[i-1].curr_value));
+
+       switch (codes[i].method)
+ 	{
+ 	case METHOD_NORMAL:
+@@ -2917,22 +2926,17 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
+ 			      GEN_INT (codes[i].value));
+ 	  break;
+ 	case METHOD_LU32I:
+-	  emit_insn (
+-	    gen_rtx_SET (x,
+-			 gen_rtx_IOR (DImode,
+-				      gen_rtx_ZERO_EXTEND (
+-					DImode, gen_rtx_SUBREG (SImode, x, 0)),
+-				      GEN_INT (codes[i].value))));
+	  gcc_assert (mode == DImode);
+	  x = gen_rtx_IOR (DImode,
+			   gen_rtx_ZERO_EXTEND (DImode,
+						gen_rtx_SUBREG (SImode, x, 0)),
+			   GEN_INT (codes[i].value));
+ 	  break;
+ 	case METHOD_LU52I:
+-	  emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff),
+-				  GEN_INT (codes[i].value)));
+-	  break;
+-	case METHOD_INSV:
+-	  emit_insn (
+-	    gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT (20),
+-					       GEN_INT (32)),
+-			 gen_rtx_REG (DImode, 0)));
+	  gcc_assert (mode == DImode);
+	  x = gen_rtx_IOR (DImode,
+			   gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
+			   GEN_INT (codes[i].value));
+ 	  break;
+ 	default:
+ 	  gcc_unreachable ();
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 2fda53819..f61db66d5 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -1718,23 +1718,41 @@
+     DONE;
+ })
+ 
+-(define_insn "*movdi_32bit"
+(define_insn_and_split "*movdi_32bit"
+   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m")
+        (match_operand:DI 1 "move_operand" "r,i,w,r,*J*r,*m,*f,*f"))]
+   "!TARGET_64BIT
+    && (register_operand (operands[0], DImode)
+        || reg_or_0_operand (operands[1], DImode))"
+   { return loongarch_output_move (operands[0], operands[1]); }
+  "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
+  (operands[0]))"
+  [(const_int 0)]
+  "
+{
+  loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
+  DONE;
+}
+  "
+   [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore")
+    (set_attr "mode" "DI")])
+ 
+-(define_insn "*movdi_64bit"
+(define_insn_and_split "*movdi_64bit"
+   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m")
+ 	(match_operand:DI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f"))]
+   "TARGET_64BIT
+    && (register_operand (operands[0], DImode)
+        || reg_or_0_operand (operands[1], DImode))"
+   { return loongarch_output_move (operands[0], operands[1]); }
+  "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
+  (operands[0]))"
+  [(const_int 0)]
+  "
+{
+  loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
+  DONE;
+}
+  "
+   [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore")
+    (set_attr "mode" "DI")])
+ 
+@@ -1749,12 +1767,21 @@
+     DONE;
+ })
+ 
+-(define_insn "*movsi_internal"
+(define_insn_and_split "*movsi_internal"
+   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m,*r,*z")
+ 	(match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r"))]
+   "(register_operand (operands[0], SImode)
+     || reg_or_0_operand (operands[1], SImode))"
+   { return loongarch_output_move (operands[0], operands[1]); }
+  "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
+  (operands[0]))"
+  [(const_int 0)]
+  "
+{
+  loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
+  DONE;
+}
+  "
+   [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf")
+    (set_attr "mode" "SI")])
+ 
+@@ -1774,12 +1801,21 @@
+     DONE;
+ })
+ 
+-(define_insn "*movhi_internal"
+(define_insn_and_split "*movhi_internal"
+   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,m,r,k")
+ 	(match_operand:HI 1 "move_operand" "r,Yd,I,m,rJ,k,rJ"))]
+   "(register_operand (operands[0], HImode)
+        || reg_or_0_operand (operands[1], HImode))"
+   { return loongarch_output_move (operands[0], operands[1]); }
+  "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
+  (operands[0]))"
+  [(const_int 0)]
+  "
+{
+  loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
+  DONE;
+}
+  "
+   [(set_attr "move_type" "move,const,const,load,store,load,store")
+    (set_attr "mode" "HI")])
+ 
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index 8bd0c1376..58c3dc226 100644
+--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
+@@ -226,7 +226,7 @@
+   switch (GET_CODE (op))
+     {
+     case CONST_INT:
+-      return !splittable_const_int_operand (op, mode);
+      return true;
+ 
+     case CONST:
+     case SYMBOL_REF:
+diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load.c b/gcc/testsuite/gcc.target/loongarch/imm-load.c
+new file mode 100644
+index 000000000..c04ca3399
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/imm-load.c
+@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-split1" } */
+
+long int
+test (void)
+{
+  return 0x1234567890abcdef;
+}
+/* { dg-final { scan-rtl-dump-times "scanning new insn with uid" 6 "split1" } } */
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
+new file mode 100644
+index 000000000..2ff029712
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
+@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
+
+
+extern long long b[10];
+static inline long long
+repeat_bytes (void)
+{
+  long long r = 0x0101010101010101;
+
+  return r;
+}
+
+static inline long long
+highbit_mask (long long m)
+{
+  return m & repeat_bytes ();
+}
+
+void test(long long *a)
+{
+  for (int i = 0; i < 10; i++)
+    b[i] = highbit_mask (a[i]);
+
+}
+-- 
+2.33.0
+
--- a/LoongArch-Optimize-switch-with-sign-extended-index.patch
+++ b/LoongArch-Optimize-switch-with-sign-extended-index.patch
@ -0,0 +1,114 @@
+From c6d4efda0fa064dfe1d3cc1b9abf8051a82cd74f Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Sat, 2 Sep 2023 10:59:55 +0800
+Subject: [PATCH 056/124] LoongArch: Optimize switch with sign-extended index.
+
+The patch refers to the submission of RISCV
+7bbce9b50302959286381d9177818642bceaf301.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_extend_comparands):
+	In unsigned QImode test, check for sign extended subreg and/or
+	constant operands, and do a sign extension in that case.
+	* config/loongarch/loongarch.md (TARGET_64BIT): Define
+	template cbranchqi4.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/switch-qi.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.cc              | 16 +++++++++++++---
+ gcc/config/loongarch/loongarch.md              | 10 +++++++---
+ gcc/testsuite/gcc.target/loongarch/switch-qi.c | 16 ++++++++++++++++
+ 3 files changed, 36 insertions(+), 6 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/switch-qi.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 7b48e3216..41d344b82 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -4221,11 +4221,21 @@ loongarch_allocate_fcc (machine_mode mode)
+ static void
+ loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
+ {
+-  /* Comparisons consider all XLEN bits, so extend sub-XLEN values.  */
+  /* Comparisons consider all GRLEN bits, so extend sub-GRLEN values.  */
+   if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)))
+     {
+-      /* TODO: checkout It is more profitable to zero-extend QImode values.  */
+-      if (unsigned_condition (code) == code && GET_MODE (*op0) == QImode)
+      /* It is more profitable to zero-extend QImode values.  But not if the
+	 first operand has already been sign-extended, and the second one is
+	 is a constant or has already been sign-extended also.  */
+      if (unsigned_condition (code) == code
+	  && (GET_MODE (*op0) == QImode
+	      && ! (GET_CODE (*op0) == SUBREG
+		    && SUBREG_PROMOTED_VAR_P (*op0)
+		    && SUBREG_PROMOTED_SIGNED_P (*op0)
+		    && (CONST_INT_P (*op1)
+			|| (GET_CODE (*op1) == SUBREG
+			    && SUBREG_PROMOTED_VAR_P (*op1)
+			    && SUBREG_PROMOTED_SIGNED_P (*op1))))))
+ 	{
+ 	  *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
+ 	  if (CONST_INT_P (*op1))
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index cf7441e0b..a5e9352ca 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -357,7 +357,7 @@
+ ;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+ (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+ 
+-;; Likewise, but for XLEN-sized quantities.
+;; Likewise, but for GRLEN-sized quantities.
+ (define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")])
+ 
+ ;; 64-bit modes for which we provide move patterns.
+@@ -2733,11 +2733,15 @@
+   [(set_attr "type" "branch")])
+ 
+ 
+;; Branches operate on GRLEN-sized quantities, but for LoongArch64 we accept
+;; QImode values so we can force zero-extension.
+(define_mode_iterator BR [(QI "TARGET_64BIT") SI (DI "TARGET_64BIT")])
+
+ (define_expand "cbranch<mode>4"
+   [(set (pc)
+ 	(if_then_else (match_operator 0 "comparison_operator"
+-			[(match_operand:GPR 1 "register_operand")
+-			 (match_operand:GPR 2 "nonmemory_operand")])
+			[(match_operand:BR 1 "register_operand")
+			 (match_operand:BR 2 "nonmemory_operand")])
+ 		      (label_ref (match_operand 3 ""))
+ 		      (pc)))]
+   ""
+diff --git a/gcc/testsuite/gcc.target/loongarch/switch-qi.c b/gcc/testsuite/gcc.target/loongarch/switch-qi.c
+new file mode 100644
+index 000000000..dd192fd49
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/switch-qi.c
+@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=loongarch64 -mabi=lp64d" } */
+/* { dg-final { scan-assembler-not "bstrpick" } } */
+
+/* Test for loongarch_extend_comparands patch.  */
+extern void asdf (int);
+void
+foo (signed char x) {
+    switch (x) {
+      case 0: asdf (10); break;
+      case 1: asdf (11); break;
+      case 2: asdf (12); break;
+      case 3: asdf (13); break;
+      case 4: asdf (14); break;
+    }
+}
+-- 
+2.33.0
+
--- a/LoongArch-Optimize-the-implementation-of-stack-check.patch
+++ b/LoongArch-Optimize-the-implementation-of-stack-check.patch
@ -0,0 +1,810 @@
+From d3615b555d6885dba298f7b339740be11cb65a8f Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Tue, 29 Nov 2022 16:06:12 +0800
+Subject: [PATCH 033/124] LoongArch: Optimize the implementation of stack
+ check.
+
+The old stack check was performed before the stack was dropped,
+which would cause the detection tool to report a memory leak.
+
+The current stack check scheme is as follows:
+
+'-fstack-clash-protection':
+1. When the frame->total_size is smaller than the guard page size,
+   the stack is dropped according to the original scheme, and there
+   is no need to perform stack detection in the prologue.
+2. When frame->total_size is greater than or equal to guard page size,
+   the first step to drop the stack is to drop the space required by
+   the caller-save registers. This space needs to save the caller-save
+   registers, so an implicit stack check is performed.
+   So just need to check the rest of the stack space.
+
+'-fstack-check':
+There is no one-time stack drop and then page-by-page detection as
+described in the document. It is also the same as
+'-fstack-clash-protection', which is detected immediately after page drop.
+
+It is judged that when frame->total_size is not 0, only the size required
+to save the s register is dropped for the first stack down.
+
+The test cases are referenced from aarch64.
+
+gcc/ChangeLog:
+
+	* config/loongarch/linux.h (STACK_CHECK_MOVING_SP):
+	Define this macro to 1.
+	* config/loongarch/loongarch.cc (STACK_CLASH_PROTECTION_GUARD_SIZE):
+	Size of guard page.
+	(loongarch_first_stack_step): Return the size of the first drop stack
+	according to whether stack checking is performed.
+	(loongarch_emit_probe_stack_range): Adjust the method of stack checking in prologue.
+	(loongarch_output_probe_stack_range): Delete useless code.
+	(loongarch_expand_prologue): Adjust the method of stack checking in prologue.
+	(loongarch_option_override_internal): Enforce that interval is the same
+	size as size so the mid-end does the right thing.
+	* config/loongarch/loongarch.h (STACK_CLASH_MAX_UNROLL_PAGES):
+	New macro decide whether to loop stack detection.
+
+gcc/testsuite/ChangeLog:
+
+	* lib/target-supports.exp:
+	* gcc.target/loongarch/stack-check-alloca-1.c: New test.
+	* gcc.target/loongarch/stack-check-alloca-2.c: New test.
+	* gcc.target/loongarch/stack-check-alloca-3.c: New test.
+	* gcc.target/loongarch/stack-check-alloca-4.c: New test.
+	* gcc.target/loongarch/stack-check-alloca-5.c: New test.
+	* gcc.target/loongarch/stack-check-alloca-6.c: New test.
+	* gcc.target/loongarch/stack-check-alloca.h: New test.
+	* gcc.target/loongarch/stack-check-cfa-1.c: New test.
+	* gcc.target/loongarch/stack-check-cfa-2.c: New test.
+	* gcc.target/loongarch/stack-check-prologue-1.c: New test.
+	* gcc.target/loongarch/stack-check-prologue-2.c: New test.
+	* gcc.target/loongarch/stack-check-prologue-3.c: New test.
+	* gcc.target/loongarch/stack-check-prologue-4.c: New test.
+	* gcc.target/loongarch/stack-check-prologue-5.c: New test.
+	* gcc.target/loongarch/stack-check-prologue-6.c: New test.
+	* gcc.target/loongarch/stack-check-prologue-7.c: New test.
+	* gcc.target/loongarch/stack-check-prologue.h: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/linux.h                  |   3 +
+ gcc/config/loongarch/loongarch.cc             | 248 +++++++++++-------
+ gcc/config/loongarch/loongarch.h              |   4 +
+ .../loongarch/stack-check-alloca-1.c          |  15 ++
+ .../loongarch/stack-check-alloca-2.c          |  12 +
+ .../loongarch/stack-check-alloca-3.c          |  12 +
+ .../loongarch/stack-check-alloca-4.c          |  12 +
+ .../loongarch/stack-check-alloca-5.c          |  13 +
+ .../loongarch/stack-check-alloca-6.c          |  13 +
+ .../gcc.target/loongarch/stack-check-alloca.h |  15 ++
+ .../gcc.target/loongarch/stack-check-cfa-1.c  |  12 +
+ .../gcc.target/loongarch/stack-check-cfa-2.c  |  12 +
+ .../loongarch/stack-check-prologue-1.c        |  11 +
+ .../loongarch/stack-check-prologue-2.c        |  11 +
+ .../loongarch/stack-check-prologue-3.c        |  11 +
+ .../loongarch/stack-check-prologue-4.c        |  11 +
+ .../loongarch/stack-check-prologue-5.c        |  12 +
+ .../loongarch/stack-check-prologue-6.c        |  11 +
+ .../loongarch/stack-check-prologue-7.c        |  12 +
+ .../loongarch/stack-check-prologue.h          |   5 +
+ gcc/testsuite/lib/target-supports.exp         |   7 +-
+ 21 files changed, 361 insertions(+), 101 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h
+
+diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h
+index 110d0fab9..00039ac18 100644
+--- a/gcc/config/loongarch/linux.h
+++ b/gcc/config/loongarch/linux.h
+@@ -48,3 +48,6 @@ along with GCC; see the file COPYING3.  If not see
+ #define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024)
+ 
+ #define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+/* The stack pointer needs to be moved while checking the stack.  */
+#define STACK_CHECK_MOVING_SP 1
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index f45a49f90..e59edc4cd 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -257,6 +257,10 @@ const char *const
+ loongarch_fp_conditions[16]= {LARCH_FP_CONDITIONS (STRINGIFY)};
+ #undef STRINGIFY
+ 
+/* Size of guard page.  */
+#define STACK_CLASH_PROTECTION_GUARD_SIZE \
+  (1 << param_stack_clash_protection_guard_size)
+
+ /* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Every parameter gets at
+    least PARM_BOUNDARY bits of alignment, but will be given anything up
+    to PREFERRED_STACK_BOUNDARY bits if the type requires it.  */
+@@ -1069,11 +1073,20 @@ loongarch_restore_reg (rtx reg, rtx mem)
+ static HOST_WIDE_INT
+ loongarch_first_stack_step (struct loongarch_frame_info *frame)
+ {
+  HOST_WIDE_INT min_first_step
+    = LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
+
+  /* When stack checking is required, if the sum of frame->total_size
+     and stack_check_protect is greater than stack clash protection guard
+     size, then return min_first_step.  */
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+      || (flag_stack_clash_protection
+	  && frame->total_size > STACK_CLASH_PROTECTION_GUARD_SIZE))
+    return min_first_step;
+
+   if (IMM12_OPERAND (frame->total_size))
+     return frame->total_size;
+ 
+-  HOST_WIDE_INT min_first_step
+-    = LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
+   HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
+   HOST_WIDE_INT min_second_step = frame->total_size - max_first_step;
+   gcc_assert (min_first_step <= max_first_step);
+@@ -1106,103 +1119,109 @@ loongarch_emit_stack_tie (void)
+ static void
+ loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
+ {
+-  /* See if we have a constant small number of probes to generate.  If so,
+-     that's the easy case.  */
+-  if ((TARGET_64BIT && (first + size <= 32768))
+-      || (!TARGET_64BIT && (first + size <= 2048)))
+-    {
+-      HOST_WIDE_INT i;
+  HOST_WIDE_INT rounded_size;
+  HOST_WIDE_INT interval;
+ 
+-      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
+-	 it exceeds SIZE.  If only one probe is needed, this will not
+-	 generate any code.  Then probe at FIRST + SIZE.  */
+-      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
+-	emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+-					 -(first + i)));
+  if (flag_stack_clash_protection)
+    interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
+  else
+    interval = PROBE_INTERVAL;
+ 
+-      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
+-				       -(first + size)));
+-    }
+  rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
+  rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
+ 
+-  /* Otherwise, do the same as above, but in a loop.  Note that we must be
+-     extra careful with variables wrapping around because we might be at
+-     the very top (or the very bottom) of the address space and we have
+-     to be able to handle this case properly; in particular, we use an
+-     equality test for the loop condition.  */
+-  else
+-    {
+-      HOST_WIDE_INT rounded_size;
+-      rtx r13 = LARCH_PROLOGUE_TEMP (Pmode);
+-      rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
+-      rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
+  size = size + first;
+ 
+-      /* Sanity check for the addressing mode we're going to use.  */
+-      gcc_assert (first <= 16384);
+  /* Sanity check for the addressing mode we're going to use.  */
+  gcc_assert (first <= 16384);
+ 
+  /* Step 1: round SIZE to the previous multiple of the interval.  */
+ 
+-      /* Step 1: round SIZE to the previous multiple of the interval.  */
+  rounded_size = ROUND_DOWN (size, interval);
+ 
+-      rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
+  /* Step 2: compute initial and final value of the loop counter.  */
+ 
+-      /* TEST_ADDR = SP + FIRST */
+-      if (first != 0)
+-	{
+-	  emit_move_insn (r14, GEN_INT (first));
+-	  emit_insn (gen_rtx_SET (r13, gen_rtx_MINUS (Pmode,
+-						      stack_pointer_rtx,
+-						      r14)));
+-	}
+-      else
+-	emit_move_insn (r13, stack_pointer_rtx);
+  emit_move_insn (r14, GEN_INT (interval));
+
+  /* If rounded_size is zero, it means that the space requested by
+     the local variable is less than the interval, and there is no
+     need to display and detect the allocated space.  */
+  if (rounded_size != 0)
+    {
+      /* Step 3: the loop
+
+	 do
+	 {
+	 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+	 probe at TEST_ADDR
+	 }
+	 while (TEST_ADDR != LAST_ADDR)
+ 
+-      /* Step 2: compute initial and final value of the loop counter.  */
+	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+	 until it is equal to ROUNDED_SIZE.  */
+ 
+-      emit_move_insn (r14, GEN_INT (PROBE_INTERVAL));
+-      /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
+-      if (rounded_size == 0)
+-	emit_move_insn (r12, r13);
+      if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * interval)
+	{
+	  for (HOST_WIDE_INT i = 0; i < rounded_size; i += interval)
+	    {
+	      emit_insn (gen_rtx_SET (stack_pointer_rtx,
+				      gen_rtx_MINUS (Pmode,
+						     stack_pointer_rtx,
+						     r14)));
+	      emit_move_insn (gen_rtx_MEM (Pmode,
+					   gen_rtx_PLUS (Pmode,
+							 stack_pointer_rtx,
+							 const0_rtx)),
+			      const0_rtx);
+	      emit_insn (gen_blockage ());
+	    }
+	  dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
+	}
+       else
+ 	{
+ 	  emit_move_insn (r12, GEN_INT (rounded_size));
+-	  emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode, r13, r12)));
+-	  /* Step 3: the loop
+-
+-	     do
+-	     {
+-	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+-	     probe at TEST_ADDR
+-	     }
+-	     while (TEST_ADDR != LAST_ADDR)
+-
+-	     probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+-	     until it is equal to ROUNDED_SIZE.  */
+-
+-	  emit_insn (gen_probe_stack_range (Pmode, r13, r13, r12, r14));
+	  emit_insn (gen_rtx_SET (r12,
+				  gen_rtx_MINUS (Pmode,
+						 stack_pointer_rtx,
+						 r12)));
+
+	  emit_insn (gen_probe_stack_range (Pmode, stack_pointer_rtx,
+					    stack_pointer_rtx, r12, r14));
+	  emit_insn (gen_blockage ());
+	  dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
+ 	}
+    }
+  else
+    dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
+
+ 
+-      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+-	 that SIZE is equal to ROUNDED_SIZE.  */
+  /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+     that SIZE is equal to ROUNDED_SIZE.  */
+ 
+-      if (size != rounded_size)
+  if (size != rounded_size)
+    {
+      if (size - rounded_size >= 2048)
+ 	{
+-	  if (TARGET_64BIT)
+-	    emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
+-	  else
+-	    {
+-	      HOST_WIDE_INT i;
+-	      for (i = 2048; i < (size - rounded_size); i += 2048)
+-		{
+-		  emit_stack_probe (plus_constant (Pmode, r12, -i));
+-		  emit_insn (gen_rtx_SET (r12,
+-					  plus_constant (Pmode, r12, -2048)));
+-		}
+-	      rtx r1 = plus_constant (Pmode, r12,
+-				      -(size - rounded_size - i + 2048));
+-	      emit_stack_probe (r1);
+-	    }
+	  emit_move_insn (r14, GEN_INT (size - rounded_size));
+	  emit_insn (gen_rtx_SET (stack_pointer_rtx,
+				  gen_rtx_MINUS (Pmode,
+						 stack_pointer_rtx,
+						 r14)));
+ 	}
+      else
+	emit_insn (gen_rtx_SET (stack_pointer_rtx,
+				gen_rtx_PLUS (Pmode,
+					      stack_pointer_rtx,
+					      GEN_INT (rounded_size - size))));
+     }
+ 
+  if (first)
+    {
+      emit_move_insn (r12, GEN_INT (first));
+      emit_insn (gen_rtx_SET (stack_pointer_rtx,
+			      gen_rtx_PLUS (Pmode,
+					    stack_pointer_rtx, r12)));
+    }
+   /* Make sure nothing is scheduled before we are done.  */
+   emit_insn (gen_blockage ());
+ }
+@@ -1223,7 +1242,6 @@ loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
+ 
+   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
+   xops[0] = reg1;
+-  xops[1] = GEN_INT (-PROBE_INTERVAL);
+   xops[2] = reg3;
+   if (TARGET_64BIT)
+     output_asm_insn ("sub.d\t%0,%0,%2", xops);
+@@ -1249,28 +1267,11 @@ loongarch_expand_prologue (void)
+ {
+   struct loongarch_frame_info *frame = &cfun->machine->frame;
+   HOST_WIDE_INT size = frame->total_size;
+-  HOST_WIDE_INT tmp;
+   rtx insn;
+ 
+   if (flag_stack_usage_info)
+     current_function_static_stack_size = size;
+ 
+-  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+-      || flag_stack_clash_protection)
+-    {
+-      if (crtl->is_leaf && !cfun->calls_alloca)
+-	{
+-	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
+-	    {
+-	      tmp = size - get_stack_check_protect ();
+-	      loongarch_emit_probe_stack_range (get_stack_check_protect (),
+-						tmp);
+-	    }
+-	}
+-      else if (size > 0)
+-	loongarch_emit_probe_stack_range (get_stack_check_protect (), size);
+-    }
+-
+   /* Save the registers.  */
+   if ((frame->mask | frame->fmask) != 0)
+     {
+@@ -1283,7 +1284,6 @@ loongarch_expand_prologue (void)
+       loongarch_for_each_saved_reg (size, loongarch_save_reg);
+     }
+ 
+-
+   /* Set up the frame pointer, if we're using one.  */
+   if (frame_pointer_needed)
+     {
+@@ -1294,7 +1294,45 @@ loongarch_expand_prologue (void)
+       loongarch_emit_stack_tie ();
+     }
+ 
+-  /* Allocate the rest of the frame.  */
+  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+       || flag_stack_clash_protection)
+    {
+      HOST_WIDE_INT first = get_stack_check_protect ();
+
+      if (frame->total_size == 0)
+	{
+	  /* do nothing.  */
+	  dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
+	  return;
+	}
+
+      if (crtl->is_leaf && !cfun->calls_alloca)
+	{
+	  HOST_WIDE_INT interval;
+
+	  if (flag_stack_clash_protection)
+	    interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
+	  else
+	    interval = PROBE_INTERVAL;
+
+	  if (size > interval && size > first)
+	    loongarch_emit_probe_stack_range (first, size - first);
+	  else
+	    loongarch_emit_probe_stack_range (first, size);
+	}
+      else
+	loongarch_emit_probe_stack_range (first, size);
+
+      if (size > 0)
+	{
+	  /* Describe the effect of the previous instructions.  */
+	  insn = plus_constant (Pmode, stack_pointer_rtx, -size);
+	  insn = gen_rtx_SET (stack_pointer_rtx, insn);
+	  loongarch_set_frame_expr (insn);
+	}
+      return;
+    }
+
+   if (size > 0)
+     {
+       if (IMM12_OPERAND (-size))
+@@ -1305,7 +1343,8 @@ loongarch_expand_prologue (void)
+ 	}
+       else
+ 	{
+-	  loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size));
+	  loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode),
+			       GEN_INT (-size));
+ 	  emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+ 				    LARCH_PROLOGUE_TEMP (Pmode)));
+ 
+@@ -6162,6 +6201,15 @@ loongarch_option_override_internal (struct gcc_options *opts)
+ 	gcc_unreachable ();
+     }
+ 
+  /* Validate the guard size.  */
+  int guard_size = param_stack_clash_protection_guard_size;
+
+  /* Enforce that interval is the same size as size so the mid-end does the
+     right thing.  */
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_stack_clash_protection_probe_interval,
+		       guard_size);
+
+   loongarch_init_print_operand_punct ();
+ 
+   /* Set up array to map GCC register number to debug register number.
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index a52a81adf..392597943 100644
+--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
+@@ -668,6 +668,10 @@ enum reg_class
+ 
+ #define STACK_BOUNDARY (TARGET_ABI_LP64 ? 128 : 64)
+ 
+/* This value controls how many pages we manually unroll the loop for when
+   generating stack clash probes.  */
+#define STACK_CLASH_MAX_UNROLL_PAGES 4
+
+ /* Symbolic macros for the registers used to return integer and floating
+    point values.  */
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c
+new file mode 100644
+index 000000000..6ee589c4b
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c
+@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE y
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
+/* { dg-final { scan-assembler-times {stx\.d\t\$r0,\$r3,\$r12} 1 } } */
+
+/* Dynamic alloca, expect loop, and 1 probes with top at sp.
+   1st probe is inside the loop for the full guard-size allocations, second
+   probe is for the case where residual is zero.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c
+new file mode 100644
+index 000000000..8deaa5873
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c
+@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 0
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-not {stp*t*r*\.d\t\$r0,\$r3,4088} } } */
+
+/* Alloca of 0 should emit no probes, boundary condition.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c
+new file mode 100644
+index 000000000..e326ba9a0
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c
+@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 100
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-times {st\.d\t\$r0,\$r3,104} 1 } } */
+
+/* Alloca is less than guard-size, 1 probe at the top of the new allocation.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c
+new file mode 100644
+index 000000000..b9f7572de
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c
+@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 64 * 1024
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
+
+/* Alloca is exactly one guard-size, 1 probe expected at top.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c
+new file mode 100644
+index 000000000..0ff6e493f
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c
+@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 65 * 1024
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,1016} 1 } } */
+
+/* Alloca is more than one guard-page. 2 probes expected.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c
+new file mode 100644
+index 000000000..c5cf74fcb
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c
+@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 127 * 64 * 1024
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
+
+/* Large alloca of a constant amount which is a multiple of a guard-size.
+   Loop expected with top probe.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h
+new file mode 100644
+index 000000000..8c75f6c0f
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h
+@@ -0,0 +1,15 @@
+
+/* Avoid inclusion of alloca.h, unavailable on some systems.  */
+#define alloca __builtin_alloca
+
+__attribute__((noinline, noipa))
+void g (char* ptr, int y)
+{
+  ptr[y] = '\0';
+}
+
+void f_caller (int y)
+{
+  char* pStr = alloca(SIZE);
+  g (pStr, y);
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
+new file mode 100644
+index 000000000..f0c6877fc
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
+@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -funwind-tables" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 128*1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131088} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
+
+/* Checks that the CFA notes are correct for every sp adjustment.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
+new file mode 100644
+index 000000000..c6e07bc56
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
+@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -funwind-tables" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 1280*1024 + 512
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311248} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
+
+/* Checks that the CFA notes are correct for every sp adjustment.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c
+new file mode 100644
+index 000000000..351bc1f61
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c
+@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 128
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 0 } } */
+
+/* SIZE is smaller than guard-size so no probe expected.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c
+new file mode 100644
+index 000000000..6bba659a3
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c
+@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 63 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*.d\t\$r0,\$r3,0} 0 } } */
+
+/* SIZE is smaller than guard-size so no probe expected.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c
+new file mode 100644
+index 000000000..164956c37
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c
+@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 64 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 1 } } */
+
+/* SIZE is equal to guard-size, 1 probe expected, boundary condition.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c
+new file mode 100644
+index 000000000..f53da6b0d
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c
+@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 65 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 1 } } */
+
+/* SIZE is more than guard-size 1 probe expected.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c
+new file mode 100644
+index 000000000..c092317ea
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c
+@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 127 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 1 } } */
+
+/* SIZE is more than 1x guard-size and remainder small than guard-size,
+   1 probe expected, unrolled, no loop.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c
+new file mode 100644
+index 000000000..70a2f53f6
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c
+@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 128 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 2 } } */
+
+/* SIZE is more than 2x guard-size and no remainder, unrolled, no loop.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c
+new file mode 100644
+index 000000000..e2df89acc
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c
+@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 6 * 64 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*.d\t\$r0,\$r3,0} 1 } } */
+
+/* SIZE is more than 4x guard-size and no remainder, 1 probe expected in a loop
+   and no residual probe.  */
+diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h
+new file mode 100644
+index 000000000..b7e06aedb
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h
+@@ -0,0 +1,5 @@
+int f_test (int x)
+{
+  char arr[SIZE];
+  return arr[x];
+}
+diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
+index c858bd93b..3a326ea1c 100644
+--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
+@@ -11292,7 +11292,8 @@ proc check_effective_target_supports_stack_clash_protection { } {
+ 
+     if { [istarget x86_64-*-*] || [istarget i?86-*-*] 
+ 	  || [istarget powerpc*-*-*] || [istarget rs6000*-*-*]
+-	  || [istarget aarch64*-**] || [istarget s390*-*-*] } {
+	  || [istarget aarch64*-**] || [istarget s390*-*-*]
+	  || [istarget loongarch64*-**] } {
+ 	return 1
+     }
+   return 0
+@@ -11343,6 +11344,10 @@ proc check_effective_target_caller_implicit_probes { } {
+ 	return 1;
+   }
+ 
+  if { [istarget loongarch64*-*-*] } {
+	return 1;
+  }
+
+   return 0
+ }
+ 
+-- 
+2.33.0
+
--- a/LoongArch-Optimized-multiply-instruction-generation.patch
+++ b/LoongArch-Optimized-multiply-instruction-generation.patch
@ -0,0 +1,232 @@
+From aa1dc79c9a5ff3df241a94cbfb1c857cfa89c686 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Tue, 5 Sep 2023 11:09:03 +0800
+Subject: [PATCH 074/124] LoongArch: Optimized multiply instruction generation.
+
+	1. Can generate mulh.w[u] instruction.
+	2. Can generate mulw.d.wu instruction.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (mulsidi3_64bit):
+	Field unsigned extension support.
+	(<u>muldi3_highpart): Modify template name.
+	(<u>mulsi3_highpart): Likewise.
+	(<u>mulsidi3_64bit): Field unsigned extension support.
+	(<su>muldi3_highpart): Modify muldi3_highpart to
+	smuldi3_highpart.
+	(<su>mulsi3_highpart): Modify mulsi3_highpart to
+	smulsi3_highpart.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/mulw_d_wu.c: New test.
+	* gcc.target/loongarch/smuldi3_highpart.c: New test.
+	* gcc.target/loongarch/smulsi3_highpart.c: New test.
+	* gcc.target/loongarch/umulsi3_highpart.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md             | 66 ++++++++++++-------
+ .../gcc.target/loongarch/mulw_d_wu.c          |  9 +++
+ .../gcc.target/loongarch/smuldi3_highpart.c   | 13 ++++
+ .../gcc.target/loongarch/smulsi3_highpart.c   | 15 +++++
+ .../gcc.target/loongarch/umulsi3_highpart.c   | 14 ++++
+ 5 files changed, 94 insertions(+), 23 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 11c18bf15..264cd325c 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -750,15 +750,6 @@
+   [(set_attr "type" "imul")
+    (set_attr "mode" "<MODE>")])
+ 
+-(define_insn "mulsidi3_64bit"
+-  [(set (match_operand:DI 0 "register_operand" "=r")
+-	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
+-		 (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+-  "TARGET_64BIT"
+-  "mulw.d.w\t%0,%1,%2"
+-  [(set_attr "type" "imul")
+-   (set_attr "mode" "DI")])
+-
+ (define_insn "*mulsi3_extended"
+   [(set (match_operand:DI 0 "register_operand" "=r")
+ 	(sign_extend:DI
+@@ -787,14 +778,14 @@
+   emit_insn (gen_muldi3 (low, operands[1], operands[2]));
+ 
+   rtx high = gen_reg_rtx (DImode);
+-  emit_insn (gen_<u>muldi3_highpart (high, operands[1], operands[2]));
+  emit_insn (gen_<su>muldi3_highpart (high, operands[1], operands[2]));
+ 
+   emit_move_insn (gen_lowpart (DImode, operands[0]), low);
+   emit_move_insn (gen_highpart (DImode, operands[0]), high);
+   DONE;
+ })
+ 
+-(define_insn "<u>muldi3_highpart"
+(define_insn "<su>muldi3_highpart"
+   [(set (match_operand:DI 0 "register_operand" "=r")
+ 	(truncate:DI
+ 	  (lshiftrt:TI
+@@ -809,22 +800,34 @@
+    (set_attr "mode" "DI")])
+ 
+ (define_expand "<u>mulsidi3"
+-  [(set (match_operand:DI 0 "register_operand" "=r")
+  [(set (match_operand:DI 0 "register_operand")
+ 	(mult:DI (any_extend:DI
+-		   (match_operand:SI 1 "register_operand" " r"))
+		   (match_operand:SI 1 "register_operand"))
+ 		 (any_extend:DI
+-		   (match_operand:SI 2 "register_operand" " r"))))]
+-  "!TARGET_64BIT"
+		   (match_operand:SI 2 "register_operand"))))]
+  ""
+ {
+-  rtx temp = gen_reg_rtx (SImode);
+-  emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
+-  emit_insn (gen_<u>mulsi3_highpart (loongarch_subword (operands[0], true),
+-				     operands[1], operands[2]));
+-  emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp));
+-  DONE;
+  if (!TARGET_64BIT)
+  {
+    rtx temp = gen_reg_rtx (SImode);
+    emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
+    emit_insn (gen_<su>mulsi3_highpart (loongarch_subword (operands[0], true),
+				       operands[1], operands[2]));
+    emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp));
+    DONE;
+  }
+ })
+ 
+-(define_insn "<u>mulsi3_highpart"
+(define_insn "<u>mulsidi3_64bit"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "r"))
+		 (any_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_64BIT"
+  "mulw.d.w<u>\t%0,%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "DI")])
+
+(define_insn "<su>mulsi3_highpart"
+   [(set (match_operand:SI 0 "register_operand" "=r")
+ 	(truncate:SI
+ 	  (lshiftrt:DI
+@@ -833,11 +836,28 @@
+ 		     (any_extend:DI
+ 		       (match_operand:SI 2 "register_operand" " r")))
+ 	    (const_int 32))))]
+-  "!TARGET_64BIT"
+  ""
+   "mulh.w<u>\t%0,%1,%2"
+   [(set_attr "type" "imul")
+    (set_attr "mode" "SI")])
+ 
+;; Under the LoongArch architecture, the mulh.w[u] instruction performs
+;; sign extension by default, so the sign extension instruction can be
+;; eliminated.
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (any_extend:DI
+		       (match_operand:SI 1 "register_operand"))
+		     (any_extend:DI
+		       (match_operand:SI 2 "register_operand")))
+	    (const_int 32))))
+   (set (match_operand:DI 3 "register_operand")
+	(sign_extend:DI (match_dup 0)))]
+   "TARGET_64BIT && REGNO (operands[0]) == REGNO (operands[3])"
+   "mulh.w<u>\t%0,%1,%2")
+
+ ;;
+ ;;  ....................
+ ;;
+diff --git a/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c b/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
+new file mode 100644
+index 000000000..16163d667
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
+@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "mulw.d.wu" } } */
+
+__attribute__((noipa, noinline)) unsigned long
+f(unsigned long a, unsigned long b)
+{
+  return (unsigned long)(unsigned int)a * (unsigned long)(unsigned int)b;
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
+new file mode 100644
+index 000000000..6f5c686ca
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
+@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-expand-all" } */
+
+typedef int TI __attribute ((mode(TI)));
+typedef int DI __attribute__((mode(DI)));
+
+DI
+test (DI x, DI y)
+{
+  return ((TI)x * y) >> 64;
+}
+
+/* { dg-final { scan-rtl-dump "highparttmp" "expand" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
+new file mode 100644
+index 000000000..c4dbf8afc
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
+@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand-all" } */
+
+typedef unsigned int DI __attribute__((mode(DI)));
+typedef unsigned int SI __attribute__((mode(SI)));
+
+SI
+f (SI x, SI y)
+{
+  return ((DI) x * y) >> 32;
+}
+
+/* { dg-final { scan-rtl-dump "highparttmp" "expand" } } */
+/* { dg-final { scan-assembler "mulh\\.w" } } */
+/* { dg-final { scan-assembler-not "slli\\.w" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
+new file mode 100644
+index 000000000..e208803e2
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
+@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef unsigned int DI __attribute__((mode(DI)));
+typedef unsigned int SI __attribute__((mode(SI)));
+
+SI
+f (SI x, SI y)
+{
+  return ((DI) x * y) >> 32;
+}
+
+/* { dg-final { scan-assembler "mulh\\.wu" } } */
+/* { dg-final { scan-assembler-not "slli\\.w" } } */
+-- 
+2.33.0
+
--- a/LoongArch-Prepare-static-PIE-support.patch
+++ b/LoongArch-Prepare-static-PIE-support.patch
@ -0,0 +1,44 @@
+From aa2d9e0e1dc4bf0b612618cf0e3fcea514f92f95 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 13 Sep 2022 23:21:39 +0800
+Subject: [PATCH 018/124] LoongArch: Prepare static PIE support
+
+Static PIE allows us to extend the ASLR to cover static executables and
+it's not too difficult to support it.  On GCC side, we just pass a group
+of options to the linker, like other ports with static PIE support.
+
+The real implementation of static PIE (rcrt1.o) will be added into Glibc
+later.
+
+gcc/ChangeLog:
+
+	* config/loongarch/gnu-user.h (GNU_USER_TARGET_LINK_SPEC): For
+	-static-pie, pass -static -pie --no-dynamic-linker -z text to
+	the linker, and do not pass --dynamic-linker.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/gnu-user.h | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
+index 664dc9206..c5b1afe53 100644
+--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
+@@ -40,8 +40,10 @@ along with GCC; see the file COPYING3.  If not see
+ #undef GNU_USER_TARGET_LINK_SPEC
+ #define GNU_USER_TARGET_LINK_SPEC \
+   "%{G*} %{shared} -m " GNU_USER_LINK_EMULATION \
+-  "%{!shared: %{static} %{!static: %{rdynamic:-export-dynamic} " \
+-  "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
+  "%{!shared: %{static} " \
+  "%{!static: %{!static-pie: %{rdynamic:-export-dynamic} " \
+  "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} " \
+  "%{static-pie: -static -pie --no-dynamic-linker -z text}}"
+ 
+ 
+ /* Similar to standard Linux, but adding -ffast-math support.  */
+-- 
+2.33.0
+
--- a/LoongArch-Provide-fmin-fmax-RTL-pattern.patch
+++ b/LoongArch-Provide-fmin-fmax-RTL-pattern.patch
@ -0,0 +1,100 @@
+From b065c84206cdf463a377ca28f719dae7acbed0f7 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 16 Aug 2022 15:34:36 +0800
+Subject: [PATCH 009/124] LoongArch: Provide fmin/fmax RTL pattern
+
+We already had smin/smax RTL pattern using fmin/fmax instruction.  But
+for smin/smax, it's unspecified what will happen if either operand is
+NaN.  So we would generate calls to libc fmin/fmax functions with
+-fno-finite-math-only (the default for all optimization levels expect
+-Ofast).
+
+But, LoongArch fmin/fmax instruction is IEEE-754-2008 conformant so we
+can also use the instruction for fmin/fmax pattern and avoid the library
+function call.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (fmax<mode>3): New RTL pattern.
+	(fmin<mode>3): Likewise.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/fmax-fmin.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md             | 18 +++++++++++
+ .../gcc.target/loongarch/fmax-fmin.c          | 30 +++++++++++++++++++
+ 2 files changed, 48 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/fmax-fmin.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 6b6df22a5..8e8868de9 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -1023,6 +1023,24 @@
+   [(set_attr "type" "fmove")
+    (set_attr "mode" "<MODE>")])
+ 
+(define_insn "fmax<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(smax:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		   (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "fmax.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmove")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fmin<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(smin:ANYF (match_operand:ANYF 1 "register_operand" "f")
+		   (match_operand:ANYF 2 "register_operand" "f")))]
+  ""
+  "fmin.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fmove")
+   (set_attr "mode" "<MODE>")])
+
+ (define_insn "smaxa<mode>3"
+   [(set (match_operand:ANYF 0 "register_operand" "=f")
+ 	(if_then_else:ANYF
+diff --git a/gcc/testsuite/gcc.target/loongarch/fmax-fmin.c b/gcc/testsuite/gcc.target/loongarch/fmax-fmin.c
+new file mode 100644
+index 000000000..92cf8a150
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/fmax-fmin.c
+@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mdouble-float -fno-finite-math-only" } */
+/* { dg-final { scan-assembler "fmin\\.s" } } */
+/* { dg-final { scan-assembler "fmin\\.d" } } */
+/* { dg-final { scan-assembler "fmax\\.s" } } */
+/* { dg-final { scan-assembler "fmax\\.d" } } */
+
+double
+_fmax(double a, double b)
+{
+  return __builtin_fmax(a, b);
+}
+
+float
+_fmaxf(float a, float b)
+{
+  return __builtin_fmaxf(a, b);
+}
+
+double
+_fmin(double a, double b)
+{
+  return __builtin_fmin(a, b);
+}
+
+float
+_fminf(float a, float b)
+{
+  return __builtin_fminf(a, b);
+}
+-- 
+2.33.0
+
--- a/LoongArch-Remove-redundant-sign-extension-instructio.patch
+++ b/LoongArch-Remove-redundant-sign-extension-instructio.patch
@ -0,0 +1,180 @@
+From fbe6421c5600a151dbae96d18db2fd31aca2fe7c Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 24 Aug 2023 16:44:56 +0800
+Subject: [PATCH 051/124] LoongArch: Remove redundant sign extension
+ instructions caused by SLT instructions.
+
+Since the SLT instruction does not distinguish between 64-bit operations and 32-bit
+operations under the 64-bit LoongArch architecture, if the operand of slt is SImode,
+the sign extension of the operand needs to be displayed.
+
+But similar to the test case below, the sign extension is redundant:
+
+	extern int src1, src2, src3;
+
+	int
+	test (void)
+	{
+	  int data1 = src1 + src2;
+	  int data2 = src1 + src3;
+	  return data1 > data2 ? data1 : data2;
+	}
+Assembly code before optimization:
+ 	...
+	add.w	$r4,$r4,$r14
+	add.w	$r13,$r13,$r14
+	slli.w	$r12,$r4,0
+	slli.w	$r14,$r13,0
+	slt	$r12,$r12,$r14
+	masknez	$r4,$r4,$r12
+	maskeqz	$r12,$r13,$r12
+	or	$r4,$r4,$r12
+	slli.w	$r4,$r4,0
+	...
+
+After optimization:
+	...
+	add.w	$r12,$r12,$r14
+	add.w	$r13,$r13,$r14
+	slt	$r4,$r12,$r13
+	masknez	$r12,$r12,$r4
+	maskeqz	$r4,$r13,$r4
+	or	$r4,$r12,$r4
+	...
+
+Similar to this test example, the two operands of SLT are obtained by the
+addition operation, and add.w implicitly sign-extends, so the two operands
+of SLT do not require sign-extend.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
+	Optimize the function implementation.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/slt-sign-extend.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.cc             | 53 +++++++++++++++++--
+ .../gcc.target/loongarch/slt-sign-extend.c    | 14 +++++
+ 2 files changed, 63 insertions(+), 4 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index f14de5cce..caacfa8a3 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -4380,14 +4380,30 @@ loongarch_expand_conditional_move (rtx *operands)
+   enum rtx_code code = GET_CODE (operands[1]);
+   rtx op0 = XEXP (operands[1], 0);
+   rtx op1 = XEXP (operands[1], 1);
+  rtx op0_extend = op0;
+  rtx op1_extend = op1;
+
+  /* Record whether operands[2] and operands[3] modes are promoted to word_mode.  */
+  bool promote_p = false;
+  machine_mode mode = GET_MODE (operands[0]);
+ 
+   if (FLOAT_MODE_P (GET_MODE (op1)))
+     loongarch_emit_float_compare (&code, &op0, &op1);
+   else
+     {
+      if ((REGNO (op0) == REGNO (operands[2])
+	   || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
+	  && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
+	{
+	  mode = word_mode;
+	  promote_p = true;
+	}
+
+       loongarch_extend_comparands (code, &op0, &op1);
+ 
+       op0 = force_reg (word_mode, op0);
+      op0_extend = op0;
+      op1_extend = force_reg (word_mode, op1);
+ 
+       if (code == EQ || code == NE)
+ 	{
+@@ -4414,23 +4430,52 @@ loongarch_expand_conditional_move (rtx *operands)
+       && register_operand (operands[2], VOIDmode)
+       && register_operand (operands[3], VOIDmode))
+     {
+-      machine_mode mode = GET_MODE (operands[0]);
+      rtx op2 = operands[2];
+      rtx op3 = operands[3];
+
+      if (promote_p)
+	{
+	  if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
+	    op2 = op0_extend;
+	  else
+	    {
+	      loongarch_extend_comparands (code, &op2, &const0_rtx);
+	      op2 = force_reg (mode, op2);
+	    }
+
+	  if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
+	    op3 = op1_extend;
+	  else
+	    {
+	      loongarch_extend_comparands (code, &op3, &const0_rtx);
+	      op3 = force_reg (mode, op3);
+	    }
+	}
+
+       rtx temp = gen_reg_rtx (mode);
+       rtx temp2 = gen_reg_rtx (mode);
+ 
+       emit_insn (gen_rtx_SET (temp,
+ 			      gen_rtx_IF_THEN_ELSE (mode, cond,
+-						    operands[2], const0_rtx)));
+						    op2, const0_rtx)));
+ 
+       /* Flip the test for the second operand.  */
+       cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
+ 
+       emit_insn (gen_rtx_SET (temp2,
+ 			      gen_rtx_IF_THEN_ELSE (mode, cond,
+-						    operands[3], const0_rtx)));
+						    op3, const0_rtx)));
+ 
+       /* Merge the two results, at least one is guaranteed to be zero.  */
+-      emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
+      if (promote_p)
+	{
+	  rtx temp3 = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
+	  temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
+	  loongarch_emit_move (operands[0], temp3);
+	}
+      else
+	emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
+     }
+   else
+     emit_insn (gen_rtx_SET (operands[0],
+diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
+new file mode 100644
+index 000000000..ea6b28b7c
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
+@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler-not "slli.w" } } */
+
+extern int src1, src2, src3;
+
+int
+test (void)
+{
+  int data1 = src1 + src2;
+  int data2 = src1 + src3;
+
+  return data1 > data2 ? data1 : data2;
+}
+-- 
+2.33.0
+
--- a/LoongArch-Remove-the-definition-of-the-macro-LOGICAL.patch
+++ b/LoongArch-Remove-the-definition-of-the-macro-LOGICAL.patch
@ -0,0 +1,36 @@
+From 297b8c5770ad85bf468526602e28aff8a66dc01a Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 13 Apr 2023 19:24:38 +0800
+Subject: [PATCH 040/124] LoongArch: Remove the definition of the macro
+ LOGICAL_OP_NON_SHORT_CIRCUIT under the architecture and use the default
+ definition instead.
+
+In some cases, setting this macro as the default can reduce the number of conditional
+branch instructions.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Remove the macro
+	definition.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.h | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
+index 392597943..c6e37b1b4 100644
+--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
+@@ -836,7 +836,6 @@ typedef struct {
+    1 is the default; other values are interpreted relative to that.  */
+ 
+ #define BRANCH_COST(speed_p, predictable_p) loongarch_branch_cost
+-#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
+ 
+ /* Return the asm template for a conditional branch instruction.
+    OPCODE is the opcode's mnemonic and OPERANDS is the asm template for
+-- 
+2.33.0
+
--- a/LoongArch-Rename-frint_-fmt-to-rint-mode-2.patch
+++ b/LoongArch-Rename-frint_-fmt-to-rint-mode-2.patch
@ -0,0 +1,65 @@
+From 7584716b03b13c06b8bb9956b9f49e0cfc29c6b3 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 6 Nov 2022 20:41:38 +0800
+Subject: [PATCH 027/124] LoongArch: Rename frint_<fmt> to rint<mode>2
+
+Use standard name so __builtin_rint{,f} can be expanded to one
+instruction.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (frint_<fmt>): Rename to ..
+	(rint<mode>2): .. this.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/frint.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md          |  4 ++--
+ gcc/testsuite/gcc.target/loongarch/frint.c | 16 ++++++++++++++++
+ 2 files changed, 18 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/frint.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index bda34d0f3..a14ab14ac 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -2012,8 +2012,8 @@
+   [(set_attr "type" "move")]
+ )
+ 
+-;; Convert floating-point numbers to integers
+-(define_insn "frint_<fmt>"
+;; Round floating-point numbers to integers
+(define_insn "rint<mode>2"
+   [(set (match_operand:ANYF 0 "register_operand" "=f")
+ 	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+ 		      UNSPEC_FRINT))]
+diff --git a/gcc/testsuite/gcc.target/loongarch/frint.c b/gcc/testsuite/gcc.target/loongarch/frint.c
+new file mode 100644
+index 000000000..3ee6a8f97
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/frint.c
+@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mdouble-float" } */
+/* { dg-final { scan-assembler "frint\\.s" } } */
+/* { dg-final { scan-assembler "frint\\.d" } } */
+
+double
+my_rint (double a)
+{
+  return __builtin_rint (a);
+}
+
+float
+my_rintf (float a)
+{
+  return __builtin_rintf (a);
+}
+-- 
+2.33.0
+
--- a/LoongArch-Set-default-alignment-for-functions-and-la.patch
+++ b/LoongArch-Set-default-alignment-for-functions-and-la.patch
@ -0,0 +1,113 @@
+From 129d96b9ab5a2445d0fc2c3f7b72baa0453bd93f Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Wed, 14 Jun 2023 08:24:05 +0800
+Subject: [PATCH 047/124] LoongArch: Set default alignment for functions and
+ labels with -mtune
+
+The LA464 micro-architecture is sensitive to alignment of code.  The
+Loongson team has benchmarked various combinations of function, the
+results [1] show that 16-byte label alignment together with 32-byte
+function alignment gives best results in terms of SPEC score.
+
+Add a mtune-based table-driven mechanism to set the default of
+-falign-{functions,labels}.  As LA464 is the first (and the only for
+now) uarch supported by GCC, the same setting is also used for
+the "generic" -mtune=loongarch64.  In the future we may set different
+settings for LA{2,3,6}64 once we add the support for them.
+
+Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-tune.h (loongarch_align): New
+	struct.
+	* config/loongarch/loongarch-def.h (loongarch_cpu_align): New
+	array.
+	* config/loongarch/loongarch-def.c (loongarch_cpu_align): Define
+	the array.
+	* config/loongarch/loongarch.cc
+	(loongarch_option_override_internal): Set the value of
+	-falign-functions= if -falign-functions is enabled but no value
+	is given.  Likewise for -falign-labels=.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch-def.c  | 12 ++++++++++++
+ gcc/config/loongarch/loongarch-def.h  |  1 +
+ gcc/config/loongarch/loongarch-tune.h |  8 ++++++++
+ gcc/config/loongarch/loongarch.cc     |  6 ++++++
+ 4 files changed, 27 insertions(+)
+
+diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
+index 80ab10a52..74d422ce0 100644
+--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
+@@ -72,6 +72,18 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
+   },
+ };
+ 
+struct loongarch_align
+loongarch_cpu_align[N_TUNE_TYPES] = {
+  [CPU_LOONGARCH64] = {
+    .function = "32",
+    .label = "16",
+  },
+  [CPU_LA464] = {
+    .function = "32",
+    .label = "16",
+  },
+};
+
+ /* The following properties cannot be looked up directly using "cpucfg".
+  So it is necessary to provide a default value for "unknown native"
+  tune targets (i.e. -mtune=native while PRID does not correspond to
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index b5985f070..eb87a79a5 100644
+--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
+@@ -144,6 +144,7 @@ extern int loongarch_cpu_issue_rate[];
+ extern int loongarch_cpu_multipass_dfa_lookahead[];
+ 
+ extern struct loongarch_cache loongarch_cpu_cache[];
+extern struct loongarch_align loongarch_cpu_align[];
+ extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[];
+ 
+ #ifdef __cplusplus
+diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
+index 8e3eb2947..d961963f0 100644
+--- a/gcc/config/loongarch/loongarch-tune.h
+++ b/gcc/config/loongarch/loongarch-tune.h
+@@ -48,4 +48,12 @@ struct loongarch_cache {
+     int simultaneous_prefetches; /* number of parallel prefetch */
+ };
+ 
+/* Alignment for functions and labels for best performance.  For new uarchs
+   the value should be measured via benchmarking.  See the documentation for
+   -falign-functions and -falign-labels in invoke.texi for the format.  */
+struct loongarch_align {
+  const char *function;	/* default value for -falign-functions */
+  const char *label;	/* default value for -falign-labels */
+};
+
+ #endif /* LOONGARCH_TUNE_H */
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 4c0f393b6..f14de5cce 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -6246,6 +6246,12 @@ loongarch_option_override_internal (struct gcc_options *opts)
+       && !opts->x_optimize_size)
+     opts->x_flag_prefetch_loop_arrays = 1;
+ 
+  if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+    opts->x_str_align_functions = loongarch_cpu_align[LARCH_ACTUAL_TUNE].function;
+
+  if (opts->x_flag_align_labels && !opts->x_str_align_labels)
+    opts->x_str_align_labels = loongarch_cpu_align[LARCH_ACTUAL_TUNE].label;
+
+   if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
+     error ("%qs cannot be used for compiling a shared library",
+ 	   "-mdirect-extern-access");
+-- 
+2.33.0
+
--- a/LoongArch-Slightly-simplify-loongarch_block_move_str.patch
+++ b/LoongArch-Slightly-simplify-loongarch_block_move_str.patch
@ -0,0 +1,37 @@
+From f4a0248c80fedff3a6841407ff95b732dfbb93a1 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 8 Sep 2023 00:29:57 +0800
+Subject: [PATCH 073/124] LoongArch: Slightly simplify
+ loongarch_block_move_straight
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.cc (loongarch_block_move_straight):
+	Check precondition (delta must be a power of 2) and use
+	popcount_hwi instead of a homebrew loop.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.cc | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index baa5c2354..baa9831aa 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -5221,9 +5221,8 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
+      emit two ld.d/st.d pairs, one ld.w/st.w pair, and one ld.b/st.b
+      pair.  For each load/store pair we use a dedicated register to keep
+      the pipeline as populated as possible.  */
+-  HOST_WIDE_INT num_reg = length / delta;
+-  for (delta_cur = delta / 2; delta_cur != 0; delta_cur /= 2)
+-    num_reg += !!(length & delta_cur);
+  gcc_assert (pow2p_hwi (delta));
+  HOST_WIDE_INT num_reg = length / delta + popcount_hwi (length % delta);
+ 
+   /* Allocate a buffer for the temporary registers.  */
+   regs = XALLOCAVEC (rtx, num_reg);
+-- 
+2.33.0
+
--- a/LoongArch-Subdivision-symbol-type-add-SYMBOL_PCREL-s.patch
+++ b/LoongArch-Subdivision-symbol-type-add-SYMBOL_PCREL-s.patch
--- a/Show More
+++ b/Show More