LoongArch: Sync patch from gcc upstream

Signed-off-by: ticat_fp <fanpeng@loongson.cn>
This commit is contained in:
ticat_fp 2024-03-27 09:22:13 +08:00
parent 18582a2742
commit 7e7be47bfd
126 changed files with 228142 additions and 9 deletions

View File

@ -0,0 +1,59 @@
From 62ea18c632200edbbf46b4e957bc4d997f1c66f0 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Tue, 27 Sep 2022 15:28:43 +0800
Subject: [PATCH 024/124] Libvtv: Add loongarch support.
The loongarch64 specification permits page sizes of 4KiB, 16KiB and 64KiB,
but only 16KiB pages are supported for now.
Co-Authored-By: qijingwen <qijingwen@loongson.cn>
include/ChangeLog:
* vtv-change-permission.h (defined): Determines whether the macro
__loongarch_lp64 is defined
(VTV_PAGE_SIZE): Set VTV_PAGE_SIZE to 16KiB for loongarch64.
libvtv/ChangeLog:
* configure.tgt: Add loongarch support.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
include/vtv-change-permission.h | 4 ++++
libvtv/configure.tgt | 3 +++
2 files changed, 7 insertions(+)
diff --git a/include/vtv-change-permission.h b/include/vtv-change-permission.h
index 70bdad92b..e7b9294a0 100644
--- a/include/vtv-change-permission.h
+++ b/include/vtv-change-permission.h
@@ -48,6 +48,10 @@ extern void __VLTChangePermission (int);
#else
#if defined(__sun__) && defined(__svr4__) && defined(__sparc__)
#define VTV_PAGE_SIZE 8192
+#elif defined(__loongarch_lp64)
+/* The page size is configurable by the kernel to be 4, 16 or 64 KiB.
+ For now, only the default page size of 16KiB is supported. */
+#define VTV_PAGE_SIZE 16384
#else
#define VTV_PAGE_SIZE 4096
#endif
diff --git a/libvtv/configure.tgt b/libvtv/configure.tgt
index aa2a3f675..6cdd1e97a 100644
--- a/libvtv/configure.tgt
+++ b/libvtv/configure.tgt
@@ -50,6 +50,9 @@ case "${target}" in
;;
x86_64-*-darwin[1]* | i?86-*-darwin[1]*)
;;
+ loongarch*-*-linux*)
+ VTV_SUPPORTED=yes
+ ;;
*)
;;
esac
--
2.33.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,166 @@
From 7cfe6e057045ac794afbe9097b1b211c0e1ea723 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Thu, 6 Apr 2023 16:02:07 +0800
Subject: [PATCH 039/124] LoongArch: Add built-in functions description of
LoongArch Base instruction set instructions.
gcc/ChangeLog:
* doc/extend.texi: Add section for LoongArch Base Built-in functions.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/doc/extend.texi | 129 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 129 insertions(+)
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 3c101ca89..1d1bac255 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -14678,6 +14678,7 @@ instructions, but allow the compiler to schedule those calls.
* Blackfin Built-in Functions::
* BPF Built-in Functions::
* FR-V Built-in Functions::
+* LoongArch Base Built-in Functions::
* MIPS DSP Built-in Functions::
* MIPS Paired-Single Support::
* MIPS Loongson Built-in Functions::
@@ -16128,6 +16129,134 @@ Use the @code{nldub} instruction to load the contents of address @var{x}
into the data cache. The instruction is issued in slot I1@.
@end table
+@node LoongArch Base Built-in Functions
+@subsection LoongArch Base Built-in Functions
+
+These built-in functions are available for LoongArch.
+
+Data Type Description:
+@itemize
+@item @code{imm0_31}, a compile-time constant in range 0 to 31;
+@item @code{imm0_16383}, a compile-time constant in range 0 to 16383;
+@item @code{imm0_32767}, a compile-time constant in range 0 to 32767;
+@item @code{imm_n2048_2047}, a compile-time constant in range -2048 to 2047;
+@end itemize
+
+The intrinsics provided are listed below:
+@smallexample
+ unsigned int __builtin_loongarch_movfcsr2gr (imm0_31)
+ void __builtin_loongarch_movgr2fcsr (imm0_31, unsigned int)
+ void __builtin_loongarch_cacop_d (imm0_31, unsigned long int, imm_n2048_2047)
+ unsigned int __builtin_loongarch_cpucfg (unsigned int)
+ void __builtin_loongarch_asrtle_d (long int, long int)
+ void __builtin_loongarch_asrtgt_d (long int, long int)
+ long int __builtin_loongarch_lddir_d (long int, imm0_31)
+ void __builtin_loongarch_ldpte_d (long int, imm0_31)
+
+ int __builtin_loongarch_crc_w_b_w (char, int)
+ int __builtin_loongarch_crc_w_h_w (short, int)
+ int __builtin_loongarch_crc_w_w_w (int, int)
+ int __builtin_loongarch_crc_w_d_w (long int, int)
+ int __builtin_loongarch_crcc_w_b_w (char, int)
+ int __builtin_loongarch_crcc_w_h_w (short, int)
+ int __builtin_loongarch_crcc_w_w_w (int, int)
+ int __builtin_loongarch_crcc_w_d_w (long int, int)
+
+ unsigned int __builtin_loongarch_csrrd_w (imm0_16383)
+ unsigned int __builtin_loongarch_csrwr_w (unsigned int, imm0_16383)
+ unsigned int __builtin_loongarch_csrxchg_w (unsigned int, unsigned int, imm0_16383)
+ unsigned long int __builtin_loongarch_csrrd_d (imm0_16383)
+ unsigned long int __builtin_loongarch_csrwr_d (unsigned long int, imm0_16383)
+ unsigned long int __builtin_loongarch_csrxchg_d (unsigned long int, unsigned long int, imm0_16383)
+
+ unsigned char __builtin_loongarch_iocsrrd_b (unsigned int)
+ unsigned short __builtin_loongarch_iocsrrd_h (unsigned int)
+ unsigned int __builtin_loongarch_iocsrrd_w (unsigned int)
+ unsigned long int __builtin_loongarch_iocsrrd_d (unsigned int)
+ void __builtin_loongarch_iocsrwr_b (unsigned char, unsigned int)
+ void __builtin_loongarch_iocsrwr_h (unsigned short, unsigned int)
+ void __builtin_loongarch_iocsrwr_w (unsigned int, unsigned int)
+ void __builtin_loongarch_iocsrwr_d (unsigned long int, unsigned int)
+
+ void __builtin_loongarch_dbar (imm0_32767)
+ void __builtin_loongarch_ibar (imm0_32767)
+
+ void __builtin_loongarch_syscall (imm0_32767)
+ void __builtin_loongarch_break (imm0_32767)
+@end smallexample
+
+@emph{Note:}Since the control register is divided into 32-bit and 64-bit,
+but the access instruction is not distinguished. So GCC renames the control
+instructions when implementing intrinsics.
+
+Take the csrrd instruction as an example, built-in functions are implemented as follows:
+@smallexample
+ __builtin_loongarch_csrrd_w // When reading the 32-bit control register use.
+ __builtin_loongarch_csrrd_d // When reading the 64-bit control register use.
+@end smallexample
+
+For the convenience of use, the built-in functions are encapsulated,
+the encapsulated functions and @code{__drdtime_t, __rdtime_t} are
+defined in the @code{larchintrin.h}. So if you call the following
+function you need to include @code{larchintrin.h}.
+
+@smallexample
+ typedef struct drdtime@{
+ unsigned long dvalue;
+ unsigned long dtimeid;
+ @} __drdtime_t;
+
+ typedef struct rdtime@{
+ unsigned int value;
+ unsigned int timeid;
+ @} __rdtime_t;
+@end smallexample
+
+@smallexample
+ __drdtime_t __rdtime_d (void)
+ __rdtime_t __rdtimel_w (void)
+ __rdtime_t __rdtimeh_w (void)
+ unsigned int __movfcsr2gr (imm0_31)
+ void __movgr2fcsr (imm0_31, unsigned int)
+ void __cacop_d (imm0_31, unsigned long, imm_n2048_2047)
+ unsigned int __cpucfg (unsigned int)
+ void __asrtle_d (long int, long int)
+ void __asrtgt_d (long int, long int)
+ long int __lddir_d (long int, imm0_31)
+ void __ldpte_d (long int, imm0_31)
+
+ int __crc_w_b_w (char, int)
+ int __crc_w_h_w (short, int)
+ int __crc_w_w_w (int, int)
+ int __crc_w_d_w (long int, int)
+ int __crcc_w_b_w (char, int)
+ int __crcc_w_h_w (short, int)
+ int __crcc_w_w_w (int, int)
+ int __crcc_w_d_w (long int, int)
+
+ unsigned int __csrrd_w (imm0_16383)
+ unsigned int __csrwr_w (unsigned int, imm0_16383)
+ unsigned int __csrxchg_w (unsigned int, unsigned int, imm0_16383)
+ unsigned long __csrrd_d (imm0_16383)
+ unsigned long __csrwr_d (unsigned long, imm0_16383)
+ unsigned long __csrxchg_d (unsigned long, unsigned long, imm0_16383)
+
+ unsigned char __iocsrrd_b (unsigned int)
+ unsigned short __iocsrrd_h (unsigned int)
+ unsigned int __iocsrrd_w (unsigned int)
+ unsigned long __iocsrrd_d (unsigned int)
+ void __iocsrwr_b (unsigned char, unsigned int)
+ void __iocsrwr_h (unsigned short, unsigned int)
+ void __iocsrwr_w (unsigned int, unsigned int)
+ void __iocsrwr_d (unsigned long, unsigned int)
+
+ void __dbar (imm0_32767)
+ void __ibar (imm0_32767)
+
+ void __syscall (imm0_32767)
+ void __break (imm0_32767)
+@end smallexample
+
@node MIPS DSP Built-in Functions
@subsection MIPS DSP Built-in Functions
--
2.33.0

View File

@ -0,0 +1,107 @@
From 41a4945886631a1b2898ae957389d5db18a07141 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Fri, 4 Nov 2022 15:12:22 +0800
Subject: [PATCH 025/124] LoongArch: Add fcopysign instructions
Add fcopysign.{s,d} with the names copysign{sf,df}3 so GCC will expand
__builtin_copysign{f,} to a single instruction.
Link: https://sourceware.org/pipermail/libc-alpha/2022-November/143177.html
gcc/ChangeLog:
* config/loongarch/loongarch.md (UNSPEC_FCOPYSIGN): New unspec.
(type): Add fcopysign.
(copysign<mode>3): New instruction template.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/fcopysign.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.md | 22 ++++++++++++++++++-
.../gcc.target/loongarch/fcopysign.c | 16 ++++++++++++++
2 files changed, 37 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/fcopysign.c
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 214b14bdd..bda34d0f3 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -37,6 +37,7 @@
UNSPEC_FCLASS
UNSPEC_FMAX
UNSPEC_FMIN
+ UNSPEC_FCOPYSIGN
;; Override return address for exception handling.
UNSPEC_EH_RETURN
@@ -214,6 +215,7 @@
;; fabs floating point absolute value
;; fneg floating point negation
;; fcmp floating point compare
+;; fcopysign floating point copysign
;; fcvt floating point convert
;; fsqrt floating point square root
;; frsqrt floating point reciprocal square root
@@ -226,7 +228,7 @@
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
shift,slt,signext,clz,trap,imul,idiv,move,
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcvt,fsqrt,
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
(cond [(eq_attr "jirl" "!unset") (const_string "call")
(eq_attr "got" "load") (const_string "load")
@@ -976,6 +978,24 @@
(set_attr "mode" "<UNITMODE>")])
;;
+;; ....................
+;;
+;; FLOATING POINT COPYSIGN
+;;
+;; ....................
+
+(define_insn "copysign<mode>3"
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
+ (match_operand:ANYF 2 "register_operand" "f")]
+ UNSPEC_FCOPYSIGN))]
+ "TARGET_HARD_FLOAT"
+ "fcopysign.<fmt>\t%0,%1,%2"
+ [(set_attr "type" "fcopysign")
+ (set_attr "mode" "<UNITMODE>")])
+
+
+;;
;; ...................
;;
;; Count leading zeroes.
diff --git a/gcc/testsuite/gcc.target/loongarch/fcopysign.c b/gcc/testsuite/gcc.target/loongarch/fcopysign.c
new file mode 100644
index 000000000..058ba2cf5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/fcopysign.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mdouble-float" } */
+/* { dg-final { scan-assembler "fcopysign\\.s" } } */
+/* { dg-final { scan-assembler "fcopysign\\.d" } } */
+
+double
+my_copysign (double a, double b)
+{
+ return __builtin_copysign (a, b);
+}
+
+float
+my_copysignf (float a, float b)
+{
+ return __builtin_copysignf (a, b);
+}
--
2.33.0

View File

@ -0,0 +1,123 @@
From 2ae587a86bba31b91a127e353c31c9f861ff5326 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Tue, 8 Nov 2022 13:42:20 +0800
Subject: [PATCH 030/124] LoongArch: Add flogb.{s,d} instructions and expand
logb{sf,df}2
On LoongArch, flogb instructions extract the exponent of a non-negative
floating point value, but produces NaN for negative values. So we need
to add a fabs instruction when we expand logb.
gcc/ChangeLog:
* config/loongarch/loongarch.md (UNSPEC_FLOGB): New unspec.
(type): Add flogb.
(logb_non_negative<mode>2): New instruction template.
(logb<mode>2): New define_expand.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/flogb.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.md | 35 ++++++++++++++++++++--
gcc/testsuite/gcc.target/loongarch/flogb.c | 18 +++++++++++
2 files changed, 51 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/flogb.c
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index c141c9add..682ab9617 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -42,6 +42,7 @@
UNSPEC_FTINTRM
UNSPEC_FTINTRP
UNSPEC_FSCALEB
+ UNSPEC_FLOGB
;; Override return address for exception handling.
UNSPEC_EH_RETURN
@@ -217,6 +218,7 @@
;; fdiv floating point divide
;; frdiv floating point reciprocal divide
;; fabs floating point absolute value
+;; flogb floating point exponent extract
;; fneg floating point negation
;; fcmp floating point compare
;; fcopysign floating point copysign
@@ -233,8 +235,8 @@
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
shift,slt,signext,clz,trap,imul,idiv,move,
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
- fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
+ fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
(cond [(eq_attr "jirl" "!unset") (const_string "call")
(eq_attr "got" "load") (const_string "load")
@@ -1039,6 +1041,35 @@
(set_attr "mode" "<UNITMODE>")])
;;
+;; ....................
+;;
+;; FLOATING POINT EXPONENT EXTRACT
+;;
+;; ....................
+
+(define_insn "logb_non_negative<mode>2"
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+ UNSPEC_FLOGB))]
+ "TARGET_HARD_FLOAT"
+ "flogb.<fmt>\t%0,%1"
+ [(set_attr "type" "flogb")
+ (set_attr "mode" "<UNITMODE>")])
+
+(define_expand "logb<mode>2"
+ [(set (match_operand:ANYF 0 "register_operand")
+ (unspec:ANYF [(abs:ANYF (match_operand:ANYF 1 "register_operand"))]
+ UNSPEC_FLOGB))]
+ "TARGET_HARD_FLOAT"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ emit_insn (gen_abs<mode>2 (tmp, operands[1]));
+ emit_insn (gen_logb_non_negative<mode>2 (operands[0], tmp));
+ DONE;
+})
+
+;;
;; ...................
;;
;; Count leading zeroes.
diff --git a/gcc/testsuite/gcc.target/loongarch/flogb.c b/gcc/testsuite/gcc.target/loongarch/flogb.c
new file mode 100644
index 000000000..1daefe54e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/flogb.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mdouble-float -fno-math-errno" } */
+/* { dg-final { scan-assembler "fabs\\.s" } } */
+/* { dg-final { scan-assembler "fabs\\.d" } } */
+/* { dg-final { scan-assembler "flogb\\.s" } } */
+/* { dg-final { scan-assembler "flogb\\.d" } } */
+
+double
+my_logb (double a)
+{
+ return __builtin_logb (a);
+}
+
+float
+my_logbf (float a)
+{
+ return __builtin_logbf (a);
+}
--
2.33.0

View File

@ -0,0 +1,155 @@
From e3d69a3b7a4e00e8bba88b8b4abaa1c17bc083d5 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Tue, 8 Nov 2022 12:14:35 +0800
Subject: [PATCH 029/124] LoongArch: Add fscaleb.{s,d} instructions as
ldexp{sf,df}3
This allows optimizing __builtin_ldexp{,f} and __builtin_scalbn{,f} with
-fno-math-errno.
IMODE is added because we can't hard code SI for operand 2: fscaleb.d
instruction always take the high half of both source registers into
account. See my_ldexp_long in the test case.
gcc/ChangeLog:
* config/loongarch/loongarch.md (UNSPEC_FSCALEB): New unspec.
(type): Add fscaleb.
(IMODE): New mode attr.
(ldexp<mode>3): New instruction template.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/fscaleb.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.md | 26 ++++++++++-
gcc/testsuite/gcc.target/loongarch/fscaleb.c | 48 ++++++++++++++++++++
2 files changed, 72 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/fscaleb.c
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index eb127c346..c141c9add 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -41,6 +41,7 @@
UNSPEC_FTINT
UNSPEC_FTINTRM
UNSPEC_FTINTRP
+ UNSPEC_FSCALEB
;; Override return address for exception handling.
UNSPEC_EH_RETURN
@@ -220,6 +221,7 @@
;; fcmp floating point compare
;; fcopysign floating point copysign
;; fcvt floating point convert
+;; fscaleb floating point scale
;; fsqrt floating point square root
;; frsqrt floating point reciprocal square root
;; multi multiword sequence (or user asm statements)
@@ -231,8 +233,8 @@
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
shift,slt,signext,clz,trap,imul,idiv,move,
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
- frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
+ fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
(cond [(eq_attr "jirl" "!unset") (const_string "call")
(eq_attr "got" "load") (const_string "load")
@@ -418,6 +420,10 @@
;; the controlling mode.
(define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")])
+;; This attribute gives the integer mode that has the same size of a
+;; floating-point mode.
+(define_mode_attr IMODE [(SF "SI") (DF "DI")])
+
;; This code iterator allows signed and unsigned widening multiplications
;; to use the same template.
(define_code_iterator any_extend [sign_extend zero_extend])
@@ -1014,7 +1020,23 @@
"fcopysign.<fmt>\t%0,%1,%2"
[(set_attr "type" "fcopysign")
(set_attr "mode" "<UNITMODE>")])
+
+;;
+;; ....................
+;;
+;; FLOATING POINT SCALE
+;;
+;; ....................
+(define_insn "ldexp<mode>3"
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
+ (match_operand:<IMODE> 2 "register_operand" "f")]
+ UNSPEC_FSCALEB))]
+ "TARGET_HARD_FLOAT"
+ "fscaleb.<fmt>\t%0,%1,%2"
+ [(set_attr "type" "fscaleb")
+ (set_attr "mode" "<UNITMODE>")])
;;
;; ...................
diff --git a/gcc/testsuite/gcc.target/loongarch/fscaleb.c b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
new file mode 100644
index 000000000..f18470fbb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno" } */
+/* { dg-final { scan-assembler-times "fscaleb\\.s" 3 } } */
+/* { dg-final { scan-assembler-times "fscaleb\\.d" 4 } } */
+/* { dg-final { scan-assembler-times "slli\\.w" 1 } } */
+
+double
+my_scalbln (double a, long b)
+{
+ return __builtin_scalbln (a, b);
+}
+
+double
+my_scalbn (double a, int b)
+{
+ return __builtin_scalbn (a, b);
+}
+
+double
+my_ldexp (double a, int b)
+{
+ return __builtin_ldexp (a, b);
+}
+
+float
+my_scalblnf (float a, long b)
+{
+ return __builtin_scalblnf (a, b);
+}
+
+float
+my_scalbnf (float a, int b)
+{
+ return __builtin_scalbnf (a, b);
+}
+
+float
+my_ldexpf (float a, int b)
+{
+ return __builtin_ldexpf (a, b);
+}
+
+/* b must be sign-extended */
+double
+my_ldexp_long (double a, long b)
+{
+ return __builtin_ldexp (a, b);
+}
--
2.33.0

View File

@ -0,0 +1,220 @@
From 76d599c6d8f9cf78b51cd76a7ca8fbe11e2cda2b Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sun, 6 Nov 2022 23:16:49 +0800
Subject: [PATCH 028/124] LoongArch: Add ftint{,rm,rp}.{w,l}.{s,d} instructions
This allows to optimize the following builtins if -fno-math-errno:
- __builtin_lrint{,f}
- __builtin_lfloor{,f}
- __builtin_lceil{,f}
Inspired by
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605287.html.
ANYFI is added so the compiler won't try ftint.l.s if -mfpu=32. If we
simply used GPR here an ICE would be triggered with __builtin_lrintf
and -mfpu=32.
ftint{rm,rp} instructions may raise inexact exception, so they can't be
used if -fno-trapping-math -fno-fp-int-builtin-inexact.
Note that the .w.{s,d} variants are not tested because we don't support
ILP32 for now.
gcc/ChangeLog:
* config/loongarch/loongarch.md (UNSPEC_FTINT): New unspec.
(UNSPEC_FTINTRM): Likewise.
(UNSPEC_FTINTRP): Likewise.
(LRINT): New define_int_iterator.
(lrint_pattern): New define_int_attr.
(lrint_submenmonic): Likewise.
(lrint_allow_inexact): Likewise.
(ANYFI): New define_mode_iterator.
(lrint<ANYF><ANYFI>): New instruction template.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/ftint.c: New test.
* gcc.target/loongarch/ftint-no-inexact.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.md | 34 ++++++++++++++
.../gcc.target/loongarch/ftint-no-inexact.c | 44 +++++++++++++++++++
gcc/testsuite/gcc.target/loongarch/ftint.c | 44 +++++++++++++++++++
3 files changed, 122 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/ftint.c
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index a14ab14ac..eb127c346 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -38,6 +38,9 @@
UNSPEC_FMAX
UNSPEC_FMIN
UNSPEC_FCOPYSIGN
+ UNSPEC_FTINT
+ UNSPEC_FTINTRM
+ UNSPEC_FTINTRP
;; Override return address for exception handling.
UNSPEC_EH_RETURN
@@ -374,6 +377,11 @@
(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
(DF "TARGET_DOUBLE_FLOAT")])
+;; Iterator for fixed-point modes which can be hold by a hardware
+;; floating-point register.
+(define_mode_iterator ANYFI [(SI "TARGET_HARD_FLOAT")
+ (DI "TARGET_DOUBLE_FLOAT")])
+
;; A mode for which moves involving FPRs may need to be split.
(define_mode_iterator SPLITF
[(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
@@ -515,6 +523,19 @@
(define_code_attr sel [(eq "masknez") (ne "maskeqz")])
(define_code_attr selinv [(eq "maskeqz") (ne "masknez")])
+;; Iterator and attributes for floating-point to fixed-point conversion
+;; instructions.
+(define_int_iterator LRINT [UNSPEC_FTINT UNSPEC_FTINTRM UNSPEC_FTINTRP])
+(define_int_attr lrint_pattern [(UNSPEC_FTINT "lrint")
+ (UNSPEC_FTINTRM "lfloor")
+ (UNSPEC_FTINTRP "lceil")])
+(define_int_attr lrint_submenmonic [(UNSPEC_FTINT "")
+ (UNSPEC_FTINTRM "rm")
+ (UNSPEC_FTINTRP "rp")])
+(define_int_attr lrint_allow_inexact [(UNSPEC_FTINT "1")
+ (UNSPEC_FTINTRM "0")
+ (UNSPEC_FTINTRP "0")])
+
;;
;; ....................
;;
@@ -2022,6 +2043,19 @@
[(set_attr "type" "fcvt")
(set_attr "mode" "<MODE>")])
+;; Convert floating-point numbers to integers
+(define_insn "<lrint_pattern><ANYF:mode><ANYFI:mode>2"
+ [(set (match_operand:ANYFI 0 "register_operand" "=f")
+ (unspec:ANYFI [(match_operand:ANYF 1 "register_operand" "f")]
+ LRINT))]
+ "TARGET_HARD_FLOAT &&
+ (<lrint_allow_inexact>
+ || flag_fp_int_builtin_inexact
+ || !flag_trapping_math)"
+ "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1"
+ [(set_attr "type" "fcvt")
+ (set_attr "mode" "<ANYF:MODE>")])
+
;; Load the low word of operand 0 with operand 1.
(define_insn "load_low<mode>"
[(set (match_operand:SPLITF 0 "register_operand" "=f,f")
diff --git a/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
new file mode 100644
index 000000000..88b83a9c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact" } */
+/* { dg-final { scan-assembler "ftint\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftint\\.l\\.d" } } */
+/* { dg-final { scan-assembler-not "ftintrm\\.l\\.s" } } */
+/* { dg-final { scan-assembler-not "ftintrm\\.l\\.d" } } */
+/* { dg-final { scan-assembler-not "ftintrp\\.l\\.s" } } */
+/* { dg-final { scan-assembler-not "ftintrp\\.l\\.d" } } */
+
+long
+my_lrint (double a)
+{
+ return __builtin_lrint (a);
+}
+
+long
+my_lrintf (float a)
+{
+ return __builtin_lrintf (a);
+}
+
+long
+my_lfloor (double a)
+{
+ return __builtin_lfloor (a);
+}
+
+long
+my_lfloorf (float a)
+{
+ return __builtin_lfloorf (a);
+}
+
+long
+my_lceil (double a)
+{
+ return __builtin_lceil (a);
+}
+
+long
+my_lceilf (float a)
+{
+ return __builtin_lceilf (a);
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/ftint.c b/gcc/testsuite/gcc.target/loongarch/ftint.c
new file mode 100644
index 000000000..7a326a454
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/ftint.c
@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact" } */
+/* { dg-final { scan-assembler "ftint\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftint\\.l\\.d" } } */
+/* { dg-final { scan-assembler "ftintrm\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftintrm\\.l\\.d" } } */
+/* { dg-final { scan-assembler "ftintrp\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftintrp\\.l\\.d" } } */
+
+long
+my_lrint (double a)
+{
+ return __builtin_lrint (a);
+}
+
+long
+my_lrintf (float a)
+{
+ return __builtin_lrintf (a);
+}
+
+long
+my_lfloor (double a)
+{
+ return __builtin_lfloor (a);
+}
+
+long
+my_lfloorf (float a)
+{
+ return __builtin_lfloorf (a);
+}
+
+long
+my_lceil (double a)
+{
+ return __builtin_lceil (a);
+}
+
+long
+my_lceilf (float a)
+{
+ return __builtin_lceilf (a);
+}
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,158 @@
From 52a41006c2e8141a42de93ffcc2c040e034244b2 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Wed, 16 Nov 2022 09:25:14 +0800
Subject: [PATCH 031/124] LoongArch: Add prefetch instructions.
Enable sw prefetching at -O3 and higher.
Co-Authored-By: xujiahao <xujiahao@loongson.cn>
gcc/ChangeLog:
* config/loongarch/constraints.md (ZD): New constraint.
* config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
* config/loongarch/loongarch-tune.h (struct loongarch_cache):
Define number of parallel prefetch.
* config/loongarch/loongarch.cc (loongarch_option_override_internal):
Set up parameters to be used in prefetching algorithm.
* config/loongarch/loongarch.md (prefetch): New template.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/constraints.md | 10 ++++++++++
gcc/config/loongarch/loongarch-def.c | 2 ++
gcc/config/loongarch/loongarch-tune.h | 1 +
gcc/config/loongarch/loongarch.cc | 28 +++++++++++++++++++++++++++
gcc/config/loongarch/loongarch.md | 14 ++++++++++++++
5 files changed, 55 insertions(+)
diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
index 43cb7b5f0..46f7f63ae 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -86,6 +86,10 @@
;; "ZB"
;; "An address that is held in a general-purpose register.
;; The offset is zero"
+;; "ZD"
+;; "An address operand whose address is formed by a base register
+;; and offset that is suitable for use in instructions with the same
+;; addressing mode as @code{preld}."
;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
@@ -190,3 +194,9 @@
The offset is zero"
(and (match_code "mem")
(match_test "REG_P (XEXP (op, 0))")))
+
+(define_address_constraint "ZD"
+ "An address operand whose address is formed by a base register
+ and offset that is suitable for use in instructions with the same
+ addressing mode as @code{preld}."
+ (match_test "loongarch_12bit_offset_address_p (op, mode)"))
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
index cbf995d81..80ab10a52 100644
--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
@@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
.l1d_line_size = 64,
.l1d_size = 64,
.l2d_size = 256,
+ .simultaneous_prefetches = 4,
},
[CPU_LA464] = {
.l1d_line_size = 64,
.l1d_size = 64,
.l2d_size = 256,
+ .simultaneous_prefetches = 4,
},
};
diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
index 6f3530f5c..8e3eb2947 100644
--- a/gcc/config/loongarch/loongarch-tune.h
+++ b/gcc/config/loongarch/loongarch-tune.h
@@ -45,6 +45,7 @@ struct loongarch_cache {
int l1d_line_size; /* bytes */
int l1d_size; /* KiB */
int l2d_size; /* kiB */
+ int simultaneous_prefetches; /* number of parallel prefetch */
};
#endif /* LOONGARCH_TUNE_H */
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index d552b162a..622c9435b 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see
#include "context.h"
#include "builtins.h"
#include "rtl-iter.h"
+#include "opts.h"
/* This file should be included last. */
#include "target-def.h"
@@ -6099,6 +6100,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
if (loongarch_branch_cost == 0)
loongarch_branch_cost = loongarch_cost->branch_cost;
+ /* Set up parameters to be used in prefetching algorithm. */
+ int simultaneous_prefetches
+ = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
+
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
+ param_simultaneous_prefetches,
+ simultaneous_prefetches);
+
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
+ param_l1_cache_line_size,
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
+
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
+ param_l1_cache_size,
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
+
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
+ param_l2_cache_size,
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
+
+
+ /* Enable sw prefetching at -O3 and higher. */
+ if (opts->x_flag_prefetch_loop_arrays < 0
+ && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
+ && !opts->x_optimize_size)
+ opts->x_flag_prefetch_loop_arrays = 1;
+
if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
error ("%qs cannot be used for compiling a shared library",
"-mdirect-extern-access");
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 682ab9617..2fda53819 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -3282,6 +3282,20 @@
;; ....................
;;
+(define_insn "prefetch"
+ [(prefetch (match_operand 0 "address_operand" "ZD")
+ (match_operand 1 "const_int_operand" "n")
+ (match_operand 2 "const_int_operand" "n"))]
+ ""
+{
+ switch (INTVAL (operands[1]))
+ {
+ case 0: return "preld\t0,%a0";
+ case 1: return "preld\t8,%a0";
+ default: gcc_unreachable ();
+ }
+})
+
(define_insn "nop"
[(const_int 0)]
""
--
2.33.0

View File

@ -0,0 +1,794 @@
From b1c92fb9dab678e4c9c23fa77185011494d145b9 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Thu, 18 Aug 2022 17:26:13 +0800
Subject: [PATCH 011/124] LoongArch: Add support code model extreme.
Use five instructions to calculate a signed 64-bit offset relative to the pc.
gcc/ChangeLog:
* config/loongarch/loongarch-opts.cc: Allow cmodel to be extreme.
* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
Add extreme support for TLS GD and LD types.
(loongarch_legitimize_tls_address): Add extreme support for TLS LE
and IE.
(loongarch_split_symbol): When compiling with -mcmodel=extreme,
the symbol address will be obtained through five instructions.
(loongarch_print_operand_reloc): Add support.
(loongarch_print_operand): Add support.
(loongarch_print_operand_address): Add support.
(loongarch_option_override_internal): Set '-mcmodel=extreme' option
incompatible with '-mno-explicit-relocs'.
* config/loongarch/loongarch.md (@lui_l_hi20<mode>):
Loads bits 12-31 of data into registers.
(lui_h_lo20): Load bits 32-51 of the data and spell bits 0-31 of
the source register.
(lui_h_hi12): Load bits 52-63 of the data and spell bits 0-51 of
the source register.
* config/loongarch/predicates.md: Symbols need to be decomposed
when defining the macro TARGET_CMODEL_EXTREME
* doc/invoke.texi: Modify the description information of cmodel in the document.
Document -W[no-]extreme-plt.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/func-call-1.c: Add option '-mcmodel=normal'.
* gcc.target/loongarch/func-call-2.c: Likewise.
* gcc.target/loongarch/func-call-3.c: Likewise.
* gcc.target/loongarch/func-call-4.c: Likewise.
* gcc.target/loongarch/func-call-5.c: Likewise.
* gcc.target/loongarch/func-call-6.c: Likewise.
* gcc.target/loongarch/func-call-7.c: Likewise.
* gcc.target/loongarch/func-call-8.c: Likewise.
* gcc.target/loongarch/relocs-symbol-noaddend.c: Likewise.
* gcc.target/loongarch/func-call-extreme-1.c: New test.
* gcc.target/loongarch/func-call-extreme-2.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch-opts.cc | 3 +-
gcc/config/loongarch/loongarch.cc | 222 +++++++++++++++---
gcc/config/loongarch/loongarch.md | 34 ++-
gcc/config/loongarch/predicates.md | 9 +-
gcc/doc/invoke.texi | 50 +---
.../gcc.target/loongarch/func-call-1.c | 2 +-
.../gcc.target/loongarch/func-call-2.c | 2 +-
.../gcc.target/loongarch/func-call-3.c | 2 +-
.../gcc.target/loongarch/func-call-4.c | 2 +-
.../gcc.target/loongarch/func-call-5.c | 2 +-
.../gcc.target/loongarch/func-call-6.c | 2 +-
.../gcc.target/loongarch/func-call-7.c | 2 +-
.../gcc.target/loongarch/func-call-8.c | 2 +-
.../loongarch/func-call-extreme-1.c | 32 +++
.../loongarch/func-call-extreme-2.c | 32 +++
.../loongarch/relocs-symbol-noaddend.c | 2 +-
16 files changed, 318 insertions(+), 82 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
index 3f70943de..2ae89f234 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -376,14 +376,13 @@ fallback:
/* 5. Target code model */
t.cmodel = constrained.cmodel ? opt_cmodel : CMODEL_NORMAL;
- if (t.cmodel != CMODEL_NORMAL)
+ if (t.cmodel != CMODEL_NORMAL && t.cmodel != CMODEL_EXTREME)
{
warning (0, "%qs is not supported, now cmodel is set to %qs",
loongarch_cmodel_strings[t.cmodel], "normal");
t.cmodel = CMODEL_NORMAL;
}
-
/* Cleanup and return. */
obstack_free (&msg_obstack, NULL);
*target = t;
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 76bf55ea4..1a33f668f 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2436,7 +2436,19 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
/* Split tls symbol to high and low. */
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
high = loongarch_force_temporary (tmp, high);
- emit_insn (gen_tls_low (Pmode, a0, high, loc));
+
+ if (TARGET_CMODEL_EXTREME)
+ {
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+ rtx tmp1 = gen_reg_rtx (Pmode);
+ emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
+ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
+ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc));
+ emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1));
+ }
+ else
+ emit_insn (gen_tls_low (Pmode, a0, high, loc));
}
else
{
@@ -2449,14 +2461,44 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
}
if (flag_plt)
- insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
+ insn = emit_call_insn (gen_call_value_internal (v0,
+ loongarch_tls_symbol,
const0_rtx));
else
{
rtx dest = gen_reg_rtx (Pmode);
- rtx high = gen_reg_rtx (Pmode);
- loongarch_emit_move (high, gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
- emit_insn (gen_ld_from_got (Pmode, dest, high, loongarch_tls_symbol));
+
+ if (TARGET_CMODEL_EXTREME)
+ {
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+ rtx tmp1 = gen_reg_rtx (Pmode);
+ rtx high = gen_reg_rtx (Pmode);
+
+ loongarch_emit_move (high,
+ gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
+ loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode,
+ gen_rtx_REG (Pmode, 0),
+ loongarch_tls_symbol));
+ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol));
+ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol));
+ loongarch_emit_move (dest,
+ gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, high, tmp1)));
+ }
+ else
+ {
+ if (TARGET_EXPLICIT_RELOCS)
+ {
+ rtx high = gen_reg_rtx (Pmode);
+ loongarch_emit_move (high,
+ gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
+ emit_insn (gen_ld_from_got (Pmode, dest, high,
+ loongarch_tls_symbol));
+ }
+ else
+ loongarch_emit_move (dest, loongarch_tls_symbol);
+ }
insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx));
}
@@ -2508,7 +2550,23 @@ loongarch_legitimize_tls_address (rtx loc)
tmp3 = gen_reg_rtx (Pmode);
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
high = loongarch_force_temporary (tmp3, high);
- emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
+
+ if (TARGET_CMODEL_EXTREME)
+ {
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+ rtx tmp3 = gen_reg_rtx (Pmode);
+ emit_insn (gen_tls_low (Pmode, tmp3,
+ gen_rtx_REG (Pmode, 0), tmp2));
+ emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2));
+ emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2));
+ emit_move_insn (tmp1,
+ gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode,
+ high, tmp3)));
+ }
+ else
+ emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
}
else
emit_insn (loongarch_got_load_tls_ie (tmp1, loc));
@@ -2530,11 +2588,18 @@ loongarch_legitimize_tls_address (rtx loc)
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
high = loongarch_force_temporary (tmp3, high);
emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
+
+ if (TARGET_CMODEL_EXTREME)
+ {
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2));
+ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2));
+ }
}
else
emit_insn (loongarch_got_load_tls_le (tmp1, loc));
emit_insn (gen_add3_insn (dest, tmp1, tp));
-
}
break;
@@ -2603,7 +2668,6 @@ bool
loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
{
enum loongarch_symbol_type symbol_type;
- rtx high;
/* If build with '-mno-explicit-relocs', don't split symbol. */
if (!TARGET_EXPLICIT_RELOCS)
@@ -2615,6 +2679,8 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
|| !loongarch_split_symbol_type (symbol_type))
return false;
+ rtx high, temp1 = NULL;
+
if (temp == NULL)
temp = gen_reg_rtx (Pmode);
@@ -2622,20 +2688,42 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
high = loongarch_force_temporary (temp, high);
+ if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
+ {
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+ temp1 = gen_reg_rtx (Pmode);
+ emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0),
+ addr));
+ emit_insn (gen_lui_h_lo20 (temp1, temp1, addr));
+ emit_insn (gen_lui_h_hi12 (temp1, temp1, addr));
+ }
+
if (low_out)
switch (symbol_type)
{
case SYMBOL_PCREL:
- *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
- break;
+ {
+ if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
+ *low_out = gen_rtx_PLUS (Pmode, high, temp1);
+ else
+ *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
+ break;
+ }
case SYMBOL_GOT_DISP:
/* SYMBOL_GOT_DISP symbols are loaded from the GOT. */
{
- rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
- rtx mem = gen_rtx_MEM (Pmode, low);
- *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
- UNSPEC_LOAD_FROM_GOT);
+ if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
+ *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, temp1));
+ else
+ {
+ rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
+ rtx mem = gen_rtx_MEM (Pmode, low);
+ *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
+ UNSPEC_LOAD_FROM_GOT);
+ }
+
break;
}
@@ -4584,34 +4672,86 @@ loongarch_memmodel_needs_release_fence (enum memmodel model)
in context CONTEXT. HI_RELOC indicates a high-part reloc. */
static void
-loongarch_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
+loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
+ bool hi_reloc)
{
const char *reloc;
+ if (TARGET_CMODEL_EXTREME)
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
+
switch (loongarch_classify_symbolic_expression (op))
{
case SYMBOL_PCREL:
- reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
+ if (hi64_part)
+ {
+ if (TARGET_CMODEL_EXTREME)
+ reloc = hi_reloc ? "%pc64_hi12" : "%pc64_lo20";
+ else
+ gcc_unreachable ();
+ }
+ else
+ reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
break;
case SYMBOL_GOT_DISP:
- reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
+ if (hi64_part)
+ {
+ if (TARGET_CMODEL_EXTREME)
+ reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
+ else
+ gcc_unreachable ();
+ }
+ else
+ reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
break;
case SYMBOL_TLS_IE:
- reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
+ if (hi64_part)
+ {
+ if (TARGET_CMODEL_EXTREME)
+ reloc = hi_reloc ? "%ie64_pc_hi12" : "%ie64_pc_lo20";
+ else
+ gcc_unreachable ();
+ }
+ else
+ reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
break;
case SYMBOL_TLS_LE:
- reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
+ if (hi64_part)
+ {
+ if (TARGET_CMODEL_EXTREME)
+ reloc = hi_reloc ? "%le64_hi12" : "%le64_lo20";
+ else
+ gcc_unreachable ();
+ }
+ else
+ reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
break;
case SYMBOL_TLSGD:
- reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
+ if (hi64_part)
+ {
+ if (TARGET_CMODEL_EXTREME)
+ reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
+ else
+ gcc_unreachable ();
+ }
+ else
+ reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
break;
case SYMBOL_TLSLDM:
- reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
+ if (hi64_part)
+ {
+ if (TARGET_CMODEL_EXTREME)
+ reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
+ else
+ gcc_unreachable ();
+ }
+ else
+ reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
break;
default:
@@ -4637,6 +4777,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
'L' Print the low-part relocation associated with OP.
'm' Print one less than CONST_INT OP in decimal.
'N' Print the inverse of the integer branch condition for comparison OP.
+ 'r' Print address 12-31bit relocation associated with OP.
+ 'R' Print address 32-51bit relocation associated with OP.
'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
'z' for (eq:?I ...), 'n' for (ne:?I ...).
't' Like 'T', but with the EQ/NE cases reversed
@@ -4694,7 +4836,13 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
case 'h':
if (code == HIGH)
op = XEXP (op, 0);
- loongarch_print_operand_reloc (file, op, true /* hi_reloc */);
+ loongarch_print_operand_reloc (file, op, false /* hi64_part */,
+ true /* hi_reloc */);
+ break;
+
+ case 'H':
+ loongarch_print_operand_reloc (file, op, true /* hi64_part */,
+ true /* hi_reloc */);
break;
case 'i':
@@ -4703,7 +4851,8 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
break;
case 'L':
- loongarch_print_operand_reloc (file, op, false /* lo_reloc */);
+ loongarch_print_operand_reloc (file, op, false /* hi64_part*/,
+ false /* lo_reloc */);
break;
case 'm':
@@ -4718,6 +4867,16 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
letter);
break;
+ case 'r':
+ loongarch_print_operand_reloc (file, op, false /* hi64_part */,
+ true /* lo_reloc */);
+ break;
+
+ case 'R':
+ loongarch_print_operand_reloc (file, op, true /* hi64_part */,
+ false /* lo_reloc */);
+ break;
+
case 't':
case 'T':
{
@@ -4848,7 +5007,8 @@ loongarch_print_operand_address (FILE *file, machine_mode /* mode */, rtx x)
case ADDRESS_LO_SUM:
fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
- loongarch_print_operand_reloc (file, addr.offset, false /* hi_reloc */);
+ loongarch_print_operand_reloc (file, addr.offset, false /* hi64_part */,
+ false /* hi_reloc */);
return;
case ADDRESS_CONST_INT:
@@ -5821,13 +5981,21 @@ loongarch_option_override_internal (struct gcc_options *opts)
switch (la_target.cmodel)
{
- case CMODEL_TINY_STATIC:
case CMODEL_EXTREME:
+ if (!TARGET_EXPLICIT_RELOCS)
+ error ("code model %qs needs %s",
+ "extreme", "-mexplicit-relocs");
+
if (opts->x_flag_plt)
- error ("code model %qs and %qs not support %s mode",
- "tiny-static", "extreme", "plt");
+ {
+ if (global_options_set.x_flag_plt)
+ error ("code model %qs is not compatible with %s",
+ "extreme", "-fplt");
+ opts->x_flag_plt = 0;
+ }
break;
+ case CMODEL_TINY_STATIC:
case CMODEL_NORMAL:
case CMODEL_TINY:
case CMODEL_LARGE:
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 8e8868de9..8fc10444c 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -60,6 +60,9 @@
UNSPEC_LOAD_FROM_GOT
UNSPEC_ORI_L_LO12
+ UNSPEC_LUI_L_HI20
+ UNSPEC_LUI_H_LO20
+ UNSPEC_LUI_H_HI12
UNSPEC_TLS_LOW
])
@@ -1934,16 +1937,45 @@
[(set_attr "type" "move")]
)
+(define_insn "@lui_l_hi20<mode>"
+ [(set (match_operand:P 0 "register_operand" "=r")
+ (unspec:P [(match_operand:P 1 "symbolic_operand")]
+ UNSPEC_LUI_L_HI20))]
+ ""
+ "lu12i.w\t%0,%r1"
+ [(set_attr "type" "move")]
+)
+
(define_insn "@ori_l_lo12<mode>"
[(set (match_operand:P 0 "register_operand" "=r")
(unspec:P [(match_operand:P 1 "register_operand" "r")
- (match_operand:P 2 "symbolic_operand")]
+ (match_operand:P 2 "symbolic_operand")]
UNSPEC_ORI_L_LO12))]
""
"ori\t%0,%1,%L2"
[(set_attr "type" "move")]
)
+(define_insn "lui_h_lo20"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:DI 1 "register_operand" "0")
+ (match_operand:DI 2 "symbolic_operand")]
+ UNSPEC_LUI_H_LO20))]
+ "TARGET_64BIT"
+ "lu32i.d\t%0,%R2"
+ [(set_attr "type" "move")]
+)
+
+(define_insn "lui_h_hi12"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:DI 1 "register_operand" "r")
+ (match_operand:DI 2 "symbolic_operand")]
+ UNSPEC_LUI_H_HI12))]
+ "TARGET_64BIT"
+ "lu52i.d\t%0,%1,%H2"
+ [(set_attr "type" "move")]
+)
+
;; Convert floating-point numbers to integers
(define_insn "frint_<fmt>"
[(set (match_operand:ANYF 0 "register_operand" "=f")
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
index cd3528c7c..e38c6fbdd 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -111,7 +111,7 @@
(match_code "const,symbol_ref,label_ref")
{
/* Split symbol to high and low if return false.
- If defined TARGET_CMODEL_LARGE, all symbol would be splited,
+ If defined TARGET_CMODEL_EXTREME, all symbol would be splited,
else if offset is not zero, the symbol would be splited. */
enum loongarch_symbol_type symbol_type;
@@ -126,10 +126,13 @@
switch (symbol_type)
{
case SYMBOL_PCREL:
- return 1;
+ if (TARGET_CMODEL_EXTREME)
+ return false;
+ else
+ return 1;
case SYMBOL_GOT_DISP:
- if (TARGET_CMODEL_LARGE || !flag_plt)
+ if (TARGET_CMODEL_EXTREME || !flag_plt)
return false;
else
return 1;
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1de2b2bd4..c4f83e62a 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1006,6 +1006,7 @@ Objective-C and Objective-C++ Dialects}.
-mcond-move-float -mno-cond-move-float @gol
-memcpy -mno-memcpy -mstrict-align -mno-strict-align @gol
-mmax-inline-memcpy-size=@var{n} @gol
+-mexplicit-relocs -mno-explicit-relocs @gol
-mcmodel=@var{code-model}}
@emph{M32R/D Options}
@@ -24617,50 +24618,19 @@ less than or equal to @var{n} bytes. The default value of @var{n} is 1024.
@item -mcmodel=@var{code-model}
Set the code model to one of:
@table @samp
-@item tiny-static
-@itemize @bullet
-@item
-local symbol and global strong symbol: The data section must be within +/-2MiB addressing space.
-The text section must be within +/-128MiB addressing space.
-@item
-global weak symbol: The got table must be within +/-2GiB addressing space.
-@end itemize
-
-@item tiny
-@itemize @bullet
-@item
-local symbol: The data section must be within +/-2MiB addressing space.
-The text section must be within +/-128MiB
-addressing space.
-@item
-global symbol: The got table must be within +/-2GiB addressing space.
-@end itemize
+@item tiny-static (Not implemented yet)
+@item tiny (Not implemented yet)
@item normal
-@itemize @bullet
-@item
-local symbol: The data section must be within +/-2GiB addressing space.
-The text section must be within +/-128MiB addressing space.
-@item
-global symbol: The got table must be within +/-2GiB addressing space.
-@end itemize
+The text segment must be within 128MB addressing space. The data segment must
+be within 2GB addressing space.
-@item large
-@itemize @bullet
-@item
-local symbol: The data section must be within +/-2GiB addressing space.
-The text section must be within +/-128GiB addressing space.
-@item
-global symbol: The got table must be within +/-2GiB addressing space.
-@end itemize
+@item large (Not implemented yet)
-@item extreme(Not implemented yet)
-@itemize @bullet
-@item
-local symbol: The data and text section must be within +/-8EiB addressing space.
-@item
-global symbol: The data got table must be within +/-8EiB addressing space.
-@end itemize
+@item extreme
+This mode does not limit the size of the code segment and data segment.
+The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} and
+@option{-mno-explicit-relocs}.
@end table
The default code model is @code{normal}.
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-1.c
index 01b8ea23f..76bf11b0c 100644
--- a/gcc/testsuite/gcc.target/loongarch/func-call-1.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-1.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs -mcmodel=normal" } */
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
/* { dg-final { scan-assembler "test1:.*bl\t%plt\\(f\\)\n" } } */
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-2.c
index 4565baaec..4b468fef8 100644
--- a/gcc/testsuite/gcc.target/loongarch/func-call-2.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs -mcmodel=normal" } */
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-3.c
index 4f669a029..dd3a4882d 100644
--- a/gcc/testsuite/gcc.target/loongarch/func-call-3.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-3.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs -mcmodel=normal" } */
/* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */
/* { dg-final { scan-assembler "test1:.*la\.global\t.*f\n\tjirl" } } */
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-4.c
index 943adb640..f8158ec34 100644
--- a/gcc/testsuite/gcc.target/loongarch/func-call-4.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-4.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs -mcmodel=normal" } */
/* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-5.c b/gcc/testsuite/gcc.target/loongarch/func-call-5.c
index 2c2a1c8a1..37994af43 100644
--- a/gcc/testsuite/gcc.target/loongarch/func-call-5.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-5.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs -mcmodel=normal" } */
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
/* { dg-final { scan-assembler "test1:.*bl\t%plt\\(f\\)\n" } } */
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-6.c b/gcc/testsuite/gcc.target/loongarch/func-call-6.c
index 4b0e4266e..8e366e376 100644
--- a/gcc/testsuite/gcc.target/loongarch/func-call-6.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-6.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs -mcmodel=normal" } */
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-7.c b/gcc/testsuite/gcc.target/loongarch/func-call-7.c
index 51792711f..4177c3d96 100644
--- a/gcc/testsuite/gcc.target/loongarch/func-call-7.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-7.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=normal" } */
/* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
/* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%got_pc_hi20\\(f\\)\n\tld\.d\t.*%got_pc_lo12\\(f\\)\n\tjirl" } } */
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-8.c b/gcc/testsuite/gcc.target/loongarch/func-call-8.c
index 330140d88..4254eaa16 100644
--- a/gcc/testsuite/gcc.target/loongarch/func-call-8.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-8.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=normal" } */
/* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
new file mode 100644
index 000000000..db1e0f853
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
+
+extern void g (void);
+void
+f (void)
+{}
+
+static void
+l (void)
+{}
+
+void
+test (void)
+{
+ g ();
+}
+
+void
+test1 (void)
+{
+ f ();
+}
+
+void
+test2 (void)
+{
+ l ();
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
new file mode 100644
index 000000000..21bf81ae8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
+
+extern void g (void);
+void
+f (void)
+{}
+
+static void
+l (void)
+{}
+
+void
+test (void)
+{
+ g ();
+}
+
+void
+test1 (void)
+{
+ f ();
+}
+
+void
+test2 (void)
+{
+ l ();
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c b/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
index bfcc9bc33..3ec8bd229 100644
--- a/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
+++ b/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mabi=lp64d -mexplicit-relocs -fno-pic -O2" } */
+/* { dg-options "-mabi=lp64d -mexplicit-relocs -fno-pic -O2 -mcmodel=normal" } */
/* { dg-final { scan-assembler "pcalau12i.*%pc_hi20\\(\.LANCHOR0\\)\n" } } */
/* { dg-final { scan-assembler "addi\.d.*%pc_lo12\\(\.LANCHOR0\\)\n" } } */
/* { dg-final { scan-assembler "ldptr.d\t\\\$r4,.*,0\n" } } */
--
2.33.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,65 @@
From 2ef90d604d7bae207d5b2067b4ce38d04d4835be Mon Sep 17 00:00:00 2001
From: Xiaolong Chen <chenxiaolong@loongson.cn>
Date: Tue, 12 Sep 2023 16:00:48 +0800
Subject: [PATCH 110/124] LoongArch: Add tests for ASX xvldrepl/xvstelm
instruction generation.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-xvstelm.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
.../loongarch/vector/lasx/lasx-xvldrepl.c | 16 ++++++++++++++++
.../loongarch/vector/lasx/lasx-xvstelm.c | 14 ++++++++++++++
2 files changed, 30 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
new file mode 100644
index 000000000..105567951
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx" } */
+/* { dg-final { scan-assembler-times "xvldrepl.w" 2} } */
+
+#define N 258
+
+float a[N], b[N], c[N];
+
+void
+test ()
+{
+ for (int i = 0; i < 256; i++)
+ {
+ a[i] = c[0] * b[i] + c[1];
+ }
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
new file mode 100644
index 000000000..1a7b0e86f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx" } */
+/* { dg-final { scan-assembler-times "xvstelm.w" 8} } */
+
+#define LEN 256
+
+float a[LEN], b[LEN], c[LEN];
+
+void
+test ()
+{
+ for (int i = 0; i < LEN; i += 2)
+ a[i] = b[i] + c[i];
+}
--
2.33.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,715 @@
From 243656b5b87a3125c2a885d11f022a79cca98b39 Mon Sep 17 00:00:00 2001
From: Xiaolong Chen <chenxiaolong@loongson.cn>
Date: Mon, 11 Sep 2023 10:07:24 +0800
Subject: [PATCH 082/124] LoongArch: Add tests for SX vector addition vsadd
instructions.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
.../loongarch/vector/lsx/lsx-vsadd-1.c | 335 +++++++++++++++++
.../loongarch/vector/lsx/lsx-vsadd-2.c | 345 ++++++++++++++++++
2 files changed, 680 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
new file mode 100644
index 000000000..1bc27c983
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
@@ -0,0 +1,335 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include <lsxintrin.h>
+
+int
+main ()
+{
+ __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+ __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+ __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+ int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+ long int long_op0, long_op1, long_op2, lont_out, lont_result;
+ long int long_int_out, long_int_result;
+ unsigned int unsigned_int_out, unsigned_int_result;
+ unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x00000000ffffffff;
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000000ffffffff;
+ *((unsigned long *)&__m128i_result[1]) = 0x00000000ffffffff;
+ *((unsigned long *)&__m128i_result[0]) = 0x00000000ffffffff;
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_result[0]) = 0xfefefefefefefefe;
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffff3c992b2e;
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffff730f;
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffff3c992b2e;
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffff730f;
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x00007fff00007fff;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x000000002bfd9461;
+ *((unsigned long *)&__m128i_result[1]) = 0x00007fff00007fff;
+ *((unsigned long *)&__m128i_result[0]) = 0x000000002bfd9461;
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x00d3012acc56f9bb;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000001021;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x00d3012acc56f9bb;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000001021;
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000001000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000001000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000001000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000001000;
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x80808080806b000b;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x80808080806b000b;
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffff01ff01;
+ *((unsigned long *)&__m128i_op1[1]) = 0x3c600000ff800000;
+ *((unsigned long *)&__m128i_op1[0]) = 0xfffffffffffffffe;
+ *((unsigned long *)&__m128i_result[1]) = 0x3c5fffffff7fffff;
+ *((unsigned long *)&__m128i_result[0]) = 0xfffefffeff00feff;
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x00ff00ff00ff00ff;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x00ff00ff00ff00ff;
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000ffffffff;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x00000000ffffffff;
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x3ff0000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x40f3fa0000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x3ff0000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x40f3fa0000000000;
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000008a0000008a;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000008900000009;
+ *((unsigned long *)&__m128i_op1[1]) = 0x63637687636316bb;
+ *((unsigned long *)&__m128i_op1[0]) = 0x6363636363636363;
+ *((unsigned long *)&__m128i_result[1]) = 0x6363771163631745;
+ *((unsigned long *)&__m128i_result[0]) = 0x636363ec6363636c;
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000004;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000004;
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000080000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000080000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000080000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000080000000;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffffffffefefe6a;
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000c2bac2c2;
+ *((unsigned long *)&__m128i_op1[1]) = 0x00000001fffffffe;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x00000000fefefe68;
+ *((unsigned long *)&__m128i_result[0]) = 0x00000000c2bac2c2;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x027c027c000027c0;
+ *((unsigned long *)&__m128i_op1[1]) = 0x001ffff0003ffff0;
+ *((unsigned long *)&__m128i_op1[0]) = 0x000fffefffefffef;
+ *((unsigned long *)&__m128i_result[1]) = 0x001ffff0003ffff0;
+ *((unsigned long *)&__m128i_result[0]) = 0x028c026bfff027af;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0007000000040000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0003000000010000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0007000000040000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0003000000010000;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x3f8000003f800000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x3f8000003f800000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x3fffff0000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x3fffff0000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x7f7fff003f800000;
+ *((unsigned long *)&__m128i_result[0]) = 0x7f7fff003f800000;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000820202020;
+ *((unsigned long *)&__m128i_op0[0]) = 0x00fe01fc0005fff4;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000003a24;
+ *((unsigned long *)&__m128i_op1[0]) = 0x003dbe88077c78c1;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000820205a44;
+ *((unsigned long *)&__m128i_result[0]) = 0x013bc084078278b5;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000140001;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000001;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000140001;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x67eb85afb2ebb000;
+ *((unsigned long *)&__m128i_op0[0]) = 0xc8847ef6ed3f2000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000100000001;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x67eb85b0b2ebb001;
+ *((unsigned long *)&__m128i_result[0]) = 0xc8847ef6ed3f2000;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffff00000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0xffff000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000014eb54ab;
+ *((unsigned long *)&__m128i_op1[0]) = 0x14eb6a002a406a00;
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffff14eb54ab;
+ *((unsigned long *)&__m128i_result[0]) = 0x14ea6a002a406a00;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000004;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000004;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0xce9035c49ffff570;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000004;
+ *((unsigned long *)&__m128i_result[0]) = 0xce9035c49ffff574;
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000010;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000010;
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x000000000000000d;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000400;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x000000000000040d;
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000001300000013;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000001300000013;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000001300000013;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000001300000013;
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000100000100;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000100000100;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000100000100;
+ *((unsigned long *)&__m128i_result[0]) = 0x00000001000000ff;
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000300000001;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000100010001;
+ *((unsigned long *)&__m128i_op1[1]) = 0xfffffffffffffffa;
+ *((unsigned long *)&__m128i_op1[0]) = 0xfffffffffffffffa;
+ *((unsigned long *)&__m128i_result[1]) = 0x00000002fffffffb;
+ *((unsigned long *)&__m128i_result[0]) = 0x000000010000fffb;
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
new file mode 100644
index 000000000..67d189991
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
@@ -0,0 +1,345 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include <lsxintrin.h>
+
+int
+main ()
+{
+ __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+ __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+ __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+ int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+ long int long_op0, long_op1, long_op2, lont_out, lont_result;
+ long int long_int_out, long_int_result;
+ unsigned int unsigned_int_out, unsigned_int_result;
+ unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x10f917d72d3d01e4;
+ *((unsigned long *)&__m128i_op1[0]) = 0x203e16d116de012b;
+ *((unsigned long *)&__m128i_result[1]) = 0x10f917d72d3d01e4;
+ *((unsigned long *)&__m128i_result[0]) = 0x203e16d116de012b;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffebd06fffe820c;
+ *((unsigned long *)&__m128i_op0[0]) = 0x7fff7ffe7fff3506;
+ *((unsigned long *)&__m128i_op1[1]) = 0xfffebd06fffe820c;
+ *((unsigned long *)&__m128i_op1[0]) = 0x7fff7ffe7fff3506;
+ *((unsigned long *)&__m128i_result[1]) = 0xffffff0cffffff18;
+ *((unsigned long *)&__m128i_result[0]) = 0xfefffefffeff6a0c;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_op1[1]) = 0x4f804f804f804f80;
+ *((unsigned long *)&__m128i_op1[0]) = 0x4f804f804f804f80;
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffff60ca7104649;
+ *((unsigned long *)&__m128i_op0[0]) = 0xfffff790a15db63d;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000001;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000001;
+ *((unsigned long *)&__m128i_result[1]) = 0xfffff60ca710464a;
+ *((unsigned long *)&__m128i_result[0]) = 0xfffff790a15db63e;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffffffffffffffe;
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffff46;
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x00fe000100cf005f;
+ *((unsigned long *)&__m128i_op0[0]) = 0x7fff7fff7fff7fff;
+ *((unsigned long *)&__m128i_op1[1]) = 0x5f675e96e29a5a60;
+ *((unsigned long *)&__m128i_op1[0]) = 0x7fff7fff7fff7fff;
+ *((unsigned long *)&__m128i_result[1]) = 0x5fff5e97e2ff5abf;
+ *((unsigned long *)&__m128i_result[0]) = 0xfefffefffefffeff;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000001000100010;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0001000100010058;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0001001100110068;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x7fffffff7fffffff;
+ *((unsigned long *)&__m128i_op0[0]) = 0x7fffffff7fffffff;
+ *((unsigned long *)&__m128i_op1[1]) = 0x7fff010181010102;
+ *((unsigned long *)&__m128i_op1[0]) = 0x7fffffff81010102;
+ *((unsigned long *)&__m128i_result[1]) = 0xfeffffffffffffff;
+ *((unsigned long *)&__m128i_result[0]) = 0xfeffffffffffffff;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000ebd20000714f;
+ *((unsigned long *)&__m128i_op0[0]) = 0x00012c8a0000a58a;
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffb81a6f70;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000d48eaa1a2;
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffb81ae0bf;
+ *((unsigned long *)&__m128i_result[0]) = 0x00012c9748eaffff;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0177fff0fffffff0;
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000011ff8bc;
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000200;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000200;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000200;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000200;
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000001;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000001;
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000d0000000d;
+ *((unsigned long *)&__m128i_op1[1]) = 0x8006000000040000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x8002000000000007;
+ *((unsigned long *)&__m128i_result[1]) = 0x8006000000040000;
+ *((unsigned long *)&__m128i_result[0]) = 0x8002000d00000014;
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000014;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000014;
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ int_out = __lsx_vpickve2gr_h (__m128i_op0, 0x1);
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000600007fff;
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000008ffffa209;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000600007fff;
+ *((unsigned long *)&__m128i_result[0]) = 0x00000008ffffa209;
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x636363633f3e47c1;
+ *((unsigned long *)&__m128i_op0[0]) = 0x41f8e080f1ef4eaa;
+ *((unsigned long *)&__m128i_op1[1]) = 0x00000807bf0a1f80;
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000800ecedee68;
+ *((unsigned long *)&__m128i_result[1]) = 0x63636b6afe486741;
+ *((unsigned long *)&__m128i_result[0]) = 0x41f8e880ffffffff;
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000ebd20000714f;
+ *((unsigned long *)&__m128i_op0[0]) = 0x00012c8a0000a58a;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000ebd20000714f;
+ *((unsigned long *)&__m128i_op1[0]) = 0x00012c8a0000a58a;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000ffff0000e29e;
+ *((unsigned long *)&__m128i_result[0]) = 0x000259140000ffff;
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0xfffffffeffffffff;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[0]) = 0xfffffffeffffffff;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0c03e17edd781b11;
+ *((unsigned long *)&__m128i_op0[0]) = 0x342caf9be55700b5;
+ *((unsigned long *)&__m128i_op1[1]) = 0x00040003ff83ff84;
+ *((unsigned long *)&__m128i_op1[0]) = 0x00040003ff4dffca;
+ *((unsigned long *)&__m128i_result[1]) = 0x0c07e181ffffffff;
+ *((unsigned long *)&__m128i_result[0]) = 0x3430af9effffffff;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x00000000ffa8ff9f;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000ffffffabff99;
+ *((unsigned long *)&__m128i_op1[1]) = 0x000100000002007d;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0001000000020001;
+ *((unsigned long *)&__m128i_result[1]) = 0x00010000ffab001c;
+ *((unsigned long *)&__m128i_result[0]) = 0x0001ffffffadff9a;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0800080008000800;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0800080008000800;
+ *((unsigned long *)&__m128i_result[1]) = 0x0800080008000800;
+ *((unsigned long *)&__m128i_result[0]) = 0x0800080008000800;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001;
+ *((unsigned long *)&__m128i_op0[0]) = 0x76f424887fffffff;
+ *((unsigned long *)&__m128i_op1[1]) = 0xc110000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0xc00d060000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0xc110000000000001;
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffff7fffffff;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x000000000000002f;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000029;
+ *((unsigned long *)&__m128i_op1[1]) = 0xfbfbfb17fbfb38ea;
+ *((unsigned long *)&__m128i_op1[0]) = 0xfbfb47fbfbfb0404;
+ *((unsigned long *)&__m128i_result[1]) = 0xfbfbfb17fbfb3919;
+ *((unsigned long *)&__m128i_result[0]) = 0xfbfb47fbfbfb042d;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x8080808080808081;
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000000ffffffff;
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+ *((unsigned long *)&__m128i_result[0]) = 0x80808080ffffffff;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x00123fff00120012;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0012001200120012;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x000000000005003a;
+ *((unsigned long *)&__m128i_result[1]) = 0x00123fff00120012;
+ *((unsigned long *)&__m128i_result[0]) = 0x001200120017004c;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0xbfd10d0d7b6b6b73;
+ *((unsigned long *)&__m128i_op1[0]) = 0xc5c534920000c4ed;
+ *((unsigned long *)&__m128i_result[1]) = 0xbfd10d0d7b6b6b73;
+ *((unsigned long *)&__m128i_result[0]) = 0xc5c534920000c4ed;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x000aa822a79308f6;
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000084d12ce;
+ *((unsigned long *)&__m128i_op1[1]) = 0x000aa822a79308f6;
+ *((unsigned long *)&__m128i_op1[0]) = 0x03aa558e1d37b5a1;
+ *((unsigned long *)&__m128i_result[1]) = 0x00155044ffffffff;
+ *((unsigned long *)&__m128i_result[0]) = 0x03aa558e2584c86f;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x021b7d24c9678a35;
+ *((unsigned long *)&__m128i_op0[0]) = 0x030298a6a1030a49;
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_result[1]) = 0x021b7d24c9678a35;
+ *((unsigned long *)&__m128i_result[0]) = 0x030298a6a1030a49;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x00007a8000000480;
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000485000004cc;
+ *((unsigned long *)&__m128i_op1[1]) = 0x00007a8000000480;
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000485000004cc;
+ *((unsigned long *)&__m128i_result[1]) = 0x0000f50000000900;
+ *((unsigned long *)&__m128i_result[0]) = 0x0000090a00000998;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+ *((unsigned long *)&__m128i_op1[1]) = 0x004eff6200d2ff76;
+ *((unsigned long *)&__m128i_op1[0]) = 0xff70002800be00a0;
+ *((unsigned long *)&__m128i_result[1]) = 0x004eff6200d2ff76;
+ *((unsigned long *)&__m128i_result[0]) = 0xff70002800be00a0;
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+ return 0;
+}
--
2.33.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,37 @@
From f07b91862055533d779fbf76c12cb7c0ae75b53d Mon Sep 17 00:00:00 2001
From: Xiaolong Chen <chenxiaolong@loongson.cn>
Date: Mon, 11 Sep 2023 09:35:24 +0800
Subject: [PATCH 076/124] LoongArch: Add tests of -mstrict-align option.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/strict-align.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/testsuite/gcc.target/loongarch/strict-align.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/loongarch/strict-align.c
diff --git a/gcc/testsuite/gcc.target/loongarch/strict-align.c b/gcc/testsuite/gcc.target/loongarch/strict-align.c
new file mode 100644
index 000000000..040d84958
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/strict-align.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mstrict-align -mlasx" } */
+/* { dg-final { scan-assembler-not "vfadd.s" } } */
+
+void
+foo (float *restrict x, float *restrict y)
+{
+ x[0] = x[0] + y[0];
+ x[1] = x[1] + y[1];
+ x[2] = x[2] + y[2];
+ x[3] = x[3] + y[3];
+}
--
2.33.0

View File

@ -0,0 +1,131 @@
From aebd03c944312be767f03d129eeebc0c4cdf5b4a Mon Sep 17 00:00:00 2001
From: Xiaolong Chen <chenxiaolong@loongson.cn>
Date: Mon, 11 Sep 2023 09:36:35 +0800
Subject: [PATCH 077/124] LoongArch: Add testsuite framework for Loongson
SX/ASX.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/vector/loongarch-vector.exp: New test.
* gcc.target/loongarch/vector/simd_correctness_check.h: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
.../loongarch/vector/loongarch-vector.exp | 42 +++++++++++++++
.../loongarch/vector/simd_correctness_check.h | 54 +++++++++++++++++++
2 files changed, 96 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
new file mode 100644
index 000000000..2c37aa91d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
@@ -0,0 +1,42 @@
+#Copyright(C) 2023 Free Software Foundation, Inc.
+
+#This program is free software; you can redistribute it and / or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation; either version 3 of the License, or
+#(at your option) any later version.
+#
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
+#GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License
+#along with GCC; see the file COPYING3.If not see
+# <http: //www.gnu.org/licenses/>.
+
+#GCC testsuite that uses the `dg.exp' driver.
+
+#Exit immediately if this isn't a LoongArch target.
+if ![istarget loongarch*-*-*] then {
+ return
+}
+
+#Load support procs.
+load_lib gcc-dg.exp
+
+#If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+ set DEFAULT_CFLAGS " "
+}
+
+#Initialize `dg'.
+dg-init
+
+#Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lsx/*.\[cS\]]] \
+ " -mlsx" $DEFAULT_CFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lasx/*.\[cS\]]] \
+ " -mlasx" $DEFAULT_CFLAGS
+# All done.
+dg-finish
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
new file mode 100644
index 000000000..eb7fbd59c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
@@ -0,0 +1,54 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define ASSERTEQ_64(line, ref, res) \
+ do \
+ { \
+ int fail = 0; \
+ for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
+ { \
+ long *temp_ref = &ref[i], *temp_res = &res[i]; \
+ if (abs (*temp_ref - *temp_res) > 0) \
+ { \
+ printf (" error: %s at line %ld , expected " #ref \
+ "[%ld]:0x%lx, got: 0x%lx\n", \
+ __FILE__, line, i, *temp_ref, *temp_res); \
+ fail = 1; \
+ } \
+ } \
+ if (fail == 1) \
+ abort (); \
+ } \
+ while (0)
+
+#define ASSERTEQ_32(line, ref, res) \
+ do \
+ { \
+ int fail = 0; \
+ for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
+ { \
+ int *temp_ref = &ref[i], *temp_res = &res[i]; \
+ if (abs (*temp_ref - *temp_res) > 0) \
+ { \
+ printf (" error: %s at line %ld , expected " #ref \
+ "[%ld]:0x%x, got: 0x%x\n", \
+ __FILE__, line, i, *temp_ref, *temp_res); \
+ fail = 1; \
+ } \
+ } \
+ if (fail == 1) \
+ abort (); \
+ } \
+ while (0)
+
+#define ASSERTEQ_int(line, ref, res) \
+ do \
+ { \
+ if (ref != res) \
+ { \
+ printf (" error: %s at line %ld , expected %d, got %d\n", __FILE__, \
+ line, ref, res); \
+ } \
+ } \
+ while (0)
--
2.33.0

View File

@ -0,0 +1,53 @@
From a4bf17e87a965ed7f2bb1d2921fb9dd820c79a96 Mon Sep 17 00:00:00 2001
From: Yang Yujie <yangyujie@loongson.cn>
Date: Thu, 7 Sep 2023 14:50:10 +0800
Subject: [PATCH 069/124] LoongArch: Adjust C++ multilib header layout.
For LoongArch, the toplevel library build is always aliased to
one of the multilib variants. This patch installs it with the
actual MULTISUBDIR (instead of ".") so that the headers can be
reached by the compiler.
This patch is an update of
https://gcc.gnu.org/pipermail/gcc-patches/2023-September/629435.html
libstdc++-v3/ChangeLog:
* configure.host: Register t-loongarch in tmake_file.
* config/cpu/loongarch/t-loongarch: New file. Manually refresh
MULTISUBDIR with $(shell $(CXX) --print-multi-directory).
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
libstdc++-v3/config/cpu/loongarch/t-loongarch | 1 +
libstdc++-v3/configure.host | 5 ++++-
2 files changed, 5 insertions(+), 1 deletion(-)
create mode 100644 libstdc++-v3/config/cpu/loongarch/t-loongarch
diff --git a/libstdc++-v3/config/cpu/loongarch/t-loongarch b/libstdc++-v3/config/cpu/loongarch/t-loongarch
new file mode 100644
index 000000000..adfc8ebb9
--- /dev/null
+++ b/libstdc++-v3/config/cpu/loongarch/t-loongarch
@@ -0,0 +1 @@
+AM_MAKEFLAGS += " MULTISUBDIR=/$(shell $(CXX) --print-multi-directory)"
diff --git a/libstdc++-v3/configure.host b/libstdc++-v3/configure.host
index ec32980aa..592160e6d 100644
--- a/libstdc++-v3/configure.host
+++ b/libstdc++-v3/configure.host
@@ -315,7 +315,10 @@ esac
# Set any OS-dependent and CPU-dependent bits.
# THIS TABLE IS SORTED. KEEP IT THAT WAY.
case "${host}" in
- *-*-linux* | *-*-uclinux*)
+ loongarch*)
+ tmake_file="cpu/loongarch/t-loongarch"
+ ;;
+ *-*-linux* | *-*-uclinux*)
case "${host_cpu}" in
i[567]86)
abi_baseline_pair=i486-linux-gnu
--
2.33.0

View File

@ -0,0 +1,55 @@
From e82403e918e18fa8e8ecd0c9e26f2657cc814e12 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Wed, 24 Aug 2022 21:31:34 +0800
Subject: [PATCH 013/124] LoongArch: Avoid RTL flag check failure in
loongarch_classify_symbol
SYMBOL_REF_TLS_MODEL invokes SYMBOL_REF_FLAGS, and SYMBOL_REF_FLAGS
invokes RTL_FLAG_CHECK1 and aborts when RTL code is not SYMBOL_REF.
r13-1833 removed "gcc_assert (SYMBOL_REF_P (x))" before invoking
"SYMBOL_REF_TLS_MODEL (x)", indicating that it's now possible that "x"
is not a SYMBOL_REF. So we need to check if "x" is SYMBOL_REF first.
This fixes a test failure happening with r13-2173 with RTL flag
checking enabled:
pr106096.C:26:1: internal compiler error: RTL flag check:
SYMBOL_REF_FLAGS used with unexpected rtx code 'const' in
loongarch_classify_symbol
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_classify_symbol):
Return early if the rtx is not SYMBOL_REF.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.cc | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 04c4ddaed..452aba9d4 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1633,14 +1633,13 @@ loongarch_rtx_constant_in_small_data_p (machine_mode mode)
static enum loongarch_symbol_type
loongarch_classify_symbol (const_rtx x)
{
- if (LABEL_REF_P (x))
+ if (!SYMBOL_REF_P (x))
return SYMBOL_PCREL;
if (SYMBOL_REF_TLS_MODEL (x))
return SYMBOL_TLS;
- if (SYMBOL_REF_P (x)
- && !loongarch_symbol_binds_local_p (x))
+ if (!loongarch_symbol_binds_local_p (x))
return SYMBOL_GOT_DISP;
return SYMBOL_PCREL;
--
2.33.0

View File

@ -0,0 +1,62 @@
From 7e759740048ee6f24c1055c32868fa21cabb4f75 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Wed, 7 Jun 2023 10:21:58 +0800
Subject: [PATCH 048/124] LoongArch: Avoid non-returning indirect jumps through
$ra [PR110136]
Micro-architecture unconditionally treats a "jr $ra" as "return from subroutine",
hence doing "jr $ra" would interfere with both subroutine return prediction and
the more general indirect branch prediction.
Therefore, a problem like PR110136 can cause a significant increase in branch error
prediction rate and affect performance. The same problem exists with "indirect_jump".
gcc/ChangeLog:
PR target/110136
* config/loongarch/loongarch.md: Modify the register constraints for template
"jumptable" and "indirect_jump" from "r" to "e".
Co-authored-by: Andrew Pinski <apinski@marvell.com>
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.md | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index b23248c33..c79951c1d 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -2895,6 +2895,10 @@
}
[(set_attr "type" "branch")])
+;; Micro-architecture unconditionally treats a "jr $ra" as "return from subroutine",
+;; non-returning indirect jumps through $ra would interfere with both subroutine
+;; return prediction and the more general indirect branch prediction.
+
(define_expand "indirect_jump"
[(set (pc) (match_operand 0 "register_operand"))]
""
@@ -2905,7 +2909,7 @@
})
(define_insn "@indirect_jump<mode>"
- [(set (pc) (match_operand:P 0 "register_operand" "r"))]
+ [(set (pc) (match_operand:P 0 "register_operand" "e"))]
""
"jr\t%0"
[(set_attr "type" "jump")
@@ -2928,7 +2932,7 @@
(define_insn "@tablejump<mode>"
[(set (pc)
- (match_operand:P 0 "register_operand" "r"))
+ (match_operand:P 0 "register_operand" "e"))
(use (label_ref (match_operand 1 "" "")))]
""
"jr\t%0"
--
2.33.0

View File

@ -0,0 +1,41 @@
From 59824f1062d77d0e02ea82d47415bf95c235de87 Mon Sep 17 00:00:00 2001
From: chenxiaolong <chenxl04200420@163.com>
Date: Thu, 15 Jun 2023 02:46:24 +0000
Subject: [PATCH 046/124] LoongArch: Change the default value of
LARCH_CALL_RATIO to 6.
During the regression testing of the LoongArch architecture GCC, it was found
that the tests in the pr90883.C file failed. The problem was modulated and
found that the error was caused by setting the macro LARCH_CALL_RATIO to a too
large value. Combined with the actual LoongArch architecture, the different
thresholds for meeting the test conditions were tested using the engineering method
(SPEC CPU 2006), and the results showed that its optimal threshold should be set
to 6.
gcc/ChangeLog:
* config/loongarch/loongarch.h (LARCH_CALL_RATIO): Modify the value
of macro LARCH_CALL_RATIO on LoongArch to make it perform optimally.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index 44ebadfaa..0e35d4dec 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -1073,7 +1073,7 @@ typedef struct {
/* The base cost of a memcpy call, for MOVE_RATIO and friends. These
values were determined experimentally by benchmarking with CSiBE.
*/
-#define LARCH_CALL_RATIO 8
+#define LARCH_CALL_RATIO 6
/* Any loop-based implementation of cpymemsi will have at least
LARCH_MAX_MOVE_OPS_PER_LOOP_ITER memory-to-memory
--
2.33.0

View File

@ -0,0 +1,69 @@
From 7e843ed8da168a05eb04eee0b14cbe681bf798fe Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Wed, 13 Sep 2023 11:01:34 +0800
Subject: [PATCH 123/124] LoongArch: Change the value of branch_cost from 2 to
6.
gcc/ChangeLog:
* config/loongarch/loongarch-def.c: Modify the default value of
branch_cost.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/cmov_ii.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch-def.c | 4 ++--
gcc/testsuite/gcc.target/loongarch/cmov_ii.c | 15 +++++++++++++++
2 files changed, 17 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/cmov_ii.c
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
index d29d5f001..eeb32dbf6 100644
--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
@@ -85,7 +85,7 @@ loongarch_cpu_align[N_TUNE_TYPES] = {
.int_mult_di = COSTS_N_INSNS (1), \
.int_div_si = COSTS_N_INSNS (4), \
.int_div_di = COSTS_N_INSNS (6), \
- .branch_cost = 2, \
+ .branch_cost = 6, \
.memory_latency = 4
/* The following properties cannot be looked up directly using "cpucfg".
@@ -118,7 +118,7 @@ loongarch_rtx_cost_optimize_size = {
.int_mult_di = 4,
.int_div_si = 4,
.int_div_di = 4,
- .branch_cost = 2,
+ .branch_cost = 6,
.memory_latency = 4,
};
diff --git a/gcc/testsuite/gcc.target/loongarch/cmov_ii.c b/gcc/testsuite/gcc.target/loongarch/cmov_ii.c
new file mode 100644
index 000000000..21b468e8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/cmov_ii.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler "test:.*xor.*maskeqz.*masknez.*or.*" } } */
+
+extern void foo_ii (int *, int *, int *, int *);
+
+int
+test (void)
+{
+ int a, b;
+ int c, d, out;
+ foo_ii (&a, &b, &c, &d);
+ out = a == b ? c : d;
+ return out;
+}
--
2.33.0

View File

@ -0,0 +1,49 @@
From 6e9265e571a63deb2584704a0b088a6d67ec8af5 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Mon, 20 Feb 2023 16:47:11 +0800
Subject: [PATCH 037/124] LoongArch: Change the value of macro
TRY_EMPTY_VM_SPACE from 0x8000000000 to 0x1000000000.
The PCH mechanism first tries to map the .gch file to the virtual memory
space pointed to by TRY_EMPTY_VM_SPACE during the compilation process.
The original value of TRY_EMPTY_VM_SPACE macro is 0x8000000000,
but like la464 only has 40 bits of virtual address space, this value
just exceeds the address range.
If we want to support chips with less than 40 bits virtual addresses,
then the value of this macro needs to be set small. I think setting
this value small will increase the probability of virtual address
mapping failure. And the purpose of pch is to make compilation faster,
but I think we rarely compile on embedded systems. So this situation
may not be within our consideration.
So change the value of this macro to 0x1000000000.
gcc/ChangeLog:
* config/host-linux.cc (TRY_EMPTY_VM_SPACE): Modify the value of
the macro to 0x1000000000.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/host-linux.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/config/host-linux.cc b/gcc/config/host-linux.cc
index 817d3c087..d93cfc064 100644
--- a/gcc/config/host-linux.cc
+++ b/gcc/config/host-linux.cc
@@ -99,7 +99,7 @@
#elif defined(__riscv) && defined (__LP64__)
# define TRY_EMPTY_VM_SPACE 0x1000000000
#elif defined(__loongarch__) && defined(__LP64__)
-# define TRY_EMPTY_VM_SPACE 0x8000000000
+# define TRY_EMPTY_VM_SPACE 0x1000000000
#else
# define TRY_EMPTY_VM_SPACE 0
#endif
--
2.33.0

View File

@ -0,0 +1,139 @@
From 05c1df09c70cd0ed48f0644890f69a0128b17a98 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Fri, 29 Jul 2022 09:44:52 +0800
Subject: [PATCH 008/124] LoongArch: Define the macro
ASM_PREFERRED_EH_DATA_FORMAT by checking the assembler's support for eh_frame
encoding.
.eh_frame DW_EH_PE_pcrel encoding format is not supported by gas <= 2.39.
Check if the assembler support DW_EH_PE_PCREL encoding and define .eh_frame
encoding type.
gcc/ChangeLog:
* config.in: Regenerate.
* config/loongarch/loongarch.h (ASM_PREFERRED_EH_DATA_FORMAT):
Select the value of the macro definition according to whether
HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT is defined.
* configure: Regenerate.
* configure.ac: Reinstate HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config.in | 8 +++++++-
gcc/config/loongarch/loongarch.h | 5 +++++
gcc/configure | 34 ++++++++++++++++++++++++++++++++
gcc/configure.ac | 8 ++++++++
4 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/gcc/config.in b/gcc/config.in
index 64c27c9cf..67ce422f2 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -404,13 +404,19 @@
#endif
+/* Define if your assembler supports eh_frame pcrel encoding. */
+#ifndef USED_FOR_TARGET
+#undef HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT
+#endif
+
+
/* Define if your assembler supports the R_PPC64_ENTRY relocation. */
#ifndef USED_FOR_TARGET
#undef HAVE_AS_ENTRY_MARKERS
#endif
-/* Define if your assembler supports explicit relocations. */
+/* Define if your assembler supports explicit relocation. */
#ifndef USED_FOR_TARGET
#undef HAVE_AS_EXPLICIT_RELOCS
#endif
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index 12f209047..a52a81adf 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -1130,8 +1130,13 @@ struct GTY (()) machine_function
};
#endif
+#ifdef HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
+ (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4)
+#else
#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
(((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_absptr)
+#endif
/* Do emit .note.GNU-stack by default. */
#ifndef NEED_INDICATE_EXEC_STACK
diff --git a/gcc/configure b/gcc/configure
index 840eddc7c..3788e240a 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -28857,6 +28857,40 @@ if test $gcc_cv_as_loongarch_explicit_relocs = yes; then
$as_echo "#define HAVE_AS_EXPLICIT_RELOCS 1" >>confdefs.h
+fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for eh_frame pcrel encoding support" >&5
+$as_echo_n "checking assembler for eh_frame pcrel encoding support... " >&6; }
+if ${gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support=no
+ if test x$gcc_cv_as != x; then
+ $as_echo '.cfi_startproc
+ .cfi_personality 0x9b,a
+ .cfi_lsda 0x1b,b
+ .cfi_endproc' > conftest.s
+ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }
+ then
+ gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support=yes
+ else
+ echo "configure: failed program was" >&5
+ cat conftest.s >&5
+ fi
+ rm -f conftest.o conftest.s
+ fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support" >&5
+$as_echo "$gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support" >&6; }
+if test $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support = yes; then
+
+$as_echo "#define HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT 1" >>confdefs.h
+
fi
;;
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 975c852c6..1c376e0d4 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -5324,6 +5324,14 @@ x:
[a:pcalau12i $t0,%pc_hi20(a)],,
[AC_DEFINE(HAVE_AS_EXPLICIT_RELOCS, 1,
[Define if your assembler supports explicit relocation.])])
+ gcc_GAS_CHECK_FEATURE([eh_frame pcrel encoding support],
+ gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support,,
+ [.cfi_startproc
+ .cfi_personality 0x9b,a
+ .cfi_lsda 0x1b,b
+ .cfi_endproc],,
+ [AC_DEFINE(HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT, 1,
+ [Define if your assembler supports eh_frame pcrel encoding.])])
;;
s390*-*-*)
gcc_GAS_CHECK_FEATURE([.gnu_attribute support],
--
2.33.0

View File

@ -0,0 +1,34 @@
From 2e19311d1bf4f932f5e67f6866123b895b12c97f Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Fri, 13 Jan 2023 09:01:12 +0100
Subject: [PATCH 035/124] LoongArch: Don't add crtfastmath.o for -shared
Don't add crtfastmath.o for -shared to avoid altering the FP
environment when loading a shared library.
PR target/55522
* config/loongarch/gnu-user.h (GNU_USER_TARGET_MATHFILE_SPEC):
Don't add crtfastmath.o for -shared.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/gnu-user.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
index c5b1afe53..1dc6add62 100644
--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
@@ -49,7 +49,7 @@ along with GCC; see the file COPYING3. If not see
/* Similar to standard Linux, but adding -ffast-math support. */
#undef GNU_USER_TARGET_MATHFILE_SPEC
#define GNU_USER_TARGET_MATHFILE_SPEC \
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
+ "%{Ofast|ffast-math|funsafe-math-optimizations:%{!shared:crtfastmath.o%s}}"
#undef LIB_SPEC
#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
--
2.33.0

View File

@ -0,0 +1,71 @@
From 0369836718ffb25ac64c135e748f409302068a56 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Mon, 28 Aug 2023 11:30:21 +0800
Subject: [PATCH 052/124] LoongArch: Enable '-free' starting at -O2.
gcc/ChangeLog:
* common/config/loongarch/loongarch-common.cc:
Enable '-free' on O2 and above.
* doc/invoke.texi: Modify the description information
of the '-free' compilation option and add the LoongArch
description.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/sign-extend.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
.../config/loongarch/loongarch-common.cc | 1 +
.../gcc.target/loongarch/sign-extend.c | 25 +++++++++++++++++++
2 files changed, 26 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend.c
diff --git a/gcc/common/config/loongarch/loongarch-common.cc b/gcc/common/config/loongarch/loongarch-common.cc
index f8b4660fa..309fcb280 100644
--- a/gcc/common/config/loongarch/loongarch-common.cc
+++ b/gcc/common/config/loongarch/loongarch-common.cc
@@ -35,6 +35,7 @@ static const struct default_options loongarch_option_optimization_table[] =
{
{ OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
{ OPT_LEVELS_NONE, 0, NULL, 0 }
};
diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend.c
new file mode 100644
index 000000000..3f339d06b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/sign-extend.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler-times "slli.w" 1 } } */
+
+extern int PL_savestack_ix;
+extern int PL_regsize;
+extern int PL_savestack_max;
+void Perl_savestack_grow_cnt (int need);
+extern void Perl_croak (char *);
+
+int
+S_regcppush(int parenfloor)
+{
+ int retval = PL_savestack_ix;
+ int paren_elems_to_push = (PL_regsize - parenfloor) * 4;
+ int p;
+
+ if (paren_elems_to_push < 0)
+ Perl_croak ("panic: paren_elems_to_push < 0");
+
+ if (PL_savestack_ix + (paren_elems_to_push + 6) > PL_savestack_max)
+ Perl_savestack_grow_cnt (paren_elems_to_push + 6);
+
+ return retval;
+}
--
2.33.0

View File

@ -0,0 +1,33 @@
From a9f72e237d5c176e4ef8ba03a8b4ee5c5daa25fb Mon Sep 17 00:00:00 2001
From: Guo Jie <guojie@loongson.cn>
Date: Fri, 8 Sep 2023 10:00:21 +0800
Subject: [PATCH 071/124] LoongArch: Enable -fsched-pressure by default at -O1
and higher.
gcc/ChangeLog:
* common/config/loongarch/loongarch-common.cc:
(default_options loongarch_option_optimization_table):
Default to -fsched-pressure.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/common/config/loongarch/loongarch-common.cc | 1 +
1 file changed, 1 insertion(+)
diff --git a/gcc/common/config/loongarch/loongarch-common.cc b/gcc/common/config/loongarch/loongarch-common.cc
index 309fcb280..c8bc5718d 100644
--- a/gcc/common/config/loongarch/loongarch-common.cc
+++ b/gcc/common/config/loongarch/loongarch-common.cc
@@ -36,6 +36,7 @@ static const struct default_options loongarch_option_optimization_table[] =
{ OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
{ OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
+ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
{ OPT_LEVELS_NONE, 0, NULL, 0 }
};
--
2.33.0

View File

@ -0,0 +1,309 @@
From e86c9ece7ae922fe80017ba2ffe22f6267531682 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sun, 23 Apr 2023 20:52:22 +0800
Subject: [PATCH 045/124] LoongArch: Enable shrink wrapping
This commit implements the target macros for shrink wrapping of function
prologues/epilogues shrink wrapping on LoongArch.
Bootstrapped and regtested on loongarch64-linux-gnu. I don't have an
access to SPEC CPU so I hope the reviewer can perform a benchmark to see
if there is real benefit.
gcc/ChangeLog:
* config/loongarch/loongarch.h (struct machine_function): Add
reg_is_wrapped_separately array for register wrapping
information.
* config/loongarch/loongarch.cc
(loongarch_get_separate_components): New function.
(loongarch_components_for_bb): Likewise.
(loongarch_disqualify_components): Likewise.
(loongarch_process_components): Likewise.
(loongarch_emit_prologue_components): Likewise.
(loongarch_emit_epilogue_components): Likewise.
(loongarch_set_handled_components): Likewise.
(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS): Define.
(TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB): Likewise.
(TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS): Likewise.
(TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS): Likewise.
(TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS): Likewise.
(TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Likewise.
(loongarch_for_each_saved_reg): Skip registers that are wrapped
separately.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/shrink-wrap.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.cc | 179 +++++++++++++++++-
gcc/config/loongarch/loongarch.h | 2 +
.../gcc.target/loongarch/shrink-wrap.c | 19 ++
3 files changed, 197 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index d3c6f22ad..4c0f393b6 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see
#include "builtins.h"
#include "rtl-iter.h"
#include "opts.h"
+#include "function-abi.h"
/* This file should be included last. */
#include "target-def.h"
@@ -1014,19 +1015,23 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
{
- loongarch_save_restore_reg (word_mode, regno, offset, fn);
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
+
offset -= UNITS_PER_WORD;
}
/* This loop must iterate over the same space as its companion in
loongarch_compute_frame_info. */
offset = cfun->machine->frame.fp_sp_offset - sp_offset;
+ machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
+
for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
{
- machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
- loongarch_save_restore_reg (mode, regno, offset, fn);
offset -= GET_MODE_SIZE (mode);
}
}
@@ -6630,6 +6635,151 @@ loongarch_asan_shadow_offset (void)
return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
}
+static sbitmap
+loongarch_get_separate_components (void)
+{
+ HOST_WIDE_INT offset;
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+ bitmap_clear (components);
+ offset = cfun->machine->frame.gp_sp_offset;
+
+ /* The stack should be aligned to 16-bytes boundary, so we can make the use
+ of ldptr instructions. */
+ gcc_assert (offset % UNITS_PER_WORD == 0);
+
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+ if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+ {
+ /* We can wrap general registers saved at [sp, sp + 32768) using the
+ ldptr/stptr instructions. For large offsets a pseudo register
+ might be needed which cannot be created during the shrink
+ wrapping pass.
+
+ TODO: This may need a revise when we add LA32 as ldptr.w is not
+ guaranteed available by the manual. */
+ if (offset < 32768)
+ bitmap_set_bit (components, regno);
+
+ offset -= UNITS_PER_WORD;
+ }
+
+ offset = cfun->machine->frame.fp_sp_offset;
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+ if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
+ {
+ /* We can only wrap FP registers with imm12 offsets. For large
+ offsets a pseudo register might be needed which cannot be
+ created during the shrink wrapping pass. */
+ if (IMM12_OPERAND (offset))
+ bitmap_set_bit (components, regno);
+
+ offset -= UNITS_PER_FPREG;
+ }
+
+ /* Don't mess with the hard frame pointer. */
+ if (frame_pointer_needed)
+ bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
+
+ bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
+
+ return components;
+}
+
+static sbitmap
+loongarch_components_for_bb (basic_block bb)
+{
+ /* Registers are used in a bb if they are in the IN, GEN, or KILL sets. */
+ auto_bitmap used;
+ bitmap_copy (used, DF_LIVE_IN (bb));
+ bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->gen);
+ bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->kill);
+
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+ bitmap_clear (components);
+
+ function_abi_aggregator callee_abis;
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ if (CALL_P (insn))
+ callee_abis.note_callee_abi (insn_callee_abi (insn));
+
+ HARD_REG_SET extra_caller_saves =
+ callee_abis.caller_save_regs (*crtl->abi);
+
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+ if (!fixed_regs[regno]
+ && !crtl->abi->clobbers_full_reg_p (regno)
+ && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
+ bitmap_bit_p (used, regno)))
+ bitmap_set_bit (components, regno);
+
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+ if (!fixed_regs[regno]
+ && !crtl->abi->clobbers_full_reg_p (regno)
+ && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
+ bitmap_bit_p (used, regno)))
+ bitmap_set_bit (components, regno);
+
+ return components;
+}
+
+static void
+loongarch_disqualify_components (sbitmap, edge, sbitmap, bool)
+{
+ /* Do nothing. */
+}
+
+static void
+loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn)
+{
+ HOST_WIDE_INT offset = cfun->machine->frame.gp_sp_offset;
+
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+ if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
+ {
+ if (bitmap_bit_p (components, regno))
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
+
+ offset -= UNITS_PER_WORD;
+ }
+
+ offset = cfun->machine->frame.fp_sp_offset;
+ machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
+
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+ if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
+ {
+ if (bitmap_bit_p (components, regno))
+ loongarch_save_restore_reg (mode, regno, offset, fn);
+
+ offset -= UNITS_PER_FPREG;
+ }
+}
+
+static void
+loongarch_emit_prologue_components (sbitmap components)
+{
+ loongarch_process_components (components, loongarch_save_reg);
+}
+
+static void
+loongarch_emit_epilogue_components (sbitmap components)
+{
+ loongarch_process_components (components, loongarch_restore_reg);
+}
+
+static void
+loongarch_set_handled_components (sbitmap components)
+{
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
+ if (bitmap_bit_p (components, regno))
+ cfun->machine->reg_is_wrapped_separately[regno] = true;
+
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
+ if (bitmap_bit_p (components, regno))
+ cfun->machine->reg_is_wrapped_separately[regno] = true;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -6827,6 +6977,29 @@ loongarch_asan_shadow_offset (void)
#undef TARGET_ASAN_SHADOW_OFFSET
#define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
+ loongarch_get_separate_components
+
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB loongarch_components_for_bb
+
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
+ loongarch_disqualify_components
+
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
+ loongarch_emit_prologue_components
+
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
+ loongarch_emit_epilogue_components
+
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
+ loongarch_set_handled_components
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-loongarch.h"
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index af24bfa01..44ebadfaa 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -1147,6 +1147,8 @@ struct GTY (()) machine_function
/* The current frame information, calculated by loongarch_compute_frame_info.
*/
struct loongarch_frame_info frame;
+
+ bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
};
#endif
diff --git a/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
new file mode 100644
index 000000000..1431536c5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fshrink-wrap" } */
+
+/* We should not save anything before checking the value of x. */
+/* { dg-final { scan-assembler-not "st(ptr)?\\\.\[dw\].*b(eq|ne)z" } } */
+
+int
+foo (int x)
+{
+ __asm__ ("nop" :);
+ if (x)
+ {
+ __asm__ ("" ::: "s0", "s1");
+ return x;
+ }
+
+ __asm__ ("" ::: "s2", "s3");
+ return 0;
+}
--
2.33.0

View File

@ -0,0 +1,43 @@
From 3db61acfbaa773568fad2bc31d950c6d9b3729b0 Mon Sep 17 00:00:00 2001
From: Peng Fan <fanpeng@loongson.cn>
Date: Wed, 19 Apr 2023 16:23:42 +0800
Subject: [PATCH 044/124] LoongArch: Fix MUSL_DYNAMIC_LINKER
The system based on musl has no '/lib64', so change it.
https://wiki.musl-libc.org/guidelines-for-distributions.html,
"Multilib/multi-arch" section of this introduces it.
gcc/
* config/loongarch/gnu-user.h (MUSL_DYNAMIC_LINKER): Redefine.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Suggested-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/gnu-user.h | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
index 1dc6add62..44e4f2575 100644
--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
@@ -33,9 +33,14 @@ along with GCC; see the file COPYING3. If not see
#define GLIBC_DYNAMIC_LINKER \
"/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1"
+#define MUSL_ABI_SPEC \
+ "%{mabi=lp64d:-lp64d}" \
+ "%{mabi=lp64f:-lp64f}" \
+ "%{mabi=lp64s:-lp64s}"
+
#undef MUSL_DYNAMIC_LINKER
#define MUSL_DYNAMIC_LINKER \
- "/lib" ABI_GRLEN_SPEC "/ld-musl-loongarch-" ABI_SPEC ".so.1"
+ "/lib/ld-musl-loongarch" ABI_GRLEN_SPEC MUSL_ABI_SPEC ".so.1"
#undef GNU_USER_TARGET_LINK_SPEC
#define GNU_USER_TARGET_LINK_SPEC \
--
2.33.0

View File

@ -0,0 +1,43 @@
From 7c8fc6b414dc1718e71e0d05c7a78498e06eb499 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Thu, 29 Jun 2023 19:30:59 +0800
Subject: [PATCH 053/124] LoongArch: Fix bug in loongarch_emit_stack_tie
[PR110484].
Which may result in implicit references to $fp when frame_pointer_needed is false,
causing regs_ever_live[$fp] to be true when $fp is not explicitly used,
resulting in $fp being used as the target replacement register in the rnreg pass.
The bug originates from SPEC2017 541.leela_r(-flto).
gcc/ChangeLog:
PR target/110484
* config/loongarch/loongarch.cc (loongarch_emit_stack_tie): Use the
frame_pointer_needed to determine whether to use the $fp register.
Co-authored-by: Guo Jie <guojie@loongson.cn>
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.cc | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index caacfa8a3..7b48e3216 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1109,7 +1109,9 @@ loongarch_first_stack_step (struct loongarch_frame_info *frame)
static void
loongarch_emit_stack_tie (void)
{
- emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx, hard_frame_pointer_rtx));
+ emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx,
+ frame_pointer_needed ? hard_frame_pointer_rtx
+ : stack_pointer_rtx));
}
#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
--
2.33.0

View File

@ -0,0 +1,123 @@
From df1df2e7b7e27bd9fba77f572d74d833aff4a202 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Mon, 11 Sep 2023 16:20:29 +0800
Subject: [PATCH 122/124] LoongArch: Fix bug of '<optab>di3_fake'.
PR target/111334
gcc/ChangeLog:
* config/loongarch/loongarch.md: Fix bug of '<optab>di3_fake'.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/pr111334.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.md | 20 ++++++----
gcc/testsuite/gcc.target/loongarch/pr111334.c | 39 +++++++++++++++++++
2 files changed, 52 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr111334.c
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 264cd325c..7746116e6 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -72,6 +72,9 @@
UNSPEC_LUI_H_HI12
UNSPEC_TLS_LOW
+ ;; Fake div.w[u] mod.w[u]
+ UNSPEC_FAKE_ANY_DIV
+
UNSPEC_SIBCALL_VALUE_MULTIPLE_INTERNAL_1
UNSPEC_CALL_VALUE_MULTIPLE_INTERNAL_1
])
@@ -900,7 +903,7 @@
(match_operand:GPR 2 "register_operand")))]
""
{
- if (GET_MODE (operands[0]) == SImode)
+ if (GET_MODE (operands[0]) == SImode && TARGET_64BIT)
{
rtx reg1 = gen_reg_rtx (DImode);
rtx reg2 = gen_reg_rtx (DImode);
@@ -920,9 +923,9 @@
})
(define_insn "*<optab><mode>3"
- [(set (match_operand:GPR 0 "register_operand" "=r,&r,&r")
- (any_div:GPR (match_operand:GPR 1 "register_operand" "r,r,0")
- (match_operand:GPR 2 "register_operand" "r,r,r")))]
+ [(set (match_operand:X 0 "register_operand" "=r,&r,&r")
+ (any_div:X (match_operand:X 1 "register_operand" "r,r,0")
+ (match_operand:X 2 "register_operand" "r,r,r")))]
""
{
return loongarch_output_division ("<insn>.<d><u>\t%0,%1,%2", operands);
@@ -938,9 +941,12 @@
(define_insn "<optab>di3_fake"
[(set (match_operand:DI 0 "register_operand" "=r,&r,&r")
(sign_extend:DI
- (any_div:SI (match_operand:DI 1 "register_operand" "r,r,0")
- (match_operand:DI 2 "register_operand" "r,r,r"))))]
- ""
+ (unspec:SI
+ [(subreg:SI
+ (any_div:DI (match_operand:DI 1 "register_operand" "r,r,0")
+ (match_operand:DI 2 "register_operand" "r,r,r")) 0)]
+ UNSPEC_FAKE_ANY_DIV)))]
+ "TARGET_64BIT"
{
return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands);
}
diff --git a/gcc/testsuite/gcc.target/loongarch/pr111334.c b/gcc/testsuite/gcc.target/loongarch/pr111334.c
new file mode 100644
index 000000000..47366afcb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr111334.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+unsigned
+util_next_power_of_two (unsigned x)
+{
+ return (1 << __builtin_clz (x - 1));
+}
+
+extern int create_vec_from_array (void);
+
+struct ac_shader_args {
+ struct {
+ unsigned char offset;
+ unsigned char size;
+ } args[384];
+};
+
+struct isel_context {
+ const struct ac_shader_args* args;
+ int arg_temps[384];
+};
+
+
+void
+add_startpgm (struct isel_context* ctx, unsigned short arg_count)
+{
+
+ for (unsigned i = 0, arg = 0; i < arg_count; i++)
+ {
+ unsigned size = ctx->args->args[i].size;
+ unsigned reg = ctx->args->args[i].offset;
+
+ if (reg % ( 4 < util_next_power_of_two (size)
+ ? 4 : util_next_power_of_two (size)))
+ ctx->arg_temps[i] = create_vec_from_array ();
+ }
+}
+
--
2.33.0

View File

@ -0,0 +1,69 @@
From a70fe51d9813d490a89cbc8da1ae4b040bf8b37e Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Wed, 7 Sep 2022 11:25:45 +0800
Subject: [PATCH 017/124] LoongArch: Fix pr106828 by define hook
TARGET_ASAN_SHADOW_OFFSET in loongarch backend [PR106828].
gcc/ChangeLog:
PR target/106828
* config/loongarch/loongarch.cc (loongarch_asan_shadow_offset): New.
(TARGET_ASAN_SHADOW_OFFSET): New.
gcc/testsuite/ChangeLog:
PR target/106828
* g++.target/loongarch/pr106828.C: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.cc | 13 +++++++++++++
gcc/testsuite/g++.target/loongarch/pr106828.C | 4 ++++
2 files changed, 17 insertions(+)
create mode 100644 gcc/testsuite/g++.target/loongarch/pr106828.C
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index c9187bf81..98c0e26cd 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -6466,6 +6466,16 @@ loongarch_use_anchors_for_symbol_p (const_rtx symbol)
return default_use_anchors_for_symbol_p (symbol);
}
+/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
+
+static unsigned HOST_WIDE_INT
+loongarch_asan_shadow_offset (void)
+{
+ /* We only have libsanitizer support for LOONGARCH64 at present.
+ This value is taken from the file libsanitizer/asan/asan_mappint.h. */
+ return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -6660,6 +6670,9 @@ loongarch_use_anchors_for_symbol_p (const_rtx symbol)
#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
#define TARGET_USE_ANCHORS_FOR_SYMBOL_P loongarch_use_anchors_for_symbol_p
+#undef TARGET_ASAN_SHADOW_OFFSET
+#define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-loongarch.h"
diff --git a/gcc/testsuite/g++.target/loongarch/pr106828.C b/gcc/testsuite/g++.target/loongarch/pr106828.C
new file mode 100644
index 000000000..190c1db71
--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/pr106828.C
@@ -0,0 +1,4 @@
+/* { dg-do-preprocess } */
+/* { dg-options "-mabi=lp64d -fsanitize=address" } */
+
+/* Tests whether the compiler supports compile option '-fsanitize=address'. */
--
2.33.0

View File

@ -0,0 +1,31 @@
From 8e5c9f349877af07dde4804974d47625c1292956 Mon Sep 17 00:00:00 2001
From: Yang Yujie <yangyujie@loongson.cn>
Date: Wed, 6 Sep 2023 17:57:47 +0800
Subject: [PATCH 070/124] LoongArch: Fix unintentional bash-ism in r14-3665.
gcc/ChangeLog:
* config.gcc: remove non-POSIX syntax "<<<".
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config.gcc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 19f584344..57e724080 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -5263,7 +5263,7 @@ case "${target}" in
if test x${parse_state} = x"abi-base"; then
# Base ABI type
case ${component} in
- lp64d | lp64f | lp64s) elem_tmp="ABI_BASE_$(tr a-z A-Z <<< ${component}),";;
+ lp64d | lp64f | lp64s) elem_tmp="ABI_BASE_$(echo ${component} | tr a-z A-Z),";;
*)
echo "Unknown base ABI \"${component}\" in --with-multilib-list." 1>&2
exit 1
--
2.33.0

View File

@ -0,0 +1,34 @@
From 8de6f5e1aad2a1ff85ff3a4b732055d625c61139 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Tue, 5 Sep 2023 20:02:51 +0800
Subject: [PATCH 067/124] LoongArch: Fix unintentionally breakage in r14-3665
Fix a build failure with no system assembler or system old assembler.
gcc/ChangeLog:
* config/loongarch/loongarch-opts.h (HAVE_AS_EXPLICIT_RELOCS):
Define to 0 if not defined yet.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch-opts.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
index e3f9b6f99..0d148e43b 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -93,4 +93,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
while -m[no]-memcpy imposes a global constraint. */
#define TARGET_DO_OPTIMIZE_BLOCK_MOVE_P loongarch_do_optimize_block_move_p()
+#ifndef HAVE_AS_EXPLICIT_RELOCS
+#define HAVE_AS_EXPLICIT_RELOCS 0
+#endif
+
#endif /* LOONGARCH_OPTS_H */
--
2.33.0

View File

@ -0,0 +1,33 @@
From 78896e68f50164af7827e8da01a7220764d1e296 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sat, 9 Sep 2023 16:18:06 +0800
Subject: [PATCH 075/124] LoongArch: Fix up memcpy-vec-3.c test case
The generic code will split 16-byte copy into two 8-byte copies, so the
vector code wouldn't be used even if -mno-strict-align. This
contradicted with the purpose of this test case.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/memcpy-vec-3.c: Increase the amount of
copied bytes to 32.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
index 233ed2150..db2ea510b 100644
--- a/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
+++ b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
@@ -3,4 +3,4 @@
/* { dg-final { scan-assembler-not "vst" } } */
extern char a[], b[];
-void test() { __builtin_memcpy(a, b, 16); }
+void test() { __builtin_memcpy(a, b, 32); }
--
2.33.0

View File

@ -0,0 +1,43 @@
From 80ed9ab39d9b1b08ad9d054f16d65b2a249a89e5 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Wed, 12 Oct 2022 11:02:11 +0800
Subject: [PATCH 022/124] LoongArch: Fixed a bug in the loongarch architecture
of libitm package.
Add a soft floating point condition to the register recovery part of the code.
libitm/ChangeLog:
* config/loongarch/sjlj.S: Add a soft floating point condition to the
register recovery part of the code.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
libitm/config/loongarch/sjlj.S | 3 +++
1 file changed, 3 insertions(+)
diff --git a/libitm/config/loongarch/sjlj.S b/libitm/config/loongarch/sjlj.S
index a5f9fadde..f896e400e 100644
--- a/libitm/config/loongarch/sjlj.S
+++ b/libitm/config/loongarch/sjlj.S
@@ -104,6 +104,8 @@ GTM_longjmp:
GPR_L $s7, $r5, 10*SZ_GPR
GPR_L $s8, $r5, 11*SZ_GPR
+#if !defined(__loongarch_soft_float)
+ /* Callee-saved scratch FPRs (f24-f31) */
FPR_L $f24, $r5, 12*SZ_GPR + 0*SZ_FPR
FPR_L $f25, $r5, 12*SZ_GPR + 1*SZ_FPR
FPR_L $f26, $r5, 12*SZ_GPR + 2*SZ_FPR
@@ -112,6 +114,7 @@ GTM_longjmp:
FPR_L $f29, $r5, 12*SZ_GPR + 5*SZ_FPR
FPR_L $f30, $r5, 12*SZ_GPR + 6*SZ_FPR
FPR_L $f31, $r5, 12*SZ_GPR + 7*SZ_FPR
+#endif
GPR_L $r7, $r5, 2*SZ_GPR
GPR_L $fp, $r5, 0*SZ_GPR
--
2.33.0

View File

@ -0,0 +1,182 @@
From 49a63dbaf3b4296f0b1f8a0e11790cc3455aeec7 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Wed, 18 Jan 2023 11:06:56 +0800
Subject: [PATCH 034/124] LoongArch: Fixed a compilation failure with '%c' in
inline assembly [PR107731].
Co-authored-by: Yang Yujie <yangyujie@loongson.cn>
PR target/107731
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_classify_address):
Add precessint for CONST_INT.
(loongarch_print_operand_reloc): Operand modifier 'c' is supported.
(loongarch_print_operand): Increase the processing of '%c'.
* doc/extend.texi: Adds documents for LoongArch operand modifiers.
And port the public operand modifiers information to this document.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/tst-asm-const.c: Moved to...
* gcc.target/loongarch/pr107731.c: ...here.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.cc | 14 +++++
gcc/doc/extend.texi | 51 +++++++++++++++++--
.../loongarch/{tst-asm-const.c => pr107731.c} | 6 +--
3 files changed, 64 insertions(+), 7 deletions(-)
rename gcc/testsuite/gcc.target/loongarch/{tst-asm-const.c => pr107731.c} (78%)
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index e59edc4cd..1a4686f03 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2074,6 +2074,11 @@ loongarch_classify_address (struct loongarch_address_info *info, rtx x,
return (loongarch_valid_base_register_p (info->reg, mode, strict_p)
&& loongarch_valid_lo_sum_p (info->symbol_type, mode,
info->offset));
+ case CONST_INT:
+ /* Small-integer addresses don't occur very often, but they
+ are legitimate if $r0 is a valid base register. */
+ info->type = ADDRESS_CONST_INT;
+ return IMM12_OPERAND (INTVAL (x));
default:
return false;
@@ -4932,6 +4937,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
'A' Print a _DB suffix if the memory model requires a release.
'b' Print the address of a memory operand, without offset.
+ 'c' Print an integer.
'C' Print the integer branch condition for comparison OP.
'd' Print CONST_INT OP in decimal.
'F' Print the FPU branch condition for comparison OP.
@@ -4978,6 +4984,14 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
fputs ("_db", file);
break;
+ case 'c':
+ if (CONST_INT_P (op))
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
+ else
+ output_operand_lossage ("unsupported operand for code '%c'", letter);
+
+ break;
+
case 'C':
loongarch_print_int_branch_condition (file, code, letter);
break;
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index da2840c23..3c101ca89 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10414,8 +10414,10 @@ ensures that modifying @var{a} does not affect the address referenced by
is undefined if @var{a} is modified before using @var{b}.
@code{asm} supports operand modifiers on operands (for example @samp{%k2}
-instead of simply @samp{%2}). Typically these qualifiers are hardware
-dependent. The list of supported modifiers for x86 is found at
+instead of simply @samp{%2}). @ref{GenericOperandmodifiers,
+Generic Operand modifiers} lists the modifiers that are available
+on all targets. Other modifiers are hardware dependent.
+For example, the list of supported modifiers for x86 is found at
@ref{x86Operandmodifiers,x86 Operand modifiers}.
If the C code that follows the @code{asm} makes no use of any of the output
@@ -10683,8 +10685,10 @@ optimizers may discard the @code{asm} statement as unneeded
(see @ref{Volatile}).
@code{asm} supports operand modifiers on operands (for example @samp{%k2}
-instead of simply @samp{%2}). Typically these qualifiers are hardware
-dependent. The list of supported modifiers for x86 is found at
+instead of simply @samp{%2}). @ref{GenericOperandmodifiers,
+Generic Operand modifiers} lists the modifiers that are available
+on all targets. Other modifiers are hardware dependent.
+For example, the list of supported modifiers for x86 is found at
@ref{x86Operandmodifiers,x86 Operand modifiers}.
In this example using the fictitious @code{combine} instruction, the
@@ -11036,6 +11040,30 @@ lab:
@}
@end example
+@anchor{GenericOperandmodifiers}
+@subsubsection Generic Operand Modifiers
+@noindent
+The following table shows the modifiers supported by all targets and their effects:
+
+@multitable {Modifier} {Description} {Example}
+@headitem Modifier @tab Description @tab Example
+@item @code{c}
+@tab Require a constant operand and print the constant expression with no punctuation.
+@tab @code{%c0}
+@item @code{n}
+@tab Like @samp{%c} except that the value of the constant is negated before printing.
+@tab @code{%n0}
+@item @code{a}
+@tab Substitute a memory reference, with the actual operand treated as the address.
+This may be useful when outputting a ``load address'' instruction, because
+often the assembler syntax for such an instruction requires you to write the
+operand as if it were a memory reference.
+@tab @code{%a0}
+@item @code{l}
+@tab Print the label name with no punctuation.
+@tab @code{%l0}
+@end multitable
+
@anchor{x86Operandmodifiers}
@subsubsection x86 Operand Modifiers
@@ -11386,6 +11414,21 @@ constant. Used to select the specified bit position.
@item @code{x} @tab Equivialent to @code{X}, but only for pointers.
@end multitable
+@anchor{loongarchOperandmodifiers}
+@subsubsection LoongArch Operand Modifiers
+
+The list below describes the supported modifiers and their effects for LoongArch.
+
+@multitable @columnfractions .10 .90
+@headitem Modifier @tab Description
+@item @code{d} @tab Same as @code{c}.
+@item @code{i} @tab Print the character ''@code{i}'' if the operand is not a register.
+@item @code{m} @tab Same as @code{c}, but the printed value is @code{operand - 1}.
+@item @code{X} @tab Print a constant integer operand in hexadecimal.
+@item @code{z} @tab Print the operand in its unmodified form, followed by a comma.
+@end multitable
+
+
@lowersections
@include md.texi
@raisesections
diff --git a/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c b/gcc/testsuite/gcc.target/loongarch/pr107731.c
similarity index 78%
rename from gcc/testsuite/gcc.target/loongarch/tst-asm-const.c
rename to gcc/testsuite/gcc.target/loongarch/pr107731.c
index 2e04b99e3..80d84c48c 100644
--- a/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c
+++ b/gcc/testsuite/gcc.target/loongarch/pr107731.c
@@ -1,13 +1,13 @@
-/* Test asm const. */
/* { dg-do compile } */
/* { dg-final { scan-assembler-times "foo:.*\\.long 1061109567.*\\.long 52" 1 } } */
+
int foo ()
{
__asm__ volatile (
"foo:"
"\n\t"
- ".long %a0\n\t"
- ".long %a1\n\t"
+ ".long %c0\n\t"
+ ".long %c1\n\t"
:
:"i"(0x3f3f3f3f), "i"(52)
:
--
2.33.0

View File

@ -0,0 +1,33 @@
From cbb5f181544e35b119fee4ed150bec24eee7179c Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Wed, 28 Sep 2022 16:35:06 +0800
Subject: [PATCH 020/124] LoongArch: Fixed a typo in the comment information of
the function loongarch_asan_shadow_offset.
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_asan_shadow_offset):
Fixed typo in "asan_mapping.h".
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 98c0e26cd..e9ba3374e 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -6472,7 +6472,7 @@ static unsigned HOST_WIDE_INT
loongarch_asan_shadow_offset (void)
{
/* We only have libsanitizer support for LOONGARCH64 at present.
- This value is taken from the file libsanitizer/asan/asan_mappint.h. */
+ This value is taken from the file libsanitizer/asan/asan_mapping.h. */
return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
}
--
2.33.0

View File

@ -0,0 +1,196 @@
From 9311c0f56086e38fe5e9bf4bbfc2e37d0f18347c Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Fri, 3 Feb 2023 17:06:06 +0800
Subject: [PATCH 036/124] LoongArch: Generate bytepick.[wd] for suitable bit
operation pattern
We can use bytepick.[wd] for
a << (8 * x) | b >> (8 * (sizeof(a) - x))
while a and b are uint32_t or uint64_t. This is useful for some cases,
for example:
https://sourceware.org/pipermail/libc-alpha/2023-February/145203.html
gcc/ChangeLog:
* config/loongarch/loongarch.md (bytepick_w_ashift_amount):
New define_int_iterator.
(bytepick_d_ashift_amount): Likewise.
(bytepick_imm): New define_int_attr.
(bytepick_w_lshiftrt_amount): Likewise.
(bytepick_d_lshiftrt_amount): Likewise.
(bytepick_w_<bytepick_imm>): New define_insn template.
(bytepick_w_<bytepick_imm>_extend): Likewise.
(bytepick_d_<bytepick_imm>): Likewise.
(bytepick_w): Remove unused define_insn.
(bytepick_d): Likewise.
(UNSPEC_BYTEPICK_W): Remove unused unspec.
(UNSPEC_BYTEPICK_D): Likewise.
* config/loongarch/predicates.md (const_0_to_3_operand):
Remove unused define_predicate.
(const_0_to_7_operand): Likewise.
gcc/testsuite/ChangeLog:
* g++.target/loongarch/bytepick.C: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.md | 60 ++++++++++++++-----
gcc/config/loongarch/predicates.md | 8 ---
gcc/testsuite/g++.target/loongarch/bytepick.C | 32 ++++++++++
3 files changed, 77 insertions(+), 23 deletions(-)
create mode 100644 gcc/testsuite/g++.target/loongarch/bytepick.C
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index f61db66d5..833b94753 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -48,8 +48,6 @@
UNSPEC_EH_RETURN
;; Bit operation
- UNSPEC_BYTEPICK_W
- UNSPEC_BYTEPICK_D
UNSPEC_BITREV_4B
UNSPEC_BITREV_8B
@@ -544,6 +542,27 @@
(UNSPEC_FTINTRM "0")
(UNSPEC_FTINTRP "0")])
+;; Iterator and attributes for bytepick.d
+(define_int_iterator bytepick_w_ashift_amount [8 16 24])
+(define_int_attr bytepick_w_lshiftrt_amount [(8 "24")
+ (16 "16")
+ (24 "8")])
+(define_int_iterator bytepick_d_ashift_amount [8 16 24 32 40 48 56])
+(define_int_attr bytepick_d_lshiftrt_amount [(8 "56")
+ (16 "48")
+ (24 "40")
+ (32 "32")
+ (40 "24")
+ (48 "16")
+ (56 "8")])
+(define_int_attr bytepick_imm [(8 "1")
+ (16 "2")
+ (24 "3")
+ (32 "4")
+ (40 "5")
+ (48 "6")
+ (56 "7")])
+
;;
;; ....................
;;
@@ -3364,24 +3383,35 @@
[(set_attr "type" "unknown")
(set_attr "mode" "<MODE>")])
-(define_insn "bytepick_w"
+(define_insn "bytepick_w_<bytepick_imm>"
[(set (match_operand:SI 0 "register_operand" "=r")
- (unspec:SI [(match_operand:SI 1 "register_operand" "r")
- (match_operand:SI 2 "register_operand" "r")
- (match_operand:SI 3 "const_0_to_3_operand" "n")]
- UNSPEC_BYTEPICK_W))]
+ (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
+ (const_int <bytepick_w_lshiftrt_amount>))
+ (ashift (match_operand:SI 2 "register_operand" "r")
+ (const_int bytepick_w_ashift_amount))))]
""
- "bytepick.w\t%0,%1,%2,%z3"
+ "bytepick.w\t%0,%1,%2,<bytepick_imm>"
[(set_attr "mode" "SI")])
-(define_insn "bytepick_d"
+(define_insn "bytepick_w_<bytepick_imm>_extend"
[(set (match_operand:DI 0 "register_operand" "=r")
- (unspec:DI [(match_operand:DI 1 "register_operand" "r")
- (match_operand:DI 2 "register_operand" "r")
- (match_operand:DI 3 "const_0_to_7_operand" "n")]
- UNSPEC_BYTEPICK_D))]
- ""
- "bytepick.d\t%0,%1,%2,%z3"
+ (sign_extend:DI
+ (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
+ (const_int <bytepick_w_lshiftrt_amount>))
+ (ashift (match_operand:SI 2 "register_operand" "r")
+ (const_int bytepick_w_ashift_amount)))))]
+ "TARGET_64BIT"
+ "bytepick.w\t%0,%1,%2,<bytepick_imm>"
+ [(set_attr "mode" "SI")])
+
+(define_insn "bytepick_d_<bytepick_imm>"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (ior:DI (lshiftrt (match_operand:DI 1 "register_operand" "r")
+ (const_int <bytepick_d_lshiftrt_amount>))
+ (ashift (match_operand:DI 2 "register_operand" "r")
+ (const_int bytepick_d_ashift_amount))))]
+ "TARGET_64BIT"
+ "bytepick.d\t%0,%1,%2,<bytepick_imm>"
[(set_attr "mode" "DI")])
(define_insn "bitrev_4b"
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
index 58c3dc226..3c32b2987 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -91,14 +91,6 @@
(ior (match_operand 0 "const_1_operand")
(match_operand 0 "register_operand")))
-(define_predicate "const_0_to_3_operand"
- (and (match_code "const_int")
- (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
-
-(define_predicate "const_0_to_7_operand"
- (and (match_code "const_int")
- (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
-
(define_predicate "lu52i_mask_operand"
(and (match_code "const_int")
(match_test "UINTVAL (op) == 0xfffffffffffff")))
diff --git a/gcc/testsuite/g++.target/loongarch/bytepick.C b/gcc/testsuite/g++.target/loongarch/bytepick.C
new file mode 100644
index 000000000..a39e2fa65
--- /dev/null
+++ b/gcc/testsuite/g++.target/loongarch/bytepick.C
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d" } */
+/* { dg-final { scan-assembler-times "bytepick.w\t\\\$r4,\\\$r5,\\\$r4" 3 } } */
+/* { dg-final { scan-assembler-times "bytepick.d\t\\\$r4,\\\$r5,\\\$r4" 7 } } */
+/* { dg-final { scan-assembler-not "slli.w" } } */
+
+template <class T, int offs>
+T
+merge (T a, T b)
+{
+ return a << offs | b >> (8 * sizeof (T) - offs);
+}
+
+using u32 = __UINT32_TYPE__;
+using u64 = __UINT64_TYPE__;
+using i64 = __INT64_TYPE__;
+
+template u32 merge<u32, 8> (u32, u32);
+template u32 merge<u32, 16> (u32, u32);
+template u32 merge<u32, 24> (u32, u32);
+
+template u64 merge<u64, 8> (u64, u64);
+template u64 merge<u64, 16> (u64, u64);
+template u64 merge<u64, 24> (u64, u64);
+template u64 merge<u64, 32> (u64, u64);
+template u64 merge<u64, 40> (u64, u64);
+template u64 merge<u64, 48> (u64, u64);
+template u64 merge<u64, 56> (u64, u64);
+
+/* we cannot use bytepick for the following cases */
+template i64 merge<i64, 8> (i64, i64);
+template u64 merge<u64, 42> (u64, u64);
--
2.33.0

View File

@ -0,0 +1,71 @@
From a96dee6ba3c916f9a4329b196a0c5a1652fe294f Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Thu, 18 Aug 2022 09:57:14 +0800
Subject: [PATCH 010/124] LoongArch: Get __tls_get_addr address through got
table when disable plt.
Fix bug, ICE with tls gd/ld var with -fno-plt.
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
Get __tls_get_addr address through got table when disable plt.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/tls-gd-noplt.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.cc | 14 ++++++++++++--
gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c | 12 ++++++++++++
2 files changed, 24 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 1b5af2c7d..76bf55ea4 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2448,8 +2448,18 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
gcc_unreachable ();
}
- insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
- const0_rtx));
+ if (flag_plt)
+ insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
+ const0_rtx));
+ else
+ {
+ rtx dest = gen_reg_rtx (Pmode);
+ rtx high = gen_reg_rtx (Pmode);
+ loongarch_emit_move (high, gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
+ emit_insn (gen_ld_from_got (Pmode, dest, high, loongarch_tls_symbol));
+ insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx));
+ }
+
RTL_CONST_CALL_P (insn) = 1;
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
insn = get_insns ();
diff --git a/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c
new file mode 100644
index 000000000..32a0acf9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-plt -mcmodel=normal" } */
+/* { dg-final { scan-assembler "pcalau12i\t.*%got_pc_hi20\\(__tls_get_addr\\)" } } */
+
+__attribute__ ((tls_model ("global-dynamic"))) __thread int a;
+
+void
+test (void)
+{
+ a = 10;
+}
+
--
2.33.0

View File

@ -0,0 +1,204 @@
From 12ab9eae9e8a5b83c778182f15c6216bcbc3dc36 Mon Sep 17 00:00:00 2001
From: chenxiaolong <chenxiaolong@loongson.cn>
Date: Fri, 1 Sep 2023 11:22:42 +0800
Subject: [PATCH 054/124] LoongArch: Implement 128-bit floating point functions
in gcc.
During implementation, float128_type_node is bound with the type "__float128"
so that the compiler can correctly identify the type of the function. The
"q" suffix is associated with the "f128" function, which makes GCC more
flexible to support different user input cases, implementing functions such
as __builtin_{huge_valq, infq, fabsq, copysignq, nanq, nansq}.
gcc/ChangeLog:
* config/loongarch/loongarch-builtins.cc (loongarch_init_builtins):
Associate the __float128 type to float128_type_node so that it can
be recognized by the compiler.
* config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins):
Add the flag "FLOAT128_TYPE" to gcc and associate a function
with the suffix "q" to "f128".
* doc/extend.texi:Added support for 128-bit floating-point functions on
the LoongArch architecture.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/math-float-128.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch-builtins.cc | 5 ++
gcc/config/loongarch/loongarch-c.cc | 11 +++
gcc/doc/extend.texi | 20 ++++-
.../gcc.target/loongarch/math-float-128.c | 81 +++++++++++++++++++
4 files changed, 114 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/math-float-128.c
diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
index 64fe11168..cb0ea1664 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -256,6 +256,11 @@ loongarch_init_builtins (void)
unsigned int i;
tree type;
+ /* Register the type float128_type_node as a built-in type and
+ give it an alias "__float128". */
+ (*lang_hooks.types.register_builtin_type) (float128_type_node,
+ "__float128");
+
/* Iterate through all of the bdesc arrays, initializing all of the
builtin functions. */
for (i = 0; i < ARRAY_SIZE (loongarch_builtins); i++)
diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc
index d6e3e19f0..f779a7355 100644
--- a/gcc/config/loongarch/loongarch-c.cc
+++ b/gcc/config/loongarch/loongarch-c.cc
@@ -99,6 +99,17 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
else
builtin_define ("__loongarch_frlen=0");
+ /* Add support for FLOAT128_TYPE on the LoongArch architecture. */
+ builtin_define ("__FLOAT128_TYPE__");
+
+ /* Map the old _Float128 'q' builtins into the new 'f128' builtins. */
+ builtin_define ("__builtin_fabsq=__builtin_fabsf128");
+ builtin_define ("__builtin_copysignq=__builtin_copysignf128");
+ builtin_define ("__builtin_nanq=__builtin_nanf128");
+ builtin_define ("__builtin_nansq=__builtin_nansf128");
+ builtin_define ("__builtin_infq=__builtin_inff128");
+ builtin_define ("__builtin_huge_valq=__builtin_huge_valf128");
+
/* Native Data Sizes. */
builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE);
builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE);
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1d1bac255..bb19d0f27 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -1085,10 +1085,10 @@ types.
As an extension, GNU C and GNU C++ support additional floating
types, which are not supported by all targets.
@itemize @bullet
-@item @code{__float128} is available on i386, x86_64, IA-64, and
-hppa HP-UX, as well as on PowerPC GNU/Linux targets that enable
+@item @code{__float128} is available on i386, x86_64, IA-64, LoongArch
+and hppa HP-UX, as well as on PowerPC GNU/Linux targets that enable
the vector scalar (VSX) instruction set. @code{__float128} supports
-the 128-bit floating type. On i386, x86_64, PowerPC, and IA-64
+the 128-bit floating type. On i386, x86_64, PowerPC, LoongArch and IA-64,
other than HP-UX, @code{__float128} is an alias for @code{_Float128}.
On hppa and IA-64 HP-UX, @code{__float128} is an alias for @code{long
double}.
@@ -16257,6 +16257,20 @@ function you need to include @code{larchintrin.h}.
void __break (imm0_32767)
@end smallexample
+Additional built-in functions are available for LoongArch family
+processors to efficiently use 128-bit floating-point (__float128)
+values.
+
+The following are the basic built-in functions supported.
+@smallexample
+__float128 __builtin_fabsq (__float128);
+__float128 __builtin_copysignq (__float128, __float128);
+__float128 __builtin_infq (void);
+__float128 __builtin_huge_valq (void);
+__float128 __builtin_nanq (void);
+__float128 __builtin_nansq (void);
+@end smallexample
+
@node MIPS DSP Built-in Functions
@subsection MIPS DSP Built-in Functions
diff --git a/gcc/testsuite/gcc.target/loongarch/math-float-128.c b/gcc/testsuite/gcc.target/loongarch/math-float-128.c
new file mode 100644
index 000000000..387566a57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/math-float-128.c
@@ -0,0 +1,81 @@
+/* { dg-do compile } */
+/* { dg-options " -march=loongarch64 -O2 " } */
+/* { dg-final { scan-assembler-not "my_fabsq2:.*\\bl\t%plt\\(__builtin_fabsq\\).*my_fabsq2" } } */
+/* { dg-final { scan-assembler-not "my_copysignq2:.*\\bl\t%plt\\(__builtin_copysignq\\).*my_copysignq2" } } */
+/* { dg-final { scan-assembler-not "my_infq2:.*\\bl\t%plt\\(__builtin_infq\\).*my_infq2" } } */
+/* { dg-final { scan-assembler-not "my_huge_valq2:.*\\bl\t%plt\\(__builtin_huge_valq\\).*my_huge_valq2" } } */
+/* { dg-final { scan-assembler-not "my_nanq2:.*\\bl\t%plt\\(__builtin_nanq\\).*my_nanq2" } } */
+/* { dg-final { scan-assembler-not "my_nansq2:.*\\bl\t%plt\\(__builtin_nansq\\).*my_nansq2" } } */
+
+__float128
+my_fabsq1 (__float128 a)
+{
+ return __builtin_fabsq (a);
+}
+
+_Float128
+my_fabsq2 (_Float128 a)
+{
+ return __builtin_fabsq (a);
+}
+
+__float128
+my_copysignq1 (__float128 a, __float128 b)
+{
+ return __builtin_copysignq (a, b);
+}
+
+_Float128
+my_copysignq2 (_Float128 a, _Float128 b)
+{
+ return __builtin_copysignq (a, b);
+}
+
+__float128
+my_infq1 (void)
+{
+ return __builtin_infq ();
+}
+
+_Float128
+my_infq2 (void)
+{
+ return __builtin_infq ();
+}
+
+__float128
+my_huge_valq1 (void)
+{
+ return __builtin_huge_valq ();
+}
+
+_Float128
+my_huge_valq2 (void)
+{
+ return __builtin_huge_valq ();
+}
+
+__float128
+my_nanq1 (void)
+{
+ return __builtin_nanq ("");
+}
+
+_Float128
+my_nanq2 (void)
+{
+ return __builtin_nanq ("");
+}
+
+__float128
+my_nansq1 (void)
+{
+ return __builtin_nansq ("");
+}
+
+_Float128
+my_nansq2 (void)
+{
+ return __builtin_nansq ("");
+}
+
--
2.33.0

View File

@ -0,0 +1,83 @@
From 4075f299ca6a5d15fdb46f877cbe11b7166a19ff Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Wed, 29 Mar 2023 01:36:09 +0800
Subject: [PATCH 042/124] LoongArch: Improve GAR store for va_list
LoongArch backend used to save all GARs for a function with variable
arguments. But sometimes a function only accepts variable arguments for
a purpose like C++ function overloading. For example, POSIX defines
open() as:
int open(const char *path, int oflag, ...);
But only two forms are actually used:
int open(const char *pathname, int flags);
int open(const char *pathname, int flags, mode_t mode);
So it's obviously a waste to save all 8 GARs in open(). We can use the
cfun->va_list_gpr_size field set by the stdarg pass to only save the
GARs necessary to be saved.
If the va_list escapes (for example, in fprintf() we pass it to
vfprintf()), stdarg would set cfun->va_list_gpr_size to 255 so we
don't need a special case.
With this patch, only one GAR ($a2/$r6) is saved in open(). Ideally
even this stack store should be omitted too, but doing so is not trivial
and AFAIK there are no compilers (for any target) performing the "ideal"
optimization here, see https://godbolt.org/z/n1YqWq9c9.
Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk
(GCC 14 or now)?
gcc/ChangeLog:
* config/loongarch/loongarch.cc
(loongarch_setup_incoming_varargs): Don't save more GARs than
cfun->va_list_gpr_size / UNITS_PER_WORD.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/va_arg.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/testsuite/gcc.target/loongarch/va_arg.c | 24 +++++++++++++++++++++
1 file changed, 24 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/loongarch/va_arg.c
diff --git a/gcc/testsuite/gcc.target/loongarch/va_arg.c b/gcc/testsuite/gcc.target/loongarch/va_arg.c
new file mode 100644
index 000000000..980c96d0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/va_arg.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/* Technically we shouldn't save any register for this function: it should be
+ compiled as if it accepts 3 named arguments. But AFAIK no compilers can
+ achieve this "perfect" optimization now, so just ensure we are using the
+ knowledge provided by stdarg pass and we won't save GARs impossible to be
+ accessed with __builtin_va_arg () when the va_list does not escape. */
+
+/* { dg-final { scan-assembler-not "st.*r7" } } */
+
+int
+test (int a0, ...)
+{
+ void *arg;
+ int a1, a2;
+
+ __builtin_va_start (arg, a0);
+ a1 = __builtin_va_arg (arg, int);
+ a2 = __builtin_va_arg (arg, int);
+ __builtin_va_end (arg);
+
+ return a0 + a1 + a2;
+}
--
2.33.0

View File

@ -0,0 +1,339 @@
From 33fff578e7df7aa7e236efc6c9c85c595918d86a Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Wed, 12 Apr 2023 11:45:48 +0000
Subject: [PATCH 043/124] LoongArch: Improve cpymemsi expansion [PR109465]
We'd been generating really bad block move sequences which is recently
complained by kernel developers who tried __builtin_memcpy. To improve
it:
1. Take the advantage of -mno-strict-align. When it is set, set mode
size to UNITS_PER_WORD regardless of the alignment.
2. Half the mode size when (block size) % (mode size) != 0, instead of
falling back to ld.bu/st.b at once.
3. Limit the length of block move sequence considering the number of
instructions, not the size of block. When -mstrict-align is set and
the block is not aligned, the old size limit for straight-line
implementation (64 bytes) was definitely too large (we don't have 64
registers anyway).
Change since v1: add a comment about the calculation of num_reg.
gcc/ChangeLog:
PR target/109465
* config/loongarch/loongarch-protos.h
(loongarch_expand_block_move): Add a parameter as alignment RTX.
* config/loongarch/loongarch.h:
(LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER): Remove.
(LARCH_MAX_MOVE_BYTES_STRAIGHT): Remove.
(LARCH_MAX_MOVE_OPS_PER_LOOP_ITER): Define.
(LARCH_MAX_MOVE_OPS_STRAIGHT): Define.
(MOVE_RATIO): Use LARCH_MAX_MOVE_OPS_PER_LOOP_ITER instead of
LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER.
* config/loongarch/loongarch.cc (loongarch_expand_block_move):
Take the alignment from the parameter, but set it to
UNITS_PER_WORD if !TARGET_STRICT_ALIGN. Limit the length of
straight-line implementation with LARCH_MAX_MOVE_OPS_STRAIGHT
instead of LARCH_MAX_MOVE_BYTES_STRAIGHT.
(loongarch_block_move_straight): When there are left-over bytes,
half the mode size instead of falling back to byte mode at once.
(loongarch_block_move_loop): Limit the length of loop body with
LARCH_MAX_MOVE_OPS_PER_LOOP_ITER instead of
LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER.
* config/loongarch/loongarch.md (cpymemsi): Pass the alignment
to loongarch_expand_block_move.
gcc/testsuite/ChangeLog:
PR target/109465
* gcc.target/loongarch/pr109465-1.c: New test.
* gcc.target/loongarch/pr109465-2.c: New test.
* gcc.target/loongarch/pr109465-3.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch-protos.h | 2 +-
gcc/config/loongarch/loongarch.cc | 95 +++++++++++--------
gcc/config/loongarch/loongarch.h | 10 +-
gcc/config/loongarch/loongarch.md | 3 +-
.../gcc.target/loongarch/pr109465-1.c | 9 ++
.../gcc.target/loongarch/pr109465-2.c | 9 ++
.../gcc.target/loongarch/pr109465-3.c | 12 +++
7 files changed, 91 insertions(+), 49 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-1.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-2.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-3.c
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
index 0a9b47722..3ac3b5e19 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -95,7 +95,7 @@ extern void loongarch_expand_conditional_trap (rtx);
#endif
extern void loongarch_set_return_address (rtx, rtx);
extern bool loongarch_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
-extern bool loongarch_expand_block_move (rtx, rtx, rtx);
+extern bool loongarch_expand_block_move (rtx, rtx, rtx, rtx);
extern bool loongarch_do_optimize_block_move_p (void);
extern bool loongarch_expand_ext_as_unaligned_load (rtx, rtx, HOST_WIDE_INT,
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 233dddbac..d3c6f22ad 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4456,41 +4456,46 @@ loongarch_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
Assume that the areas do not overlap. */
static void
-loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
+ HOST_WIDE_INT delta)
{
- HOST_WIDE_INT offset, delta;
- unsigned HOST_WIDE_INT bits;
+ HOST_WIDE_INT offs, delta_cur;
int i;
machine_mode mode;
rtx *regs;
- bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)));
-
- mode = int_mode_for_size (bits, 0).require ();
- delta = bits / BITS_PER_UNIT;
+ /* Calculate how many registers we'll need for the block move.
+ We'll emit length / delta move operations with delta as the size
+ first. Then we may still have length % delta bytes not copied.
+ We handle these remaining bytes by move operations with smaller
+ (halfed) sizes. For example, if length = 21 and delta = 8, we'll
+ emit two ld.d/st.d pairs, one ld.w/st.w pair, and one ld.b/st.b
+ pair. For each load/store pair we use a dedicated register to keep
+ the pipeline as populated as possible. */
+ HOST_WIDE_INT num_reg = length / delta;
+ for (delta_cur = delta / 2; delta_cur != 0; delta_cur /= 2)
+ num_reg += !!(length & delta_cur);
/* Allocate a buffer for the temporary registers. */
- regs = XALLOCAVEC (rtx, length / delta);
+ regs = XALLOCAVEC (rtx, num_reg);
- /* Load as many BITS-sized chunks as possible. Use a normal load if
- the source has enough alignment, otherwise use left/right pairs. */
- for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+ for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
{
- regs[i] = gen_reg_rtx (mode);
- loongarch_emit_move (regs[i], adjust_address (src, mode, offset));
- }
+ mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
- for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
- loongarch_emit_move (adjust_address (dest, mode, offset), regs[i]);
+ for (; offs + delta_cur <= length; offs += delta_cur, i++)
+ {
+ regs[i] = gen_reg_rtx (mode);
+ loongarch_emit_move (regs[i], adjust_address (src, mode, offs));
+ }
+ }
- /* Mop up any left-over bytes. */
- if (offset < length)
+ for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
{
- src = adjust_address (src, BLKmode, offset);
- dest = adjust_address (dest, BLKmode, offset);
- move_by_pieces (dest, src, length - offset,
- MIN (MEM_ALIGN (src), MEM_ALIGN (dest)),
- (enum memop_ret) 0);
+ mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
+
+ for (; offs + delta_cur <= length; offs += delta_cur, i++)
+ loongarch_emit_move (adjust_address (dest, mode, offs), regs[i]);
}
}
@@ -4520,10 +4525,11 @@ loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
static void
loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
- HOST_WIDE_INT bytes_per_iter)
+ HOST_WIDE_INT align)
{
rtx_code_label *label;
rtx src_reg, dest_reg, final_src, test;
+ HOST_WIDE_INT bytes_per_iter = align * LARCH_MAX_MOVE_OPS_PER_LOOP_ITER;
HOST_WIDE_INT leftover;
leftover = length % bytes_per_iter;
@@ -4543,7 +4549,7 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
emit_label (label);
/* Emit the loop body. */
- loongarch_block_move_straight (dest, src, bytes_per_iter);
+ loongarch_block_move_straight (dest, src, bytes_per_iter, align);
/* Move on to the next block. */
loongarch_emit_move (src_reg,
@@ -4560,7 +4566,7 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
/* Mop up any left-over bytes. */
if (leftover)
- loongarch_block_move_straight (dest, src, leftover);
+ loongarch_block_move_straight (dest, src, leftover, align);
else
/* Temporary fix for PR79150. */
emit_insn (gen_nop ());
@@ -4570,25 +4576,32 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
memory reference SRC to memory reference DEST. */
bool
-loongarch_expand_block_move (rtx dest, rtx src, rtx length)
+loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align)
{
- int max_move_bytes = LARCH_MAX_MOVE_BYTES_STRAIGHT;
+ if (!CONST_INT_P (r_length))
+ return false;
+
+ HOST_WIDE_INT length = INTVAL (r_length);
+ if (length > loongarch_max_inline_memcpy_size)
+ return false;
+
+ HOST_WIDE_INT align = INTVAL (r_align);
+
+ if (!TARGET_STRICT_ALIGN || align > UNITS_PER_WORD)
+ align = UNITS_PER_WORD;
- if (CONST_INT_P (length)
- && INTVAL (length) <= loongarch_max_inline_memcpy_size)
+ if (length <= align * LARCH_MAX_MOVE_OPS_STRAIGHT)
{
- if (INTVAL (length) <= max_move_bytes)
- {
- loongarch_block_move_straight (dest, src, INTVAL (length));
- return true;
- }
- else if (optimize)
- {
- loongarch_block_move_loop (dest, src, INTVAL (length),
- LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER);
- return true;
- }
+ loongarch_block_move_straight (dest, src, length, align);
+ return true;
+ }
+
+ if (optimize)
+ {
+ loongarch_block_move_loop (dest, src, length, align);
+ return true;
}
+
return false;
}
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index 9d3cd9ca0..af24bfa01 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -1062,13 +1062,13 @@ typedef struct {
/* The maximum number of bytes that can be copied by one iteration of
a cpymemsi loop; see loongarch_block_move_loop. */
-#define LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER (UNITS_PER_WORD * 4)
+#define LARCH_MAX_MOVE_OPS_PER_LOOP_ITER 4
/* The maximum number of bytes that can be copied by a straight-line
implementation of cpymemsi; see loongarch_block_move_straight. We want
to make sure that any loop-based implementation will iterate at
least twice. */
-#define LARCH_MAX_MOVE_BYTES_STRAIGHT (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER * 2)
+#define LARCH_MAX_MOVE_OPS_STRAIGHT (LARCH_MAX_MOVE_OPS_PER_LOOP_ITER * 2)
/* The base cost of a memcpy call, for MOVE_RATIO and friends. These
values were determined experimentally by benchmarking with CSiBE.
@@ -1076,7 +1076,7 @@ typedef struct {
#define LARCH_CALL_RATIO 8
/* Any loop-based implementation of cpymemsi will have at least
- LARCH_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory
+ LARCH_MAX_MOVE_OPS_PER_LOOP_ITER memory-to-memory
moves, so allow individual copies of fewer elements.
When cpymemsi is not available, use a value approximating
@@ -1087,9 +1087,7 @@ typedef struct {
value of LARCH_CALL_RATIO to take that into account. */
#define MOVE_RATIO(speed) \
- (HAVE_cpymemsi \
- ? LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD \
- : CLEAR_RATIO (speed) / 2)
+ (HAVE_cpymemsi ? LARCH_MAX_MOVE_OPS_PER_LOOP_ITER : CLEAR_RATIO (speed) / 2)
/* For CLEAR_RATIO, when optimizing for size, give a better estimate
of the length of a memset call, but use the default otherwise. */
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index b2f7c7f78..b23248c33 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -2488,7 +2488,8 @@
""
{
if (TARGET_DO_OPTIMIZE_BLOCK_MOVE_P
- && loongarch_expand_block_move (operands[0], operands[1], operands[2]))
+ && loongarch_expand_block_move (operands[0], operands[1],
+ operands[2], operands[3]))
DONE;
else
FAIL;
diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-1.c b/gcc/testsuite/gcc.target/loongarch/pr109465-1.c
new file mode 100644
index 000000000..4cd35d139
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr109465-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d -mno-strict-align" } */
+/* { dg-final { scan-assembler-times "st\\.d|stptr\\.d" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.w|stptr\\.w" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.h" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.b" 1 } } */
+
+extern char a[], b[];
+void test() { __builtin_memcpy(a, b, 15); }
diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-2.c b/gcc/testsuite/gcc.target/loongarch/pr109465-2.c
new file mode 100644
index 000000000..703eb951c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr109465-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d -mstrict-align" } */
+/* { dg-final { scan-assembler-times "st\\.d|stptr\\.d" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.w|stptr\\.w" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.h" 1 } } */
+/* { dg-final { scan-assembler-times "st\\.b" 1 } } */
+
+extern long a[], b[];
+void test() { __builtin_memcpy(a, b, 15); }
diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-3.c b/gcc/testsuite/gcc.target/loongarch/pr109465-3.c
new file mode 100644
index 000000000..d6a80659b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr109465-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d -mstrict-align" } */
+
+/* Three loop iterations each contains 4 st.b, and 3 st.b after the loop */
+/* { dg-final { scan-assembler-times "st\\.b" 7 } } */
+
+/* { dg-final { scan-assembler-not "st\\.h" } } */
+/* { dg-final { scan-assembler-not "st\\.w|stptr\\.w" } } */
+/* { dg-final { scan-assembler-not "st\\.d|stptr\\.d" } } */
+
+extern char a[], b[];
+void test() { __builtin_memcpy(a, b, 15); }
--
2.33.0

View File

@ -0,0 +1,291 @@
From 7f9f1dd3c87cffeab58150997e22e8fff707646b Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Mon, 26 Sep 2022 09:42:51 +0800
Subject: [PATCH 019/124] LoongArch: Libitm add LoongArch support.
Co-Authored-By: Yang Yujie <yangyujie@loongson.cn>
libitm/ChangeLog:
* configure.tgt: Add loongarch support.
* config/loongarch/asm.h: New file.
* config/loongarch/sjlj.S: New file.
* config/loongarch/target.h: New file.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
libitm/config/loongarch/asm.h | 54 +++++++++++++
libitm/config/loongarch/sjlj.S | 127 +++++++++++++++++++++++++++++++
libitm/config/loongarch/target.h | 50 ++++++++++++
libitm/configure.tgt | 2 +
4 files changed, 233 insertions(+)
create mode 100644 libitm/config/loongarch/asm.h
create mode 100644 libitm/config/loongarch/sjlj.S
create mode 100644 libitm/config/loongarch/target.h
diff --git a/libitm/config/loongarch/asm.h b/libitm/config/loongarch/asm.h
new file mode 100644
index 000000000..a8e3304bb
--- /dev/null
+++ b/libitm/config/loongarch/asm.h
@@ -0,0 +1,54 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Loongson Co. Ltd.
+
+ This file is part of the GNU Transactional Memory Library (libitm).
+
+ Libitm is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _LA_ASM_H
+#define _LA_ASM_H
+
+#if defined(__loongarch_lp64)
+# define GPR_L ld.d
+# define GPR_S st.d
+# define SZ_GPR 8
+# define ADDSP(si) addi.d $sp, $sp, si
+#elif defined(__loongarch64_ilp32)
+# define GPR_L ld.w
+# define GPR_S st.w
+# define SZ_GPR 4
+# define ADDSP(si) addi.w $sp, $sp, si
+#else
+# error Unsupported GPR size (must be 64-bit or 32-bit).
+#endif
+
+#if defined(__loongarch_double_float)
+# define FPR_L fld.d
+# define FPR_S fst.d
+# define SZ_FPR 8
+#elif defined(__loongarch_single_float)
+# define FPR_L fld.s
+# define FPR_S fst.s
+# define SZ_FPR 4
+#else
+# define SZ_FPR 0
+#endif
+
+#endif /* _LA_ASM_H */
diff --git a/libitm/config/loongarch/sjlj.S b/libitm/config/loongarch/sjlj.S
new file mode 100644
index 000000000..a5f9fadde
--- /dev/null
+++ b/libitm/config/loongarch/sjlj.S
@@ -0,0 +1,127 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Loongson Co. Ltd.
+
+ This file is part of the GNU Transactional Memory Library (libitm).
+
+ Libitm is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "asmcfi.h"
+#include "asm.h"
+
+ .text
+ .align 2
+ .global _ITM_beginTransaction
+ .type _ITM_beginTransaction, @function
+
+_ITM_beginTransaction:
+ cfi_startproc
+ move $r5, $sp
+ ADDSP(-(12*SZ_GPR+8*SZ_FPR))
+ cfi_adjust_cfa_offset(12*SZ_GPR+8*SZ_FPR)
+
+ /* Frame Pointer */
+ GPR_S $fp, $sp, 0*SZ_GPR
+ cfi_rel_offset(22, 0)
+
+ /* Return Address */
+ GPR_S $r1, $sp, 1*SZ_GPR
+ cfi_rel_offset(1, SZ_GPR)
+
+ /* Caller's $sp */
+ GPR_S $r5, $sp, 2*SZ_GPR
+
+ /* Callee-saved scratch GPRs (r23-r31) */
+ GPR_S $s0, $sp, 3*SZ_GPR
+ GPR_S $s1, $sp, 4*SZ_GPR
+ GPR_S $s2, $sp, 5*SZ_GPR
+ GPR_S $s3, $sp, 6*SZ_GPR
+ GPR_S $s4, $sp, 7*SZ_GPR
+ GPR_S $s5, $sp, 8*SZ_GPR
+ GPR_S $s6, $sp, 9*SZ_GPR
+ GPR_S $s7, $sp, 10*SZ_GPR
+ GPR_S $s8, $sp, 11*SZ_GPR
+
+#if !defined(__loongarch_soft_float)
+ /* Callee-saved scratch FPRs (f24-f31) */
+ FPR_S $f24, $sp, 12*SZ_GPR + 0*SZ_FPR
+ FPR_S $f25, $sp, 12*SZ_GPR + 1*SZ_FPR
+ FPR_S $f26, $sp, 12*SZ_GPR + 2*SZ_FPR
+ FPR_S $f27, $sp, 12*SZ_GPR + 3*SZ_FPR
+ FPR_S $f28, $sp, 12*SZ_GPR + 4*SZ_FPR
+ FPR_S $f29, $sp, 12*SZ_GPR + 5*SZ_FPR
+ FPR_S $f30, $sp, 12*SZ_GPR + 6*SZ_FPR
+ FPR_S $f31, $sp, 12*SZ_GPR + 7*SZ_FPR
+#endif
+ move $fp, $sp
+
+ /* Invoke GTM_begin_transaction with the struct we've just built. */
+ move $r5, $sp
+ bl %plt(GTM_begin_transaction)
+
+ /* Return. (no call-saved scratch reg needs to be restored here) */
+ GPR_L $fp, $sp, 0*SZ_GPR
+ cfi_restore(22)
+ GPR_L $r1, $sp, 1*SZ_GPR
+ cfi_restore(1)
+
+ ADDSP(12*SZ_GPR+8*SZ_FPR)
+ cfi_adjust_cfa_offset(-(12*SZ_GPR+8*SZ_FPR))
+
+ jr $r1
+ cfi_endproc
+ .size _ITM_beginTransaction, . - _ITM_beginTransaction
+
+ .align 2
+ .global GTM_longjmp
+ .hidden GTM_longjmp
+ .type GTM_longjmp, @function
+
+GTM_longjmp:
+ cfi_startproc
+ GPR_L $s0, $r5, 3*SZ_GPR
+ GPR_L $s1, $r5, 4*SZ_GPR
+ GPR_L $s2, $r5, 5*SZ_GPR
+ GPR_L $s3, $r5, 6*SZ_GPR
+ GPR_L $s4, $r5, 7*SZ_GPR
+ GPR_L $s5, $r5, 8*SZ_GPR
+ GPR_L $s6, $r5, 9*SZ_GPR
+ GPR_L $s7, $r5, 10*SZ_GPR
+ GPR_L $s8, $r5, 11*SZ_GPR
+
+ FPR_L $f24, $r5, 12*SZ_GPR + 0*SZ_FPR
+ FPR_L $f25, $r5, 12*SZ_GPR + 1*SZ_FPR
+ FPR_L $f26, $r5, 12*SZ_GPR + 2*SZ_FPR
+ FPR_L $f27, $r5, 12*SZ_GPR + 3*SZ_FPR
+ FPR_L $f28, $r5, 12*SZ_GPR + 4*SZ_FPR
+ FPR_L $f29, $r5, 12*SZ_GPR + 5*SZ_FPR
+ FPR_L $f30, $r5, 12*SZ_GPR + 6*SZ_FPR
+ FPR_L $f31, $r5, 12*SZ_GPR + 7*SZ_FPR
+
+ GPR_L $r7, $r5, 2*SZ_GPR
+ GPR_L $fp, $r5, 0*SZ_GPR
+ GPR_L $r1, $r5, 1*SZ_GPR
+ cfi_def_cfa(5, 0)
+ move $sp, $r7
+ jr $r1
+ cfi_endproc
+ .size GTM_longjmp, . - GTM_longjmp
+
+#ifdef __linux__
+.section .note.GNU-stack, "", @progbits
+#endif
diff --git a/libitm/config/loongarch/target.h b/libitm/config/loongarch/target.h
new file mode 100644
index 000000000..0c5cf3ada
--- /dev/null
+++ b/libitm/config/loongarch/target.h
@@ -0,0 +1,50 @@
+/* Copyright (C) 2022 Free Software Foundation, Inc.
+ Contributed by Loongson Co. Ltd.
+
+ This file is part of the GNU Transactional Memory Library (libitm).
+
+ Libitm is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+namespace GTM HIDDEN {
+
+typedef struct gtm_jmpbuf
+ {
+ long int fp; /* Frame Pointer: r22 */
+ long int pc; /* Return Address: r1 */
+ void *cfa; /* CFA: r3 */
+ long int gpr[9]; /* Callee-saved scratch GPRs: r23(s0)-r31(s8) */
+
+ /* Callee-saved scratch FPRs: f24-f31 */
+#if defined(__loongarch_double_float)
+ double fpr[8];
+#elif defined(__loongarch_single_float)
+ float fpr[8];
+#endif
+ } gtm_jmpbuf;
+
+#define HW_CACHELINE_SIZE 128
+
+static inline void
+cpu_relax (void)
+{
+ __asm__ volatile ("" : : : "memory");
+}
+
+} // namespace GTM
diff --git a/libitm/configure.tgt b/libitm/configure.tgt
index 06e90973e..4c0e78cff 100644
--- a/libitm/configure.tgt
+++ b/libitm/configure.tgt
@@ -80,6 +80,8 @@ EOF
ARCH=x86
;;
+ loongarch*) ARCH=loongarch ;;
+
sh*) ARCH=sh ;;
sparc)
--
2.33.0

View File

@ -0,0 +1,37 @@
From 83d6cfbbdc41766af9d7941d00204cc0f26ff40c Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Tue, 26 Jul 2022 21:03:52 +0800
Subject: [PATCH 005/124] LoongArch: Modify the output message string of the
warning.
Fix bug for "error: spurious trailing punctuation sequence '.' in format [-Werror=format-diag]".
gcc/ChangeLog:
* config/loongarch/loongarch-opts.cc: Modify the output message string
of the warning.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch-opts.cc | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
index fc477bfd4..3f70943de 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -378,8 +378,8 @@ fallback:
t.cmodel = constrained.cmodel ? opt_cmodel : CMODEL_NORMAL;
if (t.cmodel != CMODEL_NORMAL)
{
- warning (0, "%qs is not supported, now cmodel is set to 'normal'.",
- loongarch_cmodel_strings[t.cmodel]);
+ warning (0, "%qs is not supported, now cmodel is set to %qs",
+ loongarch_cmodel_strings[t.cmodel], "normal");
t.cmodel = CMODEL_NORMAL;
}
--
2.33.0

View File

@ -0,0 +1,445 @@
From a31baa1e437fa4acedfaf03db91c1d6e5ce78013 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sun, 2 Apr 2023 21:37:49 +0800
Subject: [PATCH 041/124] LoongArch: Optimize additions with immediates
1. Use addu16i.d for TARGET_64BIT and suitable immediates.
2. Split one addition with immediate into two addu16i.d or addi.{d/w}
instructions if possible. This can avoid using a temp register w/o
increase the count of instructions.
Inspired by https://reviews.llvm.org/D143710 and
https://reviews.llvm.org/D147222.
Bootstrapped and regtested on loongarch64-linux-gnu. Ok for GCC 14?
gcc/ChangeLog:
* config/loongarch/loongarch-protos.h
(loongarch_addu16i_imm12_operand_p): New function prototype.
(loongarch_split_plus_constant): Likewise.
* config/loongarch/loongarch.cc
(loongarch_addu16i_imm12_operand_p): New function.
(loongarch_split_plus_constant): Likewise.
* config/loongarch/loongarch.h (ADDU16I_OPERAND): New macro.
(DUAL_IMM12_OPERAND): Likewise.
(DUAL_ADDU16I_OPERAND): Likewise.
* config/loongarch/constraints.md (La, Lb, Lc, Ld, Le): New
constraint.
* config/loongarch/predicates.md (const_dual_imm12_operand): New
predicate.
(const_addu16i_operand): Likewise.
(const_addu16i_imm12_di_operand): Likewise.
(const_addu16i_imm12_si_operand): Likewise.
(plus_di_operand): Likewise.
(plus_si_operand): Likewise.
(plus_si_extend_operand): Likewise.
* config/loongarch/loongarch.md (add<mode>3): Convert to
define_insn_and_split. Use plus_<mode>_operand predicate
instead of arith_operand. Add alternatives for La, Lb, Lc, Ld,
and Le constraints.
(*addsi3_extended): Convert to define_insn_and_split. Use
plus_si_extend_operand instead of arith_operand. Add
alternatives for La and Le alternatives.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/add-const.c: New test.
* gcc.target/loongarch/stack-check-cfa-1.c: Adjust for stack
frame size change.
* gcc.target/loongarch/stack-check-cfa-2.c: Likewise.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/constraints.md | 46 ++++++++++++-
gcc/config/loongarch/loongarch-protos.h | 2 +
gcc/config/loongarch/loongarch.cc | 44 +++++++++++++
gcc/config/loongarch/loongarch.h | 19 ++++++
gcc/config/loongarch/loongarch.md | 66 +++++++++++++++----
gcc/config/loongarch/predicates.md | 36 ++++++++++
.../gcc.target/loongarch/add-const.c | 45 +++++++++++++
.../gcc.target/loongarch/stack-check-cfa-1.c | 2 +-
.../gcc.target/loongarch/stack-check-cfa-2.c | 2 +-
9 files changed, 246 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/add-const.c
diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
index 46f7f63ae..25f3cda35 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -60,7 +60,22 @@
;; "I" "A signed 12-bit constant (for arithmetic instructions)."
;; "J" "Integer zero."
;; "K" "An unsigned 12-bit constant (for logic instructions)."
-;; "L" <-----unused
+;; "L" -
+;; "La"
+;; "A signed constant in [-4096, 2048) or (2047, 4094]."
+;; "Lb"
+;; "A signed 32-bit constant and low 16-bit is zero, which can be
+;; added onto a register with addu16i.d. It matches nothing if
+;; the addu16i.d instruction is not available."
+;; "Lc"
+;; "A signed 64-bit constant can be expressed as Lb + I, but not a
+;; single Lb or I."
+;; "Ld"
+;; "A signed 64-bit constant can be expressed as Lb + Lb, but not a
+;; single Lb."
+;; "Le"
+;; "A signed 32-bit constant can be expressed as Lb + I, but not a
+;; single Lb or I."
;; "M" <-----unused
;; "N" <-----unused
;; "O" <-----unused
@@ -170,6 +185,35 @@
(and (match_code "const_int")
(match_test "IMM12_OPERAND_UNSIGNED (ival)")))
+(define_constraint "La"
+ "A signed constant in [-4096, 2048) or (2047, 4094]."
+ (and (match_code "const_int")
+ (match_test "DUAL_IMM12_OPERAND (ival)")))
+
+(define_constraint "Lb"
+ "A signed 32-bit constant and low 16-bit is zero, which can be added
+ onto a register with addu16i.d."
+ (and (match_code "const_int")
+ (match_test "ADDU16I_OPERAND (ival)")))
+
+(define_constraint "Lc"
+ "A signed 64-bit constant can be expressed as Lb + I, but not a single Lb
+ or I."
+ (and (match_code "const_int")
+ (match_test "loongarch_addu16i_imm12_operand_p (ival, DImode)")))
+
+(define_constraint "Ld"
+ "A signed 64-bit constant can be expressed as Lb + Lb, but not a single
+ Lb."
+ (and (match_code "const_int")
+ (match_test "DUAL_ADDU16I_OPERAND (ival)")))
+
+(define_constraint "Le"
+ "A signed 32-bit constant can be expressed as Lb + I, but not a single Lb
+ or I."
+ (and (match_code "const_int")
+ (match_test "loongarch_addu16i_imm12_operand_p (ival, SImode)")))
+
(define_constraint "Yd"
"@internal
A constant @code{move_operand} that can be safely loaded using
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
index 77b221724..0a9b47722 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -83,6 +83,8 @@ extern rtx loongarch_legitimize_call_address (rtx);
extern rtx loongarch_subword (rtx, bool);
extern bool loongarch_split_move_p (rtx, rtx);
extern void loongarch_split_move (rtx, rtx, rtx);
+extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
+extern void loongarch_split_plus_constant (rtx *, machine_mode);
extern const char *loongarch_output_move (rtx, rtx);
extern bool loongarch_cfun_has_cprestore_slot_p (void);
#ifdef RTX_CODE
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 1a4686f03..233dddbac 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3753,6 +3753,50 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_)
}
}
+/* Check if adding an integer constant value for a specific mode can be
+ performed with an addu16i.d instruction and an addi.{w/d}
+ instruction. */
+
+bool
+loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT value, machine_mode mode)
+{
+ /* Not necessary, but avoid unnecessary calculation if !TARGET_64BIT. */
+ if (!TARGET_64BIT)
+ return false;
+
+ if ((value & 0xffff) == 0)
+ return false;
+
+ if (IMM12_OPERAND (value))
+ return false;
+
+ value = (value & ~HWIT_UC_0xFFF) + ((value & 0x800) << 1);
+ return ADDU16I_OPERAND (trunc_int_for_mode (value, mode));
+}
+
+/* Split one integer constant op[0] into two (op[1] and op[2]) for constant
+ plus operation in a specific mode. The splitted constants can be added
+ onto a register with a single instruction (addi.{d/w} or addu16i.d). */
+
+void
+loongarch_split_plus_constant (rtx *op, machine_mode mode)
+{
+ HOST_WIDE_INT v = INTVAL (op[0]), a;
+
+ if (DUAL_IMM12_OPERAND (v))
+ a = (v > 0 ? 2047 : -2048);
+ else if (loongarch_addu16i_imm12_operand_p (v, mode))
+ a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1);
+ else if (mode == DImode && DUAL_ADDU16I_OPERAND (v))
+ a = (v > 0 ? 0x7fff : -0x8000) << 16;
+ else
+ gcc_unreachable ();
+
+ op[1] = gen_int_mode (a, mode);
+ v = v - (unsigned HOST_WIDE_INT) a;
+ op[2] = gen_int_mode (v, mode);
+}
+
/* Return true if a move from SRC to DEST in INSN should be split. */
static bool
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index c6e37b1b4..9d3cd9ca0 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -612,6 +612,25 @@ enum reg_class
#define CONST_LOW_PART(VALUE) ((VALUE) - CONST_HIGH_PART (VALUE))
+/* True if VALUE can be added onto a register with one addu16i.d
+ instruction. */
+
+#define ADDU16I_OPERAND(VALUE) \
+ (TARGET_64BIT && (((VALUE) & 0xffff) == 0 \
+ && IMM16_OPERAND ((HOST_WIDE_INT) (VALUE) / 65536)))
+
+/* True if VALUE can be added onto a register with two addi.{d/w}
+ instructions, but not one addi.{d/w} instruction. */
+#define DUAL_IMM12_OPERAND(VALUE) \
+ (IN_RANGE ((VALUE), -4096, 4094) && !IMM12_OPERAND (VALUE))
+
+/* True if VALUE can be added onto a register with two addu16i.d
+ instruction, but not one addu16i.d instruction. */
+#define DUAL_ADDU16I_OPERAND(VALUE) \
+ (TARGET_64BIT && (((VALUE) & 0xffff) == 0 \
+ && !ADDU16I_OPERAND (VALUE) \
+ && IN_RANGE ((VALUE) / 65536, -0x10000, 0xfffe)))
+
#define IMM12_INT(X) IMM12_OPERAND (INTVAL (X))
#define IMM12_INT_UNSIGNED(X) IMM12_OPERAND_UNSIGNED (INTVAL (X))
#define LU12I_INT(X) LU12I_OPERAND (INTVAL (X))
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 833b94753..b2f7c7f78 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -598,24 +598,64 @@
[(set_attr "type" "fadd")
(set_attr "mode" "<UNITMODE>")])
-(define_insn "add<mode>3"
- [(set (match_operand:GPR 0 "register_operand" "=r,r")
- (plus:GPR (match_operand:GPR 1 "register_operand" "r,r")
- (match_operand:GPR 2 "arith_operand" "r,I")))]
+(define_insn_and_split "add<mode>3"
+ [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
+ (plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
+ (match_operand:GPR 2 "plus_<mode>_operand"
+ "r,I,La,Lb,Lc,Ld,Le")))]
""
- "add%i2.<d>\t%0,%1,%2";
+ "@
+ add.<d>\t%0,%1,%2
+ addi.<d>\t%0,%1,%2
+ #
+ * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
+ return \"addu16i.d\t%0,%1,%2\";
+ #
+ #
+ #"
+ "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
+ && !ADDU16I_OPERAND (INTVAL (operands[2]))"
+ [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
+ (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
+ {
+ loongarch_split_plus_constant (&operands[2], <MODE>mode);
+ }
[(set_attr "alu_type" "add")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*addsi3_extended"
- [(set (match_operand:DI 0 "register_operand" "=r,r")
+ (set_attr "mode" "<MODE>")
+ (set_attr "insn_count" "1,1,2,1,2,2,2")
+ (set (attr "enabled")
+ (cond
+ [(match_test "<MODE>mode != DImode && which_alternative == 4")
+ (const_string "no")
+ (match_test "<MODE>mode != DImode && which_alternative == 5")
+ (const_string "no")
+ (match_test "<MODE>mode != SImode && which_alternative == 6")
+ (const_string "no")]
+ (const_string "yes")))])
+
+(define_insn_and_split "*addsi3_extended"
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
(sign_extend:DI
- (plus:SI (match_operand:SI 1 "register_operand" "r,r")
- (match_operand:SI 2 "arith_operand" "r,I"))))]
+ (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
+ (match_operand:SI 2 "plus_si_extend_operand"
+ "r,I,La,Le"))))]
"TARGET_64BIT"
- "add%i2.w\t%0,%1,%2"
+ "@
+ add.w\t%0,%1,%2
+ addi.w\t%0,%1,%2
+ #
+ #"
+ "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])"
+ [(set (subreg:SI (match_dup 0) 0) (plus:SI (match_dup 1) (match_dup 3)))
+ (set (match_dup 0)
+ (sign_extend:DI (plus:SI (subreg:SI (match_dup 0) 0)
+ (match_dup 4))))]
+ {
+ loongarch_split_plus_constant (&operands[2], SImode);
+ }
[(set_attr "alu_type" "add")
- (set_attr "mode" "SI")])
+ (set_attr "mode" "SI")
+ (set_attr "insn_count" "1,1,2,2")])
;;
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
index 3c32b2987..4966d5569 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -39,14 +39,50 @@
(and (match_code "const_int")
(match_test "IMM12_OPERAND (INTVAL (op))")))
+(define_predicate "const_dual_imm12_operand"
+ (and (match_code "const_int")
+ (match_test "DUAL_IMM12_OPERAND (INTVAL (op))")))
+
(define_predicate "const_imm16_operand"
(and (match_code "const_int")
(match_test "IMM16_OPERAND (INTVAL (op))")))
+(define_predicate "const_addu16i_operand"
+ (and (match_code "const_int")
+ (match_test "ADDU16I_OPERAND (INTVAL (op))")))
+
+(define_predicate "const_addu16i_imm12_di_operand"
+ (and (match_code "const_int")
+ (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), DImode)")))
+
+(define_predicate "const_addu16i_imm12_si_operand"
+ (and (match_code "const_int")
+ (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), SImode)")))
+
+(define_predicate "const_dual_addu16i_operand"
+ (and (match_code "const_int")
+ (match_test "DUAL_ADDU16I_OPERAND (INTVAL (op))")))
+
(define_predicate "arith_operand"
(ior (match_operand 0 "const_arith_operand")
(match_operand 0 "register_operand")))
+(define_predicate "plus_di_operand"
+ (ior (match_operand 0 "arith_operand")
+ (match_operand 0 "const_dual_imm12_operand")
+ (match_operand 0 "const_addu16i_operand")
+ (match_operand 0 "const_addu16i_imm12_di_operand")
+ (match_operand 0 "const_dual_addu16i_operand")))
+
+(define_predicate "plus_si_extend_operand"
+ (ior (match_operand 0 "arith_operand")
+ (match_operand 0 "const_dual_imm12_operand")
+ (match_operand 0 "const_addu16i_imm12_si_operand")))
+
+(define_predicate "plus_si_operand"
+ (ior (match_operand 0 "plus_si_extend_operand")
+ (match_operand 0 "const_addu16i_operand")))
+
(define_predicate "const_immalsl_operand"
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 1, 4)")))
diff --git a/gcc/testsuite/gcc.target/loongarch/add-const.c b/gcc/testsuite/gcc.target/loongarch/add-const.c
new file mode 100644
index 000000000..7b6a7cb92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/add-const.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mabi=lp64d" } */
+
+/* None of these functions should load the const operand into a temp
+ register. */
+
+/* { dg-final { scan-assembler-not "add\\.[dw]" } } */
+
+unsigned long f01 (unsigned long x) { return x + 1; }
+unsigned long f02 (unsigned long x) { return x - 1; }
+unsigned long f03 (unsigned long x) { return x + 2047; }
+unsigned long f04 (unsigned long x) { return x + 4094; }
+unsigned long f05 (unsigned long x) { return x - 2048; }
+unsigned long f06 (unsigned long x) { return x - 4096; }
+unsigned long f07 (unsigned long x) { return x + 0x7fff0000; }
+unsigned long f08 (unsigned long x) { return x - 0x80000000l; }
+unsigned long f09 (unsigned long x) { return x + 0x7fff0000l * 2; }
+unsigned long f10 (unsigned long x) { return x - 0x80000000l * 2; }
+unsigned long f11 (unsigned long x) { return x + 0x7fff0000 + 0x1; }
+unsigned long f12 (unsigned long x) { return x + 0x7fff0000 - 0x1; }
+unsigned long f13 (unsigned long x) { return x + 0x7fff0000 + 0x7ff; }
+unsigned long f14 (unsigned long x) { return x + 0x7fff0000 - 0x800; }
+unsigned long f15 (unsigned long x) { return x - 0x80000000l - 1; }
+unsigned long f16 (unsigned long x) { return x - 0x80000000l + 1; }
+unsigned long f17 (unsigned long x) { return x - 0x80000000l - 0x800; }
+unsigned long f18 (unsigned long x) { return x - 0x80000000l + 0x7ff; }
+
+unsigned int g01 (unsigned int x) { return x + 1; }
+unsigned int g02 (unsigned int x) { return x - 1; }
+unsigned int g03 (unsigned int x) { return x + 2047; }
+unsigned int g04 (unsigned int x) { return x + 4094; }
+unsigned int g05 (unsigned int x) { return x - 2048; }
+unsigned int g06 (unsigned int x) { return x - 4096; }
+unsigned int g07 (unsigned int x) { return x + 0x7fff0000; }
+unsigned int g08 (unsigned int x) { return x - 0x80000000l; }
+unsigned int g09 (unsigned int x) { return x + 0x7fff0000l * 2; }
+unsigned int g10 (unsigned int x) { return x - 0x80000000l * 2; }
+unsigned int g11 (unsigned int x) { return x + 0x7fff0000 + 0x1; }
+unsigned int g12 (unsigned int x) { return x + 0x7fff0000 - 0x1; }
+unsigned int g13 (unsigned int x) { return x + 0x7fff0000 + 0x7ff; }
+unsigned int g14 (unsigned int x) { return x + 0x7fff0000 - 0x800; }
+unsigned int g15 (unsigned int x) { return x - 0x80000000l - 1; }
+unsigned int g16 (unsigned int x) { return x - 0x80000000l + 1; }
+unsigned int g17 (unsigned int x) { return x - 0x80000000l - 0x800; }
+unsigned int g18 (unsigned int x) { return x - 0x80000000l + 0x7ff; }
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
index 3533fe7b6..cd72154f4 100644
--- a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
@@ -6,7 +6,7 @@
#define SIZE 128*1024
#include "stack-check-prologue.h"
-/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131088} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131072} 1 } } */
/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
/* Checks that the CFA notes are correct for every sp adjustment. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
index e5e711105..3e5ca05b2 100644
--- a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
@@ -6,7 +6,7 @@
#define SIZE 1280*1024 + 512
#include "stack-check-prologue.h"
-/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311248} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311232} 1 } } */
/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
/* Checks that the CFA notes are correct for every sp adjustment. */
--
2.33.0

View File

@ -0,0 +1,338 @@
From b533b615ae47b97d51eeb83e1a63f7c72407430f Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Thu, 17 Nov 2022 17:08:36 +0800
Subject: [PATCH 032/124] LoongArch: Optimize immediate load.
The immediate number is split in the Split pass, not in the expand pass.
Because loop2_invariant pass will extract the instructions that do not change
in the loop out of the loop, some instructions will not meet the extraction
conditions if the machine performs immediate decomposition while expand pass,
so the immediate decomposition will be transferred to the split process.
gcc/ChangeLog:
* config/loongarch/loongarch.cc (enum loongarch_load_imm_method):
Remove the member METHOD_INSV that is not currently used.
(struct loongarch_integer_op): Define a new member curr_value,
that records the value of the number stored in the destination
register immediately after the current instruction has run.
(loongarch_build_integer): Assign a value to the curr_value member variable.
(loongarch_move_integer): Adds information for the immediate load instruction.
* config/loongarch/loongarch.md (*movdi_32bit): Redefine as define_insn_and_split.
(*movdi_64bit): Likewise.
(*movsi_internal): Likewise.
(*movhi_internal): Likewise.
* config/loongarch/predicates.md: Return true as long as it is CONST_INT, ensure
that the immediate number is not optimized by decomposition during expand
optimization loop.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/imm-load.c: New test.
* gcc.target/loongarch/imm-load1.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.cc | 62 ++++++++++---------
gcc/config/loongarch/loongarch.md | 44 +++++++++++--
gcc/config/loongarch/predicates.md | 2 +-
gcc/testsuite/gcc.target/loongarch/imm-load.c | 10 +++
.../gcc.target/loongarch/imm-load1.c | 26 ++++++++
5 files changed, 110 insertions(+), 34 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load1.c
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 622c9435b..f45a49f90 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -139,22 +139,21 @@ struct loongarch_address_info
METHOD_LU52I:
Load 52-63 bit of the immediate number.
-
- METHOD_INSV:
- immediate like 0xfff00000fffffxxx
- */
+*/
enum loongarch_load_imm_method
{
METHOD_NORMAL,
METHOD_LU32I,
- METHOD_LU52I,
- METHOD_INSV
+ METHOD_LU52I
};
struct loongarch_integer_op
{
enum rtx_code code;
HOST_WIDE_INT value;
+ /* Represent the result of the immediate count of the load instruction at
+ each step. */
+ HOST_WIDE_INT curr_value;
enum loongarch_load_imm_method method;
};
@@ -1474,24 +1473,27 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
{
/* The value of the lower 32 bit be loaded with one instruction.
lu12i.w. */
- codes[0].code = UNKNOWN;
- codes[0].method = METHOD_NORMAL;
- codes[0].value = low_part;
+ codes[cost].code = UNKNOWN;
+ codes[cost].method = METHOD_NORMAL;
+ codes[cost].value = low_part;
+ codes[cost].curr_value = low_part;
cost++;
}
else
{
/* lu12i.w + ior. */
- codes[0].code = UNKNOWN;
- codes[0].method = METHOD_NORMAL;
- codes[0].value = low_part & ~(IMM_REACH - 1);
+ codes[cost].code = UNKNOWN;
+ codes[cost].method = METHOD_NORMAL;
+ codes[cost].value = low_part & ~(IMM_REACH - 1);
+ codes[cost].curr_value = codes[cost].value;
cost++;
HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1);
if (iorv != 0)
{
- codes[1].code = IOR;
- codes[1].method = METHOD_NORMAL;
- codes[1].value = iorv;
+ codes[cost].code = IOR;
+ codes[cost].method = METHOD_NORMAL;
+ codes[cost].value = iorv;
+ codes[cost].curr_value = low_part;
cost++;
}
}
@@ -1514,11 +1516,14 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
{
codes[cost].method = METHOD_LU52I;
codes[cost].value = value & LU52I_B;
+ codes[cost].curr_value = value;
return cost + 1;
}
codes[cost].method = METHOD_LU32I;
codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0);
+ codes[cost].curr_value = (value & 0xfffffffffffff)
+ | (sign51 ? LU52I_B : 0);
cost++;
/* Determine whether the 52-61 bits are sign-extended from the low order,
@@ -1527,6 +1532,7 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
{
codes[cost].method = METHOD_LU52I;
codes[cost].value = value & LU52I_B;
+ codes[cost].curr_value = value;
cost++;
}
}
@@ -2910,6 +2916,9 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
else
x = force_reg (mode, x);
+ set_unique_reg_note (get_last_insn (), REG_EQUAL,
+ GEN_INT (codes[i-1].curr_value));
+
switch (codes[i].method)
{
case METHOD_NORMAL:
@@ -2917,22 +2926,17 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
GEN_INT (codes[i].value));
break;
case METHOD_LU32I:
- emit_insn (
- gen_rtx_SET (x,
- gen_rtx_IOR (DImode,
- gen_rtx_ZERO_EXTEND (
- DImode, gen_rtx_SUBREG (SImode, x, 0)),
- GEN_INT (codes[i].value))));
+ gcc_assert (mode == DImode);
+ x = gen_rtx_IOR (DImode,
+ gen_rtx_ZERO_EXTEND (DImode,
+ gen_rtx_SUBREG (SImode, x, 0)),
+ GEN_INT (codes[i].value));
break;
case METHOD_LU52I:
- emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff),
- GEN_INT (codes[i].value)));
- break;
- case METHOD_INSV:
- emit_insn (
- gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT (20),
- GEN_INT (32)),
- gen_rtx_REG (DImode, 0)));
+ gcc_assert (mode == DImode);
+ x = gen_rtx_IOR (DImode,
+ gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
+ GEN_INT (codes[i].value));
break;
default:
gcc_unreachable ();
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 2fda53819..f61db66d5 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1718,23 +1718,41 @@
DONE;
})
-(define_insn "*movdi_32bit"
+(define_insn_and_split "*movdi_32bit"
[(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m")
(match_operand:DI 1 "move_operand" "r,i,w,r,*J*r,*m,*f,*f"))]
"!TARGET_64BIT
&& (register_operand (operands[0], DImode)
|| reg_or_0_operand (operands[1], DImode))"
{ return loongarch_output_move (operands[0], operands[1]); }
+ "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
+ (operands[0]))"
+ [(const_int 0)]
+ "
+{
+ loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
+ DONE;
+}
+ "
[(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore")
(set_attr "mode" "DI")])
-(define_insn "*movdi_64bit"
+(define_insn_and_split "*movdi_64bit"
[(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m")
(match_operand:DI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f"))]
"TARGET_64BIT
&& (register_operand (operands[0], DImode)
|| reg_or_0_operand (operands[1], DImode))"
{ return loongarch_output_move (operands[0], operands[1]); }
+ "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
+ (operands[0]))"
+ [(const_int 0)]
+ "
+{
+ loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
+ DONE;
+}
+ "
[(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore")
(set_attr "mode" "DI")])
@@ -1749,12 +1767,21 @@
DONE;
})
-(define_insn "*movsi_internal"
+(define_insn_and_split "*movsi_internal"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m,*r,*z")
(match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r"))]
"(register_operand (operands[0], SImode)
|| reg_or_0_operand (operands[1], SImode))"
{ return loongarch_output_move (operands[0], operands[1]); }
+ "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
+ (operands[0]))"
+ [(const_int 0)]
+ "
+{
+ loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
+ DONE;
+}
+ "
[(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf")
(set_attr "mode" "SI")])
@@ -1774,12 +1801,21 @@
DONE;
})
-(define_insn "*movhi_internal"
+(define_insn_and_split "*movhi_internal"
[(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,m,r,k")
(match_operand:HI 1 "move_operand" "r,Yd,I,m,rJ,k,rJ"))]
"(register_operand (operands[0], HImode)
|| reg_or_0_operand (operands[1], HImode))"
{ return loongarch_output_move (operands[0], operands[1]); }
+ "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
+ (operands[0]))"
+ [(const_int 0)]
+ "
+{
+ loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
+ DONE;
+}
+ "
[(set_attr "move_type" "move,const,const,load,store,load,store")
(set_attr "mode" "HI")])
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
index 8bd0c1376..58c3dc226 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -226,7 +226,7 @@
switch (GET_CODE (op))
{
case CONST_INT:
- return !splittable_const_int_operand (op, mode);
+ return true;
case CONST:
case SYMBOL_REF:
diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load.c b/gcc/testsuite/gcc.target/loongarch/imm-load.c
new file mode 100644
index 000000000..c04ca3399
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/imm-load.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-split1" } */
+
+long int
+test (void)
+{
+ return 0x1234567890abcdef;
+}
+/* { dg-final { scan-rtl-dump-times "scanning new insn with uid" 6 "split1" } } */
+
diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
new file mode 100644
index 000000000..2ff029712
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
+
+
+extern long long b[10];
+static inline long long
+repeat_bytes (void)
+{
+ long long r = 0x0101010101010101;
+
+ return r;
+}
+
+static inline long long
+highbit_mask (long long m)
+{
+ return m & repeat_bytes ();
+}
+
+void test(long long *a)
+{
+ for (int i = 0; i < 10; i++)
+ b[i] = highbit_mask (a[i]);
+
+}
--
2.33.0

View File

@ -0,0 +1,114 @@
From c6d4efda0fa064dfe1d3cc1b9abf8051a82cd74f Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Sat, 2 Sep 2023 10:59:55 +0800
Subject: [PATCH 056/124] LoongArch: Optimize switch with sign-extended index.
The patch refers to the submission of RISCV
7bbce9b50302959286381d9177818642bceaf301.
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_extend_comparands):
In unsigned QImode test, check for sign extended subreg and/or
constant operands, and do a sign extension in that case.
* config/loongarch/loongarch.md (TARGET_64BIT): Define
template cbranchqi4.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/switch-qi.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.cc | 16 +++++++++++++---
gcc/config/loongarch/loongarch.md | 10 +++++++---
gcc/testsuite/gcc.target/loongarch/switch-qi.c | 16 ++++++++++++++++
3 files changed, 36 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/switch-qi.c
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 7b48e3216..41d344b82 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4221,11 +4221,21 @@ loongarch_allocate_fcc (machine_mode mode)
static void
loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
{
- /* Comparisons consider all XLEN bits, so extend sub-XLEN values. */
+ /* Comparisons consider all GRLEN bits, so extend sub-GRLEN values. */
if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)))
{
- /* TODO: checkout It is more profitable to zero-extend QImode values. */
- if (unsigned_condition (code) == code && GET_MODE (*op0) == QImode)
+ /* It is more profitable to zero-extend QImode values. But not if the
+ first operand has already been sign-extended, and the second one is
+ is a constant or has already been sign-extended also. */
+ if (unsigned_condition (code) == code
+ && (GET_MODE (*op0) == QImode
+ && ! (GET_CODE (*op0) == SUBREG
+ && SUBREG_PROMOTED_VAR_P (*op0)
+ && SUBREG_PROMOTED_SIGNED_P (*op0)
+ && (CONST_INT_P (*op1)
+ || (GET_CODE (*op1) == SUBREG
+ && SUBREG_PROMOTED_VAR_P (*op1)
+ && SUBREG_PROMOTED_SIGNED_P (*op1))))))
{
*op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
if (CONST_INT_P (*op1))
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index cf7441e0b..a5e9352ca 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -357,7 +357,7 @@
;; pointer-sized quantities. Exactly one of the two alternatives will match.
(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
-;; Likewise, but for XLEN-sized quantities.
+;; Likewise, but for GRLEN-sized quantities.
(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")])
;; 64-bit modes for which we provide move patterns.
@@ -2733,11 +2733,15 @@
[(set_attr "type" "branch")])
+;; Branches operate on GRLEN-sized quantities, but for LoongArch64 we accept
+;; QImode values so we can force zero-extension.
+(define_mode_iterator BR [(QI "TARGET_64BIT") SI (DI "TARGET_64BIT")])
+
(define_expand "cbranch<mode>4"
[(set (pc)
(if_then_else (match_operator 0 "comparison_operator"
- [(match_operand:GPR 1 "register_operand")
- (match_operand:GPR 2 "nonmemory_operand")])
+ [(match_operand:BR 1 "register_operand")
+ (match_operand:BR 2 "nonmemory_operand")])
(label_ref (match_operand 3 ""))
(pc)))]
""
diff --git a/gcc/testsuite/gcc.target/loongarch/switch-qi.c b/gcc/testsuite/gcc.target/loongarch/switch-qi.c
new file mode 100644
index 000000000..dd192fd49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/switch-qi.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-march=loongarch64 -mabi=lp64d" } */
+/* { dg-final { scan-assembler-not "bstrpick" } } */
+
+/* Test for loongarch_extend_comparands patch. */
+extern void asdf (int);
+void
+foo (signed char x) {
+ switch (x) {
+ case 0: asdf (10); break;
+ case 1: asdf (11); break;
+ case 2: asdf (12); break;
+ case 3: asdf (13); break;
+ case 4: asdf (14); break;
+ }
+}
--
2.33.0

View File

@ -0,0 +1,810 @@
From d3615b555d6885dba298f7b339740be11cb65a8f Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Tue, 29 Nov 2022 16:06:12 +0800
Subject: [PATCH 033/124] LoongArch: Optimize the implementation of stack
check.
The old stack check was performed before the stack was dropped,
which would cause the detection tool to report a memory leak.
The current stack check scheme is as follows:
'-fstack-clash-protection':
1. When the frame->total_size is smaller than the guard page size,
the stack is dropped according to the original scheme, and there
is no need to perform stack detection in the prologue.
2. When frame->total_size is greater than or equal to guard page size,
the first step to drop the stack is to drop the space required by
the caller-save registers. This space needs to save the caller-save
registers, so an implicit stack check is performed.
So just need to check the rest of the stack space.
'-fstack-check':
There is no one-time stack drop and then page-by-page detection as
described in the document. It is also the same as
'-fstack-clash-protection', which is detected immediately after page drop.
It is judged that when frame->total_size is not 0, only the size required
to save the s register is dropped for the first stack down.
The test cases are referenced from aarch64.
gcc/ChangeLog:
* config/loongarch/linux.h (STACK_CHECK_MOVING_SP):
Define this macro to 1.
* config/loongarch/loongarch.cc (STACK_CLASH_PROTECTION_GUARD_SIZE):
Size of guard page.
(loongarch_first_stack_step): Return the size of the first drop stack
according to whether stack checking is performed.
(loongarch_emit_probe_stack_range): Adjust the method of stack checking in prologue.
(loongarch_output_probe_stack_range): Delete useless code.
(loongarch_expand_prologue): Adjust the method of stack checking in prologue.
(loongarch_option_override_internal): Enforce that interval is the same
size as size so the mid-end does the right thing.
* config/loongarch/loongarch.h (STACK_CLASH_MAX_UNROLL_PAGES):
New macro decide whether to loop stack detection.
gcc/testsuite/ChangeLog:
* lib/target-supports.exp:
* gcc.target/loongarch/stack-check-alloca-1.c: New test.
* gcc.target/loongarch/stack-check-alloca-2.c: New test.
* gcc.target/loongarch/stack-check-alloca-3.c: New test.
* gcc.target/loongarch/stack-check-alloca-4.c: New test.
* gcc.target/loongarch/stack-check-alloca-5.c: New test.
* gcc.target/loongarch/stack-check-alloca-6.c: New test.
* gcc.target/loongarch/stack-check-alloca.h: New test.
* gcc.target/loongarch/stack-check-cfa-1.c: New test.
* gcc.target/loongarch/stack-check-cfa-2.c: New test.
* gcc.target/loongarch/stack-check-prologue-1.c: New test.
* gcc.target/loongarch/stack-check-prologue-2.c: New test.
* gcc.target/loongarch/stack-check-prologue-3.c: New test.
* gcc.target/loongarch/stack-check-prologue-4.c: New test.
* gcc.target/loongarch/stack-check-prologue-5.c: New test.
* gcc.target/loongarch/stack-check-prologue-6.c: New test.
* gcc.target/loongarch/stack-check-prologue-7.c: New test.
* gcc.target/loongarch/stack-check-prologue.h: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/linux.h | 3 +
gcc/config/loongarch/loongarch.cc | 248 +++++++++++-------
gcc/config/loongarch/loongarch.h | 4 +
.../loongarch/stack-check-alloca-1.c | 15 ++
.../loongarch/stack-check-alloca-2.c | 12 +
.../loongarch/stack-check-alloca-3.c | 12 +
.../loongarch/stack-check-alloca-4.c | 12 +
.../loongarch/stack-check-alloca-5.c | 13 +
.../loongarch/stack-check-alloca-6.c | 13 +
.../gcc.target/loongarch/stack-check-alloca.h | 15 ++
.../gcc.target/loongarch/stack-check-cfa-1.c | 12 +
.../gcc.target/loongarch/stack-check-cfa-2.c | 12 +
.../loongarch/stack-check-prologue-1.c | 11 +
.../loongarch/stack-check-prologue-2.c | 11 +
.../loongarch/stack-check-prologue-3.c | 11 +
.../loongarch/stack-check-prologue-4.c | 11 +
.../loongarch/stack-check-prologue-5.c | 12 +
.../loongarch/stack-check-prologue-6.c | 11 +
.../loongarch/stack-check-prologue-7.c | 12 +
.../loongarch/stack-check-prologue.h | 5 +
gcc/testsuite/lib/target-supports.exp | 7 +-
21 files changed, 361 insertions(+), 101 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h
diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h
index 110d0fab9..00039ac18 100644
--- a/gcc/config/loongarch/linux.h
+++ b/gcc/config/loongarch/linux.h
@@ -48,3 +48,6 @@ along with GCC; see the file COPYING3. If not see
#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024)
#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+
+/* The stack pointer needs to be moved while checking the stack. */
+#define STACK_CHECK_MOVING_SP 1
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index f45a49f90..e59edc4cd 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -257,6 +257,10 @@ const char *const
loongarch_fp_conditions[16]= {LARCH_FP_CONDITIONS (STRINGIFY)};
#undef STRINGIFY
+/* Size of guard page. */
+#define STACK_CLASH_PROTECTION_GUARD_SIZE \
+ (1 << param_stack_clash_protection_guard_size)
+
/* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
least PARM_BOUNDARY bits of alignment, but will be given anything up
to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
@@ -1069,11 +1073,20 @@ loongarch_restore_reg (rtx reg, rtx mem)
static HOST_WIDE_INT
loongarch_first_stack_step (struct loongarch_frame_info *frame)
{
+ HOST_WIDE_INT min_first_step
+ = LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
+
+ /* When stack checking is required, if the sum of frame->total_size
+ and stack_check_protect is greater than stack clash protection guard
+ size, then return min_first_step. */
+ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+ || (flag_stack_clash_protection
+ && frame->total_size > STACK_CLASH_PROTECTION_GUARD_SIZE))
+ return min_first_step;
+
if (IMM12_OPERAND (frame->total_size))
return frame->total_size;
- HOST_WIDE_INT min_first_step
- = LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
HOST_WIDE_INT min_second_step = frame->total_size - max_first_step;
gcc_assert (min_first_step <= max_first_step);
@@ -1106,103 +1119,109 @@ loongarch_emit_stack_tie (void)
static void
loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
{
- /* See if we have a constant small number of probes to generate. If so,
- that's the easy case. */
- if ((TARGET_64BIT && (first + size <= 32768))
- || (!TARGET_64BIT && (first + size <= 2048)))
- {
- HOST_WIDE_INT i;
+ HOST_WIDE_INT rounded_size;
+ HOST_WIDE_INT interval;
- /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
- it exceeds SIZE. If only one probe is needed, this will not
- generate any code. Then probe at FIRST + SIZE. */
- for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
- -(first + i)));
+ if (flag_stack_clash_protection)
+ interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
+ else
+ interval = PROBE_INTERVAL;
- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
- -(first + size)));
- }
+ rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
+ rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
- /* Otherwise, do the same as above, but in a loop. Note that we must be
- extra careful with variables wrapping around because we might be at
- the very top (or the very bottom) of the address space and we have
- to be able to handle this case properly; in particular, we use an
- equality test for the loop condition. */
- else
- {
- HOST_WIDE_INT rounded_size;
- rtx r13 = LARCH_PROLOGUE_TEMP (Pmode);
- rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
- rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
+ size = size + first;
- /* Sanity check for the addressing mode we're going to use. */
- gcc_assert (first <= 16384);
+ /* Sanity check for the addressing mode we're going to use. */
+ gcc_assert (first <= 16384);
+ /* Step 1: round SIZE to the previous multiple of the interval. */
- /* Step 1: round SIZE to the previous multiple of the interval. */
+ rounded_size = ROUND_DOWN (size, interval);
- rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
+ /* Step 2: compute initial and final value of the loop counter. */
- /* TEST_ADDR = SP + FIRST */
- if (first != 0)
- {
- emit_move_insn (r14, GEN_INT (first));
- emit_insn (gen_rtx_SET (r13, gen_rtx_MINUS (Pmode,
- stack_pointer_rtx,
- r14)));
- }
- else
- emit_move_insn (r13, stack_pointer_rtx);
+ emit_move_insn (r14, GEN_INT (interval));
+
+ /* If rounded_size is zero, it means that the space requested by
+ the local variable is less than the interval, and there is no
+ need to display and detect the allocated space. */
+ if (rounded_size != 0)
+ {
+ /* Step 3: the loop
+
+ do
+ {
+ TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
+ probe at TEST_ADDR
+ }
+ while (TEST_ADDR != LAST_ADDR)
- /* Step 2: compute initial and final value of the loop counter. */
+ probes at FIRST + N * PROBE_INTERVAL for values of N from 1
+ until it is equal to ROUNDED_SIZE. */
- emit_move_insn (r14, GEN_INT (PROBE_INTERVAL));
- /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
- if (rounded_size == 0)
- emit_move_insn (r12, r13);
+ if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * interval)
+ {
+ for (HOST_WIDE_INT i = 0; i < rounded_size; i += interval)
+ {
+ emit_insn (gen_rtx_SET (stack_pointer_rtx,
+ gen_rtx_MINUS (Pmode,
+ stack_pointer_rtx,
+ r14)));
+ emit_move_insn (gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx,
+ const0_rtx)),
+ const0_rtx);
+ emit_insn (gen_blockage ());
+ }
+ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
+ }
else
{
emit_move_insn (r12, GEN_INT (rounded_size));
- emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode, r13, r12)));
- /* Step 3: the loop
-
- do
- {
- TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
- probe at TEST_ADDR
- }
- while (TEST_ADDR != LAST_ADDR)
-
- probes at FIRST + N * PROBE_INTERVAL for values of N from 1
- until it is equal to ROUNDED_SIZE. */
-
- emit_insn (gen_probe_stack_range (Pmode, r13, r13, r12, r14));
+ emit_insn (gen_rtx_SET (r12,
+ gen_rtx_MINUS (Pmode,
+ stack_pointer_rtx,
+ r12)));
+
+ emit_insn (gen_probe_stack_range (Pmode, stack_pointer_rtx,
+ stack_pointer_rtx, r12, r14));
+ emit_insn (gen_blockage ());
+ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
}
+ }
+ else
+ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
+
- /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
- that SIZE is equal to ROUNDED_SIZE. */
+ /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
+ that SIZE is equal to ROUNDED_SIZE. */
- if (size != rounded_size)
+ if (size != rounded_size)
+ {
+ if (size - rounded_size >= 2048)
{
- if (TARGET_64BIT)
- emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
- else
- {
- HOST_WIDE_INT i;
- for (i = 2048; i < (size - rounded_size); i += 2048)
- {
- emit_stack_probe (plus_constant (Pmode, r12, -i));
- emit_insn (gen_rtx_SET (r12,
- plus_constant (Pmode, r12, -2048)));
- }
- rtx r1 = plus_constant (Pmode, r12,
- -(size - rounded_size - i + 2048));
- emit_stack_probe (r1);
- }
+ emit_move_insn (r14, GEN_INT (size - rounded_size));
+ emit_insn (gen_rtx_SET (stack_pointer_rtx,
+ gen_rtx_MINUS (Pmode,
+ stack_pointer_rtx,
+ r14)));
}
+ else
+ emit_insn (gen_rtx_SET (stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx,
+ GEN_INT (rounded_size - size))));
}
+ if (first)
+ {
+ emit_move_insn (r12, GEN_INT (first));
+ emit_insn (gen_rtx_SET (stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode,
+ stack_pointer_rtx, r12)));
+ }
/* Make sure nothing is scheduled before we are done. */
emit_insn (gen_blockage ());
}
@@ -1223,7 +1242,6 @@ loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
/* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
xops[0] = reg1;
- xops[1] = GEN_INT (-PROBE_INTERVAL);
xops[2] = reg3;
if (TARGET_64BIT)
output_asm_insn ("sub.d\t%0,%0,%2", xops);
@@ -1249,28 +1267,11 @@ loongarch_expand_prologue (void)
{
struct loongarch_frame_info *frame = &cfun->machine->frame;
HOST_WIDE_INT size = frame->total_size;
- HOST_WIDE_INT tmp;
rtx insn;
if (flag_stack_usage_info)
current_function_static_stack_size = size;
- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
- || flag_stack_clash_protection)
- {
- if (crtl->is_leaf && !cfun->calls_alloca)
- {
- if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
- {
- tmp = size - get_stack_check_protect ();
- loongarch_emit_probe_stack_range (get_stack_check_protect (),
- tmp);
- }
- }
- else if (size > 0)
- loongarch_emit_probe_stack_range (get_stack_check_protect (), size);
- }
-
/* Save the registers. */
if ((frame->mask | frame->fmask) != 0)
{
@@ -1283,7 +1284,6 @@ loongarch_expand_prologue (void)
loongarch_for_each_saved_reg (size, loongarch_save_reg);
}
-
/* Set up the frame pointer, if we're using one. */
if (frame_pointer_needed)
{
@@ -1294,7 +1294,45 @@ loongarch_expand_prologue (void)
loongarch_emit_stack_tie ();
}
- /* Allocate the rest of the frame. */
+ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+ || flag_stack_clash_protection)
+ {
+ HOST_WIDE_INT first = get_stack_check_protect ();
+
+ if (frame->total_size == 0)
+ {
+ /* do nothing. */
+ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
+ return;
+ }
+
+ if (crtl->is_leaf && !cfun->calls_alloca)
+ {
+ HOST_WIDE_INT interval;
+
+ if (flag_stack_clash_protection)
+ interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
+ else
+ interval = PROBE_INTERVAL;
+
+ if (size > interval && size > first)
+ loongarch_emit_probe_stack_range (first, size - first);
+ else
+ loongarch_emit_probe_stack_range (first, size);
+ }
+ else
+ loongarch_emit_probe_stack_range (first, size);
+
+ if (size > 0)
+ {
+ /* Describe the effect of the previous instructions. */
+ insn = plus_constant (Pmode, stack_pointer_rtx, -size);
+ insn = gen_rtx_SET (stack_pointer_rtx, insn);
+ loongarch_set_frame_expr (insn);
+ }
+ return;
+ }
+
if (size > 0)
{
if (IMM12_OPERAND (-size))
@@ -1305,7 +1343,8 @@ loongarch_expand_prologue (void)
}
else
{
- loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size));
+ loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode),
+ GEN_INT (-size));
emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
LARCH_PROLOGUE_TEMP (Pmode)));
@@ -6162,6 +6201,15 @@ loongarch_option_override_internal (struct gcc_options *opts)
gcc_unreachable ();
}
+ /* Validate the guard size. */
+ int guard_size = param_stack_clash_protection_guard_size;
+
+ /* Enforce that interval is the same size as size so the mid-end does the
+ right thing. */
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
+ param_stack_clash_protection_probe_interval,
+ guard_size);
+
loongarch_init_print_operand_punct ();
/* Set up array to map GCC register number to debug register number.
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index a52a81adf..392597943 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -668,6 +668,10 @@ enum reg_class
#define STACK_BOUNDARY (TARGET_ABI_LP64 ? 128 : 64)
+/* This value controls how many pages we manually unroll the loop for when
+ generating stack clash probes. */
+#define STACK_CLASH_MAX_UNROLL_PAGES 4
+
/* Symbolic macros for the registers used to return integer and floating
point values. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c
new file mode 100644
index 000000000..6ee589c4b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE y
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
+/* { dg-final { scan-assembler-times {stx\.d\t\$r0,\$r3,\$r12} 1 } } */
+
+/* Dynamic alloca, expect loop, and 1 probes with top at sp.
+ 1st probe is inside the loop for the full guard-size allocations, second
+ probe is for the case where residual is zero. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c
new file mode 100644
index 000000000..8deaa5873
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 0
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-not {stp*t*r*\.d\t\$r0,\$r3,4088} } } */
+
+/* Alloca of 0 should emit no probes, boundary condition. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c
new file mode 100644
index 000000000..e326ba9a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 100
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-times {st\.d\t\$r0,\$r3,104} 1 } } */
+
+/* Alloca is less than guard-size, 1 probe at the top of the new allocation. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c
new file mode 100644
index 000000000..b9f7572de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 64 * 1024
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
+
+/* Alloca is exactly one guard-size, 1 probe expected at top. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c
new file mode 100644
index 000000000..0ff6e493f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 65 * 1024
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,1016} 1 } } */
+
+/* Alloca is more than one guard-page. 2 probes expected. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c
new file mode 100644
index 000000000..c5cf74fcb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-require-effective-target alloca } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 127 * 64 * 1024
+#include "stack-check-alloca.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
+
+/* Large alloca of a constant amount which is a multiple of a guard-size.
+ Loop expected with top probe. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h
new file mode 100644
index 000000000..8c75f6c0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h
@@ -0,0 +1,15 @@
+
+/* Avoid inclusion of alloca.h, unavailable on some systems. */
+#define alloca __builtin_alloca
+
+__attribute__((noinline, noipa))
+void g (char* ptr, int y)
+{
+ ptr[y] = '\0';
+}
+
+void f_caller (int y)
+{
+ char* pStr = alloca(SIZE);
+ g (pStr, y);
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
new file mode 100644
index 000000000..f0c6877fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -funwind-tables" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 128*1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131088} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
+
+/* Checks that the CFA notes are correct for every sp adjustment. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
new file mode 100644
index 000000000..c6e07bc56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -funwind-tables" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 1280*1024 + 512
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311248} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
+
+/* Checks that the CFA notes are correct for every sp adjustment. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c
new file mode 100644
index 000000000..351bc1f61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 128
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 0 } } */
+
+/* SIZE is smaller than guard-size so no probe expected. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c
new file mode 100644
index 000000000..6bba659a3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 63 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*.d\t\$r0,\$r3,0} 0 } } */
+
+/* SIZE is smaller than guard-size so no probe expected. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c
new file mode 100644
index 000000000..164956c37
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 64 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 1 } } */
+
+/* SIZE is equal to guard-size, 1 probe expected, boundary condition. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c
new file mode 100644
index 000000000..f53da6b0d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 65 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 1 } } */
+
+/* SIZE is more than guard-size 1 probe expected. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c
new file mode 100644
index 000000000..c092317ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 127 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 1 } } */
+
+/* SIZE is more than 1x guard-size and remainder small than guard-size,
+ 1 probe expected, unrolled, no loop. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c
new file mode 100644
index 000000000..70a2f53f6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 128 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 2 } } */
+
+/* SIZE is more than 2x guard-size and no remainder, unrolled, no loop. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c
new file mode 100644
index 000000000..e2df89acc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
+/* { dg-require-effective-target supports_stack_clash_protection } */
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
+
+#define SIZE 6 * 64 * 1024
+#include "stack-check-prologue.h"
+
+/* { dg-final { scan-assembler-times {stp*t*r*.d\t\$r0,\$r3,0} 1 } } */
+
+/* SIZE is more than 4x guard-size and no remainder, 1 probe expected in a loop
+ and no residual probe. */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h
new file mode 100644
index 000000000..b7e06aedb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h
@@ -0,0 +1,5 @@
+int f_test (int x)
+{
+ char arr[SIZE];
+ return arr[x];
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index c858bd93b..3a326ea1c 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -11292,7 +11292,8 @@ proc check_effective_target_supports_stack_clash_protection { } {
if { [istarget x86_64-*-*] || [istarget i?86-*-*]
|| [istarget powerpc*-*-*] || [istarget rs6000*-*-*]
- || [istarget aarch64*-**] || [istarget s390*-*-*] } {
+ || [istarget aarch64*-**] || [istarget s390*-*-*]
+ || [istarget loongarch64*-**] } {
return 1
}
return 0
@@ -11343,6 +11344,10 @@ proc check_effective_target_caller_implicit_probes { } {
return 1;
}
+ if { [istarget loongarch64*-*-*] } {
+ return 1;
+ }
+
return 0
}
--
2.33.0

View File

@ -0,0 +1,232 @@
From aa1dc79c9a5ff3df241a94cbfb1c857cfa89c686 Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Tue, 5 Sep 2023 11:09:03 +0800
Subject: [PATCH 074/124] LoongArch: Optimized multiply instruction generation.
1. Can generate mulh.w[u] instruction.
2. Can generate mulw.d.wu instruction.
gcc/ChangeLog:
* config/loongarch/loongarch.md (mulsidi3_64bit):
Field unsigned extension support.
(<u>muldi3_highpart): Modify template name.
(<u>mulsi3_highpart): Likewise.
(<u>mulsidi3_64bit): Field unsigned extension support.
(<su>muldi3_highpart): Modify muldi3_highpart to
smuldi3_highpart.
(<su>mulsi3_highpart): Modify mulsi3_highpart to
smulsi3_highpart.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/mulw_d_wu.c: New test.
* gcc.target/loongarch/smuldi3_highpart.c: New test.
* gcc.target/loongarch/smulsi3_highpart.c: New test.
* gcc.target/loongarch/umulsi3_highpart.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.md | 66 ++++++++++++-------
.../gcc.target/loongarch/mulw_d_wu.c | 9 +++
.../gcc.target/loongarch/smuldi3_highpart.c | 13 ++++
.../gcc.target/loongarch/smulsi3_highpart.c | 15 +++++
.../gcc.target/loongarch/umulsi3_highpart.c | 14 ++++
5 files changed, 94 insertions(+), 23 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 11c18bf15..264cd325c 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -750,15 +750,6 @@
[(set_attr "type" "imul")
(set_attr "mode" "<MODE>")])
-(define_insn "mulsidi3_64bit"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
- (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
- "TARGET_64BIT"
- "mulw.d.w\t%0,%1,%2"
- [(set_attr "type" "imul")
- (set_attr "mode" "DI")])
-
(define_insn "*mulsi3_extended"
[(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI
@@ -787,14 +778,14 @@
emit_insn (gen_muldi3 (low, operands[1], operands[2]));
rtx high = gen_reg_rtx (DImode);
- emit_insn (gen_<u>muldi3_highpart (high, operands[1], operands[2]));
+ emit_insn (gen_<su>muldi3_highpart (high, operands[1], operands[2]));
emit_move_insn (gen_lowpart (DImode, operands[0]), low);
emit_move_insn (gen_highpart (DImode, operands[0]), high);
DONE;
})
-(define_insn "<u>muldi3_highpart"
+(define_insn "<su>muldi3_highpart"
[(set (match_operand:DI 0 "register_operand" "=r")
(truncate:DI
(lshiftrt:TI
@@ -809,22 +800,34 @@
(set_attr "mode" "DI")])
(define_expand "<u>mulsidi3"
- [(set (match_operand:DI 0 "register_operand" "=r")
+ [(set (match_operand:DI 0 "register_operand")
(mult:DI (any_extend:DI
- (match_operand:SI 1 "register_operand" " r"))
+ (match_operand:SI 1 "register_operand"))
(any_extend:DI
- (match_operand:SI 2 "register_operand" " r"))))]
- "!TARGET_64BIT"
+ (match_operand:SI 2 "register_operand"))))]
+ ""
{
- rtx temp = gen_reg_rtx (SImode);
- emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
- emit_insn (gen_<u>mulsi3_highpart (loongarch_subword (operands[0], true),
- operands[1], operands[2]));
- emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp));
- DONE;
+ if (!TARGET_64BIT)
+ {
+ rtx temp = gen_reg_rtx (SImode);
+ emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
+ emit_insn (gen_<su>mulsi3_highpart (loongarch_subword (operands[0], true),
+ operands[1], operands[2]));
+ emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp));
+ DONE;
+ }
})
-(define_insn "<u>mulsi3_highpart"
+(define_insn "<u>mulsidi3_64bit"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "r"))
+ (any_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
+ "TARGET_64BIT"
+ "mulw.d.w<u>\t%0,%1,%2"
+ [(set_attr "type" "imul")
+ (set_attr "mode" "DI")])
+
+(define_insn "<su>mulsi3_highpart"
[(set (match_operand:SI 0 "register_operand" "=r")
(truncate:SI
(lshiftrt:DI
@@ -833,11 +836,28 @@
(any_extend:DI
(match_operand:SI 2 "register_operand" " r")))
(const_int 32))))]
- "!TARGET_64BIT"
+ ""
"mulh.w<u>\t%0,%1,%2"
[(set_attr "type" "imul")
(set_attr "mode" "SI")])
+;; Under the LoongArch architecture, the mulh.w[u] instruction performs
+;; sign extension by default, so the sign extension instruction can be
+;; eliminated.
+(define_peephole
+ [(set (match_operand:SI 0 "register_operand")
+ (truncate:SI
+ (lshiftrt:DI
+ (mult:DI (any_extend:DI
+ (match_operand:SI 1 "register_operand"))
+ (any_extend:DI
+ (match_operand:SI 2 "register_operand")))
+ (const_int 32))))
+ (set (match_operand:DI 3 "register_operand")
+ (sign_extend:DI (match_dup 0)))]
+ "TARGET_64BIT && REGNO (operands[0]) == REGNO (operands[3])"
+ "mulh.w<u>\t%0,%1,%2")
+
;;
;; ....................
;;
diff --git a/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c b/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
new file mode 100644
index 000000000..16163d667
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d" } */
+/* { dg-final { scan-assembler "mulw.d.wu" } } */
+
+__attribute__((noipa, noinline)) unsigned long
+f(unsigned long a, unsigned long b)
+{
+ return (unsigned long)(unsigned int)a * (unsigned long)(unsigned int)b;
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
new file mode 100644
index 000000000..6f5c686ca
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-expand-all" } */
+
+typedef int TI __attribute ((mode(TI)));
+typedef int DI __attribute__((mode(DI)));
+
+DI
+test (DI x, DI y)
+{
+ return ((TI)x * y) >> 64;
+}
+
+/* { dg-final { scan-rtl-dump "highparttmp" "expand" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
new file mode 100644
index 000000000..c4dbf8afc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-expand-all" } */
+
+typedef unsigned int DI __attribute__((mode(DI)));
+typedef unsigned int SI __attribute__((mode(SI)));
+
+SI
+f (SI x, SI y)
+{
+ return ((DI) x * y) >> 32;
+}
+
+/* { dg-final { scan-rtl-dump "highparttmp" "expand" } } */
+/* { dg-final { scan-assembler "mulh\\.w" } } */
+/* { dg-final { scan-assembler-not "slli\\.w" } } */
diff --git a/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
new file mode 100644
index 000000000..e208803e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef unsigned int DI __attribute__((mode(DI)));
+typedef unsigned int SI __attribute__((mode(SI)));
+
+SI
+f (SI x, SI y)
+{
+ return ((DI) x * y) >> 32;
+}
+
+/* { dg-final { scan-assembler "mulh\\.wu" } } */
+/* { dg-final { scan-assembler-not "slli\\.w" } } */
--
2.33.0

View File

@ -0,0 +1,44 @@
From aa2d9e0e1dc4bf0b612618cf0e3fcea514f92f95 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Tue, 13 Sep 2022 23:21:39 +0800
Subject: [PATCH 018/124] LoongArch: Prepare static PIE support
Static PIE allows us to extend the ASLR to cover static executables and
it's not too difficult to support it. On GCC side, we just pass a group
of options to the linker, like other ports with static PIE support.
The real implementation of static PIE (rcrt1.o) will be added into Glibc
later.
gcc/ChangeLog:
* config/loongarch/gnu-user.h (GNU_USER_TARGET_LINK_SPEC): For
-static-pie, pass -static -pie --no-dynamic-linker -z text to
the linker, and do not pass --dynamic-linker.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/gnu-user.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
index 664dc9206..c5b1afe53 100644
--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
@@ -40,8 +40,10 @@ along with GCC; see the file COPYING3. If not see
#undef GNU_USER_TARGET_LINK_SPEC
#define GNU_USER_TARGET_LINK_SPEC \
"%{G*} %{shared} -m " GNU_USER_LINK_EMULATION \
- "%{!shared: %{static} %{!static: %{rdynamic:-export-dynamic} " \
- "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
+ "%{!shared: %{static} " \
+ "%{!static: %{!static-pie: %{rdynamic:-export-dynamic} " \
+ "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} " \
+ "%{static-pie: -static -pie --no-dynamic-linker -z text}}"
/* Similar to standard Linux, but adding -ffast-math support. */
--
2.33.0

View File

@ -0,0 +1,100 @@
From b065c84206cdf463a377ca28f719dae7acbed0f7 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Tue, 16 Aug 2022 15:34:36 +0800
Subject: [PATCH 009/124] LoongArch: Provide fmin/fmax RTL pattern
We already had smin/smax RTL pattern using fmin/fmax instruction. But
for smin/smax, it's unspecified what will happen if either operand is
NaN. So we would generate calls to libc fmin/fmax functions with
-fno-finite-math-only (the default for all optimization levels expect
-Ofast).
But, LoongArch fmin/fmax instruction is IEEE-754-2008 conformant so we
can also use the instruction for fmin/fmax pattern and avoid the library
function call.
gcc/ChangeLog:
* config/loongarch/loongarch.md (fmax<mode>3): New RTL pattern.
(fmin<mode>3): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/fmax-fmin.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.md | 18 +++++++++++
.../gcc.target/loongarch/fmax-fmin.c | 30 +++++++++++++++++++
2 files changed, 48 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/loongarch/fmax-fmin.c
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 6b6df22a5..8e8868de9 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1023,6 +1023,24 @@
[(set_attr "type" "fmove")
(set_attr "mode" "<MODE>")])
+(define_insn "fmax<mode>3"
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
+ (smax:ANYF (match_operand:ANYF 1 "register_operand" "f")
+ (match_operand:ANYF 2 "register_operand" "f")))]
+ ""
+ "fmax.<fmt>\t%0,%1,%2"
+ [(set_attr "type" "fmove")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "fmin<mode>3"
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
+ (smin:ANYF (match_operand:ANYF 1 "register_operand" "f")
+ (match_operand:ANYF 2 "register_operand" "f")))]
+ ""
+ "fmin.<fmt>\t%0,%1,%2"
+ [(set_attr "type" "fmove")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "smaxa<mode>3"
[(set (match_operand:ANYF 0 "register_operand" "=f")
(if_then_else:ANYF
diff --git a/gcc/testsuite/gcc.target/loongarch/fmax-fmin.c b/gcc/testsuite/gcc.target/loongarch/fmax-fmin.c
new file mode 100644
index 000000000..92cf8a150
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/fmax-fmin.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-mdouble-float -fno-finite-math-only" } */
+/* { dg-final { scan-assembler "fmin\\.s" } } */
+/* { dg-final { scan-assembler "fmin\\.d" } } */
+/* { dg-final { scan-assembler "fmax\\.s" } } */
+/* { dg-final { scan-assembler "fmax\\.d" } } */
+
+double
+_fmax(double a, double b)
+{
+ return __builtin_fmax(a, b);
+}
+
+float
+_fmaxf(float a, float b)
+{
+ return __builtin_fmaxf(a, b);
+}
+
+double
+_fmin(double a, double b)
+{
+ return __builtin_fmin(a, b);
+}
+
+float
+_fminf(float a, float b)
+{
+ return __builtin_fminf(a, b);
+}
--
2.33.0

View File

@ -0,0 +1,180 @@
From fbe6421c5600a151dbae96d18db2fd31aca2fe7c Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Thu, 24 Aug 2023 16:44:56 +0800
Subject: [PATCH 051/124] LoongArch: Remove redundant sign extension
instructions caused by SLT instructions.
Since the SLT instruction does not distinguish between 64-bit operations and 32-bit
operations under the 64-bit LoongArch architecture, if the operand of slt is SImode,
the sign extension of the operand needs to be displayed.
But similar to the test case below, the sign extension is redundant:
extern int src1, src2, src3;
int
test (void)
{
int data1 = src1 + src2;
int data2 = src1 + src3;
return data1 > data2 ? data1 : data2;
}
Assembly code before optimization:
...
add.w $r4,$r4,$r14
add.w $r13,$r13,$r14
slli.w $r12,$r4,0
slli.w $r14,$r13,0
slt $r12,$r12,$r14
masknez $r4,$r4,$r12
maskeqz $r12,$r13,$r12
or $r4,$r4,$r12
slli.w $r4,$r4,0
...
After optimization:
...
add.w $r12,$r12,$r14
add.w $r13,$r13,$r14
slt $r4,$r12,$r13
masknez $r12,$r12,$r4
maskeqz $r4,$r13,$r4
or $r4,$r12,$r4
...
Similar to this test example, the two operands of SLT are obtained by the
addition operation, and add.w implicitly sign-extends, so the two operands
of SLT do not require sign-extend.
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
Optimize the function implementation.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/slt-sign-extend.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.cc | 53 +++++++++++++++++--
.../gcc.target/loongarch/slt-sign-extend.c | 14 +++++
2 files changed, 63 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index f14de5cce..caacfa8a3 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4380,14 +4380,30 @@ loongarch_expand_conditional_move (rtx *operands)
enum rtx_code code = GET_CODE (operands[1]);
rtx op0 = XEXP (operands[1], 0);
rtx op1 = XEXP (operands[1], 1);
+ rtx op0_extend = op0;
+ rtx op1_extend = op1;
+
+ /* Record whether operands[2] and operands[3] modes are promoted to word_mode. */
+ bool promote_p = false;
+ machine_mode mode = GET_MODE (operands[0]);
if (FLOAT_MODE_P (GET_MODE (op1)))
loongarch_emit_float_compare (&code, &op0, &op1);
else
{
+ if ((REGNO (op0) == REGNO (operands[2])
+ || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
+ && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
+ {
+ mode = word_mode;
+ promote_p = true;
+ }
+
loongarch_extend_comparands (code, &op0, &op1);
op0 = force_reg (word_mode, op0);
+ op0_extend = op0;
+ op1_extend = force_reg (word_mode, op1);
if (code == EQ || code == NE)
{
@@ -4414,23 +4430,52 @@ loongarch_expand_conditional_move (rtx *operands)
&& register_operand (operands[2], VOIDmode)
&& register_operand (operands[3], VOIDmode))
{
- machine_mode mode = GET_MODE (operands[0]);
+ rtx op2 = operands[2];
+ rtx op3 = operands[3];
+
+ if (promote_p)
+ {
+ if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
+ op2 = op0_extend;
+ else
+ {
+ loongarch_extend_comparands (code, &op2, &const0_rtx);
+ op2 = force_reg (mode, op2);
+ }
+
+ if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
+ op3 = op1_extend;
+ else
+ {
+ loongarch_extend_comparands (code, &op3, &const0_rtx);
+ op3 = force_reg (mode, op3);
+ }
+ }
+
rtx temp = gen_reg_rtx (mode);
rtx temp2 = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (temp,
gen_rtx_IF_THEN_ELSE (mode, cond,
- operands[2], const0_rtx)));
+ op2, const0_rtx)));
/* Flip the test for the second operand. */
cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
emit_insn (gen_rtx_SET (temp2,
gen_rtx_IF_THEN_ELSE (mode, cond,
- operands[3], const0_rtx)));
+ op3, const0_rtx)));
/* Merge the two results, at least one is guaranteed to be zero. */
- emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
+ if (promote_p)
+ {
+ rtx temp3 = gen_reg_rtx (mode);
+ emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
+ temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
+ loongarch_emit_move (operands[0], temp3);
+ }
+ else
+ emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
}
else
emit_insn (gen_rtx_SET (operands[0],
diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
new file mode 100644
index 000000000..ea6b28b7c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O2" } */
+/* { dg-final { scan-assembler-not "slli.w" } } */
+
+extern int src1, src2, src3;
+
+int
+test (void)
+{
+ int data1 = src1 + src2;
+ int data2 = src1 + src3;
+
+ return data1 > data2 ? data1 : data2;
+}
--
2.33.0

View File

@ -0,0 +1,36 @@
From 297b8c5770ad85bf468526602e28aff8a66dc01a Mon Sep 17 00:00:00 2001
From: Lulu Cheng <chenglulu@loongson.cn>
Date: Thu, 13 Apr 2023 19:24:38 +0800
Subject: [PATCH 040/124] LoongArch: Remove the definition of the macro
LOGICAL_OP_NON_SHORT_CIRCUIT under the architecture and use the default
definition instead.
In some cases, setting this macro as the default can reduce the number of conditional
branch instructions.
gcc/ChangeLog:
* config/loongarch/loongarch.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Remove the macro
definition.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index 392597943..c6e37b1b4 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -836,7 +836,6 @@ typedef struct {
1 is the default; other values are interpreted relative to that. */
#define BRANCH_COST(speed_p, predictable_p) loongarch_branch_cost
-#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
/* Return the asm template for a conditional branch instruction.
OPCODE is the opcode's mnemonic and OPERANDS is the asm template for
--
2.33.0

View File

@ -0,0 +1,65 @@
From 7584716b03b13c06b8bb9956b9f49e0cfc29c6b3 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sun, 6 Nov 2022 20:41:38 +0800
Subject: [PATCH 027/124] LoongArch: Rename frint_<fmt> to rint<mode>2
Use standard name so __builtin_rint{,f} can be expanded to one
instruction.
gcc/ChangeLog:
* config/loongarch/loongarch.md (frint_<fmt>): Rename to ..
(rint<mode>2): .. this.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/frint.c: New test.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.md | 4 ++--
gcc/testsuite/gcc.target/loongarch/frint.c | 16 ++++++++++++++++
2 files changed, 18 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/frint.c
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index bda34d0f3..a14ab14ac 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -2012,8 +2012,8 @@
[(set_attr "type" "move")]
)
-;; Convert floating-point numbers to integers
-(define_insn "frint_<fmt>"
+;; Round floating-point numbers to integers
+(define_insn "rint<mode>2"
[(set (match_operand:ANYF 0 "register_operand" "=f")
(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
UNSPEC_FRINT))]
diff --git a/gcc/testsuite/gcc.target/loongarch/frint.c b/gcc/testsuite/gcc.target/loongarch/frint.c
new file mode 100644
index 000000000..3ee6a8f97
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/frint.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mdouble-float" } */
+/* { dg-final { scan-assembler "frint\\.s" } } */
+/* { dg-final { scan-assembler "frint\\.d" } } */
+
+double
+my_rint (double a)
+{
+ return __builtin_rint (a);
+}
+
+float
+my_rintf (float a)
+{
+ return __builtin_rintf (a);
+}
--
2.33.0

View File

@ -0,0 +1,113 @@
From 129d96b9ab5a2445d0fc2c3f7b72baa0453bd93f Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Wed, 14 Jun 2023 08:24:05 +0800
Subject: [PATCH 047/124] LoongArch: Set default alignment for functions and
labels with -mtune
The LA464 micro-architecture is sensitive to alignment of code. The
Loongson team has benchmarked various combinations of function, the
results [1] show that 16-byte label alignment together with 32-byte
function alignment gives best results in terms of SPEC score.
Add a mtune-based table-driven mechanism to set the default of
-falign-{functions,labels}. As LA464 is the first (and the only for
now) uarch supported by GCC, the same setting is also used for
the "generic" -mtune=loongarch64. In the future we may set different
settings for LA{2,3,6}64 once we add the support for them.
Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk?
gcc/ChangeLog:
* config/loongarch/loongarch-tune.h (loongarch_align): New
struct.
* config/loongarch/loongarch-def.h (loongarch_cpu_align): New
array.
* config/loongarch/loongarch-def.c (loongarch_cpu_align): Define
the array.
* config/loongarch/loongarch.cc
(loongarch_option_override_internal): Set the value of
-falign-functions= if -falign-functions is enabled but no value
is given. Likewise for -falign-labels=.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch-def.c | 12 ++++++++++++
gcc/config/loongarch/loongarch-def.h | 1 +
gcc/config/loongarch/loongarch-tune.h | 8 ++++++++
gcc/config/loongarch/loongarch.cc | 6 ++++++
4 files changed, 27 insertions(+)
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
index 80ab10a52..74d422ce0 100644
--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
@@ -72,6 +72,18 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
},
};
+struct loongarch_align
+loongarch_cpu_align[N_TUNE_TYPES] = {
+ [CPU_LOONGARCH64] = {
+ .function = "32",
+ .label = "16",
+ },
+ [CPU_LA464] = {
+ .function = "32",
+ .label = "16",
+ },
+};
+
/* The following properties cannot be looked up directly using "cpucfg".
So it is necessary to provide a default value for "unknown native"
tune targets (i.e. -mtune=native while PRID does not correspond to
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
index b5985f070..eb87a79a5 100644
--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
@@ -144,6 +144,7 @@ extern int loongarch_cpu_issue_rate[];
extern int loongarch_cpu_multipass_dfa_lookahead[];
extern struct loongarch_cache loongarch_cpu_cache[];
+extern struct loongarch_align loongarch_cpu_align[];
extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[];
#ifdef __cplusplus
diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
index 8e3eb2947..d961963f0 100644
--- a/gcc/config/loongarch/loongarch-tune.h
+++ b/gcc/config/loongarch/loongarch-tune.h
@@ -48,4 +48,12 @@ struct loongarch_cache {
int simultaneous_prefetches; /* number of parallel prefetch */
};
+/* Alignment for functions and labels for best performance. For new uarchs
+ the value should be measured via benchmarking. See the documentation for
+ -falign-functions and -falign-labels in invoke.texi for the format. */
+struct loongarch_align {
+ const char *function; /* default value for -falign-functions */
+ const char *label; /* default value for -falign-labels */
+};
+
#endif /* LOONGARCH_TUNE_H */
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 4c0f393b6..f14de5cce 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -6246,6 +6246,12 @@ loongarch_option_override_internal (struct gcc_options *opts)
&& !opts->x_optimize_size)
opts->x_flag_prefetch_loop_arrays = 1;
+ if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+ opts->x_str_align_functions = loongarch_cpu_align[LARCH_ACTUAL_TUNE].function;
+
+ if (opts->x_flag_align_labels && !opts->x_str_align_labels)
+ opts->x_str_align_labels = loongarch_cpu_align[LARCH_ACTUAL_TUNE].label;
+
if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
error ("%qs cannot be used for compiling a shared library",
"-mdirect-extern-access");
--
2.33.0

View File

@ -0,0 +1,37 @@
From f4a0248c80fedff3a6841407ff95b732dfbb93a1 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Fri, 8 Sep 2023 00:29:57 +0800
Subject: [PATCH 073/124] LoongArch: Slightly simplify
loongarch_block_move_straight
gcc/ChangeLog:
* config/loongarch/loongarch.cc (loongarch_block_move_straight):
Check precondition (delta must be a power of 2) and use
popcount_hwi instead of a homebrew loop.
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
gcc/config/loongarch/loongarch.cc | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index baa5c2354..baa9831aa 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -5221,9 +5221,8 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
emit two ld.d/st.d pairs, one ld.w/st.w pair, and one ld.b/st.b
pair. For each load/store pair we use a dedicated register to keep
the pipeline as populated as possible. */
- HOST_WIDE_INT num_reg = length / delta;
- for (delta_cur = delta / 2; delta_cur != 0; delta_cur /= 2)
- num_reg += !!(length & delta_cur);
+ gcc_assert (pow2p_hwi (delta));
+ HOST_WIDE_INT num_reg = length / delta + popcount_hwi (length % delta);
/* Allocate a buffer for the temporary registers. */
regs = XALLOCAVEC (rtx, num_reg);
--
2.33.0

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More