!400 LoongArch: Sync patch from gcc upstream
From: @ticat-fp Reviewed-by: @li-yancheng Signed-off-by: @li-yancheng
This commit is contained in:
commit
1dc8dd9fd8
59
Libvtv-Add-loongarch-support.patch
Normal file
59
Libvtv-Add-loongarch-support.patch
Normal file
@ -0,0 +1,59 @@
|
||||
From 62ea18c632200edbbf46b4e957bc4d997f1c66f0 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Tue, 27 Sep 2022 15:28:43 +0800
|
||||
Subject: [PATCH 024/124] Libvtv: Add loongarch support.
|
||||
|
||||
The loongarch64 specification permits page sizes of 4KiB, 16KiB and 64KiB,
|
||||
but only 16KiB pages are supported for now.
|
||||
|
||||
Co-Authored-By: qijingwen <qijingwen@loongson.cn>
|
||||
|
||||
include/ChangeLog:
|
||||
|
||||
* vtv-change-permission.h (defined): Determines whether the macro
|
||||
__loongarch_lp64 is defined
|
||||
(VTV_PAGE_SIZE): Set VTV_PAGE_SIZE to 16KiB for loongarch64.
|
||||
|
||||
libvtv/ChangeLog:
|
||||
|
||||
* configure.tgt: Add loongarch support.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
include/vtv-change-permission.h | 4 ++++
|
||||
libvtv/configure.tgt | 3 +++
|
||||
2 files changed, 7 insertions(+)
|
||||
|
||||
diff --git a/include/vtv-change-permission.h b/include/vtv-change-permission.h
|
||||
index 70bdad92b..e7b9294a0 100644
|
||||
--- a/include/vtv-change-permission.h
|
||||
+++ b/include/vtv-change-permission.h
|
||||
@@ -48,6 +48,10 @@ extern void __VLTChangePermission (int);
|
||||
#else
|
||||
#if defined(__sun__) && defined(__svr4__) && defined(__sparc__)
|
||||
#define VTV_PAGE_SIZE 8192
|
||||
+#elif defined(__loongarch_lp64)
|
||||
+/* The page size is configurable by the kernel to be 4, 16 or 64 KiB.
|
||||
+ For now, only the default page size of 16KiB is supported. */
|
||||
+#define VTV_PAGE_SIZE 16384
|
||||
#else
|
||||
#define VTV_PAGE_SIZE 4096
|
||||
#endif
|
||||
diff --git a/libvtv/configure.tgt b/libvtv/configure.tgt
|
||||
index aa2a3f675..6cdd1e97a 100644
|
||||
--- a/libvtv/configure.tgt
|
||||
+++ b/libvtv/configure.tgt
|
||||
@@ -50,6 +50,9 @@ case "${target}" in
|
||||
;;
|
||||
x86_64-*-darwin[1]* | i?86-*-darwin[1]*)
|
||||
;;
|
||||
+ loongarch*-*-linux*)
|
||||
+ VTV_SUPPORTED=yes
|
||||
+ ;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
--
|
||||
2.33.0
|
||||
|
||||
8376
LoongArch-Add-Loongson-ASX-base-instruction-support.patch
Normal file
8376
LoongArch-Add-Loongson-ASX-base-instruction-support.patch
Normal file
File diff suppressed because it is too large
Load Diff
7458
LoongArch-Add-Loongson-ASX-directive-builtin-functio.patch
Normal file
7458
LoongArch-Add-Loongson-ASX-directive-builtin-functio.patch
Normal file
File diff suppressed because it is too large
Load Diff
8433
LoongArch-Add-Loongson-SX-base-instruction-support.patch
Normal file
8433
LoongArch-Add-Loongson-SX-base-instruction-support.patch
Normal file
File diff suppressed because it is too large
Load Diff
7549
LoongArch-Add-Loongson-SX-directive-builtin-function.patch
Normal file
7549
LoongArch-Add-Loongson-SX-directive-builtin-function.patch
Normal file
File diff suppressed because it is too large
Load Diff
166
LoongArch-Add-built-in-functions-description-of-Loon.patch
Normal file
166
LoongArch-Add-built-in-functions-description-of-Loon.patch
Normal file
@ -0,0 +1,166 @@
|
||||
From 7cfe6e057045ac794afbe9097b1b211c0e1ea723 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Thu, 6 Apr 2023 16:02:07 +0800
|
||||
Subject: [PATCH 039/124] LoongArch: Add built-in functions description of
|
||||
LoongArch Base instruction set instructions.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* doc/extend.texi: Add section for LoongArch Base Built-in functions.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/doc/extend.texi | 129 ++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 129 insertions(+)
|
||||
|
||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||
index 3c101ca89..1d1bac255 100644
|
||||
--- a/gcc/doc/extend.texi
|
||||
+++ b/gcc/doc/extend.texi
|
||||
@@ -14678,6 +14678,7 @@ instructions, but allow the compiler to schedule those calls.
|
||||
* Blackfin Built-in Functions::
|
||||
* BPF Built-in Functions::
|
||||
* FR-V Built-in Functions::
|
||||
+* LoongArch Base Built-in Functions::
|
||||
* MIPS DSP Built-in Functions::
|
||||
* MIPS Paired-Single Support::
|
||||
* MIPS Loongson Built-in Functions::
|
||||
@@ -16128,6 +16129,134 @@ Use the @code{nldub} instruction to load the contents of address @var{x}
|
||||
into the data cache. The instruction is issued in slot I1@.
|
||||
@end table
|
||||
|
||||
+@node LoongArch Base Built-in Functions
|
||||
+@subsection LoongArch Base Built-in Functions
|
||||
+
|
||||
+These built-in functions are available for LoongArch.
|
||||
+
|
||||
+Data Type Description:
|
||||
+@itemize
|
||||
+@item @code{imm0_31}, a compile-time constant in range 0 to 31;
|
||||
+@item @code{imm0_16383}, a compile-time constant in range 0 to 16383;
|
||||
+@item @code{imm0_32767}, a compile-time constant in range 0 to 32767;
|
||||
+@item @code{imm_n2048_2047}, a compile-time constant in range -2048 to 2047;
|
||||
+@end itemize
|
||||
+
|
||||
+The intrinsics provided are listed below:
|
||||
+@smallexample
|
||||
+ unsigned int __builtin_loongarch_movfcsr2gr (imm0_31)
|
||||
+ void __builtin_loongarch_movgr2fcsr (imm0_31, unsigned int)
|
||||
+ void __builtin_loongarch_cacop_d (imm0_31, unsigned long int, imm_n2048_2047)
|
||||
+ unsigned int __builtin_loongarch_cpucfg (unsigned int)
|
||||
+ void __builtin_loongarch_asrtle_d (long int, long int)
|
||||
+ void __builtin_loongarch_asrtgt_d (long int, long int)
|
||||
+ long int __builtin_loongarch_lddir_d (long int, imm0_31)
|
||||
+ void __builtin_loongarch_ldpte_d (long int, imm0_31)
|
||||
+
|
||||
+ int __builtin_loongarch_crc_w_b_w (char, int)
|
||||
+ int __builtin_loongarch_crc_w_h_w (short, int)
|
||||
+ int __builtin_loongarch_crc_w_w_w (int, int)
|
||||
+ int __builtin_loongarch_crc_w_d_w (long int, int)
|
||||
+ int __builtin_loongarch_crcc_w_b_w (char, int)
|
||||
+ int __builtin_loongarch_crcc_w_h_w (short, int)
|
||||
+ int __builtin_loongarch_crcc_w_w_w (int, int)
|
||||
+ int __builtin_loongarch_crcc_w_d_w (long int, int)
|
||||
+
|
||||
+ unsigned int __builtin_loongarch_csrrd_w (imm0_16383)
|
||||
+ unsigned int __builtin_loongarch_csrwr_w (unsigned int, imm0_16383)
|
||||
+ unsigned int __builtin_loongarch_csrxchg_w (unsigned int, unsigned int, imm0_16383)
|
||||
+ unsigned long int __builtin_loongarch_csrrd_d (imm0_16383)
|
||||
+ unsigned long int __builtin_loongarch_csrwr_d (unsigned long int, imm0_16383)
|
||||
+ unsigned long int __builtin_loongarch_csrxchg_d (unsigned long int, unsigned long int, imm0_16383)
|
||||
+
|
||||
+ unsigned char __builtin_loongarch_iocsrrd_b (unsigned int)
|
||||
+ unsigned short __builtin_loongarch_iocsrrd_h (unsigned int)
|
||||
+ unsigned int __builtin_loongarch_iocsrrd_w (unsigned int)
|
||||
+ unsigned long int __builtin_loongarch_iocsrrd_d (unsigned int)
|
||||
+ void __builtin_loongarch_iocsrwr_b (unsigned char, unsigned int)
|
||||
+ void __builtin_loongarch_iocsrwr_h (unsigned short, unsigned int)
|
||||
+ void __builtin_loongarch_iocsrwr_w (unsigned int, unsigned int)
|
||||
+ void __builtin_loongarch_iocsrwr_d (unsigned long int, unsigned int)
|
||||
+
|
||||
+ void __builtin_loongarch_dbar (imm0_32767)
|
||||
+ void __builtin_loongarch_ibar (imm0_32767)
|
||||
+
|
||||
+ void __builtin_loongarch_syscall (imm0_32767)
|
||||
+ void __builtin_loongarch_break (imm0_32767)
|
||||
+@end smallexample
|
||||
+
|
||||
+@emph{Note:}Since the control register is divided into 32-bit and 64-bit,
|
||||
+but the access instruction is not distinguished. So GCC renames the control
|
||||
+instructions when implementing intrinsics.
|
||||
+
|
||||
+Take the csrrd instruction as an example, built-in functions are implemented as follows:
|
||||
+@smallexample
|
||||
+ __builtin_loongarch_csrrd_w // When reading the 32-bit control register use.
|
||||
+ __builtin_loongarch_csrrd_d // When reading the 64-bit control register use.
|
||||
+@end smallexample
|
||||
+
|
||||
+For the convenience of use, the built-in functions are encapsulated,
|
||||
+the encapsulated functions and @code{__drdtime_t, __rdtime_t} are
|
||||
+defined in the @code{larchintrin.h}. So if you call the following
|
||||
+function you need to include @code{larchintrin.h}.
|
||||
+
|
||||
+@smallexample
|
||||
+ typedef struct drdtime@{
|
||||
+ unsigned long dvalue;
|
||||
+ unsigned long dtimeid;
|
||||
+ @} __drdtime_t;
|
||||
+
|
||||
+ typedef struct rdtime@{
|
||||
+ unsigned int value;
|
||||
+ unsigned int timeid;
|
||||
+ @} __rdtime_t;
|
||||
+@end smallexample
|
||||
+
|
||||
+@smallexample
|
||||
+ __drdtime_t __rdtime_d (void)
|
||||
+ __rdtime_t __rdtimel_w (void)
|
||||
+ __rdtime_t __rdtimeh_w (void)
|
||||
+ unsigned int __movfcsr2gr (imm0_31)
|
||||
+ void __movgr2fcsr (imm0_31, unsigned int)
|
||||
+ void __cacop_d (imm0_31, unsigned long, imm_n2048_2047)
|
||||
+ unsigned int __cpucfg (unsigned int)
|
||||
+ void __asrtle_d (long int, long int)
|
||||
+ void __asrtgt_d (long int, long int)
|
||||
+ long int __lddir_d (long int, imm0_31)
|
||||
+ void __ldpte_d (long int, imm0_31)
|
||||
+
|
||||
+ int __crc_w_b_w (char, int)
|
||||
+ int __crc_w_h_w (short, int)
|
||||
+ int __crc_w_w_w (int, int)
|
||||
+ int __crc_w_d_w (long int, int)
|
||||
+ int __crcc_w_b_w (char, int)
|
||||
+ int __crcc_w_h_w (short, int)
|
||||
+ int __crcc_w_w_w (int, int)
|
||||
+ int __crcc_w_d_w (long int, int)
|
||||
+
|
||||
+ unsigned int __csrrd_w (imm0_16383)
|
||||
+ unsigned int __csrwr_w (unsigned int, imm0_16383)
|
||||
+ unsigned int __csrxchg_w (unsigned int, unsigned int, imm0_16383)
|
||||
+ unsigned long __csrrd_d (imm0_16383)
|
||||
+ unsigned long __csrwr_d (unsigned long, imm0_16383)
|
||||
+ unsigned long __csrxchg_d (unsigned long, unsigned long, imm0_16383)
|
||||
+
|
||||
+ unsigned char __iocsrrd_b (unsigned int)
|
||||
+ unsigned short __iocsrrd_h (unsigned int)
|
||||
+ unsigned int __iocsrrd_w (unsigned int)
|
||||
+ unsigned long __iocsrrd_d (unsigned int)
|
||||
+ void __iocsrwr_b (unsigned char, unsigned int)
|
||||
+ void __iocsrwr_h (unsigned short, unsigned int)
|
||||
+ void __iocsrwr_w (unsigned int, unsigned int)
|
||||
+ void __iocsrwr_d (unsigned long, unsigned int)
|
||||
+
|
||||
+ void __dbar (imm0_32767)
|
||||
+ void __ibar (imm0_32767)
|
||||
+
|
||||
+ void __syscall (imm0_32767)
|
||||
+ void __break (imm0_32767)
|
||||
+@end smallexample
|
||||
+
|
||||
@node MIPS DSP Built-in Functions
|
||||
@subsection MIPS DSP Built-in Functions
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
107
LoongArch-Add-fcopysign-instructions.patch
Normal file
107
LoongArch-Add-fcopysign-instructions.patch
Normal file
@ -0,0 +1,107 @@
|
||||
From 41a4945886631a1b2898ae957389d5db18a07141 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Fri, 4 Nov 2022 15:12:22 +0800
|
||||
Subject: [PATCH 025/124] LoongArch: Add fcopysign instructions
|
||||
|
||||
Add fcopysign.{s,d} with the names copysign{sf,df}3 so GCC will expand
|
||||
__builtin_copysign{f,} to a single instruction.
|
||||
|
||||
Link: https://sourceware.org/pipermail/libc-alpha/2022-November/143177.html
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (UNSPEC_FCOPYSIGN): New unspec.
|
||||
(type): Add fcopysign.
|
||||
(copysign<mode>3): New instruction template.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/fcopysign.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 22 ++++++++++++++++++-
|
||||
.../gcc.target/loongarch/fcopysign.c | 16 ++++++++++++++
|
||||
2 files changed, 37 insertions(+), 1 deletion(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/fcopysign.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index 214b14bdd..bda34d0f3 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -37,6 +37,7 @@
|
||||
UNSPEC_FCLASS
|
||||
UNSPEC_FMAX
|
||||
UNSPEC_FMIN
|
||||
+ UNSPEC_FCOPYSIGN
|
||||
|
||||
;; Override return address for exception handling.
|
||||
UNSPEC_EH_RETURN
|
||||
@@ -214,6 +215,7 @@
|
||||
;; fabs floating point absolute value
|
||||
;; fneg floating point negation
|
||||
;; fcmp floating point compare
|
||||
+;; fcopysign floating point copysign
|
||||
;; fcvt floating point convert
|
||||
;; fsqrt floating point square root
|
||||
;; frsqrt floating point reciprocal square root
|
||||
@@ -226,7 +228,7 @@
|
||||
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
|
||||
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
|
||||
shift,slt,signext,clz,trap,imul,idiv,move,
|
||||
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcvt,fsqrt,
|
||||
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
|
||||
frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
|
||||
(cond [(eq_attr "jirl" "!unset") (const_string "call")
|
||||
(eq_attr "got" "load") (const_string "load")
|
||||
@@ -976,6 +978,24 @@
|
||||
(set_attr "mode" "<UNITMODE>")])
|
||||
|
||||
;;
|
||||
+;; ....................
|
||||
+;;
|
||||
+;; FLOATING POINT COPYSIGN
|
||||
+;;
|
||||
+;; ....................
|
||||
+
|
||||
+(define_insn "copysign<mode>3"
|
||||
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
|
||||
+ (match_operand:ANYF 2 "register_operand" "f")]
|
||||
+ UNSPEC_FCOPYSIGN))]
|
||||
+ "TARGET_HARD_FLOAT"
|
||||
+ "fcopysign.<fmt>\t%0,%1,%2"
|
||||
+ [(set_attr "type" "fcopysign")
|
||||
+ (set_attr "mode" "<UNITMODE>")])
|
||||
+
|
||||
+
|
||||
+;;
|
||||
;; ...................
|
||||
;;
|
||||
;; Count leading zeroes.
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/fcopysign.c b/gcc/testsuite/gcc.target/loongarch/fcopysign.c
|
||||
new file mode 100644
|
||||
index 000000000..058ba2cf5
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/fcopysign.c
|
||||
@@ -0,0 +1,16 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mdouble-float" } */
|
||||
+/* { dg-final { scan-assembler "fcopysign\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "fcopysign\\.d" } } */
|
||||
+
|
||||
+double
|
||||
+my_copysign (double a, double b)
|
||||
+{
|
||||
+ return __builtin_copysign (a, b);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+my_copysignf (float a, float b)
|
||||
+{
|
||||
+ return __builtin_copysignf (a, b);
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
123
LoongArch-Add-flogb.-s-d-instructions-and-expand-log.patch
Normal file
123
LoongArch-Add-flogb.-s-d-instructions-and-expand-log.patch
Normal file
@ -0,0 +1,123 @@
|
||||
From 2ae587a86bba31b91a127e353c31c9f861ff5326 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Tue, 8 Nov 2022 13:42:20 +0800
|
||||
Subject: [PATCH 030/124] LoongArch: Add flogb.{s,d} instructions and expand
|
||||
logb{sf,df}2
|
||||
|
||||
On LoongArch, flogb instructions extract the exponent of a non-negative
|
||||
floating point value, but produces NaN for negative values. So we need
|
||||
to add a fabs instruction when we expand logb.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (UNSPEC_FLOGB): New unspec.
|
||||
(type): Add flogb.
|
||||
(logb_non_negative<mode>2): New instruction template.
|
||||
(logb<mode>2): New define_expand.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/flogb.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 35 ++++++++++++++++++++--
|
||||
gcc/testsuite/gcc.target/loongarch/flogb.c | 18 +++++++++++
|
||||
2 files changed, 51 insertions(+), 2 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/flogb.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index c141c9add..682ab9617 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -42,6 +42,7 @@
|
||||
UNSPEC_FTINTRM
|
||||
UNSPEC_FTINTRP
|
||||
UNSPEC_FSCALEB
|
||||
+ UNSPEC_FLOGB
|
||||
|
||||
;; Override return address for exception handling.
|
||||
UNSPEC_EH_RETURN
|
||||
@@ -217,6 +218,7 @@
|
||||
;; fdiv floating point divide
|
||||
;; frdiv floating point reciprocal divide
|
||||
;; fabs floating point absolute value
|
||||
+;; flogb floating point exponent extract
|
||||
;; fneg floating point negation
|
||||
;; fcmp floating point compare
|
||||
;; fcopysign floating point copysign
|
||||
@@ -233,8 +235,8 @@
|
||||
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
|
||||
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
|
||||
shift,slt,signext,clz,trap,imul,idiv,move,
|
||||
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
|
||||
- fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
|
||||
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
|
||||
+ fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
|
||||
(cond [(eq_attr "jirl" "!unset") (const_string "call")
|
||||
(eq_attr "got" "load") (const_string "load")
|
||||
|
||||
@@ -1039,6 +1041,35 @@
|
||||
(set_attr "mode" "<UNITMODE>")])
|
||||
|
||||
;;
|
||||
+;; ....................
|
||||
+;;
|
||||
+;; FLOATING POINT EXPONENT EXTRACT
|
||||
+;;
|
||||
+;; ....................
|
||||
+
|
||||
+(define_insn "logb_non_negative<mode>2"
|
||||
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
|
||||
+ UNSPEC_FLOGB))]
|
||||
+ "TARGET_HARD_FLOAT"
|
||||
+ "flogb.<fmt>\t%0,%1"
|
||||
+ [(set_attr "type" "flogb")
|
||||
+ (set_attr "mode" "<UNITMODE>")])
|
||||
+
|
||||
+(define_expand "logb<mode>2"
|
||||
+ [(set (match_operand:ANYF 0 "register_operand")
|
||||
+ (unspec:ANYF [(abs:ANYF (match_operand:ANYF 1 "register_operand"))]
|
||||
+ UNSPEC_FLOGB))]
|
||||
+ "TARGET_HARD_FLOAT"
|
||||
+{
|
||||
+ rtx tmp = gen_reg_rtx (<MODE>mode);
|
||||
+
|
||||
+ emit_insn (gen_abs<mode>2 (tmp, operands[1]));
|
||||
+ emit_insn (gen_logb_non_negative<mode>2 (operands[0], tmp));
|
||||
+ DONE;
|
||||
+})
|
||||
+
|
||||
+;;
|
||||
;; ...................
|
||||
;;
|
||||
;; Count leading zeroes.
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/flogb.c b/gcc/testsuite/gcc.target/loongarch/flogb.c
|
||||
new file mode 100644
|
||||
index 000000000..1daefe54e
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/flogb.c
|
||||
@@ -0,0 +1,18 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mdouble-float -fno-math-errno" } */
|
||||
+/* { dg-final { scan-assembler "fabs\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "fabs\\.d" } } */
|
||||
+/* { dg-final { scan-assembler "flogb\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "flogb\\.d" } } */
|
||||
+
|
||||
+double
|
||||
+my_logb (double a)
|
||||
+{
|
||||
+ return __builtin_logb (a);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+my_logbf (float a)
|
||||
+{
|
||||
+ return __builtin_logbf (a);
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
155
LoongArch-Add-fscaleb.-s-d-instructions-as-ldexp-sf-.patch
Normal file
155
LoongArch-Add-fscaleb.-s-d-instructions-as-ldexp-sf-.patch
Normal file
@ -0,0 +1,155 @@
|
||||
From e3d69a3b7a4e00e8bba88b8b4abaa1c17bc083d5 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Tue, 8 Nov 2022 12:14:35 +0800
|
||||
Subject: [PATCH 029/124] LoongArch: Add fscaleb.{s,d} instructions as
|
||||
ldexp{sf,df}3
|
||||
|
||||
This allows optimizing __builtin_ldexp{,f} and __builtin_scalbn{,f} with
|
||||
-fno-math-errno.
|
||||
|
||||
IMODE is added because we can't hard code SI for operand 2: fscaleb.d
|
||||
instruction always take the high half of both source registers into
|
||||
account. See my_ldexp_long in the test case.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (UNSPEC_FSCALEB): New unspec.
|
||||
(type): Add fscaleb.
|
||||
(IMODE): New mode attr.
|
||||
(ldexp<mode>3): New instruction template.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/fscaleb.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 26 ++++++++++-
|
||||
gcc/testsuite/gcc.target/loongarch/fscaleb.c | 48 ++++++++++++++++++++
|
||||
2 files changed, 72 insertions(+), 2 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/fscaleb.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index eb127c346..c141c9add 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -41,6 +41,7 @@
|
||||
UNSPEC_FTINT
|
||||
UNSPEC_FTINTRM
|
||||
UNSPEC_FTINTRP
|
||||
+ UNSPEC_FSCALEB
|
||||
|
||||
;; Override return address for exception handling.
|
||||
UNSPEC_EH_RETURN
|
||||
@@ -220,6 +221,7 @@
|
||||
;; fcmp floating point compare
|
||||
;; fcopysign floating point copysign
|
||||
;; fcvt floating point convert
|
||||
+;; fscaleb floating point scale
|
||||
;; fsqrt floating point square root
|
||||
;; frsqrt floating point reciprocal square root
|
||||
;; multi multiword sequence (or user asm statements)
|
||||
@@ -231,8 +233,8 @@
|
||||
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
|
||||
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
|
||||
shift,slt,signext,clz,trap,imul,idiv,move,
|
||||
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
|
||||
- frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
|
||||
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
|
||||
+ fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
|
||||
(cond [(eq_attr "jirl" "!unset") (const_string "call")
|
||||
(eq_attr "got" "load") (const_string "load")
|
||||
|
||||
@@ -418,6 +420,10 @@
|
||||
;; the controlling mode.
|
||||
(define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")])
|
||||
|
||||
+;; This attribute gives the integer mode that has the same size of a
|
||||
+;; floating-point mode.
|
||||
+(define_mode_attr IMODE [(SF "SI") (DF "DI")])
|
||||
+
|
||||
;; This code iterator allows signed and unsigned widening multiplications
|
||||
;; to use the same template.
|
||||
(define_code_iterator any_extend [sign_extend zero_extend])
|
||||
@@ -1014,7 +1020,23 @@
|
||||
"fcopysign.<fmt>\t%0,%1,%2"
|
||||
[(set_attr "type" "fcopysign")
|
||||
(set_attr "mode" "<UNITMODE>")])
|
||||
+
|
||||
+;;
|
||||
+;; ....................
|
||||
+;;
|
||||
+;; FLOATING POINT SCALE
|
||||
+;;
|
||||
+;; ....................
|
||||
|
||||
+(define_insn "ldexp<mode>3"
|
||||
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
|
||||
+ (match_operand:<IMODE> 2 "register_operand" "f")]
|
||||
+ UNSPEC_FSCALEB))]
|
||||
+ "TARGET_HARD_FLOAT"
|
||||
+ "fscaleb.<fmt>\t%0,%1,%2"
|
||||
+ [(set_attr "type" "fscaleb")
|
||||
+ (set_attr "mode" "<UNITMODE>")])
|
||||
|
||||
;;
|
||||
;; ...................
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/fscaleb.c b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
|
||||
new file mode 100644
|
||||
index 000000000..f18470fbb
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
|
||||
@@ -0,0 +1,48 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno" } */
|
||||
+/* { dg-final { scan-assembler-times "fscaleb\\.s" 3 } } */
|
||||
+/* { dg-final { scan-assembler-times "fscaleb\\.d" 4 } } */
|
||||
+/* { dg-final { scan-assembler-times "slli\\.w" 1 } } */
|
||||
+
|
||||
+double
|
||||
+my_scalbln (double a, long b)
|
||||
+{
|
||||
+ return __builtin_scalbln (a, b);
|
||||
+}
|
||||
+
|
||||
+double
|
||||
+my_scalbn (double a, int b)
|
||||
+{
|
||||
+ return __builtin_scalbn (a, b);
|
||||
+}
|
||||
+
|
||||
+double
|
||||
+my_ldexp (double a, int b)
|
||||
+{
|
||||
+ return __builtin_ldexp (a, b);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+my_scalblnf (float a, long b)
|
||||
+{
|
||||
+ return __builtin_scalblnf (a, b);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+my_scalbnf (float a, int b)
|
||||
+{
|
||||
+ return __builtin_scalbnf (a, b);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+my_ldexpf (float a, int b)
|
||||
+{
|
||||
+ return __builtin_ldexpf (a, b);
|
||||
+}
|
||||
+
|
||||
+/* b must be sign-extended */
|
||||
+double
|
||||
+my_ldexp_long (double a, long b)
|
||||
+{
|
||||
+ return __builtin_ldexp (a, b);
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
220
LoongArch-Add-ftint-rm-rp-.-w-l-.-s-d-instructions.patch
Normal file
220
LoongArch-Add-ftint-rm-rp-.-w-l-.-s-d-instructions.patch
Normal file
@ -0,0 +1,220 @@
|
||||
From 76d599c6d8f9cf78b51cd76a7ca8fbe11e2cda2b Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Sun, 6 Nov 2022 23:16:49 +0800
|
||||
Subject: [PATCH 028/124] LoongArch: Add ftint{,rm,rp}.{w,l}.{s,d} instructions
|
||||
|
||||
This allows to optimize the following builtins if -fno-math-errno:
|
||||
|
||||
- __builtin_lrint{,f}
|
||||
- __builtin_lfloor{,f}
|
||||
- __builtin_lceil{,f}
|
||||
|
||||
Inspired by
|
||||
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605287.html.
|
||||
|
||||
ANYFI is added so the compiler won't try ftint.l.s if -mfpu=32. If we
|
||||
simply used GPR here an ICE would be triggered with __builtin_lrintf
|
||||
and -mfpu=32.
|
||||
|
||||
ftint{rm,rp} instructions may raise inexact exception, so they can't be
|
||||
used if -fno-trapping-math -fno-fp-int-builtin-inexact.
|
||||
|
||||
Note that the .w.{s,d} variants are not tested because we don't support
|
||||
ILP32 for now.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (UNSPEC_FTINT): New unspec.
|
||||
(UNSPEC_FTINTRM): Likewise.
|
||||
(UNSPEC_FTINTRP): Likewise.
|
||||
(LRINT): New define_int_iterator.
|
||||
(lrint_pattern): New define_int_attr.
|
||||
(lrint_submenmonic): Likewise.
|
||||
(lrint_allow_inexact): Likewise.
|
||||
(ANYFI): New define_mode_iterator.
|
||||
(lrint<ANYF><ANYFI>): New instruction template.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/ftint.c: New test.
|
||||
* gcc.target/loongarch/ftint-no-inexact.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 34 ++++++++++++++
|
||||
.../gcc.target/loongarch/ftint-no-inexact.c | 44 +++++++++++++++++++
|
||||
gcc/testsuite/gcc.target/loongarch/ftint.c | 44 +++++++++++++++++++
|
||||
3 files changed, 122 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/ftint.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index a14ab14ac..eb127c346 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -38,6 +38,9 @@
|
||||
UNSPEC_FMAX
|
||||
UNSPEC_FMIN
|
||||
UNSPEC_FCOPYSIGN
|
||||
+ UNSPEC_FTINT
|
||||
+ UNSPEC_FTINTRM
|
||||
+ UNSPEC_FTINTRP
|
||||
|
||||
;; Override return address for exception handling.
|
||||
UNSPEC_EH_RETURN
|
||||
@@ -374,6 +377,11 @@
|
||||
(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
|
||||
(DF "TARGET_DOUBLE_FLOAT")])
|
||||
|
||||
+;; Iterator for fixed-point modes which can be hold by a hardware
|
||||
+;; floating-point register.
|
||||
+(define_mode_iterator ANYFI [(SI "TARGET_HARD_FLOAT")
|
||||
+ (DI "TARGET_DOUBLE_FLOAT")])
|
||||
+
|
||||
;; A mode for which moves involving FPRs may need to be split.
|
||||
(define_mode_iterator SPLITF
|
||||
[(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
|
||||
@@ -515,6 +523,19 @@
|
||||
(define_code_attr sel [(eq "masknez") (ne "maskeqz")])
|
||||
(define_code_attr selinv [(eq "maskeqz") (ne "masknez")])
|
||||
|
||||
+;; Iterator and attributes for floating-point to fixed-point conversion
|
||||
+;; instructions.
|
||||
+(define_int_iterator LRINT [UNSPEC_FTINT UNSPEC_FTINTRM UNSPEC_FTINTRP])
|
||||
+(define_int_attr lrint_pattern [(UNSPEC_FTINT "lrint")
|
||||
+ (UNSPEC_FTINTRM "lfloor")
|
||||
+ (UNSPEC_FTINTRP "lceil")])
|
||||
+(define_int_attr lrint_submenmonic [(UNSPEC_FTINT "")
|
||||
+ (UNSPEC_FTINTRM "rm")
|
||||
+ (UNSPEC_FTINTRP "rp")])
|
||||
+(define_int_attr lrint_allow_inexact [(UNSPEC_FTINT "1")
|
||||
+ (UNSPEC_FTINTRM "0")
|
||||
+ (UNSPEC_FTINTRP "0")])
|
||||
+
|
||||
;;
|
||||
;; ....................
|
||||
;;
|
||||
@@ -2022,6 +2043,19 @@
|
||||
[(set_attr "type" "fcvt")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
+;; Convert floating-point numbers to integers
|
||||
+(define_insn "<lrint_pattern><ANYF:mode><ANYFI:mode>2"
|
||||
+ [(set (match_operand:ANYFI 0 "register_operand" "=f")
|
||||
+ (unspec:ANYFI [(match_operand:ANYF 1 "register_operand" "f")]
|
||||
+ LRINT))]
|
||||
+ "TARGET_HARD_FLOAT &&
|
||||
+ (<lrint_allow_inexact>
|
||||
+ || flag_fp_int_builtin_inexact
|
||||
+ || !flag_trapping_math)"
|
||||
+ "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1"
|
||||
+ [(set_attr "type" "fcvt")
|
||||
+ (set_attr "mode" "<ANYF:MODE>")])
|
||||
+
|
||||
;; Load the low word of operand 0 with operand 1.
|
||||
(define_insn "load_low<mode>"
|
||||
[(set (match_operand:SPLITF 0 "register_operand" "=f,f")
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
|
||||
new file mode 100644
|
||||
index 000000000..88b83a9c0
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
|
||||
@@ -0,0 +1,44 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact" } */
|
||||
+/* { dg-final { scan-assembler "ftint\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "ftint\\.l\\.d" } } */
|
||||
+/* { dg-final { scan-assembler-not "ftintrm\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler-not "ftintrm\\.l\\.d" } } */
|
||||
+/* { dg-final { scan-assembler-not "ftintrp\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler-not "ftintrp\\.l\\.d" } } */
|
||||
+
|
||||
+long
|
||||
+my_lrint (double a)
|
||||
+{
|
||||
+ return __builtin_lrint (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lrintf (float a)
|
||||
+{
|
||||
+ return __builtin_lrintf (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lfloor (double a)
|
||||
+{
|
||||
+ return __builtin_lfloor (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lfloorf (float a)
|
||||
+{
|
||||
+ return __builtin_lfloorf (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lceil (double a)
|
||||
+{
|
||||
+ return __builtin_lceil (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lceilf (float a)
|
||||
+{
|
||||
+ return __builtin_lceilf (a);
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/ftint.c b/gcc/testsuite/gcc.target/loongarch/ftint.c
|
||||
new file mode 100644
|
||||
index 000000000..7a326a454
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/ftint.c
|
||||
@@ -0,0 +1,44 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact" } */
|
||||
+/* { dg-final { scan-assembler "ftint\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "ftint\\.l\\.d" } } */
|
||||
+/* { dg-final { scan-assembler "ftintrm\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "ftintrm\\.l\\.d" } } */
|
||||
+/* { dg-final { scan-assembler "ftintrp\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "ftintrp\\.l\\.d" } } */
|
||||
+
|
||||
+long
|
||||
+my_lrint (double a)
|
||||
+{
|
||||
+ return __builtin_lrint (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lrintf (float a)
|
||||
+{
|
||||
+ return __builtin_lrintf (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lfloor (double a)
|
||||
+{
|
||||
+ return __builtin_lfloor (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lfloorf (float a)
|
||||
+{
|
||||
+ return __builtin_lfloorf (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lceil (double a)
|
||||
+{
|
||||
+ return __builtin_lceil (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lceilf (float a)
|
||||
+{
|
||||
+ return __builtin_lceilf (a);
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1051
LoongArch-Add-new-code-model-medium.patch
Normal file
1051
LoongArch-Add-new-code-model-medium.patch
Normal file
File diff suppressed because it is too large
Load Diff
158
LoongArch-Add-prefetch-instructions.patch
Normal file
158
LoongArch-Add-prefetch-instructions.patch
Normal file
@ -0,0 +1,158 @@
|
||||
From 52a41006c2e8141a42de93ffcc2c040e034244b2 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Wed, 16 Nov 2022 09:25:14 +0800
|
||||
Subject: [PATCH 031/124] LoongArch: Add prefetch instructions.
|
||||
|
||||
Enable sw prefetching at -O3 and higher.
|
||||
|
||||
Co-Authored-By: xujiahao <xujiahao@loongson.cn>
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/constraints.md (ZD): New constraint.
|
||||
* config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
|
||||
* config/loongarch/loongarch-tune.h (struct loongarch_cache):
|
||||
Define number of parallel prefetch.
|
||||
* config/loongarch/loongarch.cc (loongarch_option_override_internal):
|
||||
Set up parameters to be used in prefetching algorithm.
|
||||
* config/loongarch/loongarch.md (prefetch): New template.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/constraints.md | 10 ++++++++++
|
||||
gcc/config/loongarch/loongarch-def.c | 2 ++
|
||||
gcc/config/loongarch/loongarch-tune.h | 1 +
|
||||
gcc/config/loongarch/loongarch.cc | 28 +++++++++++++++++++++++++++
|
||||
gcc/config/loongarch/loongarch.md | 14 ++++++++++++++
|
||||
5 files changed, 55 insertions(+)
|
||||
|
||||
diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
|
||||
index 43cb7b5f0..46f7f63ae 100644
|
||||
--- a/gcc/config/loongarch/constraints.md
|
||||
+++ b/gcc/config/loongarch/constraints.md
|
||||
@@ -86,6 +86,10 @@
|
||||
;; "ZB"
|
||||
;; "An address that is held in a general-purpose register.
|
||||
;; The offset is zero"
|
||||
+;; "ZD"
|
||||
+;; "An address operand whose address is formed by a base register
|
||||
+;; and offset that is suitable for use in instructions with the same
|
||||
+;; addressing mode as @code{preld}."
|
||||
;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
|
||||
;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
|
||||
|
||||
@@ -190,3 +194,9 @@
|
||||
The offset is zero"
|
||||
(and (match_code "mem")
|
||||
(match_test "REG_P (XEXP (op, 0))")))
|
||||
+
|
||||
+(define_address_constraint "ZD"
|
||||
+ "An address operand whose address is formed by a base register
|
||||
+ and offset that is suitable for use in instructions with the same
|
||||
+ addressing mode as @code{preld}."
|
||||
+ (match_test "loongarch_12bit_offset_address_p (op, mode)"))
|
||||
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
|
||||
index cbf995d81..80ab10a52 100644
|
||||
--- a/gcc/config/loongarch/loongarch-def.c
|
||||
+++ b/gcc/config/loongarch/loongarch-def.c
|
||||
@@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
|
||||
.l1d_line_size = 64,
|
||||
.l1d_size = 64,
|
||||
.l2d_size = 256,
|
||||
+ .simultaneous_prefetches = 4,
|
||||
},
|
||||
[CPU_LA464] = {
|
||||
.l1d_line_size = 64,
|
||||
.l1d_size = 64,
|
||||
.l2d_size = 256,
|
||||
+ .simultaneous_prefetches = 4,
|
||||
},
|
||||
};
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
|
||||
index 6f3530f5c..8e3eb2947 100644
|
||||
--- a/gcc/config/loongarch/loongarch-tune.h
|
||||
+++ b/gcc/config/loongarch/loongarch-tune.h
|
||||
@@ -45,6 +45,7 @@ struct loongarch_cache {
|
||||
int l1d_line_size; /* bytes */
|
||||
int l1d_size; /* KiB */
|
||||
int l2d_size; /* kiB */
|
||||
+ int simultaneous_prefetches; /* number of parallel prefetch */
|
||||
};
|
||||
|
||||
#endif /* LOONGARCH_TUNE_H */
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index d552b162a..622c9435b 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "context.h"
|
||||
#include "builtins.h"
|
||||
#include "rtl-iter.h"
|
||||
+#include "opts.h"
|
||||
|
||||
/* This file should be included last. */
|
||||
#include "target-def.h"
|
||||
@@ -6099,6 +6100,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
|
||||
if (loongarch_branch_cost == 0)
|
||||
loongarch_branch_cost = loongarch_cost->branch_cost;
|
||||
|
||||
+ /* Set up parameters to be used in prefetching algorithm. */
|
||||
+ int simultaneous_prefetches
|
||||
+ = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
|
||||
+
|
||||
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
|
||||
+ param_simultaneous_prefetches,
|
||||
+ simultaneous_prefetches);
|
||||
+
|
||||
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
|
||||
+ param_l1_cache_line_size,
|
||||
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
|
||||
+
|
||||
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
|
||||
+ param_l1_cache_size,
|
||||
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
|
||||
+
|
||||
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
|
||||
+ param_l2_cache_size,
|
||||
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
|
||||
+
|
||||
+
|
||||
+ /* Enable sw prefetching at -O3 and higher. */
|
||||
+ if (opts->x_flag_prefetch_loop_arrays < 0
|
||||
+ && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
|
||||
+ && !opts->x_optimize_size)
|
||||
+ opts->x_flag_prefetch_loop_arrays = 1;
|
||||
+
|
||||
if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
|
||||
error ("%qs cannot be used for compiling a shared library",
|
||||
"-mdirect-extern-access");
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index 682ab9617..2fda53819 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -3282,6 +3282,20 @@
|
||||
;; ....................
|
||||
;;
|
||||
|
||||
+(define_insn "prefetch"
|
||||
+ [(prefetch (match_operand 0 "address_operand" "ZD")
|
||||
+ (match_operand 1 "const_int_operand" "n")
|
||||
+ (match_operand 2 "const_int_operand" "n"))]
|
||||
+ ""
|
||||
+{
|
||||
+ switch (INTVAL (operands[1]))
|
||||
+ {
|
||||
+ case 0: return "preld\t0,%a0";
|
||||
+ case 1: return "preld\t8,%a0";
|
||||
+ default: gcc_unreachable ();
|
||||
+ }
|
||||
+})
|
||||
+
|
||||
(define_insn "nop"
|
||||
[(const_int 0)]
|
||||
""
|
||||
--
|
||||
2.33.0
|
||||
|
||||
794
LoongArch-Add-support-code-model-extreme.patch
Normal file
794
LoongArch-Add-support-code-model-extreme.patch
Normal file
@ -0,0 +1,794 @@
|
||||
From b1c92fb9dab678e4c9c23fa77185011494d145b9 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Thu, 18 Aug 2022 17:26:13 +0800
|
||||
Subject: [PATCH 011/124] LoongArch: Add support code model extreme.
|
||||
|
||||
Use five instructions to calculate a signed 64-bit offset relative to the pc.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch-opts.cc: Allow cmodel to be extreme.
|
||||
* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
|
||||
Add extreme support for TLS GD and LD types.
|
||||
(loongarch_legitimize_tls_address): Add extreme support for TLS LE
|
||||
and IE.
|
||||
(loongarch_split_symbol): When compiling with -mcmodel=extreme,
|
||||
the symbol address will be obtained through five instructions.
|
||||
(loongarch_print_operand_reloc): Add support.
|
||||
(loongarch_print_operand): Add support.
|
||||
(loongarch_print_operand_address): Add support.
|
||||
(loongarch_option_override_internal): Set '-mcmodel=extreme' option
|
||||
incompatible with '-mno-explicit-relocs'.
|
||||
* config/loongarch/loongarch.md (@lui_l_hi20<mode>):
|
||||
Loads bits 12-31 of data into registers.
|
||||
(lui_h_lo20): Load bits 32-51 of the data and spell bits 0-31 of
|
||||
the source register.
|
||||
(lui_h_hi12): Load bits 52-63 of the data and spell bits 0-51 of
|
||||
the source register.
|
||||
* config/loongarch/predicates.md: Symbols need to be decomposed
|
||||
when defining the macro TARGET_CMODEL_EXTREME
|
||||
* doc/invoke.texi: Modify the description information of cmodel in the document.
|
||||
Document -W[no-]extreme-plt.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/func-call-1.c: Add option '-mcmodel=normal'.
|
||||
* gcc.target/loongarch/func-call-2.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-3.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-4.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-5.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-6.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-7.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-8.c: Likewise.
|
||||
* gcc.target/loongarch/relocs-symbol-noaddend.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-extreme-1.c: New test.
|
||||
* gcc.target/loongarch/func-call-extreme-2.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch-opts.cc | 3 +-
|
||||
gcc/config/loongarch/loongarch.cc | 222 +++++++++++++++---
|
||||
gcc/config/loongarch/loongarch.md | 34 ++-
|
||||
gcc/config/loongarch/predicates.md | 9 +-
|
||||
gcc/doc/invoke.texi | 50 +---
|
||||
.../gcc.target/loongarch/func-call-1.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-2.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-3.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-4.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-5.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-6.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-7.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-8.c | 2 +-
|
||||
.../loongarch/func-call-extreme-1.c | 32 +++
|
||||
.../loongarch/func-call-extreme-2.c | 32 +++
|
||||
.../loongarch/relocs-symbol-noaddend.c | 2 +-
|
||||
16 files changed, 318 insertions(+), 82 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
|
||||
index 3f70943de..2ae89f234 100644
|
||||
--- a/gcc/config/loongarch/loongarch-opts.cc
|
||||
+++ b/gcc/config/loongarch/loongarch-opts.cc
|
||||
@@ -376,14 +376,13 @@ fallback:
|
||||
|
||||
/* 5. Target code model */
|
||||
t.cmodel = constrained.cmodel ? opt_cmodel : CMODEL_NORMAL;
|
||||
- if (t.cmodel != CMODEL_NORMAL)
|
||||
+ if (t.cmodel != CMODEL_NORMAL && t.cmodel != CMODEL_EXTREME)
|
||||
{
|
||||
warning (0, "%qs is not supported, now cmodel is set to %qs",
|
||||
loongarch_cmodel_strings[t.cmodel], "normal");
|
||||
t.cmodel = CMODEL_NORMAL;
|
||||
}
|
||||
|
||||
-
|
||||
/* Cleanup and return. */
|
||||
obstack_free (&msg_obstack, NULL);
|
||||
*target = t;
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index 76bf55ea4..1a33f668f 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -2436,7 +2436,19 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
|
||||
/* Split tls symbol to high and low. */
|
||||
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
|
||||
high = loongarch_force_temporary (tmp, high);
|
||||
- emit_insn (gen_tls_low (Pmode, a0, high, loc));
|
||||
+
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ {
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
+ rtx tmp1 = gen_reg_rtx (Pmode);
|
||||
+ emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
|
||||
+ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
|
||||
+ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc));
|
||||
+ emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1));
|
||||
+ }
|
||||
+ else
|
||||
+ emit_insn (gen_tls_low (Pmode, a0, high, loc));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -2449,14 +2461,44 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
|
||||
}
|
||||
|
||||
if (flag_plt)
|
||||
- insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
|
||||
+ insn = emit_call_insn (gen_call_value_internal (v0,
|
||||
+ loongarch_tls_symbol,
|
||||
const0_rtx));
|
||||
else
|
||||
{
|
||||
rtx dest = gen_reg_rtx (Pmode);
|
||||
- rtx high = gen_reg_rtx (Pmode);
|
||||
- loongarch_emit_move (high, gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
|
||||
- emit_insn (gen_ld_from_got (Pmode, dest, high, loongarch_tls_symbol));
|
||||
+
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ {
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
+ rtx tmp1 = gen_reg_rtx (Pmode);
|
||||
+ rtx high = gen_reg_rtx (Pmode);
|
||||
+
|
||||
+ loongarch_emit_move (high,
|
||||
+ gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
|
||||
+ loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode,
|
||||
+ gen_rtx_REG (Pmode, 0),
|
||||
+ loongarch_tls_symbol));
|
||||
+ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol));
|
||||
+ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol));
|
||||
+ loongarch_emit_move (dest,
|
||||
+ gen_rtx_MEM (Pmode,
|
||||
+ gen_rtx_PLUS (Pmode, high, tmp1)));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (TARGET_EXPLICIT_RELOCS)
|
||||
+ {
|
||||
+ rtx high = gen_reg_rtx (Pmode);
|
||||
+ loongarch_emit_move (high,
|
||||
+ gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
|
||||
+ emit_insn (gen_ld_from_got (Pmode, dest, high,
|
||||
+ loongarch_tls_symbol));
|
||||
+ }
|
||||
+ else
|
||||
+ loongarch_emit_move (dest, loongarch_tls_symbol);
|
||||
+ }
|
||||
insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx));
|
||||
}
|
||||
|
||||
@@ -2508,7 +2550,23 @@ loongarch_legitimize_tls_address (rtx loc)
|
||||
tmp3 = gen_reg_rtx (Pmode);
|
||||
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
|
||||
high = loongarch_force_temporary (tmp3, high);
|
||||
- emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
|
||||
+
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ {
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
+ rtx tmp3 = gen_reg_rtx (Pmode);
|
||||
+ emit_insn (gen_tls_low (Pmode, tmp3,
|
||||
+ gen_rtx_REG (Pmode, 0), tmp2));
|
||||
+ emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2));
|
||||
+ emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2));
|
||||
+ emit_move_insn (tmp1,
|
||||
+ gen_rtx_MEM (Pmode,
|
||||
+ gen_rtx_PLUS (Pmode,
|
||||
+ high, tmp3)));
|
||||
+ }
|
||||
+ else
|
||||
+ emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
|
||||
}
|
||||
else
|
||||
emit_insn (loongarch_got_load_tls_ie (tmp1, loc));
|
||||
@@ -2530,11 +2588,18 @@ loongarch_legitimize_tls_address (rtx loc)
|
||||
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
|
||||
high = loongarch_force_temporary (tmp3, high);
|
||||
emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
|
||||
+
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ {
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
+ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2));
|
||||
+ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2));
|
||||
+ }
|
||||
}
|
||||
else
|
||||
emit_insn (loongarch_got_load_tls_le (tmp1, loc));
|
||||
emit_insn (gen_add3_insn (dest, tmp1, tp));
|
||||
-
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -2603,7 +2668,6 @@ bool
|
||||
loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
|
||||
{
|
||||
enum loongarch_symbol_type symbol_type;
|
||||
- rtx high;
|
||||
|
||||
/* If build with '-mno-explicit-relocs', don't split symbol. */
|
||||
if (!TARGET_EXPLICIT_RELOCS)
|
||||
@@ -2615,6 +2679,8 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
|
||||
|| !loongarch_split_symbol_type (symbol_type))
|
||||
return false;
|
||||
|
||||
+ rtx high, temp1 = NULL;
|
||||
+
|
||||
if (temp == NULL)
|
||||
temp = gen_reg_rtx (Pmode);
|
||||
|
||||
@@ -2622,20 +2688,42 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
|
||||
high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
|
||||
high = loongarch_force_temporary (temp, high);
|
||||
|
||||
+ if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
|
||||
+ {
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
+ temp1 = gen_reg_rtx (Pmode);
|
||||
+ emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0),
|
||||
+ addr));
|
||||
+ emit_insn (gen_lui_h_lo20 (temp1, temp1, addr));
|
||||
+ emit_insn (gen_lui_h_hi12 (temp1, temp1, addr));
|
||||
+ }
|
||||
+
|
||||
if (low_out)
|
||||
switch (symbol_type)
|
||||
{
|
||||
case SYMBOL_PCREL:
|
||||
- *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
|
||||
- break;
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
|
||||
+ *low_out = gen_rtx_PLUS (Pmode, high, temp1);
|
||||
+ else
|
||||
+ *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
case SYMBOL_GOT_DISP:
|
||||
/* SYMBOL_GOT_DISP symbols are loaded from the GOT. */
|
||||
{
|
||||
- rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
|
||||
- rtx mem = gen_rtx_MEM (Pmode, low);
|
||||
- *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
|
||||
- UNSPEC_LOAD_FROM_GOT);
|
||||
+ if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
|
||||
+ *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, temp1));
|
||||
+ else
|
||||
+ {
|
||||
+ rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
|
||||
+ rtx mem = gen_rtx_MEM (Pmode, low);
|
||||
+ *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
|
||||
+ UNSPEC_LOAD_FROM_GOT);
|
||||
+ }
|
||||
+
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -4584,34 +4672,86 @@ loongarch_memmodel_needs_release_fence (enum memmodel model)
|
||||
in context CONTEXT. HI_RELOC indicates a high-part reloc. */
|
||||
|
||||
static void
|
||||
-loongarch_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
|
||||
+loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
|
||||
+ bool hi_reloc)
|
||||
{
|
||||
const char *reloc;
|
||||
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
switch (loongarch_classify_symbolic_expression (op))
|
||||
{
|
||||
case SYMBOL_PCREL:
|
||||
- reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%pc64_hi12" : "%pc64_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
|
||||
break;
|
||||
|
||||
case SYMBOL_GOT_DISP:
|
||||
- reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
|
||||
break;
|
||||
|
||||
case SYMBOL_TLS_IE:
|
||||
- reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%ie64_pc_hi12" : "%ie64_pc_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
|
||||
break;
|
||||
|
||||
case SYMBOL_TLS_LE:
|
||||
- reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%le64_hi12" : "%le64_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
|
||||
break;
|
||||
|
||||
case SYMBOL_TLSGD:
|
||||
- reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
|
||||
break;
|
||||
|
||||
case SYMBOL_TLSLDM:
|
||||
- reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -4637,6 +4777,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
|
||||
'L' Print the low-part relocation associated with OP.
|
||||
'm' Print one less than CONST_INT OP in decimal.
|
||||
'N' Print the inverse of the integer branch condition for comparison OP.
|
||||
+ 'r' Print address 12-31bit relocation associated with OP.
|
||||
+ 'R' Print address 32-51bit relocation associated with OP.
|
||||
'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
|
||||
'z' for (eq:?I ...), 'n' for (ne:?I ...).
|
||||
't' Like 'T', but with the EQ/NE cases reversed
|
||||
@@ -4694,7 +4836,13 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
|
||||
case 'h':
|
||||
if (code == HIGH)
|
||||
op = XEXP (op, 0);
|
||||
- loongarch_print_operand_reloc (file, op, true /* hi_reloc */);
|
||||
+ loongarch_print_operand_reloc (file, op, false /* hi64_part */,
|
||||
+ true /* hi_reloc */);
|
||||
+ break;
|
||||
+
|
||||
+ case 'H':
|
||||
+ loongarch_print_operand_reloc (file, op, true /* hi64_part */,
|
||||
+ true /* hi_reloc */);
|
||||
break;
|
||||
|
||||
case 'i':
|
||||
@@ -4703,7 +4851,8 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
|
||||
break;
|
||||
|
||||
case 'L':
|
||||
- loongarch_print_operand_reloc (file, op, false /* lo_reloc */);
|
||||
+ loongarch_print_operand_reloc (file, op, false /* hi64_part*/,
|
||||
+ false /* lo_reloc */);
|
||||
break;
|
||||
|
||||
case 'm':
|
||||
@@ -4718,6 +4867,16 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
|
||||
letter);
|
||||
break;
|
||||
|
||||
+ case 'r':
|
||||
+ loongarch_print_operand_reloc (file, op, false /* hi64_part */,
|
||||
+ true /* lo_reloc */);
|
||||
+ break;
|
||||
+
|
||||
+ case 'R':
|
||||
+ loongarch_print_operand_reloc (file, op, true /* hi64_part */,
|
||||
+ false /* lo_reloc */);
|
||||
+ break;
|
||||
+
|
||||
case 't':
|
||||
case 'T':
|
||||
{
|
||||
@@ -4848,7 +5007,8 @@ loongarch_print_operand_address (FILE *file, machine_mode /* mode */, rtx x)
|
||||
|
||||
case ADDRESS_LO_SUM:
|
||||
fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
|
||||
- loongarch_print_operand_reloc (file, addr.offset, false /* hi_reloc */);
|
||||
+ loongarch_print_operand_reloc (file, addr.offset, false /* hi64_part */,
|
||||
+ false /* hi_reloc */);
|
||||
return;
|
||||
|
||||
case ADDRESS_CONST_INT:
|
||||
@@ -5821,13 +5981,21 @@ loongarch_option_override_internal (struct gcc_options *opts)
|
||||
|
||||
switch (la_target.cmodel)
|
||||
{
|
||||
- case CMODEL_TINY_STATIC:
|
||||
case CMODEL_EXTREME:
|
||||
+ if (!TARGET_EXPLICIT_RELOCS)
|
||||
+ error ("code model %qs needs %s",
|
||||
+ "extreme", "-mexplicit-relocs");
|
||||
+
|
||||
if (opts->x_flag_plt)
|
||||
- error ("code model %qs and %qs not support %s mode",
|
||||
- "tiny-static", "extreme", "plt");
|
||||
+ {
|
||||
+ if (global_options_set.x_flag_plt)
|
||||
+ error ("code model %qs is not compatible with %s",
|
||||
+ "extreme", "-fplt");
|
||||
+ opts->x_flag_plt = 0;
|
||||
+ }
|
||||
break;
|
||||
|
||||
+ case CMODEL_TINY_STATIC:
|
||||
case CMODEL_NORMAL:
|
||||
case CMODEL_TINY:
|
||||
case CMODEL_LARGE:
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index 8e8868de9..8fc10444c 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -60,6 +60,9 @@
|
||||
|
||||
UNSPEC_LOAD_FROM_GOT
|
||||
UNSPEC_ORI_L_LO12
|
||||
+ UNSPEC_LUI_L_HI20
|
||||
+ UNSPEC_LUI_H_LO20
|
||||
+ UNSPEC_LUI_H_HI12
|
||||
UNSPEC_TLS_LOW
|
||||
])
|
||||
|
||||
@@ -1934,16 +1937,45 @@
|
||||
[(set_attr "type" "move")]
|
||||
)
|
||||
|
||||
+(define_insn "@lui_l_hi20<mode>"
|
||||
+ [(set (match_operand:P 0 "register_operand" "=r")
|
||||
+ (unspec:P [(match_operand:P 1 "symbolic_operand")]
|
||||
+ UNSPEC_LUI_L_HI20))]
|
||||
+ ""
|
||||
+ "lu12i.w\t%0,%r1"
|
||||
+ [(set_attr "type" "move")]
|
||||
+)
|
||||
+
|
||||
(define_insn "@ori_l_lo12<mode>"
|
||||
[(set (match_operand:P 0 "register_operand" "=r")
|
||||
(unspec:P [(match_operand:P 1 "register_operand" "r")
|
||||
- (match_operand:P 2 "symbolic_operand")]
|
||||
+ (match_operand:P 2 "symbolic_operand")]
|
||||
UNSPEC_ORI_L_LO12))]
|
||||
""
|
||||
"ori\t%0,%1,%L2"
|
||||
[(set_attr "type" "move")]
|
||||
)
|
||||
|
||||
+(define_insn "lui_h_lo20"
|
||||
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
+ (unspec:DI [(match_operand:DI 1 "register_operand" "0")
|
||||
+ (match_operand:DI 2 "symbolic_operand")]
|
||||
+ UNSPEC_LUI_H_LO20))]
|
||||
+ "TARGET_64BIT"
|
||||
+ "lu32i.d\t%0,%R2"
|
||||
+ [(set_attr "type" "move")]
|
||||
+)
|
||||
+
|
||||
+(define_insn "lui_h_hi12"
|
||||
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
+ (unspec:DI [(match_operand:DI 1 "register_operand" "r")
|
||||
+ (match_operand:DI 2 "symbolic_operand")]
|
||||
+ UNSPEC_LUI_H_HI12))]
|
||||
+ "TARGET_64BIT"
|
||||
+ "lu52i.d\t%0,%1,%H2"
|
||||
+ [(set_attr "type" "move")]
|
||||
+)
|
||||
+
|
||||
;; Convert floating-point numbers to integers
|
||||
(define_insn "frint_<fmt>"
|
||||
[(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||
index cd3528c7c..e38c6fbdd 100644
|
||||
--- a/gcc/config/loongarch/predicates.md
|
||||
+++ b/gcc/config/loongarch/predicates.md
|
||||
@@ -111,7 +111,7 @@
|
||||
(match_code "const,symbol_ref,label_ref")
|
||||
{
|
||||
/* Split symbol to high and low if return false.
|
||||
- If defined TARGET_CMODEL_LARGE, all symbol would be splited,
|
||||
+ If defined TARGET_CMODEL_EXTREME, all symbol would be splited,
|
||||
else if offset is not zero, the symbol would be splited. */
|
||||
|
||||
enum loongarch_symbol_type symbol_type;
|
||||
@@ -126,10 +126,13 @@
|
||||
switch (symbol_type)
|
||||
{
|
||||
case SYMBOL_PCREL:
|
||||
- return 1;
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ return false;
|
||||
+ else
|
||||
+ return 1;
|
||||
|
||||
case SYMBOL_GOT_DISP:
|
||||
- if (TARGET_CMODEL_LARGE || !flag_plt)
|
||||
+ if (TARGET_CMODEL_EXTREME || !flag_plt)
|
||||
return false;
|
||||
else
|
||||
return 1;
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index 1de2b2bd4..c4f83e62a 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -1006,6 +1006,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-mcond-move-float -mno-cond-move-float @gol
|
||||
-memcpy -mno-memcpy -mstrict-align -mno-strict-align @gol
|
||||
-mmax-inline-memcpy-size=@var{n} @gol
|
||||
+-mexplicit-relocs -mno-explicit-relocs @gol
|
||||
-mcmodel=@var{code-model}}
|
||||
|
||||
@emph{M32R/D Options}
|
||||
@@ -24617,50 +24618,19 @@ less than or equal to @var{n} bytes. The default value of @var{n} is 1024.
|
||||
@item -mcmodel=@var{code-model}
|
||||
Set the code model to one of:
|
||||
@table @samp
|
||||
-@item tiny-static
|
||||
-@itemize @bullet
|
||||
-@item
|
||||
-local symbol and global strong symbol: The data section must be within +/-2MiB addressing space.
|
||||
-The text section must be within +/-128MiB addressing space.
|
||||
-@item
|
||||
-global weak symbol: The got table must be within +/-2GiB addressing space.
|
||||
-@end itemize
|
||||
-
|
||||
-@item tiny
|
||||
-@itemize @bullet
|
||||
-@item
|
||||
-local symbol: The data section must be within +/-2MiB addressing space.
|
||||
-The text section must be within +/-128MiB
|
||||
-addressing space.
|
||||
-@item
|
||||
-global symbol: The got table must be within +/-2GiB addressing space.
|
||||
-@end itemize
|
||||
+@item tiny-static (Not implemented yet)
|
||||
+@item tiny (Not implemented yet)
|
||||
|
||||
@item normal
|
||||
-@itemize @bullet
|
||||
-@item
|
||||
-local symbol: The data section must be within +/-2GiB addressing space.
|
||||
-The text section must be within +/-128MiB addressing space.
|
||||
-@item
|
||||
-global symbol: The got table must be within +/-2GiB addressing space.
|
||||
-@end itemize
|
||||
+The text segment must be within 128MB addressing space. The data segment must
|
||||
+be within 2GB addressing space.
|
||||
|
||||
-@item large
|
||||
-@itemize @bullet
|
||||
-@item
|
||||
-local symbol: The data section must be within +/-2GiB addressing space.
|
||||
-The text section must be within +/-128GiB addressing space.
|
||||
-@item
|
||||
-global symbol: The got table must be within +/-2GiB addressing space.
|
||||
-@end itemize
|
||||
+@item large (Not implemented yet)
|
||||
|
||||
-@item extreme(Not implemented yet)
|
||||
-@itemize @bullet
|
||||
-@item
|
||||
-local symbol: The data and text section must be within +/-8EiB addressing space.
|
||||
-@item
|
||||
-global symbol: The data got table must be within +/-8EiB addressing space.
|
||||
-@end itemize
|
||||
+@item extreme
|
||||
+This mode does not limit the size of the code segment and data segment.
|
||||
+The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} and
|
||||
+@option{-mno-explicit-relocs}.
|
||||
@end table
|
||||
The default code model is @code{normal}.
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-1.c
|
||||
index 01b8ea23f..76bf11b0c 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-1.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-1.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\t%plt\\(f\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-2.c
|
||||
index 4565baaec..4b468fef8 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-2.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-2.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-3.c
|
||||
index 4f669a029..dd3a4882d 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-3.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-3.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*la\.global\t.*f\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-4.c
|
||||
index 943adb640..f8158ec34 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-4.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-4.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-5.c b/gcc/testsuite/gcc.target/loongarch/func-call-5.c
|
||||
index 2c2a1c8a1..37994af43 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-5.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-5.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\t%plt\\(f\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-6.c b/gcc/testsuite/gcc.target/loongarch/func-call-6.c
|
||||
index 4b0e4266e..8e366e376 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-6.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-6.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-7.c b/gcc/testsuite/gcc.target/loongarch/func-call-7.c
|
||||
index 51792711f..4177c3d96 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-7.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-7.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%got_pc_hi20\\(f\\)\n\tld\.d\t.*%got_pc_lo12\\(f\\)\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-8.c b/gcc/testsuite/gcc.target/loongarch/func-call-8.c
|
||||
index 330140d88..4254eaa16 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-8.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-8.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
|
||||
new file mode 100644
|
||||
index 000000000..db1e0f853
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
|
||||
@@ -0,0 +1,32 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */
|
||||
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
|
||||
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
|
||||
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
|
||||
+
|
||||
+extern void g (void);
|
||||
+void
|
||||
+f (void)
|
||||
+{}
|
||||
+
|
||||
+static void
|
||||
+l (void)
|
||||
+{}
|
||||
+
|
||||
+void
|
||||
+test (void)
|
||||
+{
|
||||
+ g ();
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+test1 (void)
|
||||
+{
|
||||
+ f ();
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+test2 (void)
|
||||
+{
|
||||
+ l ();
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
|
||||
new file mode 100644
|
||||
index 000000000..21bf81ae8
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
|
||||
@@ -0,0 +1,32 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */
|
||||
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
|
||||
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
|
||||
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
|
||||
+
|
||||
+extern void g (void);
|
||||
+void
|
||||
+f (void)
|
||||
+{}
|
||||
+
|
||||
+static void
|
||||
+l (void)
|
||||
+{}
|
||||
+
|
||||
+void
|
||||
+test (void)
|
||||
+{
|
||||
+ g ();
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+test1 (void)
|
||||
+{
|
||||
+ f ();
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+test2 (void)
|
||||
+{
|
||||
+ l ();
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c b/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
|
||||
index bfcc9bc33..3ec8bd229 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -mexplicit-relocs -fno-pic -O2" } */
|
||||
+/* { dg-options "-mabi=lp64d -mexplicit-relocs -fno-pic -O2 -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "pcalau12i.*%pc_hi20\\(\.LANCHOR0\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "addi\.d.*%pc_lo12\\(\.LANCHOR0\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "ldptr.d\t\\\$r4,.*,0\n" } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
4485
LoongArch-Add-tests-for-ASX-builtin-functions.patch
Normal file
4485
LoongArch-Add-tests-for-ASX-builtin-functions.patch
Normal file
File diff suppressed because it is too large
Load Diff
5363
LoongArch-Add-tests-for-ASX-vector-comparison-and-se.patch
Normal file
5363
LoongArch-Add-tests-for-ASX-vector-comparison-and-se.patch
Normal file
File diff suppressed because it is too large
Load Diff
7291
LoongArch-Add-tests-for-ASX-vector-floating-point-co.patch
Normal file
7291
LoongArch-Add-tests-for-ASX-vector-floating-point-co.patch
Normal file
File diff suppressed because it is too large
Load Diff
5614
LoongArch-Add-tests-for-ASX-vector-floating-point-op.patch
Normal file
5614
LoongArch-Add-tests-for-ASX-vector-floating-point-op.patch
Normal file
File diff suppressed because it is too large
Load Diff
4566
LoongArch-Add-tests-for-ASX-vector-subtraction-instr.patch
Normal file
4566
LoongArch-Add-tests-for-ASX-vector-subtraction-instr.patch
Normal file
File diff suppressed because it is too large
Load Diff
5595
LoongArch-Add-tests-for-ASX-vector-xvabsd-xvavg-xvav.patch
Normal file
5595
LoongArch-Add-tests-for-ASX-vector-xvabsd-xvavg-xvav.patch
Normal file
File diff suppressed because it is too large
Load Diff
6368
LoongArch-Add-tests-for-ASX-vector-xvadd-xvadda-xvad.patch
Normal file
6368
LoongArch-Add-tests-for-ASX-vector-xvadd-xvadda-xvad.patch
Normal file
File diff suppressed because it is too large
Load Diff
1854
LoongArch-Add-tests-for-ASX-vector-xvand-xvandi-xvan.patch
Normal file
1854
LoongArch-Add-tests-for-ASX-vector-xvand-xvandi-xvan.patch
Normal file
File diff suppressed because it is too large
Load Diff
5057
LoongArch-Add-tests-for-ASX-vector-xvbitclr-xvbitclr.patch
Normal file
5057
LoongArch-Add-tests-for-ASX-vector-xvbitclr-xvbitclr.patch
Normal file
File diff suppressed because it is too large
Load Diff
4600
LoongArch-Add-tests-for-ASX-vector-xvext2xv-xvexth-x.patch
Normal file
4600
LoongArch-Add-tests-for-ASX-vector-xvext2xv-xvexth-x.patch
Normal file
File diff suppressed because it is too large
Load Diff
4737
LoongArch-Add-tests-for-ASX-vector-xvextl-xvsra-xvsr.patch
Normal file
4737
LoongArch-Add-tests-for-ASX-vector-xvextl-xvsra-xvsr.patch
Normal file
File diff suppressed because it is too large
Load Diff
4510
LoongArch-Add-tests-for-ASX-vector-xvfcmp-caf-ceq-cl.patch
Normal file
4510
LoongArch-Add-tests-for-ASX-vector-xvfcmp-caf-ceq-cl.patch
Normal file
File diff suppressed because it is too large
Load Diff
4824
LoongArch-Add-tests-for-ASX-vector-xvfcmp-saf-seq-sl.patch
Normal file
4824
LoongArch-Add-tests-for-ASX-vector-xvfcmp-saf-seq-sl.patch
Normal file
File diff suppressed because it is too large
Load Diff
4991
LoongArch-Add-tests-for-ASX-vector-xvfnmadd-xvfrstp-.patch
Normal file
4991
LoongArch-Add-tests-for-ASX-vector-xvfnmadd-xvfrstp-.patch
Normal file
File diff suppressed because it is too large
Load Diff
6930
LoongArch-Add-tests-for-ASX-vector-xvhadd-xvhaddw-xv.patch
Normal file
6930
LoongArch-Add-tests-for-ASX-vector-xvhadd-xvhaddw-xv.patch
Normal file
File diff suppressed because it is too large
Load Diff
2735
LoongArch-Add-tests-for-ASX-vector-xvldi-xvmskgez-xv.patch
Normal file
2735
LoongArch-Add-tests-for-ASX-vector-xvldi-xvmskgez-xv.patch
Normal file
File diff suppressed because it is too large
Load Diff
4124
LoongArch-Add-tests-for-ASX-vector-xvmax-xvmaxi-xvmi.patch
Normal file
4124
LoongArch-Add-tests-for-ASX-vector-xvmax-xvmaxi-xvmi.patch
Normal file
File diff suppressed because it is too large
Load Diff
5766
LoongArch-Add-tests-for-ASX-vector-xvmul-xvmod-xvdiv.patch
Normal file
5766
LoongArch-Add-tests-for-ASX-vector-xvmul-xvmod-xvdiv.patch
Normal file
File diff suppressed because it is too large
Load Diff
5364
LoongArch-Add-tests-for-ASX-vector-xvpackev-xvpackod.patch
Normal file
5364
LoongArch-Add-tests-for-ASX-vector-xvpackev-xvpackod.patch
Normal file
File diff suppressed because it is too large
Load Diff
5611
LoongArch-Add-tests-for-ASX-vector-xvsll-xvsrl-instr.patch
Normal file
5611
LoongArch-Add-tests-for-ASX-vector-xvsll-xvsrl-instr.patch
Normal file
File diff suppressed because it is too large
Load Diff
4258
LoongArch-Add-tests-for-ASX-vector-xvssran-xvssrani-.patch
Normal file
4258
LoongArch-Add-tests-for-ASX-vector-xvssran-xvssrani-.patch
Normal file
File diff suppressed because it is too large
Load Diff
4123
LoongArch-Add-tests-for-ASX-vector-xvssrln-xvssrlni-.patch
Normal file
4123
LoongArch-Add-tests-for-ASX-vector-xvssrln-xvssrlni-.patch
Normal file
File diff suppressed because it is too large
Load Diff
65
LoongArch-Add-tests-for-ASX-xvldrepl-xvstelm-instruc.patch
Normal file
65
LoongArch-Add-tests-for-ASX-xvldrepl-xvstelm-instruc.patch
Normal file
@ -0,0 +1,65 @@
|
||||
From 2ef90d604d7bae207d5b2067b4ce38d04d4835be Mon Sep 17 00:00:00 2001
|
||||
From: Xiaolong Chen <chenxiaolong@loongson.cn>
|
||||
Date: Tue, 12 Sep 2023 16:00:48 +0800
|
||||
Subject: [PATCH 110/124] LoongArch: Add tests for ASX xvldrepl/xvstelm
|
||||
instruction generation.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c: New test.
|
||||
* gcc.target/loongarch/vector/lasx/lasx-xvstelm.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
.../loongarch/vector/lasx/lasx-xvldrepl.c | 16 ++++++++++++++++
|
||||
.../loongarch/vector/lasx/lasx-xvstelm.c | 14 ++++++++++++++
|
||||
2 files changed, 30 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
|
||||
new file mode 100644
|
||||
index 000000000..105567951
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
|
||||
@@ -0,0 +1,16 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O3 -mlasx" } */
|
||||
+/* { dg-final { scan-assembler-times "xvldrepl.w" 2} } */
|
||||
+
|
||||
+#define N 258
|
||||
+
|
||||
+float a[N], b[N], c[N];
|
||||
+
|
||||
+void
|
||||
+test ()
|
||||
+{
|
||||
+ for (int i = 0; i < 256; i++)
|
||||
+ {
|
||||
+ a[i] = c[0] * b[i] + c[1];
|
||||
+ }
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
|
||||
new file mode 100644
|
||||
index 000000000..1a7b0e86f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
|
||||
@@ -0,0 +1,14 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O3 -mlasx" } */
|
||||
+/* { dg-final { scan-assembler-times "xvstelm.w" 8} } */
|
||||
+
|
||||
+#define LEN 256
|
||||
+
|
||||
+float a[LEN], b[LEN], c[LEN];
|
||||
+
|
||||
+void
|
||||
+test ()
|
||||
+{
|
||||
+ for (int i = 0; i < LEN; i += 2)
|
||||
+ a[i] = b[i] + c[i];
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
4354
LoongArch-Add-tests-for-Loongson-SX-builtin-function.patch
Normal file
4354
LoongArch-Add-tests-for-Loongson-SX-builtin-function.patch
Normal file
File diff suppressed because it is too large
Load Diff
7181
LoongArch-Add-tests-for-SX-vector-addition-instructi.patch
Normal file
7181
LoongArch-Add-tests-for-SX-vector-addition-instructi.patch
Normal file
File diff suppressed because it is too large
Load Diff
715
LoongArch-Add-tests-for-SX-vector-addition-vsadd-ins.patch
Normal file
715
LoongArch-Add-tests-for-SX-vector-addition-vsadd-ins.patch
Normal file
@ -0,0 +1,715 @@
|
||||
From 243656b5b87a3125c2a885d11f022a79cca98b39 Mon Sep 17 00:00:00 2001
|
||||
From: Xiaolong Chen <chenxiaolong@loongson.cn>
|
||||
Date: Mon, 11 Sep 2023 10:07:24 +0800
|
||||
Subject: [PATCH 082/124] LoongArch: Add tests for SX vector addition vsadd
|
||||
instructions.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c: New test.
|
||||
* gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
.../loongarch/vector/lsx/lsx-vsadd-1.c | 335 +++++++++++++++++
|
||||
.../loongarch/vector/lsx/lsx-vsadd-2.c | 345 ++++++++++++++++++
|
||||
2 files changed, 680 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
|
||||
new file mode 100644
|
||||
index 000000000..1bc27c983
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
|
||||
@@ -0,0 +1,335 @@
|
||||
+/* { dg-do run } */
|
||||
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
|
||||
+#include "../simd_correctness_check.h"
|
||||
+#include <lsxintrin.h>
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
|
||||
+ __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
|
||||
+ __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
|
||||
+
|
||||
+ int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
|
||||
+ long int long_op0, long_op1, long_op2, lont_out, lont_result;
|
||||
+ long int long_int_out, long_int_result;
|
||||
+ unsigned int unsigned_int_out, unsigned_int_result;
|
||||
+ unsigned long int unsigned_long_int_out, unsigned_long_int_result;
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x00000000ffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000000ffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00000000ffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00000000ffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfefefefefefefefe;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffff3c992b2e;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffff730f;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffff3c992b2e;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffff730f;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00007fff00007fff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x000000002bfd9461;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00007fff00007fff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x000000002bfd9461;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00d3012acc56f9bb;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000001021;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00d3012acc56f9bb;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000001021;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000001000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000001000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000001000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000001000;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x80808080806b000b;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x80808080806b000b;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffff01ff01;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x3c600000ff800000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xfffffffffffffffe;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x3c5fffffff7fffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfffefffeff00feff;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00ff00ff00ff00ff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00ff00ff00ff00ff;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000ffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00000000ffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x3ff0000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x40f3fa0000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x3ff0000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x40f3fa0000000000;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000008a0000008a;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000008900000009;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x63637687636316bb;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x6363636363636363;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x6363771163631745;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x636363ec6363636c;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000004;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000004;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000080000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000080000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000080000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000080000000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffffffffefefe6a;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000c2bac2c2;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x00000001fffffffe;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00000000fefefe68;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00000000c2bac2c2;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x027c027c000027c0;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x001ffff0003ffff0;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x000fffefffefffef;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x001ffff0003ffff0;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x028c026bfff027af;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0007000000040000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0003000000010000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0007000000040000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0003000000010000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x3f8000003f800000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x3f8000003f800000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x3fffff0000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x3fffff0000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x7f7fff003f800000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x7f7fff003f800000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000820202020;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00fe01fc0005fff4;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000003a24;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x003dbe88077c78c1;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000820205a44;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x013bc084078278b5;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000140001;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000140001;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x67eb85afb2ebb000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xc8847ef6ed3f2000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000100000001;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x67eb85b0b2ebb001;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xc8847ef6ed3f2000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffff00000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xffff000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000014eb54ab;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x14eb6a002a406a00;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffff14eb54ab;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x14ea6a002a406a00;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000004;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000004;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xce9035c49ffff570;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000004;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xce9035c49ffff574;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000010;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000010;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x000000000000000d;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000400;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x000000000000040d;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000001300000013;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000001300000013;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000001300000013;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000001300000013;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000100000100;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000100000100;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000100000100;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00000001000000ff;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000300000001;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000100010001;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xfffffffffffffffa;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xfffffffffffffffa;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00000002fffffffb;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x000000010000fffb;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
|
||||
new file mode 100644
|
||||
index 000000000..67d189991
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
|
||||
@@ -0,0 +1,345 @@
|
||||
+/* { dg-do run } */
|
||||
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
|
||||
+#include "../simd_correctness_check.h"
|
||||
+#include <lsxintrin.h>
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
|
||||
+ __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
|
||||
+ __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
|
||||
+
|
||||
+ int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
|
||||
+ long int long_op0, long_op1, long_op2, lont_out, lont_result;
|
||||
+ long int long_int_out, long_int_result;
|
||||
+ unsigned int unsigned_int_out, unsigned_int_result;
|
||||
+ unsigned long int unsigned_long_int_out, unsigned_long_int_result;
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x10f917d72d3d01e4;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x203e16d116de012b;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x10f917d72d3d01e4;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x203e16d116de012b;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffebd06fffe820c;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x7fff7ffe7fff3506;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xfffebd06fffe820c;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x7fff7ffe7fff3506;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffff0cffffff18;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfefffefffeff6a0c;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x4f804f804f804f80;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x4f804f804f804f80;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffff60ca7104649;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xfffff790a15db63d;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xfffff60ca710464a;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfffff790a15db63e;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffffffffffffffe;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffff46;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00fe000100cf005f;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x7fff7fff7fff7fff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x5f675e96e29a5a60;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x7fff7fff7fff7fff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x5fff5e97e2ff5abf;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfefffefffefffeff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000001000100010;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0001000100010058;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0001001100110068;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x7fffffff7fffffff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x7fffffff7fffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x7fff010181010102;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x7fffffff81010102;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xfeffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfeffffffffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000ebd20000714f;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00012c8a0000a58a;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffb81a6f70;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000d48eaa1a2;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffb81ae0bf;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00012c9748eaffff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0177fff0fffffff0;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000011ff8bc;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000200;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000200;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000200;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000200;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000001;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000d0000000d;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x8006000000040000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x8002000000000007;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x8006000000040000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x8002000d00000014;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000014;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000014;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ int_out = __lsx_vpickve2gr_h (__m128i_op0, 0x1);
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000600007fff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000008ffffa209;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000600007fff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00000008ffffa209;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x636363633f3e47c1;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x41f8e080f1ef4eaa;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x00000807bf0a1f80;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000800ecedee68;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x63636b6afe486741;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x41f8e880ffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000ebd20000714f;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00012c8a0000a58a;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000ebd20000714f;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00012c8a0000a58a;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000ffff0000e29e;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x000259140000ffff;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xfffffffeffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfffffffeffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0c03e17edd781b11;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x342caf9be55700b5;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x00040003ff83ff84;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00040003ff4dffca;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0c07e181ffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x3430af9effffffff;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00000000ffa8ff9f;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000ffffffabff99;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x000100000002007d;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0001000000020001;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00010000ffab001c;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0001ffffffadff9a;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0800080008000800;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0800080008000800;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0800080008000800;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0800080008000800;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x76f424887fffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xc110000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xc00d060000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xc110000000000001;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffff7fffffff;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x000000000000002f;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000029;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xfbfbfb17fbfb38ea;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xfbfb47fbfbfb0404;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xfbfbfb17fbfb3919;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfbfb47fbfbfb042d;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x8080808080808081;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000000ffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x80808080ffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00123fff00120012;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0012001200120012;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x000000000005003a;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00123fff00120012;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x001200120017004c;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xbfd10d0d7b6b6b73;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xc5c534920000c4ed;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xbfd10d0d7b6b6b73;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xc5c534920000c4ed;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x000aa822a79308f6;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000084d12ce;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x000aa822a79308f6;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x03aa558e1d37b5a1;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00155044ffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x03aa558e2584c86f;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x021b7d24c9678a35;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x030298a6a1030a49;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x021b7d24c9678a35;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x030298a6a1030a49;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00007a8000000480;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000485000004cc;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x00007a8000000480;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000485000004cc;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000f50000000900;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000090a00000998;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x004eff6200d2ff76;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xff70002800be00a0;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x004eff6200d2ff76;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xff70002800be00a0;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
2928
LoongArch-Add-tests-for-SX-vector-floating-point-ari.patch
Normal file
2928
LoongArch-Add-tests-for-SX-vector-floating-point-ari.patch
Normal file
File diff suppressed because it is too large
Load Diff
4316
LoongArch-Add-tests-for-SX-vector-floating-point-ins.patch
Normal file
4316
LoongArch-Add-tests-for-SX-vector-floating-point-ins.patch
Normal file
File diff suppressed because it is too large
Load Diff
5411
LoongArch-Add-tests-for-SX-vector-handling-and-shuff.patch
Normal file
5411
LoongArch-Add-tests-for-SX-vector-handling-and-shuff.patch
Normal file
File diff suppressed because it is too large
Load Diff
4150
LoongArch-Add-tests-for-SX-vector-subtraction-instru.patch
Normal file
4150
LoongArch-Add-tests-for-SX-vector-subtraction-instru.patch
Normal file
File diff suppressed because it is too large
Load Diff
1710
LoongArch-Add-tests-for-SX-vector-vabsd-vmskgez-vmsk.patch
Normal file
1710
LoongArch-Add-tests-for-SX-vector-vabsd-vmskgez-vmsk.patch
Normal file
File diff suppressed because it is too large
Load Diff
1209
LoongArch-Add-tests-for-SX-vector-vand-vandi-vandn-v.patch
Normal file
1209
LoongArch-Add-tests-for-SX-vector-vand-vandi-vandn-v.patch
Normal file
File diff suppressed because it is too large
Load Diff
1375
LoongArch-Add-tests-for-SX-vector-vavg-vavgr-instruc.patch
Normal file
1375
LoongArch-Add-tests-for-SX-vector-vavg-vavgr-instruc.patch
Normal file
File diff suppressed because it is too large
Load Diff
3324
LoongArch-Add-tests-for-SX-vector-vbitclr-vbitclri-v.patch
Normal file
3324
LoongArch-Add-tests-for-SX-vector-vbitclr-vbitclri-v.patch
Normal file
File diff suppressed because it is too large
Load Diff
1114
LoongArch-Add-tests-for-SX-vector-vdiv-vmod-instruct.patch
Normal file
1114
LoongArch-Add-tests-for-SX-vector-vdiv-vmod-instruct.patch
Normal file
File diff suppressed because it is too large
Load Diff
1664
LoongArch-Add-tests-for-SX-vector-vexth-vextl-vldi-v.patch
Normal file
1664
LoongArch-Add-tests-for-SX-vector-vexth-vextl-vldi-v.patch
Normal file
File diff suppressed because it is too large
Load Diff
5295
LoongArch-Add-tests-for-SX-vector-vfcmp-instructions.patch
Normal file
5295
LoongArch-Add-tests-for-SX-vector-vfcmp-instructions.patch
Normal file
File diff suppressed because it is too large
Load Diff
1412
LoongArch-Add-tests-for-SX-vector-vfmadd-vfnmadd-vld.patch
Normal file
1412
LoongArch-Add-tests-for-SX-vector-vfmadd-vfnmadd-vld.patch
Normal file
File diff suppressed because it is too large
Load Diff
3926
LoongArch-Add-tests-for-SX-vector-vfrstp-vfrstpi-vse.patch
Normal file
3926
LoongArch-Add-tests-for-SX-vector-vfrstp-vfrstpi-vse.patch
Normal file
File diff suppressed because it is too large
Load Diff
2578
LoongArch-Add-tests-for-SX-vector-vmax-vmaxi-vmin-vm.patch
Normal file
2578
LoongArch-Add-tests-for-SX-vector-vmax-vmaxi-vmin-vm.patch
Normal file
File diff suppressed because it is too large
Load Diff
3173
LoongArch-Add-tests-for-SX-vector-vrotr-vrotri-vsra-.patch
Normal file
3173
LoongArch-Add-tests-for-SX-vector-vrotr-vrotri-vsra-.patch
Normal file
File diff suppressed because it is too large
Load Diff
4023
LoongArch-Add-tests-for-SX-vector-vsll-vslli-vsrl-vs.patch
Normal file
4023
LoongArch-Add-tests-for-SX-vector-vsll-vslli-vsrl-vs.patch
Normal file
File diff suppressed because it is too large
Load Diff
4954
LoongArch-Add-tests-for-SX-vector-vssran-vssrani-vss.patch
Normal file
4954
LoongArch-Add-tests-for-SX-vector-vssran-vssrani-vss.patch
Normal file
File diff suppressed because it is too large
Load Diff
2990
LoongArch-Add-tests-for-the-SX-vector-multiplication.patch
Normal file
2990
LoongArch-Add-tests-for-the-SX-vector-multiplication.patch
Normal file
File diff suppressed because it is too large
Load Diff
37
LoongArch-Add-tests-of-mstrict-align-option.patch
Normal file
37
LoongArch-Add-tests-of-mstrict-align-option.patch
Normal file
@ -0,0 +1,37 @@
|
||||
From f07b91862055533d779fbf76c12cb7c0ae75b53d Mon Sep 17 00:00:00 2001
|
||||
From: Xiaolong Chen <chenxiaolong@loongson.cn>
|
||||
Date: Mon, 11 Sep 2023 09:35:24 +0800
|
||||
Subject: [PATCH 076/124] LoongArch: Add tests of -mstrict-align option.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/strict-align.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/testsuite/gcc.target/loongarch/strict-align.c | 12 ++++++++++++
|
||||
1 file changed, 12 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/strict-align.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/strict-align.c b/gcc/testsuite/gcc.target/loongarch/strict-align.c
|
||||
new file mode 100644
|
||||
index 000000000..040d84958
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/strict-align.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-Ofast -mstrict-align -mlasx" } */
|
||||
+/* { dg-final { scan-assembler-not "vfadd.s" } } */
|
||||
+
|
||||
+void
|
||||
+foo (float *restrict x, float *restrict y)
|
||||
+{
|
||||
+ x[0] = x[0] + y[0];
|
||||
+ x[1] = x[1] + y[1];
|
||||
+ x[2] = x[2] + y[2];
|
||||
+ x[3] = x[3] + y[3];
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
131
LoongArch-Add-testsuite-framework-for-Loongson-SX-AS.patch
Normal file
131
LoongArch-Add-testsuite-framework-for-Loongson-SX-AS.patch
Normal file
@ -0,0 +1,131 @@
|
||||
From aebd03c944312be767f03d129eeebc0c4cdf5b4a Mon Sep 17 00:00:00 2001
|
||||
From: Xiaolong Chen <chenxiaolong@loongson.cn>
|
||||
Date: Mon, 11 Sep 2023 09:36:35 +0800
|
||||
Subject: [PATCH 077/124] LoongArch: Add testsuite framework for Loongson
|
||||
SX/ASX.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/vector/loongarch-vector.exp: New test.
|
||||
* gcc.target/loongarch/vector/simd_correctness_check.h: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
.../loongarch/vector/loongarch-vector.exp | 42 +++++++++++++++
|
||||
.../loongarch/vector/simd_correctness_check.h | 54 +++++++++++++++++++
|
||||
2 files changed, 96 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
|
||||
new file mode 100644
|
||||
index 000000000..2c37aa91d
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
|
||||
@@ -0,0 +1,42 @@
|
||||
+#Copyright(C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+#This program is free software; you can redistribute it and / or modify
|
||||
+#it under the terms of the GNU General Public License as published by
|
||||
+#the Free Software Foundation; either version 3 of the License, or
|
||||
+#(at your option) any later version.
|
||||
+#
|
||||
+#This program is distributed in the hope that it will be useful,
|
||||
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
|
||||
+#GNU General Public License for more details.
|
||||
+#
|
||||
+#You should have received a copy of the GNU General Public License
|
||||
+#along with GCC; see the file COPYING3.If not see
|
||||
+# <http: //www.gnu.org/licenses/>.
|
||||
+
|
||||
+#GCC testsuite that uses the `dg.exp' driver.
|
||||
+
|
||||
+#Exit immediately if this isn't a LoongArch target.
|
||||
+if ![istarget loongarch*-*-*] then {
|
||||
+ return
|
||||
+}
|
||||
+
|
||||
+#Load support procs.
|
||||
+load_lib gcc-dg.exp
|
||||
+
|
||||
+#If a testcase doesn't have special options, use these.
|
||||
+global DEFAULT_CFLAGS
|
||||
+if ![info exists DEFAULT_CFLAGS] then {
|
||||
+ set DEFAULT_CFLAGS " "
|
||||
+}
|
||||
+
|
||||
+#Initialize `dg'.
|
||||
+dg-init
|
||||
+
|
||||
+#Main loop.
|
||||
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lsx/*.\[cS\]]] \
|
||||
+ " -mlsx" $DEFAULT_CFLAGS
|
||||
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lasx/*.\[cS\]]] \
|
||||
+ " -mlasx" $DEFAULT_CFLAGS
|
||||
+# All done.
|
||||
+dg-finish
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
|
||||
new file mode 100644
|
||||
index 000000000..eb7fbd59c
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
|
||||
@@ -0,0 +1,54 @@
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <string.h>
|
||||
+
|
||||
+#define ASSERTEQ_64(line, ref, res) \
|
||||
+ do \
|
||||
+ { \
|
||||
+ int fail = 0; \
|
||||
+ for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
|
||||
+ { \
|
||||
+ long *temp_ref = &ref[i], *temp_res = &res[i]; \
|
||||
+ if (abs (*temp_ref - *temp_res) > 0) \
|
||||
+ { \
|
||||
+ printf (" error: %s at line %ld , expected " #ref \
|
||||
+ "[%ld]:0x%lx, got: 0x%lx\n", \
|
||||
+ __FILE__, line, i, *temp_ref, *temp_res); \
|
||||
+ fail = 1; \
|
||||
+ } \
|
||||
+ } \
|
||||
+ if (fail == 1) \
|
||||
+ abort (); \
|
||||
+ } \
|
||||
+ while (0)
|
||||
+
|
||||
+#define ASSERTEQ_32(line, ref, res) \
|
||||
+ do \
|
||||
+ { \
|
||||
+ int fail = 0; \
|
||||
+ for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
|
||||
+ { \
|
||||
+ int *temp_ref = &ref[i], *temp_res = &res[i]; \
|
||||
+ if (abs (*temp_ref - *temp_res) > 0) \
|
||||
+ { \
|
||||
+ printf (" error: %s at line %ld , expected " #ref \
|
||||
+ "[%ld]:0x%x, got: 0x%x\n", \
|
||||
+ __FILE__, line, i, *temp_ref, *temp_res); \
|
||||
+ fail = 1; \
|
||||
+ } \
|
||||
+ } \
|
||||
+ if (fail == 1) \
|
||||
+ abort (); \
|
||||
+ } \
|
||||
+ while (0)
|
||||
+
|
||||
+#define ASSERTEQ_int(line, ref, res) \
|
||||
+ do \
|
||||
+ { \
|
||||
+ if (ref != res) \
|
||||
+ { \
|
||||
+ printf (" error: %s at line %ld , expected %d, got %d\n", __FILE__, \
|
||||
+ line, ref, res); \
|
||||
+ } \
|
||||
+ } \
|
||||
+ while (0)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
53
LoongArch-Adjust-C-multilib-header-layout.patch
Normal file
53
LoongArch-Adjust-C-multilib-header-layout.patch
Normal file
@ -0,0 +1,53 @@
|
||||
From a4bf17e87a965ed7f2bb1d2921fb9dd820c79a96 Mon Sep 17 00:00:00 2001
|
||||
From: Yang Yujie <yangyujie@loongson.cn>
|
||||
Date: Thu, 7 Sep 2023 14:50:10 +0800
|
||||
Subject: [PATCH 069/124] LoongArch: Adjust C++ multilib header layout.
|
||||
|
||||
For LoongArch, the toplevel library build is always aliased to
|
||||
one of the multilib variants. This patch installs it with the
|
||||
actual MULTISUBDIR (instead of ".") so that the headers can be
|
||||
reached by the compiler.
|
||||
|
||||
This patch is an update of
|
||||
https://gcc.gnu.org/pipermail/gcc-patches/2023-September/629435.html
|
||||
|
||||
libstdc++-v3/ChangeLog:
|
||||
|
||||
* configure.host: Register t-loongarch in tmake_file.
|
||||
* config/cpu/loongarch/t-loongarch: New file. Manually refresh
|
||||
MULTISUBDIR with $(shell $(CXX) --print-multi-directory).
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
libstdc++-v3/config/cpu/loongarch/t-loongarch | 1 +
|
||||
libstdc++-v3/configure.host | 5 ++++-
|
||||
2 files changed, 5 insertions(+), 1 deletion(-)
|
||||
create mode 100644 libstdc++-v3/config/cpu/loongarch/t-loongarch
|
||||
|
||||
diff --git a/libstdc++-v3/config/cpu/loongarch/t-loongarch b/libstdc++-v3/config/cpu/loongarch/t-loongarch
|
||||
new file mode 100644
|
||||
index 000000000..adfc8ebb9
|
||||
--- /dev/null
|
||||
+++ b/libstdc++-v3/config/cpu/loongarch/t-loongarch
|
||||
@@ -0,0 +1 @@
|
||||
+AM_MAKEFLAGS += " MULTISUBDIR=/$(shell $(CXX) --print-multi-directory)"
|
||||
diff --git a/libstdc++-v3/configure.host b/libstdc++-v3/configure.host
|
||||
index ec32980aa..592160e6d 100644
|
||||
--- a/libstdc++-v3/configure.host
|
||||
+++ b/libstdc++-v3/configure.host
|
||||
@@ -315,7 +315,10 @@ esac
|
||||
# Set any OS-dependent and CPU-dependent bits.
|
||||
# THIS TABLE IS SORTED. KEEP IT THAT WAY.
|
||||
case "${host}" in
|
||||
- *-*-linux* | *-*-uclinux*)
|
||||
+ loongarch*)
|
||||
+ tmake_file="cpu/loongarch/t-loongarch"
|
||||
+ ;;
|
||||
+ *-*-linux* | *-*-uclinux*)
|
||||
case "${host_cpu}" in
|
||||
i[567]86)
|
||||
abi_baseline_pair=i486-linux-gnu
|
||||
--
|
||||
2.33.0
|
||||
|
||||
55
LoongArch-Avoid-RTL-flag-check-failure-in-loongarch_.patch
Normal file
55
LoongArch-Avoid-RTL-flag-check-failure-in-loongarch_.patch
Normal file
@ -0,0 +1,55 @@
|
||||
From e82403e918e18fa8e8ecd0c9e26f2657cc814e12 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Wed, 24 Aug 2022 21:31:34 +0800
|
||||
Subject: [PATCH 013/124] LoongArch: Avoid RTL flag check failure in
|
||||
loongarch_classify_symbol
|
||||
|
||||
SYMBOL_REF_TLS_MODEL invokes SYMBOL_REF_FLAGS, and SYMBOL_REF_FLAGS
|
||||
invokes RTL_FLAG_CHECK1 and aborts when RTL code is not SYMBOL_REF.
|
||||
|
||||
r13-1833 removed "gcc_assert (SYMBOL_REF_P (x))" before invoking
|
||||
"SYMBOL_REF_TLS_MODEL (x)", indicating that it's now possible that "x"
|
||||
is not a SYMBOL_REF. So we need to check if "x" is SYMBOL_REF first.
|
||||
|
||||
This fixes a test failure happening with r13-2173 with RTL flag
|
||||
checking enabled:
|
||||
|
||||
pr106096.C:26:1: internal compiler error: RTL flag check:
|
||||
SYMBOL_REF_FLAGS used with unexpected rtx code 'const' in
|
||||
loongarch_classify_symbol
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.cc (loongarch_classify_symbol):
|
||||
Return early if the rtx is not SYMBOL_REF.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.cc | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index 04c4ddaed..452aba9d4 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -1633,14 +1633,13 @@ loongarch_rtx_constant_in_small_data_p (machine_mode mode)
|
||||
static enum loongarch_symbol_type
|
||||
loongarch_classify_symbol (const_rtx x)
|
||||
{
|
||||
- if (LABEL_REF_P (x))
|
||||
+ if (!SYMBOL_REF_P (x))
|
||||
return SYMBOL_PCREL;
|
||||
|
||||
if (SYMBOL_REF_TLS_MODEL (x))
|
||||
return SYMBOL_TLS;
|
||||
|
||||
- if (SYMBOL_REF_P (x)
|
||||
- && !loongarch_symbol_binds_local_p (x))
|
||||
+ if (!loongarch_symbol_binds_local_p (x))
|
||||
return SYMBOL_GOT_DISP;
|
||||
|
||||
return SYMBOL_PCREL;
|
||||
--
|
||||
2.33.0
|
||||
|
||||
62
LoongArch-Avoid-non-returning-indirect-jumps-through.patch
Normal file
62
LoongArch-Avoid-non-returning-indirect-jumps-through.patch
Normal file
@ -0,0 +1,62 @@
|
||||
From 7e759740048ee6f24c1055c32868fa21cabb4f75 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Wed, 7 Jun 2023 10:21:58 +0800
|
||||
Subject: [PATCH 048/124] LoongArch: Avoid non-returning indirect jumps through
|
||||
$ra [PR110136]
|
||||
|
||||
Micro-architecture unconditionally treats a "jr $ra" as "return from subroutine",
|
||||
hence doing "jr $ra" would interfere with both subroutine return prediction and
|
||||
the more general indirect branch prediction.
|
||||
|
||||
Therefore, a problem like PR110136 can cause a significant increase in branch error
|
||||
prediction rate and affect performance. The same problem exists with "indirect_jump".
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR target/110136
|
||||
* config/loongarch/loongarch.md: Modify the register constraints for template
|
||||
"jumptable" and "indirect_jump" from "r" to "e".
|
||||
|
||||
Co-authored-by: Andrew Pinski <apinski@marvell.com>
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 8 ++++++--
|
||||
1 file changed, 6 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index b23248c33..c79951c1d 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -2895,6 +2895,10 @@
|
||||
}
|
||||
[(set_attr "type" "branch")])
|
||||
|
||||
+;; Micro-architecture unconditionally treats a "jr $ra" as "return from subroutine",
|
||||
+;; non-returning indirect jumps through $ra would interfere with both subroutine
|
||||
+;; return prediction and the more general indirect branch prediction.
|
||||
+
|
||||
(define_expand "indirect_jump"
|
||||
[(set (pc) (match_operand 0 "register_operand"))]
|
||||
""
|
||||
@@ -2905,7 +2909,7 @@
|
||||
})
|
||||
|
||||
(define_insn "@indirect_jump<mode>"
|
||||
- [(set (pc) (match_operand:P 0 "register_operand" "r"))]
|
||||
+ [(set (pc) (match_operand:P 0 "register_operand" "e"))]
|
||||
""
|
||||
"jr\t%0"
|
||||
[(set_attr "type" "jump")
|
||||
@@ -2928,7 +2932,7 @@
|
||||
|
||||
(define_insn "@tablejump<mode>"
|
||||
[(set (pc)
|
||||
- (match_operand:P 0 "register_operand" "r"))
|
||||
+ (match_operand:P 0 "register_operand" "e"))
|
||||
(use (label_ref (match_operand 1 "" "")))]
|
||||
""
|
||||
"jr\t%0"
|
||||
--
|
||||
2.33.0
|
||||
|
||||
41
LoongArch-Change-the-default-value-of-LARCH_CALL_RAT.patch
Normal file
41
LoongArch-Change-the-default-value-of-LARCH_CALL_RAT.patch
Normal file
@ -0,0 +1,41 @@
|
||||
From 59824f1062d77d0e02ea82d47415bf95c235de87 Mon Sep 17 00:00:00 2001
|
||||
From: chenxiaolong <chenxl04200420@163.com>
|
||||
Date: Thu, 15 Jun 2023 02:46:24 +0000
|
||||
Subject: [PATCH 046/124] LoongArch: Change the default value of
|
||||
LARCH_CALL_RATIO to 6.
|
||||
|
||||
During the regression testing of the LoongArch architecture GCC, it was found
|
||||
that the tests in the pr90883.C file failed. The problem was modulated and
|
||||
found that the error was caused by setting the macro LARCH_CALL_RATIO to a too
|
||||
large value. Combined with the actual LoongArch architecture, the different
|
||||
thresholds for meeting the test conditions were tested using the engineering method
|
||||
(SPEC CPU 2006), and the results showed that its optimal threshold should be set
|
||||
to 6.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.h (LARCH_CALL_RATIO): Modify the value
|
||||
of macro LARCH_CALL_RATIO on LoongArch to make it perform optimally.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||
index 44ebadfaa..0e35d4dec 100644
|
||||
--- a/gcc/config/loongarch/loongarch.h
|
||||
+++ b/gcc/config/loongarch/loongarch.h
|
||||
@@ -1073,7 +1073,7 @@ typedef struct {
|
||||
/* The base cost of a memcpy call, for MOVE_RATIO and friends. These
|
||||
values were determined experimentally by benchmarking with CSiBE.
|
||||
*/
|
||||
-#define LARCH_CALL_RATIO 8
|
||||
+#define LARCH_CALL_RATIO 6
|
||||
|
||||
/* Any loop-based implementation of cpymemsi will have at least
|
||||
LARCH_MAX_MOVE_OPS_PER_LOOP_ITER memory-to-memory
|
||||
--
|
||||
2.33.0
|
||||
|
||||
69
LoongArch-Change-the-value-of-branch_cost-from-2-to-.patch
Normal file
69
LoongArch-Change-the-value-of-branch_cost-from-2-to-.patch
Normal file
@ -0,0 +1,69 @@
|
||||
From 7e843ed8da168a05eb04eee0b14cbe681bf798fe Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Wed, 13 Sep 2023 11:01:34 +0800
|
||||
Subject: [PATCH 123/124] LoongArch: Change the value of branch_cost from 2 to
|
||||
6.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch-def.c: Modify the default value of
|
||||
branch_cost.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/cmov_ii.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch-def.c | 4 ++--
|
||||
gcc/testsuite/gcc.target/loongarch/cmov_ii.c | 15 +++++++++++++++
|
||||
2 files changed, 17 insertions(+), 2 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/cmov_ii.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
|
||||
index d29d5f001..eeb32dbf6 100644
|
||||
--- a/gcc/config/loongarch/loongarch-def.c
|
||||
+++ b/gcc/config/loongarch/loongarch-def.c
|
||||
@@ -85,7 +85,7 @@ loongarch_cpu_align[N_TUNE_TYPES] = {
|
||||
.int_mult_di = COSTS_N_INSNS (1), \
|
||||
.int_div_si = COSTS_N_INSNS (4), \
|
||||
.int_div_di = COSTS_N_INSNS (6), \
|
||||
- .branch_cost = 2, \
|
||||
+ .branch_cost = 6, \
|
||||
.memory_latency = 4
|
||||
|
||||
/* The following properties cannot be looked up directly using "cpucfg".
|
||||
@@ -118,7 +118,7 @@ loongarch_rtx_cost_optimize_size = {
|
||||
.int_mult_di = 4,
|
||||
.int_div_si = 4,
|
||||
.int_div_di = 4,
|
||||
- .branch_cost = 2,
|
||||
+ .branch_cost = 6,
|
||||
.memory_latency = 4,
|
||||
};
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/cmov_ii.c b/gcc/testsuite/gcc.target/loongarch/cmov_ii.c
|
||||
new file mode 100644
|
||||
index 000000000..21b468e8a
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/cmov_ii.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2" } */
|
||||
+/* { dg-final { scan-assembler "test:.*xor.*maskeqz.*masknez.*or.*" } } */
|
||||
+
|
||||
+extern void foo_ii (int *, int *, int *, int *);
|
||||
+
|
||||
+int
|
||||
+test (void)
|
||||
+{
|
||||
+ int a, b;
|
||||
+ int c, d, out;
|
||||
+ foo_ii (&a, &b, &c, &d);
|
||||
+ out = a == b ? c : d;
|
||||
+ return out;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
49
LoongArch-Change-the-value-of-macro-TRY_EMPTY_VM_SPA.patch
Normal file
49
LoongArch-Change-the-value-of-macro-TRY_EMPTY_VM_SPA.patch
Normal file
@ -0,0 +1,49 @@
|
||||
From 6e9265e571a63deb2584704a0b088a6d67ec8af5 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Mon, 20 Feb 2023 16:47:11 +0800
|
||||
Subject: [PATCH 037/124] LoongArch: Change the value of macro
|
||||
TRY_EMPTY_VM_SPACE from 0x8000000000 to 0x1000000000.
|
||||
|
||||
The PCH mechanism first tries to map the .gch file to the virtual memory
|
||||
space pointed to by TRY_EMPTY_VM_SPACE during the compilation process.
|
||||
|
||||
The original value of TRY_EMPTY_VM_SPACE macro is 0x8000000000,
|
||||
but like la464 only has 40 bits of virtual address space, this value
|
||||
just exceeds the address range.
|
||||
|
||||
If we want to support chips with less than 40 bits virtual addresses,
|
||||
then the value of this macro needs to be set small. I think setting
|
||||
this value small will increase the probability of virtual address
|
||||
mapping failure. And the purpose of pch is to make compilation faster,
|
||||
but I think we rarely compile on embedded systems. So this situation
|
||||
may not be within our consideration.
|
||||
|
||||
So change the value of this macro to 0x1000000000.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/host-linux.cc (TRY_EMPTY_VM_SPACE): Modify the value of
|
||||
the macro to 0x1000000000.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/host-linux.cc | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config/host-linux.cc b/gcc/config/host-linux.cc
|
||||
index 817d3c087..d93cfc064 100644
|
||||
--- a/gcc/config/host-linux.cc
|
||||
+++ b/gcc/config/host-linux.cc
|
||||
@@ -99,7 +99,7 @@
|
||||
#elif defined(__riscv) && defined (__LP64__)
|
||||
# define TRY_EMPTY_VM_SPACE 0x1000000000
|
||||
#elif defined(__loongarch__) && defined(__LP64__)
|
||||
-# define TRY_EMPTY_VM_SPACE 0x8000000000
|
||||
+# define TRY_EMPTY_VM_SPACE 0x1000000000
|
||||
#else
|
||||
# define TRY_EMPTY_VM_SPACE 0
|
||||
#endif
|
||||
--
|
||||
2.33.0
|
||||
|
||||
139
LoongArch-Define-the-macro-ASM_PREFERRED_EH_DATA_FOR.patch
Normal file
139
LoongArch-Define-the-macro-ASM_PREFERRED_EH_DATA_FOR.patch
Normal file
@ -0,0 +1,139 @@
|
||||
From 05c1df09c70cd0ed48f0644890f69a0128b17a98 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Fri, 29 Jul 2022 09:44:52 +0800
|
||||
Subject: [PATCH 008/124] LoongArch: Define the macro
|
||||
ASM_PREFERRED_EH_DATA_FORMAT by checking the assembler's support for eh_frame
|
||||
encoding.
|
||||
|
||||
.eh_frame DW_EH_PE_pcrel encoding format is not supported by gas <= 2.39.
|
||||
Check if the assembler support DW_EH_PE_PCREL encoding and define .eh_frame
|
||||
encoding type.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config.in: Regenerate.
|
||||
* config/loongarch/loongarch.h (ASM_PREFERRED_EH_DATA_FORMAT):
|
||||
Select the value of the macro definition according to whether
|
||||
HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT is defined.
|
||||
* configure: Regenerate.
|
||||
* configure.ac: Reinstate HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config.in | 8 +++++++-
|
||||
gcc/config/loongarch/loongarch.h | 5 +++++
|
||||
gcc/configure | 34 ++++++++++++++++++++++++++++++++
|
||||
gcc/configure.ac | 8 ++++++++
|
||||
4 files changed, 54 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config.in b/gcc/config.in
|
||||
index 64c27c9cf..67ce422f2 100644
|
||||
--- a/gcc/config.in
|
||||
+++ b/gcc/config.in
|
||||
@@ -404,13 +404,19 @@
|
||||
#endif
|
||||
|
||||
|
||||
+/* Define if your assembler supports eh_frame pcrel encoding. */
|
||||
+#ifndef USED_FOR_TARGET
|
||||
+#undef HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT
|
||||
+#endif
|
||||
+
|
||||
+
|
||||
/* Define if your assembler supports the R_PPC64_ENTRY relocation. */
|
||||
#ifndef USED_FOR_TARGET
|
||||
#undef HAVE_AS_ENTRY_MARKERS
|
||||
#endif
|
||||
|
||||
|
||||
-/* Define if your assembler supports explicit relocations. */
|
||||
+/* Define if your assembler supports explicit relocation. */
|
||||
#ifndef USED_FOR_TARGET
|
||||
#undef HAVE_AS_EXPLICIT_RELOCS
|
||||
#endif
|
||||
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||
index 12f209047..a52a81adf 100644
|
||||
--- a/gcc/config/loongarch/loongarch.h
|
||||
+++ b/gcc/config/loongarch/loongarch.h
|
||||
@@ -1130,8 +1130,13 @@ struct GTY (()) machine_function
|
||||
};
|
||||
#endif
|
||||
|
||||
+#ifdef HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT
|
||||
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
|
||||
+ (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4)
|
||||
+#else
|
||||
#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL) \
|
||||
(((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_absptr)
|
||||
+#endif
|
||||
|
||||
/* Do emit .note.GNU-stack by default. */
|
||||
#ifndef NEED_INDICATE_EXEC_STACK
|
||||
diff --git a/gcc/configure b/gcc/configure
|
||||
index 840eddc7c..3788e240a 100755
|
||||
--- a/gcc/configure
|
||||
+++ b/gcc/configure
|
||||
@@ -28857,6 +28857,40 @@ if test $gcc_cv_as_loongarch_explicit_relocs = yes; then
|
||||
|
||||
$as_echo "#define HAVE_AS_EXPLICIT_RELOCS 1" >>confdefs.h
|
||||
|
||||
+fi
|
||||
+
|
||||
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for eh_frame pcrel encoding support" >&5
|
||||
+$as_echo_n "checking assembler for eh_frame pcrel encoding support... " >&6; }
|
||||
+if ${gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support+:} false; then :
|
||||
+ $as_echo_n "(cached) " >&6
|
||||
+else
|
||||
+ gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support=no
|
||||
+ if test x$gcc_cv_as != x; then
|
||||
+ $as_echo '.cfi_startproc
|
||||
+ .cfi_personality 0x9b,a
|
||||
+ .cfi_lsda 0x1b,b
|
||||
+ .cfi_endproc' > conftest.s
|
||||
+ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5'
|
||||
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||
+ (eval $ac_try) 2>&5
|
||||
+ ac_status=$?
|
||||
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||
+ test $ac_status = 0; }; }
|
||||
+ then
|
||||
+ gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support=yes
|
||||
+ else
|
||||
+ echo "configure: failed program was" >&5
|
||||
+ cat conftest.s >&5
|
||||
+ fi
|
||||
+ rm -f conftest.o conftest.s
|
||||
+ fi
|
||||
+fi
|
||||
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support" >&5
|
||||
+$as_echo "$gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support" >&6; }
|
||||
+if test $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support = yes; then
|
||||
+
|
||||
+$as_echo "#define HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT 1" >>confdefs.h
|
||||
+
|
||||
fi
|
||||
|
||||
;;
|
||||
diff --git a/gcc/configure.ac b/gcc/configure.ac
|
||||
index 975c852c6..1c376e0d4 100644
|
||||
--- a/gcc/configure.ac
|
||||
+++ b/gcc/configure.ac
|
||||
@@ -5324,6 +5324,14 @@ x:
|
||||
[a:pcalau12i $t0,%pc_hi20(a)],,
|
||||
[AC_DEFINE(HAVE_AS_EXPLICIT_RELOCS, 1,
|
||||
[Define if your assembler supports explicit relocation.])])
|
||||
+ gcc_GAS_CHECK_FEATURE([eh_frame pcrel encoding support],
|
||||
+ gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support,,
|
||||
+ [.cfi_startproc
|
||||
+ .cfi_personality 0x9b,a
|
||||
+ .cfi_lsda 0x1b,b
|
||||
+ .cfi_endproc],,
|
||||
+ [AC_DEFINE(HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT, 1,
|
||||
+ [Define if your assembler supports eh_frame pcrel encoding.])])
|
||||
;;
|
||||
s390*-*-*)
|
||||
gcc_GAS_CHECK_FEATURE([.gnu_attribute support],
|
||||
--
|
||||
2.33.0
|
||||
|
||||
34
LoongArch-Don-t-add-crtfastmath.o-for-shared.patch
Normal file
34
LoongArch-Don-t-add-crtfastmath.o-for-shared.patch
Normal file
@ -0,0 +1,34 @@
|
||||
From 2e19311d1bf4f932f5e67f6866123b895b12c97f Mon Sep 17 00:00:00 2001
|
||||
From: Richard Biener <rguenther@suse.de>
|
||||
Date: Fri, 13 Jan 2023 09:01:12 +0100
|
||||
Subject: [PATCH 035/124] LoongArch: Don't add crtfastmath.o for -shared
|
||||
|
||||
Don't add crtfastmath.o for -shared to avoid altering the FP
|
||||
environment when loading a shared library.
|
||||
|
||||
PR target/55522
|
||||
* config/loongarch/gnu-user.h (GNU_USER_TARGET_MATHFILE_SPEC):
|
||||
Don't add crtfastmath.o for -shared.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/gnu-user.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
|
||||
index c5b1afe53..1dc6add62 100644
|
||||
--- a/gcc/config/loongarch/gnu-user.h
|
||||
+++ b/gcc/config/loongarch/gnu-user.h
|
||||
@@ -49,7 +49,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
/* Similar to standard Linux, but adding -ffast-math support. */
|
||||
#undef GNU_USER_TARGET_MATHFILE_SPEC
|
||||
#define GNU_USER_TARGET_MATHFILE_SPEC \
|
||||
- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
|
||||
+ "%{Ofast|ffast-math|funsafe-math-optimizations:%{!shared:crtfastmath.o%s}}"
|
||||
|
||||
#undef LIB_SPEC
|
||||
#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
|
||||
--
|
||||
2.33.0
|
||||
|
||||
71
LoongArch-Enable-free-starting-at-O2.patch
Normal file
71
LoongArch-Enable-free-starting-at-O2.patch
Normal file
@ -0,0 +1,71 @@
|
||||
From 0369836718ffb25ac64c135e748f409302068a56 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Mon, 28 Aug 2023 11:30:21 +0800
|
||||
Subject: [PATCH 052/124] LoongArch: Enable '-free' starting at -O2.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/loongarch/loongarch-common.cc:
|
||||
Enable '-free' on O2 and above.
|
||||
* doc/invoke.texi: Modify the description information
|
||||
of the '-free' compilation option and add the LoongArch
|
||||
description.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/sign-extend.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
.../config/loongarch/loongarch-common.cc | 1 +
|
||||
.../gcc.target/loongarch/sign-extend.c | 25 +++++++++++++++++++
|
||||
2 files changed, 26 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/sign-extend.c
|
||||
|
||||
diff --git a/gcc/common/config/loongarch/loongarch-common.cc b/gcc/common/config/loongarch/loongarch-common.cc
|
||||
index f8b4660fa..309fcb280 100644
|
||||
--- a/gcc/common/config/loongarch/loongarch-common.cc
|
||||
+++ b/gcc/common/config/loongarch/loongarch-common.cc
|
||||
@@ -35,6 +35,7 @@ static const struct default_options loongarch_option_optimization_table[] =
|
||||
{
|
||||
{ OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
|
||||
{ OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
|
||||
+ { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
|
||||
{ OPT_LEVELS_NONE, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/sign-extend.c b/gcc/testsuite/gcc.target/loongarch/sign-extend.c
|
||||
new file mode 100644
|
||||
index 000000000..3f339d06b
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/sign-extend.c
|
||||
@@ -0,0 +1,25 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -O2" } */
|
||||
+/* { dg-final { scan-assembler-times "slli.w" 1 } } */
|
||||
+
|
||||
+extern int PL_savestack_ix;
|
||||
+extern int PL_regsize;
|
||||
+extern int PL_savestack_max;
|
||||
+void Perl_savestack_grow_cnt (int need);
|
||||
+extern void Perl_croak (char *);
|
||||
+
|
||||
+int
|
||||
+S_regcppush(int parenfloor)
|
||||
+{
|
||||
+ int retval = PL_savestack_ix;
|
||||
+ int paren_elems_to_push = (PL_regsize - parenfloor) * 4;
|
||||
+ int p;
|
||||
+
|
||||
+ if (paren_elems_to_push < 0)
|
||||
+ Perl_croak ("panic: paren_elems_to_push < 0");
|
||||
+
|
||||
+ if (PL_savestack_ix + (paren_elems_to_push + 6) > PL_savestack_max)
|
||||
+ Perl_savestack_grow_cnt (paren_elems_to_push + 6);
|
||||
+
|
||||
+ return retval;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
33
LoongArch-Enable-fsched-pressure-by-default-at-O1-an.patch
Normal file
33
LoongArch-Enable-fsched-pressure-by-default-at-O1-an.patch
Normal file
@ -0,0 +1,33 @@
|
||||
From a9f72e237d5c176e4ef8ba03a8b4ee5c5daa25fb Mon Sep 17 00:00:00 2001
|
||||
From: Guo Jie <guojie@loongson.cn>
|
||||
Date: Fri, 8 Sep 2023 10:00:21 +0800
|
||||
Subject: [PATCH 071/124] LoongArch: Enable -fsched-pressure by default at -O1
|
||||
and higher.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* common/config/loongarch/loongarch-common.cc:
|
||||
(default_options loongarch_option_optimization_table):
|
||||
Default to -fsched-pressure.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/common/config/loongarch/loongarch-common.cc | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/gcc/common/config/loongarch/loongarch-common.cc b/gcc/common/config/loongarch/loongarch-common.cc
|
||||
index 309fcb280..c8bc5718d 100644
|
||||
--- a/gcc/common/config/loongarch/loongarch-common.cc
|
||||
+++ b/gcc/common/config/loongarch/loongarch-common.cc
|
||||
@@ -36,6 +36,7 @@ static const struct default_options loongarch_option_optimization_table[] =
|
||||
{ OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
|
||||
{ OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
|
||||
+ { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
|
||||
{ OPT_LEVELS_NONE, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
309
LoongArch-Enable-shrink-wrapping.patch
Normal file
309
LoongArch-Enable-shrink-wrapping.patch
Normal file
@ -0,0 +1,309 @@
|
||||
From e86c9ece7ae922fe80017ba2ffe22f6267531682 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Sun, 23 Apr 2023 20:52:22 +0800
|
||||
Subject: [PATCH 045/124] LoongArch: Enable shrink wrapping
|
||||
|
||||
This commit implements the target macros for shrink wrapping of function
|
||||
prologues/epilogues shrink wrapping on LoongArch.
|
||||
|
||||
Bootstrapped and regtested on loongarch64-linux-gnu. I don't have an
|
||||
access to SPEC CPU so I hope the reviewer can perform a benchmark to see
|
||||
if there is real benefit.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.h (struct machine_function): Add
|
||||
reg_is_wrapped_separately array for register wrapping
|
||||
information.
|
||||
* config/loongarch/loongarch.cc
|
||||
(loongarch_get_separate_components): New function.
|
||||
(loongarch_components_for_bb): Likewise.
|
||||
(loongarch_disqualify_components): Likewise.
|
||||
(loongarch_process_components): Likewise.
|
||||
(loongarch_emit_prologue_components): Likewise.
|
||||
(loongarch_emit_epilogue_components): Likewise.
|
||||
(loongarch_set_handled_components): Likewise.
|
||||
(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS): Define.
|
||||
(TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB): Likewise.
|
||||
(TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS): Likewise.
|
||||
(TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS): Likewise.
|
||||
(TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS): Likewise.
|
||||
(TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Likewise.
|
||||
(loongarch_for_each_saved_reg): Skip registers that are wrapped
|
||||
separately.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/shrink-wrap.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.cc | 179 +++++++++++++++++-
|
||||
gcc/config/loongarch/loongarch.h | 2 +
|
||||
.../gcc.target/loongarch/shrink-wrap.c | 19 ++
|
||||
3 files changed, 197 insertions(+), 3 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index d3c6f22ad..4c0f393b6 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "builtins.h"
|
||||
#include "rtl-iter.h"
|
||||
#include "opts.h"
|
||||
+#include "function-abi.h"
|
||||
|
||||
/* This file should be included last. */
|
||||
#include "target-def.h"
|
||||
@@ -1014,19 +1015,23 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
|
||||
for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||||
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
||||
{
|
||||
- loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
||||
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
|
||||
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
||||
+
|
||||
offset -= UNITS_PER_WORD;
|
||||
}
|
||||
|
||||
/* This loop must iterate over the same space as its companion in
|
||||
loongarch_compute_frame_info. */
|
||||
offset = cfun->machine->frame.fp_sp_offset - sp_offset;
|
||||
+ machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
|
||||
+
|
||||
for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||||
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
||||
{
|
||||
- machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
|
||||
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
|
||||
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
||||
|
||||
- loongarch_save_restore_reg (mode, regno, offset, fn);
|
||||
offset -= GET_MODE_SIZE (mode);
|
||||
}
|
||||
}
|
||||
@@ -6630,6 +6635,151 @@ loongarch_asan_shadow_offset (void)
|
||||
return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
|
||||
}
|
||||
|
||||
+static sbitmap
|
||||
+loongarch_get_separate_components (void)
|
||||
+{
|
||||
+ HOST_WIDE_INT offset;
|
||||
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
|
||||
+ bitmap_clear (components);
|
||||
+ offset = cfun->machine->frame.gp_sp_offset;
|
||||
+
|
||||
+ /* The stack should be aligned to 16-bytes boundary, so we can make the use
|
||||
+ of ldptr instructions. */
|
||||
+ gcc_assert (offset % UNITS_PER_WORD == 0);
|
||||
+
|
||||
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||||
+ if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
||||
+ {
|
||||
+ /* We can wrap general registers saved at [sp, sp + 32768) using the
|
||||
+ ldptr/stptr instructions. For large offsets a pseudo register
|
||||
+ might be needed which cannot be created during the shrink
|
||||
+ wrapping pass.
|
||||
+
|
||||
+ TODO: This may need a revise when we add LA32 as ldptr.w is not
|
||||
+ guaranteed available by the manual. */
|
||||
+ if (offset < 32768)
|
||||
+ bitmap_set_bit (components, regno);
|
||||
+
|
||||
+ offset -= UNITS_PER_WORD;
|
||||
+ }
|
||||
+
|
||||
+ offset = cfun->machine->frame.fp_sp_offset;
|
||||
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||||
+ if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
||||
+ {
|
||||
+ /* We can only wrap FP registers with imm12 offsets. For large
|
||||
+ offsets a pseudo register might be needed which cannot be
|
||||
+ created during the shrink wrapping pass. */
|
||||
+ if (IMM12_OPERAND (offset))
|
||||
+ bitmap_set_bit (components, regno);
|
||||
+
|
||||
+ offset -= UNITS_PER_FPREG;
|
||||
+ }
|
||||
+
|
||||
+ /* Don't mess with the hard frame pointer. */
|
||||
+ if (frame_pointer_needed)
|
||||
+ bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
|
||||
+
|
||||
+ bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
|
||||
+
|
||||
+ return components;
|
||||
+}
|
||||
+
|
||||
+static sbitmap
|
||||
+loongarch_components_for_bb (basic_block bb)
|
||||
+{
|
||||
+ /* Registers are used in a bb if they are in the IN, GEN, or KILL sets. */
|
||||
+ auto_bitmap used;
|
||||
+ bitmap_copy (used, DF_LIVE_IN (bb));
|
||||
+ bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->gen);
|
||||
+ bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->kill);
|
||||
+
|
||||
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
|
||||
+ bitmap_clear (components);
|
||||
+
|
||||
+ function_abi_aggregator callee_abis;
|
||||
+ rtx_insn *insn;
|
||||
+ FOR_BB_INSNS (bb, insn)
|
||||
+ if (CALL_P (insn))
|
||||
+ callee_abis.note_callee_abi (insn_callee_abi (insn));
|
||||
+
|
||||
+ HARD_REG_SET extra_caller_saves =
|
||||
+ callee_abis.caller_save_regs (*crtl->abi);
|
||||
+
|
||||
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||||
+ if (!fixed_regs[regno]
|
||||
+ && !crtl->abi->clobbers_full_reg_p (regno)
|
||||
+ && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
|
||||
+ bitmap_bit_p (used, regno)))
|
||||
+ bitmap_set_bit (components, regno);
|
||||
+
|
||||
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||||
+ if (!fixed_regs[regno]
|
||||
+ && !crtl->abi->clobbers_full_reg_p (regno)
|
||||
+ && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
|
||||
+ bitmap_bit_p (used, regno)))
|
||||
+ bitmap_set_bit (components, regno);
|
||||
+
|
||||
+ return components;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+loongarch_disqualify_components (sbitmap, edge, sbitmap, bool)
|
||||
+{
|
||||
+ /* Do nothing. */
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn)
|
||||
+{
|
||||
+ HOST_WIDE_INT offset = cfun->machine->frame.gp_sp_offset;
|
||||
+
|
||||
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||||
+ if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
||||
+ {
|
||||
+ if (bitmap_bit_p (components, regno))
|
||||
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
||||
+
|
||||
+ offset -= UNITS_PER_WORD;
|
||||
+ }
|
||||
+
|
||||
+ offset = cfun->machine->frame.fp_sp_offset;
|
||||
+ machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
|
||||
+
|
||||
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||||
+ if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
||||
+ {
|
||||
+ if (bitmap_bit_p (components, regno))
|
||||
+ loongarch_save_restore_reg (mode, regno, offset, fn);
|
||||
+
|
||||
+ offset -= UNITS_PER_FPREG;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+loongarch_emit_prologue_components (sbitmap components)
|
||||
+{
|
||||
+ loongarch_process_components (components, loongarch_save_reg);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+loongarch_emit_epilogue_components (sbitmap components)
|
||||
+{
|
||||
+ loongarch_process_components (components, loongarch_restore_reg);
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+loongarch_set_handled_components (sbitmap components)
|
||||
+{
|
||||
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||||
+ if (bitmap_bit_p (components, regno))
|
||||
+ cfun->machine->reg_is_wrapped_separately[regno] = true;
|
||||
+
|
||||
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
||||
+ if (bitmap_bit_p (components, regno))
|
||||
+ cfun->machine->reg_is_wrapped_separately[regno] = true;
|
||||
+}
|
||||
+
|
||||
/* Initialize the GCC target structure. */
|
||||
#undef TARGET_ASM_ALIGNED_HI_OP
|
||||
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
|
||||
@@ -6827,6 +6977,29 @@ loongarch_asan_shadow_offset (void)
|
||||
#undef TARGET_ASAN_SHADOW_OFFSET
|
||||
#define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset
|
||||
|
||||
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
|
||||
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
|
||||
+ loongarch_get_separate_components
|
||||
+
|
||||
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
|
||||
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB loongarch_components_for_bb
|
||||
+
|
||||
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
|
||||
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
|
||||
+ loongarch_disqualify_components
|
||||
+
|
||||
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
|
||||
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
|
||||
+ loongarch_emit_prologue_components
|
||||
+
|
||||
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
|
||||
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
|
||||
+ loongarch_emit_epilogue_components
|
||||
+
|
||||
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
|
||||
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
|
||||
+ loongarch_set_handled_components
|
||||
+
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
#include "gt-loongarch.h"
|
||||
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||
index af24bfa01..44ebadfaa 100644
|
||||
--- a/gcc/config/loongarch/loongarch.h
|
||||
+++ b/gcc/config/loongarch/loongarch.h
|
||||
@@ -1147,6 +1147,8 @@ struct GTY (()) machine_function
|
||||
/* The current frame information, calculated by loongarch_compute_frame_info.
|
||||
*/
|
||||
struct loongarch_frame_info frame;
|
||||
+
|
||||
+ bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
|
||||
};
|
||||
#endif
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
|
||||
new file mode 100644
|
||||
index 000000000..1431536c5
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
|
||||
@@ -0,0 +1,19 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O -fshrink-wrap" } */
|
||||
+
|
||||
+/* We should not save anything before checking the value of x. */
|
||||
+/* { dg-final { scan-assembler-not "st(ptr)?\\\.\[dw\].*b(eq|ne)z" } } */
|
||||
+
|
||||
+int
|
||||
+foo (int x)
|
||||
+{
|
||||
+ __asm__ ("nop" :);
|
||||
+ if (x)
|
||||
+ {
|
||||
+ __asm__ ("" ::: "s0", "s1");
|
||||
+ return x;
|
||||
+ }
|
||||
+
|
||||
+ __asm__ ("" ::: "s2", "s3");
|
||||
+ return 0;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
43
LoongArch-Fix-MUSL_DYNAMIC_LINKER.patch
Normal file
43
LoongArch-Fix-MUSL_DYNAMIC_LINKER.patch
Normal file
@ -0,0 +1,43 @@
|
||||
From 3db61acfbaa773568fad2bc31d950c6d9b3729b0 Mon Sep 17 00:00:00 2001
|
||||
From: Peng Fan <fanpeng@loongson.cn>
|
||||
Date: Wed, 19 Apr 2023 16:23:42 +0800
|
||||
Subject: [PATCH 044/124] LoongArch: Fix MUSL_DYNAMIC_LINKER
|
||||
|
||||
The system based on musl has no '/lib64', so change it.
|
||||
|
||||
https://wiki.musl-libc.org/guidelines-for-distributions.html,
|
||||
"Multilib/multi-arch" section of this introduces it.
|
||||
|
||||
gcc/
|
||||
* config/loongarch/gnu-user.h (MUSL_DYNAMIC_LINKER): Redefine.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Suggested-by: Xi Ruoyao <xry111@xry111.site>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/gnu-user.h | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
|
||||
index 1dc6add62..44e4f2575 100644
|
||||
--- a/gcc/config/loongarch/gnu-user.h
|
||||
+++ b/gcc/config/loongarch/gnu-user.h
|
||||
@@ -33,9 +33,14 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define GLIBC_DYNAMIC_LINKER \
|
||||
"/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1"
|
||||
|
||||
+#define MUSL_ABI_SPEC \
|
||||
+ "%{mabi=lp64d:-lp64d}" \
|
||||
+ "%{mabi=lp64f:-lp64f}" \
|
||||
+ "%{mabi=lp64s:-lp64s}"
|
||||
+
|
||||
#undef MUSL_DYNAMIC_LINKER
|
||||
#define MUSL_DYNAMIC_LINKER \
|
||||
- "/lib" ABI_GRLEN_SPEC "/ld-musl-loongarch-" ABI_SPEC ".so.1"
|
||||
+ "/lib/ld-musl-loongarch" ABI_GRLEN_SPEC MUSL_ABI_SPEC ".so.1"
|
||||
|
||||
#undef GNU_USER_TARGET_LINK_SPEC
|
||||
#define GNU_USER_TARGET_LINK_SPEC \
|
||||
--
|
||||
2.33.0
|
||||
|
||||
43
LoongArch-Fix-bug-in-loongarch_emit_stack_tie-PR1104.patch
Normal file
43
LoongArch-Fix-bug-in-loongarch_emit_stack_tie-PR1104.patch
Normal file
@ -0,0 +1,43 @@
|
||||
From 7c8fc6b414dc1718e71e0d05c7a78498e06eb499 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Thu, 29 Jun 2023 19:30:59 +0800
|
||||
Subject: [PATCH 053/124] LoongArch: Fix bug in loongarch_emit_stack_tie
|
||||
[PR110484].
|
||||
|
||||
Which may result in implicit references to $fp when frame_pointer_needed is false,
|
||||
causing regs_ever_live[$fp] to be true when $fp is not explicitly used,
|
||||
resulting in $fp being used as the target replacement register in the rnreg pass.
|
||||
|
||||
The bug originates from SPEC2017 541.leela_r(-flto).
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR target/110484
|
||||
* config/loongarch/loongarch.cc (loongarch_emit_stack_tie): Use the
|
||||
frame_pointer_needed to determine whether to use the $fp register.
|
||||
|
||||
Co-authored-by: Guo Jie <guojie@loongson.cn>
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.cc | 4 +++-
|
||||
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index caacfa8a3..7b48e3216 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -1109,7 +1109,9 @@ loongarch_first_stack_step (struct loongarch_frame_info *frame)
|
||||
static void
|
||||
loongarch_emit_stack_tie (void)
|
||||
{
|
||||
- emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx, hard_frame_pointer_rtx));
|
||||
+ emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx,
|
||||
+ frame_pointer_needed ? hard_frame_pointer_rtx
|
||||
+ : stack_pointer_rtx));
|
||||
}
|
||||
|
||||
#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
123
LoongArch-Fix-bug-of-optab-di3_fake.patch
Normal file
123
LoongArch-Fix-bug-of-optab-di3_fake.patch
Normal file
@ -0,0 +1,123 @@
|
||||
From df1df2e7b7e27bd9fba77f572d74d833aff4a202 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Mon, 11 Sep 2023 16:20:29 +0800
|
||||
Subject: [PATCH 122/124] LoongArch: Fix bug of '<optab>di3_fake'.
|
||||
|
||||
PR target/111334
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md: Fix bug of '<optab>di3_fake'.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/pr111334.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 20 ++++++----
|
||||
gcc/testsuite/gcc.target/loongarch/pr111334.c | 39 +++++++++++++++++++
|
||||
2 files changed, 52 insertions(+), 7 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr111334.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index 264cd325c..7746116e6 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -72,6 +72,9 @@
|
||||
UNSPEC_LUI_H_HI12
|
||||
UNSPEC_TLS_LOW
|
||||
|
||||
+ ;; Fake div.w[u] mod.w[u]
|
||||
+ UNSPEC_FAKE_ANY_DIV
|
||||
+
|
||||
UNSPEC_SIBCALL_VALUE_MULTIPLE_INTERNAL_1
|
||||
UNSPEC_CALL_VALUE_MULTIPLE_INTERNAL_1
|
||||
])
|
||||
@@ -900,7 +903,7 @@
|
||||
(match_operand:GPR 2 "register_operand")))]
|
||||
""
|
||||
{
|
||||
- if (GET_MODE (operands[0]) == SImode)
|
||||
+ if (GET_MODE (operands[0]) == SImode && TARGET_64BIT)
|
||||
{
|
||||
rtx reg1 = gen_reg_rtx (DImode);
|
||||
rtx reg2 = gen_reg_rtx (DImode);
|
||||
@@ -920,9 +923,9 @@
|
||||
})
|
||||
|
||||
(define_insn "*<optab><mode>3"
|
||||
- [(set (match_operand:GPR 0 "register_operand" "=r,&r,&r")
|
||||
- (any_div:GPR (match_operand:GPR 1 "register_operand" "r,r,0")
|
||||
- (match_operand:GPR 2 "register_operand" "r,r,r")))]
|
||||
+ [(set (match_operand:X 0 "register_operand" "=r,&r,&r")
|
||||
+ (any_div:X (match_operand:X 1 "register_operand" "r,r,0")
|
||||
+ (match_operand:X 2 "register_operand" "r,r,r")))]
|
||||
""
|
||||
{
|
||||
return loongarch_output_division ("<insn>.<d><u>\t%0,%1,%2", operands);
|
||||
@@ -938,9 +941,12 @@
|
||||
(define_insn "<optab>di3_fake"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r,&r,&r")
|
||||
(sign_extend:DI
|
||||
- (any_div:SI (match_operand:DI 1 "register_operand" "r,r,0")
|
||||
- (match_operand:DI 2 "register_operand" "r,r,r"))))]
|
||||
- ""
|
||||
+ (unspec:SI
|
||||
+ [(subreg:SI
|
||||
+ (any_div:DI (match_operand:DI 1 "register_operand" "r,r,0")
|
||||
+ (match_operand:DI 2 "register_operand" "r,r,r")) 0)]
|
||||
+ UNSPEC_FAKE_ANY_DIV)))]
|
||||
+ "TARGET_64BIT"
|
||||
{
|
||||
return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands);
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/pr111334.c b/gcc/testsuite/gcc.target/loongarch/pr111334.c
|
||||
new file mode 100644
|
||||
index 000000000..47366afcb
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/pr111334.c
|
||||
@@ -0,0 +1,39 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2" } */
|
||||
+
|
||||
+unsigned
|
||||
+util_next_power_of_two (unsigned x)
|
||||
+{
|
||||
+ return (1 << __builtin_clz (x - 1));
|
||||
+}
|
||||
+
|
||||
+extern int create_vec_from_array (void);
|
||||
+
|
||||
+struct ac_shader_args {
|
||||
+ struct {
|
||||
+ unsigned char offset;
|
||||
+ unsigned char size;
|
||||
+ } args[384];
|
||||
+};
|
||||
+
|
||||
+struct isel_context {
|
||||
+ const struct ac_shader_args* args;
|
||||
+ int arg_temps[384];
|
||||
+};
|
||||
+
|
||||
+
|
||||
+void
|
||||
+add_startpgm (struct isel_context* ctx, unsigned short arg_count)
|
||||
+{
|
||||
+
|
||||
+ for (unsigned i = 0, arg = 0; i < arg_count; i++)
|
||||
+ {
|
||||
+ unsigned size = ctx->args->args[i].size;
|
||||
+ unsigned reg = ctx->args->args[i].offset;
|
||||
+
|
||||
+ if (reg % ( 4 < util_next_power_of_two (size)
|
||||
+ ? 4 : util_next_power_of_two (size)))
|
||||
+ ctx->arg_temps[i] = create_vec_from_array ();
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
--
|
||||
2.33.0
|
||||
|
||||
69
LoongArch-Fix-pr106828-by-define-hook-TARGET_ASAN_SH.patch
Normal file
69
LoongArch-Fix-pr106828-by-define-hook-TARGET_ASAN_SH.patch
Normal file
@ -0,0 +1,69 @@
|
||||
From a70fe51d9813d490a89cbc8da1ae4b040bf8b37e Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Wed, 7 Sep 2022 11:25:45 +0800
|
||||
Subject: [PATCH 017/124] LoongArch: Fix pr106828 by define hook
|
||||
TARGET_ASAN_SHADOW_OFFSET in loongarch backend [PR106828].
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR target/106828
|
||||
* config/loongarch/loongarch.cc (loongarch_asan_shadow_offset): New.
|
||||
(TARGET_ASAN_SHADOW_OFFSET): New.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
PR target/106828
|
||||
* g++.target/loongarch/pr106828.C: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.cc | 13 +++++++++++++
|
||||
gcc/testsuite/g++.target/loongarch/pr106828.C | 4 ++++
|
||||
2 files changed, 17 insertions(+)
|
||||
create mode 100644 gcc/testsuite/g++.target/loongarch/pr106828.C
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index c9187bf81..98c0e26cd 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -6466,6 +6466,16 @@ loongarch_use_anchors_for_symbol_p (const_rtx symbol)
|
||||
return default_use_anchors_for_symbol_p (symbol);
|
||||
}
|
||||
|
||||
+/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
|
||||
+
|
||||
+static unsigned HOST_WIDE_INT
|
||||
+loongarch_asan_shadow_offset (void)
|
||||
+{
|
||||
+ /* We only have libsanitizer support for LOONGARCH64 at present.
|
||||
+ This value is taken from the file libsanitizer/asan/asan_mappint.h. */
|
||||
+ return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
|
||||
+}
|
||||
+
|
||||
/* Initialize the GCC target structure. */
|
||||
#undef TARGET_ASM_ALIGNED_HI_OP
|
||||
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
|
||||
@@ -6660,6 +6670,9 @@ loongarch_use_anchors_for_symbol_p (const_rtx symbol)
|
||||
#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
|
||||
#define TARGET_USE_ANCHORS_FOR_SYMBOL_P loongarch_use_anchors_for_symbol_p
|
||||
|
||||
+#undef TARGET_ASAN_SHADOW_OFFSET
|
||||
+#define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset
|
||||
+
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
#include "gt-loongarch.h"
|
||||
diff --git a/gcc/testsuite/g++.target/loongarch/pr106828.C b/gcc/testsuite/g++.target/loongarch/pr106828.C
|
||||
new file mode 100644
|
||||
index 000000000..190c1db71
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/g++.target/loongarch/pr106828.C
|
||||
@@ -0,0 +1,4 @@
|
||||
+/* { dg-do-preprocess } */
|
||||
+/* { dg-options "-mabi=lp64d -fsanitize=address" } */
|
||||
+
|
||||
+/* Tests whether the compiler supports compile option '-fsanitize=address'. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
31
LoongArch-Fix-unintentional-bash-ism-in-r14-3665.patch
Normal file
31
LoongArch-Fix-unintentional-bash-ism-in-r14-3665.patch
Normal file
@ -0,0 +1,31 @@
|
||||
From 8e5c9f349877af07dde4804974d47625c1292956 Mon Sep 17 00:00:00 2001
|
||||
From: Yang Yujie <yangyujie@loongson.cn>
|
||||
Date: Wed, 6 Sep 2023 17:57:47 +0800
|
||||
Subject: [PATCH 070/124] LoongArch: Fix unintentional bash-ism in r14-3665.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config.gcc: remove non-POSIX syntax "<<<".
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config.gcc | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||
index 19f584344..57e724080 100644
|
||||
--- a/gcc/config.gcc
|
||||
+++ b/gcc/config.gcc
|
||||
@@ -5263,7 +5263,7 @@ case "${target}" in
|
||||
if test x${parse_state} = x"abi-base"; then
|
||||
# Base ABI type
|
||||
case ${component} in
|
||||
- lp64d | lp64f | lp64s) elem_tmp="ABI_BASE_$(tr a-z A-Z <<< ${component}),";;
|
||||
+ lp64d | lp64f | lp64s) elem_tmp="ABI_BASE_$(echo ${component} | tr a-z A-Z),";;
|
||||
*)
|
||||
echo "Unknown base ABI \"${component}\" in --with-multilib-list." 1>&2
|
||||
exit 1
|
||||
--
|
||||
2.33.0
|
||||
|
||||
34
LoongArch-Fix-unintentionally-breakage-in-r14-3665.patch
Normal file
34
LoongArch-Fix-unintentionally-breakage-in-r14-3665.patch
Normal file
@ -0,0 +1,34 @@
|
||||
From 8de6f5e1aad2a1ff85ff3a4b732055d625c61139 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Tue, 5 Sep 2023 20:02:51 +0800
|
||||
Subject: [PATCH 067/124] LoongArch: Fix unintentionally breakage in r14-3665
|
||||
|
||||
Fix a build failure with no system assembler or system old assembler.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch-opts.h (HAVE_AS_EXPLICIT_RELOCS):
|
||||
Define to 0 if not defined yet.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch-opts.h | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||
index e3f9b6f99..0d148e43b 100644
|
||||
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||
@@ -93,4 +93,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||
while -m[no]-memcpy imposes a global constraint. */
|
||||
#define TARGET_DO_OPTIMIZE_BLOCK_MOVE_P loongarch_do_optimize_block_move_p()
|
||||
|
||||
+#ifndef HAVE_AS_EXPLICIT_RELOCS
|
||||
+#define HAVE_AS_EXPLICIT_RELOCS 0
|
||||
+#endif
|
||||
+
|
||||
#endif /* LOONGARCH_OPTS_H */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
33
LoongArch-Fix-up-memcpy-vec-3.c-test-case.patch
Normal file
33
LoongArch-Fix-up-memcpy-vec-3.c-test-case.patch
Normal file
@ -0,0 +1,33 @@
|
||||
From 78896e68f50164af7827e8da01a7220764d1e296 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Sat, 9 Sep 2023 16:18:06 +0800
|
||||
Subject: [PATCH 075/124] LoongArch: Fix up memcpy-vec-3.c test case
|
||||
|
||||
The generic code will split 16-byte copy into two 8-byte copies, so the
|
||||
vector code wouldn't be used even if -mno-strict-align. This
|
||||
contradicted with the purpose of this test case.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/memcpy-vec-3.c: Increase the amount of
|
||||
copied bytes to 32.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
|
||||
index 233ed2150..db2ea510b 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
|
||||
@@ -3,4 +3,4 @@
|
||||
/* { dg-final { scan-assembler-not "vst" } } */
|
||||
|
||||
extern char a[], b[];
|
||||
-void test() { __builtin_memcpy(a, b, 16); }
|
||||
+void test() { __builtin_memcpy(a, b, 32); }
|
||||
--
|
||||
2.33.0
|
||||
|
||||
43
LoongArch-Fixed-a-bug-in-the-loongarch-architecture-.patch
Normal file
43
LoongArch-Fixed-a-bug-in-the-loongarch-architecture-.patch
Normal file
@ -0,0 +1,43 @@
|
||||
From 80ed9ab39d9b1b08ad9d054f16d65b2a249a89e5 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Wed, 12 Oct 2022 11:02:11 +0800
|
||||
Subject: [PATCH 022/124] LoongArch: Fixed a bug in the loongarch architecture
|
||||
of libitm package.
|
||||
|
||||
Add a soft floating point condition to the register recovery part of the code.
|
||||
|
||||
libitm/ChangeLog:
|
||||
|
||||
* config/loongarch/sjlj.S: Add a soft floating point condition to the
|
||||
register recovery part of the code.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
libitm/config/loongarch/sjlj.S | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/libitm/config/loongarch/sjlj.S b/libitm/config/loongarch/sjlj.S
|
||||
index a5f9fadde..f896e400e 100644
|
||||
--- a/libitm/config/loongarch/sjlj.S
|
||||
+++ b/libitm/config/loongarch/sjlj.S
|
||||
@@ -104,6 +104,8 @@ GTM_longjmp:
|
||||
GPR_L $s7, $r5, 10*SZ_GPR
|
||||
GPR_L $s8, $r5, 11*SZ_GPR
|
||||
|
||||
+#if !defined(__loongarch_soft_float)
|
||||
+ /* Callee-saved scratch FPRs (f24-f31) */
|
||||
FPR_L $f24, $r5, 12*SZ_GPR + 0*SZ_FPR
|
||||
FPR_L $f25, $r5, 12*SZ_GPR + 1*SZ_FPR
|
||||
FPR_L $f26, $r5, 12*SZ_GPR + 2*SZ_FPR
|
||||
@@ -112,6 +114,7 @@ GTM_longjmp:
|
||||
FPR_L $f29, $r5, 12*SZ_GPR + 5*SZ_FPR
|
||||
FPR_L $f30, $r5, 12*SZ_GPR + 6*SZ_FPR
|
||||
FPR_L $f31, $r5, 12*SZ_GPR + 7*SZ_FPR
|
||||
+#endif
|
||||
|
||||
GPR_L $r7, $r5, 2*SZ_GPR
|
||||
GPR_L $fp, $r5, 0*SZ_GPR
|
||||
--
|
||||
2.33.0
|
||||
|
||||
182
LoongArch-Fixed-a-compilation-failure-with-c-in-inli.patch
Normal file
182
LoongArch-Fixed-a-compilation-failure-with-c-in-inli.patch
Normal file
@ -0,0 +1,182 @@
|
||||
From 49a63dbaf3b4296f0b1f8a0e11790cc3455aeec7 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Wed, 18 Jan 2023 11:06:56 +0800
|
||||
Subject: [PATCH 034/124] LoongArch: Fixed a compilation failure with '%c' in
|
||||
inline assembly [PR107731].
|
||||
|
||||
Co-authored-by: Yang Yujie <yangyujie@loongson.cn>
|
||||
|
||||
PR target/107731
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.cc (loongarch_classify_address):
|
||||
Add precessint for CONST_INT.
|
||||
(loongarch_print_operand_reloc): Operand modifier 'c' is supported.
|
||||
(loongarch_print_operand): Increase the processing of '%c'.
|
||||
* doc/extend.texi: Adds documents for LoongArch operand modifiers.
|
||||
And port the public operand modifiers information to this document.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/tst-asm-const.c: Moved to...
|
||||
* gcc.target/loongarch/pr107731.c: ...here.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.cc | 14 +++++
|
||||
gcc/doc/extend.texi | 51 +++++++++++++++++--
|
||||
.../loongarch/{tst-asm-const.c => pr107731.c} | 6 +--
|
||||
3 files changed, 64 insertions(+), 7 deletions(-)
|
||||
rename gcc/testsuite/gcc.target/loongarch/{tst-asm-const.c => pr107731.c} (78%)
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index e59edc4cd..1a4686f03 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -2074,6 +2074,11 @@ loongarch_classify_address (struct loongarch_address_info *info, rtx x,
|
||||
return (loongarch_valid_base_register_p (info->reg, mode, strict_p)
|
||||
&& loongarch_valid_lo_sum_p (info->symbol_type, mode,
|
||||
info->offset));
|
||||
+ case CONST_INT:
|
||||
+ /* Small-integer addresses don't occur very often, but they
|
||||
+ are legitimate if $r0 is a valid base register. */
|
||||
+ info->type = ADDRESS_CONST_INT;
|
||||
+ return IMM12_OPERAND (INTVAL (x));
|
||||
|
||||
default:
|
||||
return false;
|
||||
@@ -4932,6 +4937,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
|
||||
|
||||
'A' Print a _DB suffix if the memory model requires a release.
|
||||
'b' Print the address of a memory operand, without offset.
|
||||
+ 'c' Print an integer.
|
||||
'C' Print the integer branch condition for comparison OP.
|
||||
'd' Print CONST_INT OP in decimal.
|
||||
'F' Print the FPU branch condition for comparison OP.
|
||||
@@ -4978,6 +4984,14 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
|
||||
fputs ("_db", file);
|
||||
break;
|
||||
|
||||
+ case 'c':
|
||||
+ if (CONST_INT_P (op))
|
||||
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
|
||||
+ else
|
||||
+ output_operand_lossage ("unsupported operand for code '%c'", letter);
|
||||
+
|
||||
+ break;
|
||||
+
|
||||
case 'C':
|
||||
loongarch_print_int_branch_condition (file, code, letter);
|
||||
break;
|
||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||
index da2840c23..3c101ca89 100644
|
||||
--- a/gcc/doc/extend.texi
|
||||
+++ b/gcc/doc/extend.texi
|
||||
@@ -10414,8 +10414,10 @@ ensures that modifying @var{a} does not affect the address referenced by
|
||||
is undefined if @var{a} is modified before using @var{b}.
|
||||
|
||||
@code{asm} supports operand modifiers on operands (for example @samp{%k2}
|
||||
-instead of simply @samp{%2}). Typically these qualifiers are hardware
|
||||
-dependent. The list of supported modifiers for x86 is found at
|
||||
+instead of simply @samp{%2}). @ref{GenericOperandmodifiers,
|
||||
+Generic Operand modifiers} lists the modifiers that are available
|
||||
+on all targets. Other modifiers are hardware dependent.
|
||||
+For example, the list of supported modifiers for x86 is found at
|
||||
@ref{x86Operandmodifiers,x86 Operand modifiers}.
|
||||
|
||||
If the C code that follows the @code{asm} makes no use of any of the output
|
||||
@@ -10683,8 +10685,10 @@ optimizers may discard the @code{asm} statement as unneeded
|
||||
(see @ref{Volatile}).
|
||||
|
||||
@code{asm} supports operand modifiers on operands (for example @samp{%k2}
|
||||
-instead of simply @samp{%2}). Typically these qualifiers are hardware
|
||||
-dependent. The list of supported modifiers for x86 is found at
|
||||
+instead of simply @samp{%2}). @ref{GenericOperandmodifiers,
|
||||
+Generic Operand modifiers} lists the modifiers that are available
|
||||
+on all targets. Other modifiers are hardware dependent.
|
||||
+For example, the list of supported modifiers for x86 is found at
|
||||
@ref{x86Operandmodifiers,x86 Operand modifiers}.
|
||||
|
||||
In this example using the fictitious @code{combine} instruction, the
|
||||
@@ -11036,6 +11040,30 @@ lab:
|
||||
@}
|
||||
@end example
|
||||
|
||||
+@anchor{GenericOperandmodifiers}
|
||||
+@subsubsection Generic Operand Modifiers
|
||||
+@noindent
|
||||
+The following table shows the modifiers supported by all targets and their effects:
|
||||
+
|
||||
+@multitable {Modifier} {Description} {Example}
|
||||
+@headitem Modifier @tab Description @tab Example
|
||||
+@item @code{c}
|
||||
+@tab Require a constant operand and print the constant expression with no punctuation.
|
||||
+@tab @code{%c0}
|
||||
+@item @code{n}
|
||||
+@tab Like @samp{%c} except that the value of the constant is negated before printing.
|
||||
+@tab @code{%n0}
|
||||
+@item @code{a}
|
||||
+@tab Substitute a memory reference, with the actual operand treated as the address.
|
||||
+This may be useful when outputting a ``load address'' instruction, because
|
||||
+often the assembler syntax for such an instruction requires you to write the
|
||||
+operand as if it were a memory reference.
|
||||
+@tab @code{%a0}
|
||||
+@item @code{l}
|
||||
+@tab Print the label name with no punctuation.
|
||||
+@tab @code{%l0}
|
||||
+@end multitable
|
||||
+
|
||||
@anchor{x86Operandmodifiers}
|
||||
@subsubsection x86 Operand Modifiers
|
||||
|
||||
@@ -11386,6 +11414,21 @@ constant. Used to select the specified bit position.
|
||||
@item @code{x} @tab Equivialent to @code{X}, but only for pointers.
|
||||
@end multitable
|
||||
|
||||
+@anchor{loongarchOperandmodifiers}
|
||||
+@subsubsection LoongArch Operand Modifiers
|
||||
+
|
||||
+The list below describes the supported modifiers and their effects for LoongArch.
|
||||
+
|
||||
+@multitable @columnfractions .10 .90
|
||||
+@headitem Modifier @tab Description
|
||||
+@item @code{d} @tab Same as @code{c}.
|
||||
+@item @code{i} @tab Print the character ''@code{i}'' if the operand is not a register.
|
||||
+@item @code{m} @tab Same as @code{c}, but the printed value is @code{operand - 1}.
|
||||
+@item @code{X} @tab Print a constant integer operand in hexadecimal.
|
||||
+@item @code{z} @tab Print the operand in its unmodified form, followed by a comma.
|
||||
+@end multitable
|
||||
+
|
||||
+
|
||||
@lowersections
|
||||
@include md.texi
|
||||
@raisesections
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c b/gcc/testsuite/gcc.target/loongarch/pr107731.c
|
||||
similarity index 78%
|
||||
rename from gcc/testsuite/gcc.target/loongarch/tst-asm-const.c
|
||||
rename to gcc/testsuite/gcc.target/loongarch/pr107731.c
|
||||
index 2e04b99e3..80d84c48c 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/tst-asm-const.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/pr107731.c
|
||||
@@ -1,13 +1,13 @@
|
||||
-/* Test asm const. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-final { scan-assembler-times "foo:.*\\.long 1061109567.*\\.long 52" 1 } } */
|
||||
+
|
||||
int foo ()
|
||||
{
|
||||
__asm__ volatile (
|
||||
"foo:"
|
||||
"\n\t"
|
||||
- ".long %a0\n\t"
|
||||
- ".long %a1\n\t"
|
||||
+ ".long %c0\n\t"
|
||||
+ ".long %c1\n\t"
|
||||
:
|
||||
:"i"(0x3f3f3f3f), "i"(52)
|
||||
:
|
||||
--
|
||||
2.33.0
|
||||
|
||||
33
LoongArch-Fixed-a-typo-in-the-comment-information-of.patch
Normal file
33
LoongArch-Fixed-a-typo-in-the-comment-information-of.patch
Normal file
@ -0,0 +1,33 @@
|
||||
From cbb5f181544e35b119fee4ed150bec24eee7179c Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Wed, 28 Sep 2022 16:35:06 +0800
|
||||
Subject: [PATCH 020/124] LoongArch: Fixed a typo in the comment information of
|
||||
the function loongarch_asan_shadow_offset.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.cc (loongarch_asan_shadow_offset):
|
||||
Fixed typo in "asan_mapping.h".
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.cc | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index 98c0e26cd..e9ba3374e 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -6472,7 +6472,7 @@ static unsigned HOST_WIDE_INT
|
||||
loongarch_asan_shadow_offset (void)
|
||||
{
|
||||
/* We only have libsanitizer support for LOONGARCH64 at present.
|
||||
- This value is taken from the file libsanitizer/asan/asan_mappint.h. */
|
||||
+ This value is taken from the file libsanitizer/asan/asan_mapping.h. */
|
||||
return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
|
||||
}
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
196
LoongArch-Generate-bytepick.-wd-for-suitable-bit-ope.patch
Normal file
196
LoongArch-Generate-bytepick.-wd-for-suitable-bit-ope.patch
Normal file
@ -0,0 +1,196 @@
|
||||
From 9311c0f56086e38fe5e9bf4bbfc2e37d0f18347c Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Fri, 3 Feb 2023 17:06:06 +0800
|
||||
Subject: [PATCH 036/124] LoongArch: Generate bytepick.[wd] for suitable bit
|
||||
operation pattern
|
||||
|
||||
We can use bytepick.[wd] for
|
||||
|
||||
a << (8 * x) | b >> (8 * (sizeof(a) - x))
|
||||
|
||||
while a and b are uint32_t or uint64_t. This is useful for some cases,
|
||||
for example:
|
||||
https://sourceware.org/pipermail/libc-alpha/2023-February/145203.html
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (bytepick_w_ashift_amount):
|
||||
New define_int_iterator.
|
||||
(bytepick_d_ashift_amount): Likewise.
|
||||
(bytepick_imm): New define_int_attr.
|
||||
(bytepick_w_lshiftrt_amount): Likewise.
|
||||
(bytepick_d_lshiftrt_amount): Likewise.
|
||||
(bytepick_w_<bytepick_imm>): New define_insn template.
|
||||
(bytepick_w_<bytepick_imm>_extend): Likewise.
|
||||
(bytepick_d_<bytepick_imm>): Likewise.
|
||||
(bytepick_w): Remove unused define_insn.
|
||||
(bytepick_d): Likewise.
|
||||
(UNSPEC_BYTEPICK_W): Remove unused unspec.
|
||||
(UNSPEC_BYTEPICK_D): Likewise.
|
||||
* config/loongarch/predicates.md (const_0_to_3_operand):
|
||||
Remove unused define_predicate.
|
||||
(const_0_to_7_operand): Likewise.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* g++.target/loongarch/bytepick.C: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 60 ++++++++++++++-----
|
||||
gcc/config/loongarch/predicates.md | 8 ---
|
||||
gcc/testsuite/g++.target/loongarch/bytepick.C | 32 ++++++++++
|
||||
3 files changed, 77 insertions(+), 23 deletions(-)
|
||||
create mode 100644 gcc/testsuite/g++.target/loongarch/bytepick.C
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index f61db66d5..833b94753 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -48,8 +48,6 @@
|
||||
UNSPEC_EH_RETURN
|
||||
|
||||
;; Bit operation
|
||||
- UNSPEC_BYTEPICK_W
|
||||
- UNSPEC_BYTEPICK_D
|
||||
UNSPEC_BITREV_4B
|
||||
UNSPEC_BITREV_8B
|
||||
|
||||
@@ -544,6 +542,27 @@
|
||||
(UNSPEC_FTINTRM "0")
|
||||
(UNSPEC_FTINTRP "0")])
|
||||
|
||||
+;; Iterator and attributes for bytepick.d
|
||||
+(define_int_iterator bytepick_w_ashift_amount [8 16 24])
|
||||
+(define_int_attr bytepick_w_lshiftrt_amount [(8 "24")
|
||||
+ (16 "16")
|
||||
+ (24 "8")])
|
||||
+(define_int_iterator bytepick_d_ashift_amount [8 16 24 32 40 48 56])
|
||||
+(define_int_attr bytepick_d_lshiftrt_amount [(8 "56")
|
||||
+ (16 "48")
|
||||
+ (24 "40")
|
||||
+ (32 "32")
|
||||
+ (40 "24")
|
||||
+ (48 "16")
|
||||
+ (56 "8")])
|
||||
+(define_int_attr bytepick_imm [(8 "1")
|
||||
+ (16 "2")
|
||||
+ (24 "3")
|
||||
+ (32 "4")
|
||||
+ (40 "5")
|
||||
+ (48 "6")
|
||||
+ (56 "7")])
|
||||
+
|
||||
;;
|
||||
;; ....................
|
||||
;;
|
||||
@@ -3364,24 +3383,35 @@
|
||||
[(set_attr "type" "unknown")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
-(define_insn "bytepick_w"
|
||||
+(define_insn "bytepick_w_<bytepick_imm>"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
- (unspec:SI [(match_operand:SI 1 "register_operand" "r")
|
||||
- (match_operand:SI 2 "register_operand" "r")
|
||||
- (match_operand:SI 3 "const_0_to_3_operand" "n")]
|
||||
- UNSPEC_BYTEPICK_W))]
|
||||
+ (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
|
||||
+ (const_int <bytepick_w_lshiftrt_amount>))
|
||||
+ (ashift (match_operand:SI 2 "register_operand" "r")
|
||||
+ (const_int bytepick_w_ashift_amount))))]
|
||||
""
|
||||
- "bytepick.w\t%0,%1,%2,%z3"
|
||||
+ "bytepick.w\t%0,%1,%2,<bytepick_imm>"
|
||||
[(set_attr "mode" "SI")])
|
||||
|
||||
-(define_insn "bytepick_d"
|
||||
+(define_insn "bytepick_w_<bytepick_imm>_extend"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
- (unspec:DI [(match_operand:DI 1 "register_operand" "r")
|
||||
- (match_operand:DI 2 "register_operand" "r")
|
||||
- (match_operand:DI 3 "const_0_to_7_operand" "n")]
|
||||
- UNSPEC_BYTEPICK_D))]
|
||||
- ""
|
||||
- "bytepick.d\t%0,%1,%2,%z3"
|
||||
+ (sign_extend:DI
|
||||
+ (ior:SI (lshiftrt (match_operand:SI 1 "register_operand" "r")
|
||||
+ (const_int <bytepick_w_lshiftrt_amount>))
|
||||
+ (ashift (match_operand:SI 2 "register_operand" "r")
|
||||
+ (const_int bytepick_w_ashift_amount)))))]
|
||||
+ "TARGET_64BIT"
|
||||
+ "bytepick.w\t%0,%1,%2,<bytepick_imm>"
|
||||
+ [(set_attr "mode" "SI")])
|
||||
+
|
||||
+(define_insn "bytepick_d_<bytepick_imm>"
|
||||
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
+ (ior:DI (lshiftrt (match_operand:DI 1 "register_operand" "r")
|
||||
+ (const_int <bytepick_d_lshiftrt_amount>))
|
||||
+ (ashift (match_operand:DI 2 "register_operand" "r")
|
||||
+ (const_int bytepick_d_ashift_amount))))]
|
||||
+ "TARGET_64BIT"
|
||||
+ "bytepick.d\t%0,%1,%2,<bytepick_imm>"
|
||||
[(set_attr "mode" "DI")])
|
||||
|
||||
(define_insn "bitrev_4b"
|
||||
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||
index 58c3dc226..3c32b2987 100644
|
||||
--- a/gcc/config/loongarch/predicates.md
|
||||
+++ b/gcc/config/loongarch/predicates.md
|
||||
@@ -91,14 +91,6 @@
|
||||
(ior (match_operand 0 "const_1_operand")
|
||||
(match_operand 0 "register_operand")))
|
||||
|
||||
-(define_predicate "const_0_to_3_operand"
|
||||
- (and (match_code "const_int")
|
||||
- (match_test "IN_RANGE (INTVAL (op), 0, 3)")))
|
||||
-
|
||||
-(define_predicate "const_0_to_7_operand"
|
||||
- (and (match_code "const_int")
|
||||
- (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
|
||||
-
|
||||
(define_predicate "lu52i_mask_operand"
|
||||
(and (match_code "const_int")
|
||||
(match_test "UINTVAL (op) == 0xfffffffffffff")))
|
||||
diff --git a/gcc/testsuite/g++.target/loongarch/bytepick.C b/gcc/testsuite/g++.target/loongarch/bytepick.C
|
||||
new file mode 100644
|
||||
index 000000000..a39e2fa65
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/g++.target/loongarch/bytepick.C
|
||||
@@ -0,0 +1,32 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -mabi=lp64d" } */
|
||||
+/* { dg-final { scan-assembler-times "bytepick.w\t\\\$r4,\\\$r5,\\\$r4" 3 } } */
|
||||
+/* { dg-final { scan-assembler-times "bytepick.d\t\\\$r4,\\\$r5,\\\$r4" 7 } } */
|
||||
+/* { dg-final { scan-assembler-not "slli.w" } } */
|
||||
+
|
||||
+template <class T, int offs>
|
||||
+T
|
||||
+merge (T a, T b)
|
||||
+{
|
||||
+ return a << offs | b >> (8 * sizeof (T) - offs);
|
||||
+}
|
||||
+
|
||||
+using u32 = __UINT32_TYPE__;
|
||||
+using u64 = __UINT64_TYPE__;
|
||||
+using i64 = __INT64_TYPE__;
|
||||
+
|
||||
+template u32 merge<u32, 8> (u32, u32);
|
||||
+template u32 merge<u32, 16> (u32, u32);
|
||||
+template u32 merge<u32, 24> (u32, u32);
|
||||
+
|
||||
+template u64 merge<u64, 8> (u64, u64);
|
||||
+template u64 merge<u64, 16> (u64, u64);
|
||||
+template u64 merge<u64, 24> (u64, u64);
|
||||
+template u64 merge<u64, 32> (u64, u64);
|
||||
+template u64 merge<u64, 40> (u64, u64);
|
||||
+template u64 merge<u64, 48> (u64, u64);
|
||||
+template u64 merge<u64, 56> (u64, u64);
|
||||
+
|
||||
+/* we cannot use bytepick for the following cases */
|
||||
+template i64 merge<i64, 8> (i64, i64);
|
||||
+template u64 merge<u64, 42> (u64, u64);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
71
LoongArch-Get-__tls_get_addr-address-through-got-tab.patch
Normal file
71
LoongArch-Get-__tls_get_addr-address-through-got-tab.patch
Normal file
@ -0,0 +1,71 @@
|
||||
From a96dee6ba3c916f9a4329b196a0c5a1652fe294f Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Thu, 18 Aug 2022 09:57:14 +0800
|
||||
Subject: [PATCH 010/124] LoongArch: Get __tls_get_addr address through got
|
||||
table when disable plt.
|
||||
|
||||
Fix bug, ICE with tls gd/ld var with -fno-plt.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
|
||||
Get __tls_get_addr address through got table when disable plt.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/tls-gd-noplt.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.cc | 14 ++++++++++++--
|
||||
gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c | 12 ++++++++++++
|
||||
2 files changed, 24 insertions(+), 2 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index 1b5af2c7d..76bf55ea4 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -2448,8 +2448,18 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
- insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
|
||||
- const0_rtx));
|
||||
+ if (flag_plt)
|
||||
+ insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
|
||||
+ const0_rtx));
|
||||
+ else
|
||||
+ {
|
||||
+ rtx dest = gen_reg_rtx (Pmode);
|
||||
+ rtx high = gen_reg_rtx (Pmode);
|
||||
+ loongarch_emit_move (high, gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
|
||||
+ emit_insn (gen_ld_from_got (Pmode, dest, high, loongarch_tls_symbol));
|
||||
+ insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx));
|
||||
+ }
|
||||
+
|
||||
RTL_CONST_CALL_P (insn) = 1;
|
||||
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
|
||||
insn = get_insns ();
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c
|
||||
new file mode 100644
|
||||
index 000000000..32a0acf9b
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/tls-gd-noplt.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fno-plt -mcmodel=normal" } */
|
||||
+/* { dg-final { scan-assembler "pcalau12i\t.*%got_pc_hi20\\(__tls_get_addr\\)" } } */
|
||||
+
|
||||
+__attribute__ ((tls_model ("global-dynamic"))) __thread int a;
|
||||
+
|
||||
+void
|
||||
+test (void)
|
||||
+{
|
||||
+ a = 10;
|
||||
+}
|
||||
+
|
||||
--
|
||||
2.33.0
|
||||
|
||||
204
LoongArch-Implement-128-bit-floating-point-functions.patch
Normal file
204
LoongArch-Implement-128-bit-floating-point-functions.patch
Normal file
@ -0,0 +1,204 @@
|
||||
From 12ab9eae9e8a5b83c778182f15c6216bcbc3dc36 Mon Sep 17 00:00:00 2001
|
||||
From: chenxiaolong <chenxiaolong@loongson.cn>
|
||||
Date: Fri, 1 Sep 2023 11:22:42 +0800
|
||||
Subject: [PATCH 054/124] LoongArch: Implement 128-bit floating point functions
|
||||
in gcc.
|
||||
|
||||
During implementation, float128_type_node is bound with the type "__float128"
|
||||
so that the compiler can correctly identify the type of the function. The
|
||||
"q" suffix is associated with the "f128" function, which makes GCC more
|
||||
flexible to support different user input cases, implementing functions such
|
||||
as __builtin_{huge_valq, infq, fabsq, copysignq, nanq, nansq}.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch-builtins.cc (loongarch_init_builtins):
|
||||
Associate the __float128 type to float128_type_node so that it can
|
||||
be recognized by the compiler.
|
||||
* config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins):
|
||||
Add the flag "FLOAT128_TYPE" to gcc and associate a function
|
||||
with the suffix "q" to "f128".
|
||||
* doc/extend.texi:Added support for 128-bit floating-point functions on
|
||||
the LoongArch architecture.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/math-float-128.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch-builtins.cc | 5 ++
|
||||
gcc/config/loongarch/loongarch-c.cc | 11 +++
|
||||
gcc/doc/extend.texi | 20 ++++-
|
||||
.../gcc.target/loongarch/math-float-128.c | 81 +++++++++++++++++++
|
||||
4 files changed, 114 insertions(+), 3 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/math-float-128.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
|
||||
index 64fe11168..cb0ea1664 100644
|
||||
--- a/gcc/config/loongarch/loongarch-builtins.cc
|
||||
+++ b/gcc/config/loongarch/loongarch-builtins.cc
|
||||
@@ -256,6 +256,11 @@ loongarch_init_builtins (void)
|
||||
unsigned int i;
|
||||
tree type;
|
||||
|
||||
+ /* Register the type float128_type_node as a built-in type and
|
||||
+ give it an alias "__float128". */
|
||||
+ (*lang_hooks.types.register_builtin_type) (float128_type_node,
|
||||
+ "__float128");
|
||||
+
|
||||
/* Iterate through all of the bdesc arrays, initializing all of the
|
||||
builtin functions. */
|
||||
for (i = 0; i < ARRAY_SIZE (loongarch_builtins); i++)
|
||||
diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc
|
||||
index d6e3e19f0..f779a7355 100644
|
||||
--- a/gcc/config/loongarch/loongarch-c.cc
|
||||
+++ b/gcc/config/loongarch/loongarch-c.cc
|
||||
@@ -99,6 +99,17 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
|
||||
else
|
||||
builtin_define ("__loongarch_frlen=0");
|
||||
|
||||
+ /* Add support for FLOAT128_TYPE on the LoongArch architecture. */
|
||||
+ builtin_define ("__FLOAT128_TYPE__");
|
||||
+
|
||||
+ /* Map the old _Float128 'q' builtins into the new 'f128' builtins. */
|
||||
+ builtin_define ("__builtin_fabsq=__builtin_fabsf128");
|
||||
+ builtin_define ("__builtin_copysignq=__builtin_copysignf128");
|
||||
+ builtin_define ("__builtin_nanq=__builtin_nanf128");
|
||||
+ builtin_define ("__builtin_nansq=__builtin_nansf128");
|
||||
+ builtin_define ("__builtin_infq=__builtin_inff128");
|
||||
+ builtin_define ("__builtin_huge_valq=__builtin_huge_valf128");
|
||||
+
|
||||
/* Native Data Sizes. */
|
||||
builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE);
|
||||
builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE);
|
||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||
index 1d1bac255..bb19d0f27 100644
|
||||
--- a/gcc/doc/extend.texi
|
||||
+++ b/gcc/doc/extend.texi
|
||||
@@ -1085,10 +1085,10 @@ types.
|
||||
As an extension, GNU C and GNU C++ support additional floating
|
||||
types, which are not supported by all targets.
|
||||
@itemize @bullet
|
||||
-@item @code{__float128} is available on i386, x86_64, IA-64, and
|
||||
-hppa HP-UX, as well as on PowerPC GNU/Linux targets that enable
|
||||
+@item @code{__float128} is available on i386, x86_64, IA-64, LoongArch
|
||||
+and hppa HP-UX, as well as on PowerPC GNU/Linux targets that enable
|
||||
the vector scalar (VSX) instruction set. @code{__float128} supports
|
||||
-the 128-bit floating type. On i386, x86_64, PowerPC, and IA-64
|
||||
+the 128-bit floating type. On i386, x86_64, PowerPC, LoongArch and IA-64,
|
||||
other than HP-UX, @code{__float128} is an alias for @code{_Float128}.
|
||||
On hppa and IA-64 HP-UX, @code{__float128} is an alias for @code{long
|
||||
double}.
|
||||
@@ -16257,6 +16257,20 @@ function you need to include @code{larchintrin.h}.
|
||||
void __break (imm0_32767)
|
||||
@end smallexample
|
||||
|
||||
+Additional built-in functions are available for LoongArch family
|
||||
+processors to efficiently use 128-bit floating-point (__float128)
|
||||
+values.
|
||||
+
|
||||
+The following are the basic built-in functions supported.
|
||||
+@smallexample
|
||||
+__float128 __builtin_fabsq (__float128);
|
||||
+__float128 __builtin_copysignq (__float128, __float128);
|
||||
+__float128 __builtin_infq (void);
|
||||
+__float128 __builtin_huge_valq (void);
|
||||
+__float128 __builtin_nanq (void);
|
||||
+__float128 __builtin_nansq (void);
|
||||
+@end smallexample
|
||||
+
|
||||
@node MIPS DSP Built-in Functions
|
||||
@subsection MIPS DSP Built-in Functions
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/math-float-128.c b/gcc/testsuite/gcc.target/loongarch/math-float-128.c
|
||||
new file mode 100644
|
||||
index 000000000..387566a57
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/math-float-128.c
|
||||
@@ -0,0 +1,81 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options " -march=loongarch64 -O2 " } */
|
||||
+/* { dg-final { scan-assembler-not "my_fabsq2:.*\\bl\t%plt\\(__builtin_fabsq\\).*my_fabsq2" } } */
|
||||
+/* { dg-final { scan-assembler-not "my_copysignq2:.*\\bl\t%plt\\(__builtin_copysignq\\).*my_copysignq2" } } */
|
||||
+/* { dg-final { scan-assembler-not "my_infq2:.*\\bl\t%plt\\(__builtin_infq\\).*my_infq2" } } */
|
||||
+/* { dg-final { scan-assembler-not "my_huge_valq2:.*\\bl\t%plt\\(__builtin_huge_valq\\).*my_huge_valq2" } } */
|
||||
+/* { dg-final { scan-assembler-not "my_nanq2:.*\\bl\t%plt\\(__builtin_nanq\\).*my_nanq2" } } */
|
||||
+/* { dg-final { scan-assembler-not "my_nansq2:.*\\bl\t%plt\\(__builtin_nansq\\).*my_nansq2" } } */
|
||||
+
|
||||
+__float128
|
||||
+my_fabsq1 (__float128 a)
|
||||
+{
|
||||
+ return __builtin_fabsq (a);
|
||||
+}
|
||||
+
|
||||
+_Float128
|
||||
+my_fabsq2 (_Float128 a)
|
||||
+{
|
||||
+ return __builtin_fabsq (a);
|
||||
+}
|
||||
+
|
||||
+__float128
|
||||
+my_copysignq1 (__float128 a, __float128 b)
|
||||
+{
|
||||
+ return __builtin_copysignq (a, b);
|
||||
+}
|
||||
+
|
||||
+_Float128
|
||||
+my_copysignq2 (_Float128 a, _Float128 b)
|
||||
+{
|
||||
+ return __builtin_copysignq (a, b);
|
||||
+}
|
||||
+
|
||||
+__float128
|
||||
+my_infq1 (void)
|
||||
+{
|
||||
+ return __builtin_infq ();
|
||||
+}
|
||||
+
|
||||
+_Float128
|
||||
+my_infq2 (void)
|
||||
+{
|
||||
+ return __builtin_infq ();
|
||||
+}
|
||||
+
|
||||
+__float128
|
||||
+my_huge_valq1 (void)
|
||||
+{
|
||||
+ return __builtin_huge_valq ();
|
||||
+}
|
||||
+
|
||||
+_Float128
|
||||
+my_huge_valq2 (void)
|
||||
+{
|
||||
+ return __builtin_huge_valq ();
|
||||
+}
|
||||
+
|
||||
+__float128
|
||||
+my_nanq1 (void)
|
||||
+{
|
||||
+ return __builtin_nanq ("");
|
||||
+}
|
||||
+
|
||||
+_Float128
|
||||
+my_nanq2 (void)
|
||||
+{
|
||||
+ return __builtin_nanq ("");
|
||||
+}
|
||||
+
|
||||
+__float128
|
||||
+my_nansq1 (void)
|
||||
+{
|
||||
+ return __builtin_nansq ("");
|
||||
+}
|
||||
+
|
||||
+_Float128
|
||||
+my_nansq2 (void)
|
||||
+{
|
||||
+ return __builtin_nansq ("");
|
||||
+}
|
||||
+
|
||||
--
|
||||
2.33.0
|
||||
|
||||
83
LoongArch-Improve-GAR-store-for-va_list.patch
Normal file
83
LoongArch-Improve-GAR-store-for-va_list.patch
Normal file
@ -0,0 +1,83 @@
|
||||
From 4075f299ca6a5d15fdb46f877cbe11b7166a19ff Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Wed, 29 Mar 2023 01:36:09 +0800
|
||||
Subject: [PATCH 042/124] LoongArch: Improve GAR store for va_list
|
||||
|
||||
LoongArch backend used to save all GARs for a function with variable
|
||||
arguments. But sometimes a function only accepts variable arguments for
|
||||
a purpose like C++ function overloading. For example, POSIX defines
|
||||
open() as:
|
||||
|
||||
int open(const char *path, int oflag, ...);
|
||||
|
||||
But only two forms are actually used:
|
||||
|
||||
int open(const char *pathname, int flags);
|
||||
int open(const char *pathname, int flags, mode_t mode);
|
||||
|
||||
So it's obviously a waste to save all 8 GARs in open(). We can use the
|
||||
cfun->va_list_gpr_size field set by the stdarg pass to only save the
|
||||
GARs necessary to be saved.
|
||||
|
||||
If the va_list escapes (for example, in fprintf() we pass it to
|
||||
vfprintf()), stdarg would set cfun->va_list_gpr_size to 255 so we
|
||||
don't need a special case.
|
||||
|
||||
With this patch, only one GAR ($a2/$r6) is saved in open(). Ideally
|
||||
even this stack store should be omitted too, but doing so is not trivial
|
||||
and AFAIK there are no compilers (for any target) performing the "ideal"
|
||||
optimization here, see https://godbolt.org/z/n1YqWq9c9.
|
||||
|
||||
Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk
|
||||
(GCC 14 or now)?
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.cc
|
||||
(loongarch_setup_incoming_varargs): Don't save more GARs than
|
||||
cfun->va_list_gpr_size / UNITS_PER_WORD.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/va_arg.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/testsuite/gcc.target/loongarch/va_arg.c | 24 +++++++++++++++++++++
|
||||
1 file changed, 24 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/va_arg.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/va_arg.c b/gcc/testsuite/gcc.target/loongarch/va_arg.c
|
||||
new file mode 100644
|
||||
index 000000000..980c96d0e
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/va_arg.c
|
||||
@@ -0,0 +1,24 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2" } */
|
||||
+
|
||||
+/* Technically we shouldn't save any register for this function: it should be
|
||||
+ compiled as if it accepts 3 named arguments. But AFAIK no compilers can
|
||||
+ achieve this "perfect" optimization now, so just ensure we are using the
|
||||
+ knowledge provided by stdarg pass and we won't save GARs impossible to be
|
||||
+ accessed with __builtin_va_arg () when the va_list does not escape. */
|
||||
+
|
||||
+/* { dg-final { scan-assembler-not "st.*r7" } } */
|
||||
+
|
||||
+int
|
||||
+test (int a0, ...)
|
||||
+{
|
||||
+ void *arg;
|
||||
+ int a1, a2;
|
||||
+
|
||||
+ __builtin_va_start (arg, a0);
|
||||
+ a1 = __builtin_va_arg (arg, int);
|
||||
+ a2 = __builtin_va_arg (arg, int);
|
||||
+ __builtin_va_end (arg);
|
||||
+
|
||||
+ return a0 + a1 + a2;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
339
LoongArch-Improve-cpymemsi-expansion-PR109465.patch
Normal file
339
LoongArch-Improve-cpymemsi-expansion-PR109465.patch
Normal file
@ -0,0 +1,339 @@
|
||||
From 33fff578e7df7aa7e236efc6c9c85c595918d86a Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Wed, 12 Apr 2023 11:45:48 +0000
|
||||
Subject: [PATCH 043/124] LoongArch: Improve cpymemsi expansion [PR109465]
|
||||
|
||||
We'd been generating really bad block move sequences which is recently
|
||||
complained by kernel developers who tried __builtin_memcpy. To improve
|
||||
it:
|
||||
|
||||
1. Take the advantage of -mno-strict-align. When it is set, set mode
|
||||
size to UNITS_PER_WORD regardless of the alignment.
|
||||
2. Half the mode size when (block size) % (mode size) != 0, instead of
|
||||
falling back to ld.bu/st.b at once.
|
||||
3. Limit the length of block move sequence considering the number of
|
||||
instructions, not the size of block. When -mstrict-align is set and
|
||||
the block is not aligned, the old size limit for straight-line
|
||||
implementation (64 bytes) was definitely too large (we don't have 64
|
||||
registers anyway).
|
||||
|
||||
Change since v1: add a comment about the calculation of num_reg.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR target/109465
|
||||
* config/loongarch/loongarch-protos.h
|
||||
(loongarch_expand_block_move): Add a parameter as alignment RTX.
|
||||
* config/loongarch/loongarch.h:
|
||||
(LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER): Remove.
|
||||
(LARCH_MAX_MOVE_BYTES_STRAIGHT): Remove.
|
||||
(LARCH_MAX_MOVE_OPS_PER_LOOP_ITER): Define.
|
||||
(LARCH_MAX_MOVE_OPS_STRAIGHT): Define.
|
||||
(MOVE_RATIO): Use LARCH_MAX_MOVE_OPS_PER_LOOP_ITER instead of
|
||||
LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER.
|
||||
* config/loongarch/loongarch.cc (loongarch_expand_block_move):
|
||||
Take the alignment from the parameter, but set it to
|
||||
UNITS_PER_WORD if !TARGET_STRICT_ALIGN. Limit the length of
|
||||
straight-line implementation with LARCH_MAX_MOVE_OPS_STRAIGHT
|
||||
instead of LARCH_MAX_MOVE_BYTES_STRAIGHT.
|
||||
(loongarch_block_move_straight): When there are left-over bytes,
|
||||
half the mode size instead of falling back to byte mode at once.
|
||||
(loongarch_block_move_loop): Limit the length of loop body with
|
||||
LARCH_MAX_MOVE_OPS_PER_LOOP_ITER instead of
|
||||
LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER.
|
||||
* config/loongarch/loongarch.md (cpymemsi): Pass the alignment
|
||||
to loongarch_expand_block_move.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
PR target/109465
|
||||
* gcc.target/loongarch/pr109465-1.c: New test.
|
||||
* gcc.target/loongarch/pr109465-2.c: New test.
|
||||
* gcc.target/loongarch/pr109465-3.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch-protos.h | 2 +-
|
||||
gcc/config/loongarch/loongarch.cc | 95 +++++++++++--------
|
||||
gcc/config/loongarch/loongarch.h | 10 +-
|
||||
gcc/config/loongarch/loongarch.md | 3 +-
|
||||
.../gcc.target/loongarch/pr109465-1.c | 9 ++
|
||||
.../gcc.target/loongarch/pr109465-2.c | 9 ++
|
||||
.../gcc.target/loongarch/pr109465-3.c | 12 +++
|
||||
7 files changed, 91 insertions(+), 49 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-2.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr109465-3.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
|
||||
index 0a9b47722..3ac3b5e19 100644
|
||||
--- a/gcc/config/loongarch/loongarch-protos.h
|
||||
+++ b/gcc/config/loongarch/loongarch-protos.h
|
||||
@@ -95,7 +95,7 @@ extern void loongarch_expand_conditional_trap (rtx);
|
||||
#endif
|
||||
extern void loongarch_set_return_address (rtx, rtx);
|
||||
extern bool loongarch_move_by_pieces_p (unsigned HOST_WIDE_INT, unsigned int);
|
||||
-extern bool loongarch_expand_block_move (rtx, rtx, rtx);
|
||||
+extern bool loongarch_expand_block_move (rtx, rtx, rtx, rtx);
|
||||
extern bool loongarch_do_optimize_block_move_p (void);
|
||||
|
||||
extern bool loongarch_expand_ext_as_unaligned_load (rtx, rtx, HOST_WIDE_INT,
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index 233dddbac..d3c6f22ad 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -4456,41 +4456,46 @@ loongarch_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
|
||||
Assume that the areas do not overlap. */
|
||||
|
||||
static void
|
||||
-loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
|
||||
+loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
|
||||
+ HOST_WIDE_INT delta)
|
||||
{
|
||||
- HOST_WIDE_INT offset, delta;
|
||||
- unsigned HOST_WIDE_INT bits;
|
||||
+ HOST_WIDE_INT offs, delta_cur;
|
||||
int i;
|
||||
machine_mode mode;
|
||||
rtx *regs;
|
||||
|
||||
- bits = MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)));
|
||||
-
|
||||
- mode = int_mode_for_size (bits, 0).require ();
|
||||
- delta = bits / BITS_PER_UNIT;
|
||||
+ /* Calculate how many registers we'll need for the block move.
|
||||
+ We'll emit length / delta move operations with delta as the size
|
||||
+ first. Then we may still have length % delta bytes not copied.
|
||||
+ We handle these remaining bytes by move operations with smaller
|
||||
+ (halfed) sizes. For example, if length = 21 and delta = 8, we'll
|
||||
+ emit two ld.d/st.d pairs, one ld.w/st.w pair, and one ld.b/st.b
|
||||
+ pair. For each load/store pair we use a dedicated register to keep
|
||||
+ the pipeline as populated as possible. */
|
||||
+ HOST_WIDE_INT num_reg = length / delta;
|
||||
+ for (delta_cur = delta / 2; delta_cur != 0; delta_cur /= 2)
|
||||
+ num_reg += !!(length & delta_cur);
|
||||
|
||||
/* Allocate a buffer for the temporary registers. */
|
||||
- regs = XALLOCAVEC (rtx, length / delta);
|
||||
+ regs = XALLOCAVEC (rtx, num_reg);
|
||||
|
||||
- /* Load as many BITS-sized chunks as possible. Use a normal load if
|
||||
- the source has enough alignment, otherwise use left/right pairs. */
|
||||
- for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
|
||||
+ for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
|
||||
{
|
||||
- regs[i] = gen_reg_rtx (mode);
|
||||
- loongarch_emit_move (regs[i], adjust_address (src, mode, offset));
|
||||
- }
|
||||
+ mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
|
||||
|
||||
- for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
|
||||
- loongarch_emit_move (adjust_address (dest, mode, offset), regs[i]);
|
||||
+ for (; offs + delta_cur <= length; offs += delta_cur, i++)
|
||||
+ {
|
||||
+ regs[i] = gen_reg_rtx (mode);
|
||||
+ loongarch_emit_move (regs[i], adjust_address (src, mode, offs));
|
||||
+ }
|
||||
+ }
|
||||
|
||||
- /* Mop up any left-over bytes. */
|
||||
- if (offset < length)
|
||||
+ for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
|
||||
{
|
||||
- src = adjust_address (src, BLKmode, offset);
|
||||
- dest = adjust_address (dest, BLKmode, offset);
|
||||
- move_by_pieces (dest, src, length - offset,
|
||||
- MIN (MEM_ALIGN (src), MEM_ALIGN (dest)),
|
||||
- (enum memop_ret) 0);
|
||||
+ mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
|
||||
+
|
||||
+ for (; offs + delta_cur <= length; offs += delta_cur, i++)
|
||||
+ loongarch_emit_move (adjust_address (dest, mode, offs), regs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4520,10 +4525,11 @@ loongarch_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
|
||||
|
||||
static void
|
||||
loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
|
||||
- HOST_WIDE_INT bytes_per_iter)
|
||||
+ HOST_WIDE_INT align)
|
||||
{
|
||||
rtx_code_label *label;
|
||||
rtx src_reg, dest_reg, final_src, test;
|
||||
+ HOST_WIDE_INT bytes_per_iter = align * LARCH_MAX_MOVE_OPS_PER_LOOP_ITER;
|
||||
HOST_WIDE_INT leftover;
|
||||
|
||||
leftover = length % bytes_per_iter;
|
||||
@@ -4543,7 +4549,7 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
|
||||
emit_label (label);
|
||||
|
||||
/* Emit the loop body. */
|
||||
- loongarch_block_move_straight (dest, src, bytes_per_iter);
|
||||
+ loongarch_block_move_straight (dest, src, bytes_per_iter, align);
|
||||
|
||||
/* Move on to the next block. */
|
||||
loongarch_emit_move (src_reg,
|
||||
@@ -4560,7 +4566,7 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
|
||||
|
||||
/* Mop up any left-over bytes. */
|
||||
if (leftover)
|
||||
- loongarch_block_move_straight (dest, src, leftover);
|
||||
+ loongarch_block_move_straight (dest, src, leftover, align);
|
||||
else
|
||||
/* Temporary fix for PR79150. */
|
||||
emit_insn (gen_nop ());
|
||||
@@ -4570,25 +4576,32 @@ loongarch_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
|
||||
memory reference SRC to memory reference DEST. */
|
||||
|
||||
bool
|
||||
-loongarch_expand_block_move (rtx dest, rtx src, rtx length)
|
||||
+loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align)
|
||||
{
|
||||
- int max_move_bytes = LARCH_MAX_MOVE_BYTES_STRAIGHT;
|
||||
+ if (!CONST_INT_P (r_length))
|
||||
+ return false;
|
||||
+
|
||||
+ HOST_WIDE_INT length = INTVAL (r_length);
|
||||
+ if (length > loongarch_max_inline_memcpy_size)
|
||||
+ return false;
|
||||
+
|
||||
+ HOST_WIDE_INT align = INTVAL (r_align);
|
||||
+
|
||||
+ if (!TARGET_STRICT_ALIGN || align > UNITS_PER_WORD)
|
||||
+ align = UNITS_PER_WORD;
|
||||
|
||||
- if (CONST_INT_P (length)
|
||||
- && INTVAL (length) <= loongarch_max_inline_memcpy_size)
|
||||
+ if (length <= align * LARCH_MAX_MOVE_OPS_STRAIGHT)
|
||||
{
|
||||
- if (INTVAL (length) <= max_move_bytes)
|
||||
- {
|
||||
- loongarch_block_move_straight (dest, src, INTVAL (length));
|
||||
- return true;
|
||||
- }
|
||||
- else if (optimize)
|
||||
- {
|
||||
- loongarch_block_move_loop (dest, src, INTVAL (length),
|
||||
- LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER);
|
||||
- return true;
|
||||
- }
|
||||
+ loongarch_block_move_straight (dest, src, length, align);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ if (optimize)
|
||||
+ {
|
||||
+ loongarch_block_move_loop (dest, src, length, align);
|
||||
+ return true;
|
||||
}
|
||||
+
|
||||
return false;
|
||||
}
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||
index 9d3cd9ca0..af24bfa01 100644
|
||||
--- a/gcc/config/loongarch/loongarch.h
|
||||
+++ b/gcc/config/loongarch/loongarch.h
|
||||
@@ -1062,13 +1062,13 @@ typedef struct {
|
||||
|
||||
/* The maximum number of bytes that can be copied by one iteration of
|
||||
a cpymemsi loop; see loongarch_block_move_loop. */
|
||||
-#define LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER (UNITS_PER_WORD * 4)
|
||||
+#define LARCH_MAX_MOVE_OPS_PER_LOOP_ITER 4
|
||||
|
||||
/* The maximum number of bytes that can be copied by a straight-line
|
||||
implementation of cpymemsi; see loongarch_block_move_straight. We want
|
||||
to make sure that any loop-based implementation will iterate at
|
||||
least twice. */
|
||||
-#define LARCH_MAX_MOVE_BYTES_STRAIGHT (LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER * 2)
|
||||
+#define LARCH_MAX_MOVE_OPS_STRAIGHT (LARCH_MAX_MOVE_OPS_PER_LOOP_ITER * 2)
|
||||
|
||||
/* The base cost of a memcpy call, for MOVE_RATIO and friends. These
|
||||
values were determined experimentally by benchmarking with CSiBE.
|
||||
@@ -1076,7 +1076,7 @@ typedef struct {
|
||||
#define LARCH_CALL_RATIO 8
|
||||
|
||||
/* Any loop-based implementation of cpymemsi will have at least
|
||||
- LARCH_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory
|
||||
+ LARCH_MAX_MOVE_OPS_PER_LOOP_ITER memory-to-memory
|
||||
moves, so allow individual copies of fewer elements.
|
||||
|
||||
When cpymemsi is not available, use a value approximating
|
||||
@@ -1087,9 +1087,7 @@ typedef struct {
|
||||
value of LARCH_CALL_RATIO to take that into account. */
|
||||
|
||||
#define MOVE_RATIO(speed) \
|
||||
- (HAVE_cpymemsi \
|
||||
- ? LARCH_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD \
|
||||
- : CLEAR_RATIO (speed) / 2)
|
||||
+ (HAVE_cpymemsi ? LARCH_MAX_MOVE_OPS_PER_LOOP_ITER : CLEAR_RATIO (speed) / 2)
|
||||
|
||||
/* For CLEAR_RATIO, when optimizing for size, give a better estimate
|
||||
of the length of a memset call, but use the default otherwise. */
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index b2f7c7f78..b23248c33 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -2488,7 +2488,8 @@
|
||||
""
|
||||
{
|
||||
if (TARGET_DO_OPTIMIZE_BLOCK_MOVE_P
|
||||
- && loongarch_expand_block_move (operands[0], operands[1], operands[2]))
|
||||
+ && loongarch_expand_block_move (operands[0], operands[1],
|
||||
+ operands[2], operands[3]))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-1.c b/gcc/testsuite/gcc.target/loongarch/pr109465-1.c
|
||||
new file mode 100644
|
||||
index 000000000..4cd35d139
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/pr109465-1.c
|
||||
@@ -0,0 +1,9 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -mabi=lp64d -mno-strict-align" } */
|
||||
+/* { dg-final { scan-assembler-times "st\\.d|stptr\\.d" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "st\\.w|stptr\\.w" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "st\\.h" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "st\\.b" 1 } } */
|
||||
+
|
||||
+extern char a[], b[];
|
||||
+void test() { __builtin_memcpy(a, b, 15); }
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-2.c b/gcc/testsuite/gcc.target/loongarch/pr109465-2.c
|
||||
new file mode 100644
|
||||
index 000000000..703eb951c
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/pr109465-2.c
|
||||
@@ -0,0 +1,9 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -mabi=lp64d -mstrict-align" } */
|
||||
+/* { dg-final { scan-assembler-times "st\\.d|stptr\\.d" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "st\\.w|stptr\\.w" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "st\\.h" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "st\\.b" 1 } } */
|
||||
+
|
||||
+extern long a[], b[];
|
||||
+void test() { __builtin_memcpy(a, b, 15); }
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/pr109465-3.c b/gcc/testsuite/gcc.target/loongarch/pr109465-3.c
|
||||
new file mode 100644
|
||||
index 000000000..d6a80659b
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/pr109465-3.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -mabi=lp64d -mstrict-align" } */
|
||||
+
|
||||
+/* Three loop iterations each contains 4 st.b, and 3 st.b after the loop */
|
||||
+/* { dg-final { scan-assembler-times "st\\.b" 7 } } */
|
||||
+
|
||||
+/* { dg-final { scan-assembler-not "st\\.h" } } */
|
||||
+/* { dg-final { scan-assembler-not "st\\.w|stptr\\.w" } } */
|
||||
+/* { dg-final { scan-assembler-not "st\\.d|stptr\\.d" } } */
|
||||
+
|
||||
+extern char a[], b[];
|
||||
+void test() { __builtin_memcpy(a, b, 15); }
|
||||
--
|
||||
2.33.0
|
||||
|
||||
291
LoongArch-Libitm-add-LoongArch-support.patch
Normal file
291
LoongArch-Libitm-add-LoongArch-support.patch
Normal file
@ -0,0 +1,291 @@
|
||||
From 7f9f1dd3c87cffeab58150997e22e8fff707646b Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Mon, 26 Sep 2022 09:42:51 +0800
|
||||
Subject: [PATCH 019/124] LoongArch: Libitm add LoongArch support.
|
||||
|
||||
Co-Authored-By: Yang Yujie <yangyujie@loongson.cn>
|
||||
|
||||
libitm/ChangeLog:
|
||||
|
||||
* configure.tgt: Add loongarch support.
|
||||
* config/loongarch/asm.h: New file.
|
||||
* config/loongarch/sjlj.S: New file.
|
||||
* config/loongarch/target.h: New file.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
libitm/config/loongarch/asm.h | 54 +++++++++++++
|
||||
libitm/config/loongarch/sjlj.S | 127 +++++++++++++++++++++++++++++++
|
||||
libitm/config/loongarch/target.h | 50 ++++++++++++
|
||||
libitm/configure.tgt | 2 +
|
||||
4 files changed, 233 insertions(+)
|
||||
create mode 100644 libitm/config/loongarch/asm.h
|
||||
create mode 100644 libitm/config/loongarch/sjlj.S
|
||||
create mode 100644 libitm/config/loongarch/target.h
|
||||
|
||||
diff --git a/libitm/config/loongarch/asm.h b/libitm/config/loongarch/asm.h
|
||||
new file mode 100644
|
||||
index 000000000..a8e3304bb
|
||||
--- /dev/null
|
||||
+++ b/libitm/config/loongarch/asm.h
|
||||
@@ -0,0 +1,54 @@
|
||||
+/* Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
+ Contributed by Loongson Co. Ltd.
|
||||
+
|
||||
+ This file is part of the GNU Transactional Memory Library (libitm).
|
||||
+
|
||||
+ Libitm is free software; you can redistribute it and/or modify it
|
||||
+ under the terms of the GNU General Public License as published by
|
||||
+ the Free Software Foundation; either version 3 of the License, or
|
||||
+ (at your option) any later version.
|
||||
+
|
||||
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
+ more details.
|
||||
+
|
||||
+ Under Section 7 of GPL version 3, you are granted additional
|
||||
+ permissions described in the GCC Runtime Library Exception, version
|
||||
+ 3.1, as published by the Free Software Foundation.
|
||||
+
|
||||
+ You should have received a copy of the GNU General Public License and
|
||||
+ a copy of the GCC Runtime Library Exception along with this program;
|
||||
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#ifndef _LA_ASM_H
|
||||
+#define _LA_ASM_H
|
||||
+
|
||||
+#if defined(__loongarch_lp64)
|
||||
+# define GPR_L ld.d
|
||||
+# define GPR_S st.d
|
||||
+# define SZ_GPR 8
|
||||
+# define ADDSP(si) addi.d $sp, $sp, si
|
||||
+#elif defined(__loongarch64_ilp32)
|
||||
+# define GPR_L ld.w
|
||||
+# define GPR_S st.w
|
||||
+# define SZ_GPR 4
|
||||
+# define ADDSP(si) addi.w $sp, $sp, si
|
||||
+#else
|
||||
+# error Unsupported GPR size (must be 64-bit or 32-bit).
|
||||
+#endif
|
||||
+
|
||||
+#if defined(__loongarch_double_float)
|
||||
+# define FPR_L fld.d
|
||||
+# define FPR_S fst.d
|
||||
+# define SZ_FPR 8
|
||||
+#elif defined(__loongarch_single_float)
|
||||
+# define FPR_L fld.s
|
||||
+# define FPR_S fst.s
|
||||
+# define SZ_FPR 4
|
||||
+#else
|
||||
+# define SZ_FPR 0
|
||||
+#endif
|
||||
+
|
||||
+#endif /* _LA_ASM_H */
|
||||
diff --git a/libitm/config/loongarch/sjlj.S b/libitm/config/loongarch/sjlj.S
|
||||
new file mode 100644
|
||||
index 000000000..a5f9fadde
|
||||
--- /dev/null
|
||||
+++ b/libitm/config/loongarch/sjlj.S
|
||||
@@ -0,0 +1,127 @@
|
||||
+/* Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
+ Contributed by Loongson Co. Ltd.
|
||||
+
|
||||
+ This file is part of the GNU Transactional Memory Library (libitm).
|
||||
+
|
||||
+ Libitm is free software; you can redistribute it and/or modify it
|
||||
+ under the terms of the GNU General Public License as published by
|
||||
+ the Free Software Foundation; either version 3 of the License, or
|
||||
+ (at your option) any later version.
|
||||
+
|
||||
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
+
|
||||
+ Under Section 7 of GPL version 3, you are granted additional
|
||||
+ permissions described in the GCC Runtime Library Exception, version
|
||||
+ 3.1, as published by the Free Software Foundation.
|
||||
+
|
||||
+ You should have received a copy of the GNU General Public License and
|
||||
+ a copy of the GCC Runtime Library Exception along with this program;
|
||||
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include "asmcfi.h"
|
||||
+#include "asm.h"
|
||||
+
|
||||
+ .text
|
||||
+ .align 2
|
||||
+ .global _ITM_beginTransaction
|
||||
+ .type _ITM_beginTransaction, @function
|
||||
+
|
||||
+_ITM_beginTransaction:
|
||||
+ cfi_startproc
|
||||
+ move $r5, $sp
|
||||
+ ADDSP(-(12*SZ_GPR+8*SZ_FPR))
|
||||
+ cfi_adjust_cfa_offset(12*SZ_GPR+8*SZ_FPR)
|
||||
+
|
||||
+ /* Frame Pointer */
|
||||
+ GPR_S $fp, $sp, 0*SZ_GPR
|
||||
+ cfi_rel_offset(22, 0)
|
||||
+
|
||||
+ /* Return Address */
|
||||
+ GPR_S $r1, $sp, 1*SZ_GPR
|
||||
+ cfi_rel_offset(1, SZ_GPR)
|
||||
+
|
||||
+ /* Caller's $sp */
|
||||
+ GPR_S $r5, $sp, 2*SZ_GPR
|
||||
+
|
||||
+ /* Callee-saved scratch GPRs (r23-r31) */
|
||||
+ GPR_S $s0, $sp, 3*SZ_GPR
|
||||
+ GPR_S $s1, $sp, 4*SZ_GPR
|
||||
+ GPR_S $s2, $sp, 5*SZ_GPR
|
||||
+ GPR_S $s3, $sp, 6*SZ_GPR
|
||||
+ GPR_S $s4, $sp, 7*SZ_GPR
|
||||
+ GPR_S $s5, $sp, 8*SZ_GPR
|
||||
+ GPR_S $s6, $sp, 9*SZ_GPR
|
||||
+ GPR_S $s7, $sp, 10*SZ_GPR
|
||||
+ GPR_S $s8, $sp, 11*SZ_GPR
|
||||
+
|
||||
+#if !defined(__loongarch_soft_float)
|
||||
+ /* Callee-saved scratch FPRs (f24-f31) */
|
||||
+ FPR_S $f24, $sp, 12*SZ_GPR + 0*SZ_FPR
|
||||
+ FPR_S $f25, $sp, 12*SZ_GPR + 1*SZ_FPR
|
||||
+ FPR_S $f26, $sp, 12*SZ_GPR + 2*SZ_FPR
|
||||
+ FPR_S $f27, $sp, 12*SZ_GPR + 3*SZ_FPR
|
||||
+ FPR_S $f28, $sp, 12*SZ_GPR + 4*SZ_FPR
|
||||
+ FPR_S $f29, $sp, 12*SZ_GPR + 5*SZ_FPR
|
||||
+ FPR_S $f30, $sp, 12*SZ_GPR + 6*SZ_FPR
|
||||
+ FPR_S $f31, $sp, 12*SZ_GPR + 7*SZ_FPR
|
||||
+#endif
|
||||
+ move $fp, $sp
|
||||
+
|
||||
+ /* Invoke GTM_begin_transaction with the struct we've just built. */
|
||||
+ move $r5, $sp
|
||||
+ bl %plt(GTM_begin_transaction)
|
||||
+
|
||||
+ /* Return. (no call-saved scratch reg needs to be restored here) */
|
||||
+ GPR_L $fp, $sp, 0*SZ_GPR
|
||||
+ cfi_restore(22)
|
||||
+ GPR_L $r1, $sp, 1*SZ_GPR
|
||||
+ cfi_restore(1)
|
||||
+
|
||||
+ ADDSP(12*SZ_GPR+8*SZ_FPR)
|
||||
+ cfi_adjust_cfa_offset(-(12*SZ_GPR+8*SZ_FPR))
|
||||
+
|
||||
+ jr $r1
|
||||
+ cfi_endproc
|
||||
+ .size _ITM_beginTransaction, . - _ITM_beginTransaction
|
||||
+
|
||||
+ .align 2
|
||||
+ .global GTM_longjmp
|
||||
+ .hidden GTM_longjmp
|
||||
+ .type GTM_longjmp, @function
|
||||
+
|
||||
+GTM_longjmp:
|
||||
+ cfi_startproc
|
||||
+ GPR_L $s0, $r5, 3*SZ_GPR
|
||||
+ GPR_L $s1, $r5, 4*SZ_GPR
|
||||
+ GPR_L $s2, $r5, 5*SZ_GPR
|
||||
+ GPR_L $s3, $r5, 6*SZ_GPR
|
||||
+ GPR_L $s4, $r5, 7*SZ_GPR
|
||||
+ GPR_L $s5, $r5, 8*SZ_GPR
|
||||
+ GPR_L $s6, $r5, 9*SZ_GPR
|
||||
+ GPR_L $s7, $r5, 10*SZ_GPR
|
||||
+ GPR_L $s8, $r5, 11*SZ_GPR
|
||||
+
|
||||
+ FPR_L $f24, $r5, 12*SZ_GPR + 0*SZ_FPR
|
||||
+ FPR_L $f25, $r5, 12*SZ_GPR + 1*SZ_FPR
|
||||
+ FPR_L $f26, $r5, 12*SZ_GPR + 2*SZ_FPR
|
||||
+ FPR_L $f27, $r5, 12*SZ_GPR + 3*SZ_FPR
|
||||
+ FPR_L $f28, $r5, 12*SZ_GPR + 4*SZ_FPR
|
||||
+ FPR_L $f29, $r5, 12*SZ_GPR + 5*SZ_FPR
|
||||
+ FPR_L $f30, $r5, 12*SZ_GPR + 6*SZ_FPR
|
||||
+ FPR_L $f31, $r5, 12*SZ_GPR + 7*SZ_FPR
|
||||
+
|
||||
+ GPR_L $r7, $r5, 2*SZ_GPR
|
||||
+ GPR_L $fp, $r5, 0*SZ_GPR
|
||||
+ GPR_L $r1, $r5, 1*SZ_GPR
|
||||
+ cfi_def_cfa(5, 0)
|
||||
+ move $sp, $r7
|
||||
+ jr $r1
|
||||
+ cfi_endproc
|
||||
+ .size GTM_longjmp, . - GTM_longjmp
|
||||
+
|
||||
+#ifdef __linux__
|
||||
+.section .note.GNU-stack, "", @progbits
|
||||
+#endif
|
||||
diff --git a/libitm/config/loongarch/target.h b/libitm/config/loongarch/target.h
|
||||
new file mode 100644
|
||||
index 000000000..0c5cf3ada
|
||||
--- /dev/null
|
||||
+++ b/libitm/config/loongarch/target.h
|
||||
@@ -0,0 +1,50 @@
|
||||
+/* Copyright (C) 2022 Free Software Foundation, Inc.
|
||||
+ Contributed by Loongson Co. Ltd.
|
||||
+
|
||||
+ This file is part of the GNU Transactional Memory Library (libitm).
|
||||
+
|
||||
+ Libitm is free software; you can redistribute it and/or modify it
|
||||
+ under the terms of the GNU General Public License as published by
|
||||
+ the Free Software Foundation; either version 3 of the License, or
|
||||
+ (at your option) any later version.
|
||||
+
|
||||
+ Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
+ more details.
|
||||
+
|
||||
+ Under Section 7 of GPL version 3, you are granted additional
|
||||
+ permissions described in the GCC Runtime Library Exception, version
|
||||
+ 3.1, as published by the Free Software Foundation.
|
||||
+
|
||||
+ You should have received a copy of the GNU General Public License and
|
||||
+ a copy of the GCC Runtime Library Exception along with this program;
|
||||
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
+ <http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+namespace GTM HIDDEN {
|
||||
+
|
||||
+typedef struct gtm_jmpbuf
|
||||
+ {
|
||||
+ long int fp; /* Frame Pointer: r22 */
|
||||
+ long int pc; /* Return Address: r1 */
|
||||
+ void *cfa; /* CFA: r3 */
|
||||
+ long int gpr[9]; /* Callee-saved scratch GPRs: r23(s0)-r31(s8) */
|
||||
+
|
||||
+ /* Callee-saved scratch FPRs: f24-f31 */
|
||||
+#if defined(__loongarch_double_float)
|
||||
+ double fpr[8];
|
||||
+#elif defined(__loongarch_single_float)
|
||||
+ float fpr[8];
|
||||
+#endif
|
||||
+ } gtm_jmpbuf;
|
||||
+
|
||||
+#define HW_CACHELINE_SIZE 128
|
||||
+
|
||||
+static inline void
|
||||
+cpu_relax (void)
|
||||
+{
|
||||
+ __asm__ volatile ("" : : : "memory");
|
||||
+}
|
||||
+
|
||||
+} // namespace GTM
|
||||
diff --git a/libitm/configure.tgt b/libitm/configure.tgt
|
||||
index 06e90973e..4c0e78cff 100644
|
||||
--- a/libitm/configure.tgt
|
||||
+++ b/libitm/configure.tgt
|
||||
@@ -80,6 +80,8 @@ EOF
|
||||
ARCH=x86
|
||||
;;
|
||||
|
||||
+ loongarch*) ARCH=loongarch ;;
|
||||
+
|
||||
sh*) ARCH=sh ;;
|
||||
|
||||
sparc)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
37
LoongArch-Modify-the-output-message-string-of-the-wa.patch
Normal file
37
LoongArch-Modify-the-output-message-string-of-the-wa.patch
Normal file
@ -0,0 +1,37 @@
|
||||
From 83d6cfbbdc41766af9d7941d00204cc0f26ff40c Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Tue, 26 Jul 2022 21:03:52 +0800
|
||||
Subject: [PATCH 005/124] LoongArch: Modify the output message string of the
|
||||
warning.
|
||||
|
||||
Fix bug for "error: spurious trailing punctuation sequence '.' in format [-Werror=format-diag]".
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch-opts.cc: Modify the output message string
|
||||
of the warning.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch-opts.cc | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
|
||||
index fc477bfd4..3f70943de 100644
|
||||
--- a/gcc/config/loongarch/loongarch-opts.cc
|
||||
+++ b/gcc/config/loongarch/loongarch-opts.cc
|
||||
@@ -378,8 +378,8 @@ fallback:
|
||||
t.cmodel = constrained.cmodel ? opt_cmodel : CMODEL_NORMAL;
|
||||
if (t.cmodel != CMODEL_NORMAL)
|
||||
{
|
||||
- warning (0, "%qs is not supported, now cmodel is set to 'normal'.",
|
||||
- loongarch_cmodel_strings[t.cmodel]);
|
||||
+ warning (0, "%qs is not supported, now cmodel is set to %qs",
|
||||
+ loongarch_cmodel_strings[t.cmodel], "normal");
|
||||
t.cmodel = CMODEL_NORMAL;
|
||||
}
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
445
LoongArch-Optimize-additions-with-immediates.patch
Normal file
445
LoongArch-Optimize-additions-with-immediates.patch
Normal file
@ -0,0 +1,445 @@
|
||||
From a31baa1e437fa4acedfaf03db91c1d6e5ce78013 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Sun, 2 Apr 2023 21:37:49 +0800
|
||||
Subject: [PATCH 041/124] LoongArch: Optimize additions with immediates
|
||||
|
||||
1. Use addu16i.d for TARGET_64BIT and suitable immediates.
|
||||
2. Split one addition with immediate into two addu16i.d or addi.{d/w}
|
||||
instructions if possible. This can avoid using a temp register w/o
|
||||
increase the count of instructions.
|
||||
|
||||
Inspired by https://reviews.llvm.org/D143710 and
|
||||
https://reviews.llvm.org/D147222.
|
||||
|
||||
Bootstrapped and regtested on loongarch64-linux-gnu. Ok for GCC 14?
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch-protos.h
|
||||
(loongarch_addu16i_imm12_operand_p): New function prototype.
|
||||
(loongarch_split_plus_constant): Likewise.
|
||||
* config/loongarch/loongarch.cc
|
||||
(loongarch_addu16i_imm12_operand_p): New function.
|
||||
(loongarch_split_plus_constant): Likewise.
|
||||
* config/loongarch/loongarch.h (ADDU16I_OPERAND): New macro.
|
||||
(DUAL_IMM12_OPERAND): Likewise.
|
||||
(DUAL_ADDU16I_OPERAND): Likewise.
|
||||
* config/loongarch/constraints.md (La, Lb, Lc, Ld, Le): New
|
||||
constraint.
|
||||
* config/loongarch/predicates.md (const_dual_imm12_operand): New
|
||||
predicate.
|
||||
(const_addu16i_operand): Likewise.
|
||||
(const_addu16i_imm12_di_operand): Likewise.
|
||||
(const_addu16i_imm12_si_operand): Likewise.
|
||||
(plus_di_operand): Likewise.
|
||||
(plus_si_operand): Likewise.
|
||||
(plus_si_extend_operand): Likewise.
|
||||
* config/loongarch/loongarch.md (add<mode>3): Convert to
|
||||
define_insn_and_split. Use plus_<mode>_operand predicate
|
||||
instead of arith_operand. Add alternatives for La, Lb, Lc, Ld,
|
||||
and Le constraints.
|
||||
(*addsi3_extended): Convert to define_insn_and_split. Use
|
||||
plus_si_extend_operand instead of arith_operand. Add
|
||||
alternatives for La and Le alternatives.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/add-const.c: New test.
|
||||
* gcc.target/loongarch/stack-check-cfa-1.c: Adjust for stack
|
||||
frame size change.
|
||||
* gcc.target/loongarch/stack-check-cfa-2.c: Likewise.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/constraints.md | 46 ++++++++++++-
|
||||
gcc/config/loongarch/loongarch-protos.h | 2 +
|
||||
gcc/config/loongarch/loongarch.cc | 44 +++++++++++++
|
||||
gcc/config/loongarch/loongarch.h | 19 ++++++
|
||||
gcc/config/loongarch/loongarch.md | 66 +++++++++++++++----
|
||||
gcc/config/loongarch/predicates.md | 36 ++++++++++
|
||||
.../gcc.target/loongarch/add-const.c | 45 +++++++++++++
|
||||
.../gcc.target/loongarch/stack-check-cfa-1.c | 2 +-
|
||||
.../gcc.target/loongarch/stack-check-cfa-2.c | 2 +-
|
||||
9 files changed, 246 insertions(+), 16 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/add-const.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
|
||||
index 46f7f63ae..25f3cda35 100644
|
||||
--- a/gcc/config/loongarch/constraints.md
|
||||
+++ b/gcc/config/loongarch/constraints.md
|
||||
@@ -60,7 +60,22 @@
|
||||
;; "I" "A signed 12-bit constant (for arithmetic instructions)."
|
||||
;; "J" "Integer zero."
|
||||
;; "K" "An unsigned 12-bit constant (for logic instructions)."
|
||||
-;; "L" <-----unused
|
||||
+;; "L" -
|
||||
+;; "La"
|
||||
+;; "A signed constant in [-4096, 2048) or (2047, 4094]."
|
||||
+;; "Lb"
|
||||
+;; "A signed 32-bit constant and low 16-bit is zero, which can be
|
||||
+;; added onto a register with addu16i.d. It matches nothing if
|
||||
+;; the addu16i.d instruction is not available."
|
||||
+;; "Lc"
|
||||
+;; "A signed 64-bit constant can be expressed as Lb + I, but not a
|
||||
+;; single Lb or I."
|
||||
+;; "Ld"
|
||||
+;; "A signed 64-bit constant can be expressed as Lb + Lb, but not a
|
||||
+;; single Lb."
|
||||
+;; "Le"
|
||||
+;; "A signed 32-bit constant can be expressed as Lb + I, but not a
|
||||
+;; single Lb or I."
|
||||
;; "M" <-----unused
|
||||
;; "N" <-----unused
|
||||
;; "O" <-----unused
|
||||
@@ -170,6 +185,35 @@
|
||||
(and (match_code "const_int")
|
||||
(match_test "IMM12_OPERAND_UNSIGNED (ival)")))
|
||||
|
||||
+(define_constraint "La"
|
||||
+ "A signed constant in [-4096, 2048) or (2047, 4094]."
|
||||
+ (and (match_code "const_int")
|
||||
+ (match_test "DUAL_IMM12_OPERAND (ival)")))
|
||||
+
|
||||
+(define_constraint "Lb"
|
||||
+ "A signed 32-bit constant and low 16-bit is zero, which can be added
|
||||
+ onto a register with addu16i.d."
|
||||
+ (and (match_code "const_int")
|
||||
+ (match_test "ADDU16I_OPERAND (ival)")))
|
||||
+
|
||||
+(define_constraint "Lc"
|
||||
+ "A signed 64-bit constant can be expressed as Lb + I, but not a single Lb
|
||||
+ or I."
|
||||
+ (and (match_code "const_int")
|
||||
+ (match_test "loongarch_addu16i_imm12_operand_p (ival, DImode)")))
|
||||
+
|
||||
+(define_constraint "Ld"
|
||||
+ "A signed 64-bit constant can be expressed as Lb + Lb, but not a single
|
||||
+ Lb."
|
||||
+ (and (match_code "const_int")
|
||||
+ (match_test "DUAL_ADDU16I_OPERAND (ival)")))
|
||||
+
|
||||
+(define_constraint "Le"
|
||||
+ "A signed 32-bit constant can be expressed as Lb + I, but not a single Lb
|
||||
+ or I."
|
||||
+ (and (match_code "const_int")
|
||||
+ (match_test "loongarch_addu16i_imm12_operand_p (ival, SImode)")))
|
||||
+
|
||||
(define_constraint "Yd"
|
||||
"@internal
|
||||
A constant @code{move_operand} that can be safely loaded using
|
||||
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
|
||||
index 77b221724..0a9b47722 100644
|
||||
--- a/gcc/config/loongarch/loongarch-protos.h
|
||||
+++ b/gcc/config/loongarch/loongarch-protos.h
|
||||
@@ -83,6 +83,8 @@ extern rtx loongarch_legitimize_call_address (rtx);
|
||||
extern rtx loongarch_subword (rtx, bool);
|
||||
extern bool loongarch_split_move_p (rtx, rtx);
|
||||
extern void loongarch_split_move (rtx, rtx, rtx);
|
||||
+extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
|
||||
+extern void loongarch_split_plus_constant (rtx *, machine_mode);
|
||||
extern const char *loongarch_output_move (rtx, rtx);
|
||||
extern bool loongarch_cfun_has_cprestore_slot_p (void);
|
||||
#ifdef RTX_CODE
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index 1a4686f03..233dddbac 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -3753,6 +3753,50 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_)
|
||||
}
|
||||
}
|
||||
|
||||
+/* Check if adding an integer constant value for a specific mode can be
|
||||
+ performed with an addu16i.d instruction and an addi.{w/d}
|
||||
+ instruction. */
|
||||
+
|
||||
+bool
|
||||
+loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT value, machine_mode mode)
|
||||
+{
|
||||
+ /* Not necessary, but avoid unnecessary calculation if !TARGET_64BIT. */
|
||||
+ if (!TARGET_64BIT)
|
||||
+ return false;
|
||||
+
|
||||
+ if ((value & 0xffff) == 0)
|
||||
+ return false;
|
||||
+
|
||||
+ if (IMM12_OPERAND (value))
|
||||
+ return false;
|
||||
+
|
||||
+ value = (value & ~HWIT_UC_0xFFF) + ((value & 0x800) << 1);
|
||||
+ return ADDU16I_OPERAND (trunc_int_for_mode (value, mode));
|
||||
+}
|
||||
+
|
||||
+/* Split one integer constant op[0] into two (op[1] and op[2]) for constant
|
||||
+ plus operation in a specific mode. The splitted constants can be added
|
||||
+ onto a register with a single instruction (addi.{d/w} or addu16i.d). */
|
||||
+
|
||||
+void
|
||||
+loongarch_split_plus_constant (rtx *op, machine_mode mode)
|
||||
+{
|
||||
+ HOST_WIDE_INT v = INTVAL (op[0]), a;
|
||||
+
|
||||
+ if (DUAL_IMM12_OPERAND (v))
|
||||
+ a = (v > 0 ? 2047 : -2048);
|
||||
+ else if (loongarch_addu16i_imm12_operand_p (v, mode))
|
||||
+ a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1);
|
||||
+ else if (mode == DImode && DUAL_ADDU16I_OPERAND (v))
|
||||
+ a = (v > 0 ? 0x7fff : -0x8000) << 16;
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+
|
||||
+ op[1] = gen_int_mode (a, mode);
|
||||
+ v = v - (unsigned HOST_WIDE_INT) a;
|
||||
+ op[2] = gen_int_mode (v, mode);
|
||||
+}
|
||||
+
|
||||
/* Return true if a move from SRC to DEST in INSN should be split. */
|
||||
|
||||
static bool
|
||||
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||
index c6e37b1b4..9d3cd9ca0 100644
|
||||
--- a/gcc/config/loongarch/loongarch.h
|
||||
+++ b/gcc/config/loongarch/loongarch.h
|
||||
@@ -612,6 +612,25 @@ enum reg_class
|
||||
|
||||
#define CONST_LOW_PART(VALUE) ((VALUE) - CONST_HIGH_PART (VALUE))
|
||||
|
||||
+/* True if VALUE can be added onto a register with one addu16i.d
|
||||
+ instruction. */
|
||||
+
|
||||
+#define ADDU16I_OPERAND(VALUE) \
|
||||
+ (TARGET_64BIT && (((VALUE) & 0xffff) == 0 \
|
||||
+ && IMM16_OPERAND ((HOST_WIDE_INT) (VALUE) / 65536)))
|
||||
+
|
||||
+/* True if VALUE can be added onto a register with two addi.{d/w}
|
||||
+ instructions, but not one addi.{d/w} instruction. */
|
||||
+#define DUAL_IMM12_OPERAND(VALUE) \
|
||||
+ (IN_RANGE ((VALUE), -4096, 4094) && !IMM12_OPERAND (VALUE))
|
||||
+
|
||||
+/* True if VALUE can be added onto a register with two addu16i.d
|
||||
+ instruction, but not one addu16i.d instruction. */
|
||||
+#define DUAL_ADDU16I_OPERAND(VALUE) \
|
||||
+ (TARGET_64BIT && (((VALUE) & 0xffff) == 0 \
|
||||
+ && !ADDU16I_OPERAND (VALUE) \
|
||||
+ && IN_RANGE ((VALUE) / 65536, -0x10000, 0xfffe)))
|
||||
+
|
||||
#define IMM12_INT(X) IMM12_OPERAND (INTVAL (X))
|
||||
#define IMM12_INT_UNSIGNED(X) IMM12_OPERAND_UNSIGNED (INTVAL (X))
|
||||
#define LU12I_INT(X) LU12I_OPERAND (INTVAL (X))
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index 833b94753..b2f7c7f78 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -598,24 +598,64 @@
|
||||
[(set_attr "type" "fadd")
|
||||
(set_attr "mode" "<UNITMODE>")])
|
||||
|
||||
-(define_insn "add<mode>3"
|
||||
- [(set (match_operand:GPR 0 "register_operand" "=r,r")
|
||||
- (plus:GPR (match_operand:GPR 1 "register_operand" "r,r")
|
||||
- (match_operand:GPR 2 "arith_operand" "r,I")))]
|
||||
+(define_insn_and_split "add<mode>3"
|
||||
+ [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
|
||||
+ (plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
|
||||
+ (match_operand:GPR 2 "plus_<mode>_operand"
|
||||
+ "r,I,La,Lb,Lc,Ld,Le")))]
|
||||
""
|
||||
- "add%i2.<d>\t%0,%1,%2";
|
||||
+ "@
|
||||
+ add.<d>\t%0,%1,%2
|
||||
+ addi.<d>\t%0,%1,%2
|
||||
+ #
|
||||
+ * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
|
||||
+ return \"addu16i.d\t%0,%1,%2\";
|
||||
+ #
|
||||
+ #
|
||||
+ #"
|
||||
+ "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
|
||||
+ && !ADDU16I_OPERAND (INTVAL (operands[2]))"
|
||||
+ [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
|
||||
+ (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
|
||||
+ {
|
||||
+ loongarch_split_plus_constant (&operands[2], <MODE>mode);
|
||||
+ }
|
||||
[(set_attr "alu_type" "add")
|
||||
- (set_attr "mode" "<MODE>")])
|
||||
-
|
||||
-(define_insn "*addsi3_extended"
|
||||
- [(set (match_operand:DI 0 "register_operand" "=r,r")
|
||||
+ (set_attr "mode" "<MODE>")
|
||||
+ (set_attr "insn_count" "1,1,2,1,2,2,2")
|
||||
+ (set (attr "enabled")
|
||||
+ (cond
|
||||
+ [(match_test "<MODE>mode != DImode && which_alternative == 4")
|
||||
+ (const_string "no")
|
||||
+ (match_test "<MODE>mode != DImode && which_alternative == 5")
|
||||
+ (const_string "no")
|
||||
+ (match_test "<MODE>mode != SImode && which_alternative == 6")
|
||||
+ (const_string "no")]
|
||||
+ (const_string "yes")))])
|
||||
+
|
||||
+(define_insn_and_split "*addsi3_extended"
|
||||
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
|
||||
(sign_extend:DI
|
||||
- (plus:SI (match_operand:SI 1 "register_operand" "r,r")
|
||||
- (match_operand:SI 2 "arith_operand" "r,I"))))]
|
||||
+ (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
|
||||
+ (match_operand:SI 2 "plus_si_extend_operand"
|
||||
+ "r,I,La,Le"))))]
|
||||
"TARGET_64BIT"
|
||||
- "add%i2.w\t%0,%1,%2"
|
||||
+ "@
|
||||
+ add.w\t%0,%1,%2
|
||||
+ addi.w\t%0,%1,%2
|
||||
+ #
|
||||
+ #"
|
||||
+ "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])"
|
||||
+ [(set (subreg:SI (match_dup 0) 0) (plus:SI (match_dup 1) (match_dup 3)))
|
||||
+ (set (match_dup 0)
|
||||
+ (sign_extend:DI (plus:SI (subreg:SI (match_dup 0) 0)
|
||||
+ (match_dup 4))))]
|
||||
+ {
|
||||
+ loongarch_split_plus_constant (&operands[2], SImode);
|
||||
+ }
|
||||
[(set_attr "alu_type" "add")
|
||||
- (set_attr "mode" "SI")])
|
||||
+ (set_attr "mode" "SI")
|
||||
+ (set_attr "insn_count" "1,1,2,2")])
|
||||
|
||||
|
||||
;;
|
||||
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||
index 3c32b2987..4966d5569 100644
|
||||
--- a/gcc/config/loongarch/predicates.md
|
||||
+++ b/gcc/config/loongarch/predicates.md
|
||||
@@ -39,14 +39,50 @@
|
||||
(and (match_code "const_int")
|
||||
(match_test "IMM12_OPERAND (INTVAL (op))")))
|
||||
|
||||
+(define_predicate "const_dual_imm12_operand"
|
||||
+ (and (match_code "const_int")
|
||||
+ (match_test "DUAL_IMM12_OPERAND (INTVAL (op))")))
|
||||
+
|
||||
(define_predicate "const_imm16_operand"
|
||||
(and (match_code "const_int")
|
||||
(match_test "IMM16_OPERAND (INTVAL (op))")))
|
||||
|
||||
+(define_predicate "const_addu16i_operand"
|
||||
+ (and (match_code "const_int")
|
||||
+ (match_test "ADDU16I_OPERAND (INTVAL (op))")))
|
||||
+
|
||||
+(define_predicate "const_addu16i_imm12_di_operand"
|
||||
+ (and (match_code "const_int")
|
||||
+ (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), DImode)")))
|
||||
+
|
||||
+(define_predicate "const_addu16i_imm12_si_operand"
|
||||
+ (and (match_code "const_int")
|
||||
+ (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), SImode)")))
|
||||
+
|
||||
+(define_predicate "const_dual_addu16i_operand"
|
||||
+ (and (match_code "const_int")
|
||||
+ (match_test "DUAL_ADDU16I_OPERAND (INTVAL (op))")))
|
||||
+
|
||||
(define_predicate "arith_operand"
|
||||
(ior (match_operand 0 "const_arith_operand")
|
||||
(match_operand 0 "register_operand")))
|
||||
|
||||
+(define_predicate "plus_di_operand"
|
||||
+ (ior (match_operand 0 "arith_operand")
|
||||
+ (match_operand 0 "const_dual_imm12_operand")
|
||||
+ (match_operand 0 "const_addu16i_operand")
|
||||
+ (match_operand 0 "const_addu16i_imm12_di_operand")
|
||||
+ (match_operand 0 "const_dual_addu16i_operand")))
|
||||
+
|
||||
+(define_predicate "plus_si_extend_operand"
|
||||
+ (ior (match_operand 0 "arith_operand")
|
||||
+ (match_operand 0 "const_dual_imm12_operand")
|
||||
+ (match_operand 0 "const_addu16i_imm12_si_operand")))
|
||||
+
|
||||
+(define_predicate "plus_si_operand"
|
||||
+ (ior (match_operand 0 "plus_si_extend_operand")
|
||||
+ (match_operand 0 "const_addu16i_operand")))
|
||||
+
|
||||
(define_predicate "const_immalsl_operand"
|
||||
(and (match_code "const_int")
|
||||
(match_test "IN_RANGE (INTVAL (op), 1, 4)")))
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/add-const.c b/gcc/testsuite/gcc.target/loongarch/add-const.c
|
||||
new file mode 100644
|
||||
index 000000000..7b6a7cb92
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/add-const.c
|
||||
@@ -0,0 +1,45 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O -mabi=lp64d" } */
|
||||
+
|
||||
+/* None of these functions should load the const operand into a temp
|
||||
+ register. */
|
||||
+
|
||||
+/* { dg-final { scan-assembler-not "add\\.[dw]" } } */
|
||||
+
|
||||
+unsigned long f01 (unsigned long x) { return x + 1; }
|
||||
+unsigned long f02 (unsigned long x) { return x - 1; }
|
||||
+unsigned long f03 (unsigned long x) { return x + 2047; }
|
||||
+unsigned long f04 (unsigned long x) { return x + 4094; }
|
||||
+unsigned long f05 (unsigned long x) { return x - 2048; }
|
||||
+unsigned long f06 (unsigned long x) { return x - 4096; }
|
||||
+unsigned long f07 (unsigned long x) { return x + 0x7fff0000; }
|
||||
+unsigned long f08 (unsigned long x) { return x - 0x80000000l; }
|
||||
+unsigned long f09 (unsigned long x) { return x + 0x7fff0000l * 2; }
|
||||
+unsigned long f10 (unsigned long x) { return x - 0x80000000l * 2; }
|
||||
+unsigned long f11 (unsigned long x) { return x + 0x7fff0000 + 0x1; }
|
||||
+unsigned long f12 (unsigned long x) { return x + 0x7fff0000 - 0x1; }
|
||||
+unsigned long f13 (unsigned long x) { return x + 0x7fff0000 + 0x7ff; }
|
||||
+unsigned long f14 (unsigned long x) { return x + 0x7fff0000 - 0x800; }
|
||||
+unsigned long f15 (unsigned long x) { return x - 0x80000000l - 1; }
|
||||
+unsigned long f16 (unsigned long x) { return x - 0x80000000l + 1; }
|
||||
+unsigned long f17 (unsigned long x) { return x - 0x80000000l - 0x800; }
|
||||
+unsigned long f18 (unsigned long x) { return x - 0x80000000l + 0x7ff; }
|
||||
+
|
||||
+unsigned int g01 (unsigned int x) { return x + 1; }
|
||||
+unsigned int g02 (unsigned int x) { return x - 1; }
|
||||
+unsigned int g03 (unsigned int x) { return x + 2047; }
|
||||
+unsigned int g04 (unsigned int x) { return x + 4094; }
|
||||
+unsigned int g05 (unsigned int x) { return x - 2048; }
|
||||
+unsigned int g06 (unsigned int x) { return x - 4096; }
|
||||
+unsigned int g07 (unsigned int x) { return x + 0x7fff0000; }
|
||||
+unsigned int g08 (unsigned int x) { return x - 0x80000000l; }
|
||||
+unsigned int g09 (unsigned int x) { return x + 0x7fff0000l * 2; }
|
||||
+unsigned int g10 (unsigned int x) { return x - 0x80000000l * 2; }
|
||||
+unsigned int g11 (unsigned int x) { return x + 0x7fff0000 + 0x1; }
|
||||
+unsigned int g12 (unsigned int x) { return x + 0x7fff0000 - 0x1; }
|
||||
+unsigned int g13 (unsigned int x) { return x + 0x7fff0000 + 0x7ff; }
|
||||
+unsigned int g14 (unsigned int x) { return x + 0x7fff0000 - 0x800; }
|
||||
+unsigned int g15 (unsigned int x) { return x - 0x80000000l - 1; }
|
||||
+unsigned int g16 (unsigned int x) { return x - 0x80000000l + 1; }
|
||||
+unsigned int g17 (unsigned int x) { return x - 0x80000000l - 0x800; }
|
||||
+unsigned int g18 (unsigned int x) { return x - 0x80000000l + 0x7ff; }
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
|
||||
index 3533fe7b6..cd72154f4 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
|
||||
@@ -6,7 +6,7 @@
|
||||
#define SIZE 128*1024
|
||||
#include "stack-check-prologue.h"
|
||||
|
||||
-/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131088} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131072} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
|
||||
|
||||
/* Checks that the CFA notes are correct for every sp adjustment. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
|
||||
index e5e711105..3e5ca05b2 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
|
||||
@@ -6,7 +6,7 @@
|
||||
#define SIZE 1280*1024 + 512
|
||||
#include "stack-check-prologue.h"
|
||||
|
||||
-/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311248} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311232} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
|
||||
|
||||
/* Checks that the CFA notes are correct for every sp adjustment. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
338
LoongArch-Optimize-immediate-load.patch
Normal file
338
LoongArch-Optimize-immediate-load.patch
Normal file
@ -0,0 +1,338 @@
|
||||
From b533b615ae47b97d51eeb83e1a63f7c72407430f Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Thu, 17 Nov 2022 17:08:36 +0800
|
||||
Subject: [PATCH 032/124] LoongArch: Optimize immediate load.
|
||||
|
||||
The immediate number is split in the Split pass, not in the expand pass.
|
||||
|
||||
Because loop2_invariant pass will extract the instructions that do not change
|
||||
in the loop out of the loop, some instructions will not meet the extraction
|
||||
conditions if the machine performs immediate decomposition while expand pass,
|
||||
so the immediate decomposition will be transferred to the split process.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.cc (enum loongarch_load_imm_method):
|
||||
Remove the member METHOD_INSV that is not currently used.
|
||||
(struct loongarch_integer_op): Define a new member curr_value,
|
||||
that records the value of the number stored in the destination
|
||||
register immediately after the current instruction has run.
|
||||
(loongarch_build_integer): Assign a value to the curr_value member variable.
|
||||
(loongarch_move_integer): Adds information for the immediate load instruction.
|
||||
* config/loongarch/loongarch.md (*movdi_32bit): Redefine as define_insn_and_split.
|
||||
(*movdi_64bit): Likewise.
|
||||
(*movsi_internal): Likewise.
|
||||
(*movhi_internal): Likewise.
|
||||
* config/loongarch/predicates.md: Return true as long as it is CONST_INT, ensure
|
||||
that the immediate number is not optimized by decomposition during expand
|
||||
optimization loop.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/imm-load.c: New test.
|
||||
* gcc.target/loongarch/imm-load1.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.cc | 62 ++++++++++---------
|
||||
gcc/config/loongarch/loongarch.md | 44 +++++++++++--
|
||||
gcc/config/loongarch/predicates.md | 2 +-
|
||||
gcc/testsuite/gcc.target/loongarch/imm-load.c | 10 +++
|
||||
.../gcc.target/loongarch/imm-load1.c | 26 ++++++++
|
||||
5 files changed, 110 insertions(+), 34 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/imm-load1.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index 622c9435b..f45a49f90 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -139,22 +139,21 @@ struct loongarch_address_info
|
||||
|
||||
METHOD_LU52I:
|
||||
Load 52-63 bit of the immediate number.
|
||||
-
|
||||
- METHOD_INSV:
|
||||
- immediate like 0xfff00000fffffxxx
|
||||
- */
|
||||
+*/
|
||||
enum loongarch_load_imm_method
|
||||
{
|
||||
METHOD_NORMAL,
|
||||
METHOD_LU32I,
|
||||
- METHOD_LU52I,
|
||||
- METHOD_INSV
|
||||
+ METHOD_LU52I
|
||||
};
|
||||
|
||||
struct loongarch_integer_op
|
||||
{
|
||||
enum rtx_code code;
|
||||
HOST_WIDE_INT value;
|
||||
+ /* Represent the result of the immediate count of the load instruction at
|
||||
+ each step. */
|
||||
+ HOST_WIDE_INT curr_value;
|
||||
enum loongarch_load_imm_method method;
|
||||
};
|
||||
|
||||
@@ -1474,24 +1473,27 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
|
||||
{
|
||||
/* The value of the lower 32 bit be loaded with one instruction.
|
||||
lu12i.w. */
|
||||
- codes[0].code = UNKNOWN;
|
||||
- codes[0].method = METHOD_NORMAL;
|
||||
- codes[0].value = low_part;
|
||||
+ codes[cost].code = UNKNOWN;
|
||||
+ codes[cost].method = METHOD_NORMAL;
|
||||
+ codes[cost].value = low_part;
|
||||
+ codes[cost].curr_value = low_part;
|
||||
cost++;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* lu12i.w + ior. */
|
||||
- codes[0].code = UNKNOWN;
|
||||
- codes[0].method = METHOD_NORMAL;
|
||||
- codes[0].value = low_part & ~(IMM_REACH - 1);
|
||||
+ codes[cost].code = UNKNOWN;
|
||||
+ codes[cost].method = METHOD_NORMAL;
|
||||
+ codes[cost].value = low_part & ~(IMM_REACH - 1);
|
||||
+ codes[cost].curr_value = codes[cost].value;
|
||||
cost++;
|
||||
HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1);
|
||||
if (iorv != 0)
|
||||
{
|
||||
- codes[1].code = IOR;
|
||||
- codes[1].method = METHOD_NORMAL;
|
||||
- codes[1].value = iorv;
|
||||
+ codes[cost].code = IOR;
|
||||
+ codes[cost].method = METHOD_NORMAL;
|
||||
+ codes[cost].value = iorv;
|
||||
+ codes[cost].curr_value = low_part;
|
||||
cost++;
|
||||
}
|
||||
}
|
||||
@@ -1514,11 +1516,14 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
|
||||
{
|
||||
codes[cost].method = METHOD_LU52I;
|
||||
codes[cost].value = value & LU52I_B;
|
||||
+ codes[cost].curr_value = value;
|
||||
return cost + 1;
|
||||
}
|
||||
|
||||
codes[cost].method = METHOD_LU32I;
|
||||
codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0);
|
||||
+ codes[cost].curr_value = (value & 0xfffffffffffff)
|
||||
+ | (sign51 ? LU52I_B : 0);
|
||||
cost++;
|
||||
|
||||
/* Determine whether the 52-61 bits are sign-extended from the low order,
|
||||
@@ -1527,6 +1532,7 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
|
||||
{
|
||||
codes[cost].method = METHOD_LU52I;
|
||||
codes[cost].value = value & LU52I_B;
|
||||
+ codes[cost].curr_value = value;
|
||||
cost++;
|
||||
}
|
||||
}
|
||||
@@ -2910,6 +2916,9 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
|
||||
else
|
||||
x = force_reg (mode, x);
|
||||
|
||||
+ set_unique_reg_note (get_last_insn (), REG_EQUAL,
|
||||
+ GEN_INT (codes[i-1].curr_value));
|
||||
+
|
||||
switch (codes[i].method)
|
||||
{
|
||||
case METHOD_NORMAL:
|
||||
@@ -2917,22 +2926,17 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
|
||||
GEN_INT (codes[i].value));
|
||||
break;
|
||||
case METHOD_LU32I:
|
||||
- emit_insn (
|
||||
- gen_rtx_SET (x,
|
||||
- gen_rtx_IOR (DImode,
|
||||
- gen_rtx_ZERO_EXTEND (
|
||||
- DImode, gen_rtx_SUBREG (SImode, x, 0)),
|
||||
- GEN_INT (codes[i].value))));
|
||||
+ gcc_assert (mode == DImode);
|
||||
+ x = gen_rtx_IOR (DImode,
|
||||
+ gen_rtx_ZERO_EXTEND (DImode,
|
||||
+ gen_rtx_SUBREG (SImode, x, 0)),
|
||||
+ GEN_INT (codes[i].value));
|
||||
break;
|
||||
case METHOD_LU52I:
|
||||
- emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff),
|
||||
- GEN_INT (codes[i].value)));
|
||||
- break;
|
||||
- case METHOD_INSV:
|
||||
- emit_insn (
|
||||
- gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT (20),
|
||||
- GEN_INT (32)),
|
||||
- gen_rtx_REG (DImode, 0)));
|
||||
+ gcc_assert (mode == DImode);
|
||||
+ x = gen_rtx_IOR (DImode,
|
||||
+ gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
|
||||
+ GEN_INT (codes[i].value));
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index 2fda53819..f61db66d5 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -1718,23 +1718,41 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
-(define_insn "*movdi_32bit"
|
||||
+(define_insn_and_split "*movdi_32bit"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m")
|
||||
(match_operand:DI 1 "move_operand" "r,i,w,r,*J*r,*m,*f,*f"))]
|
||||
"!TARGET_64BIT
|
||||
&& (register_operand (operands[0], DImode)
|
||||
|| reg_or_0_operand (operands[1], DImode))"
|
||||
{ return loongarch_output_move (operands[0], operands[1]); }
|
||||
+ "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
|
||||
+ (operands[0]))"
|
||||
+ [(const_int 0)]
|
||||
+ "
|
||||
+{
|
||||
+ loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
|
||||
+ DONE;
|
||||
+}
|
||||
+ "
|
||||
[(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
-(define_insn "*movdi_64bit"
|
||||
+(define_insn_and_split "*movdi_64bit"
|
||||
[(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m")
|
||||
(match_operand:DI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f"))]
|
||||
"TARGET_64BIT
|
||||
&& (register_operand (operands[0], DImode)
|
||||
|| reg_or_0_operand (operands[1], DImode))"
|
||||
{ return loongarch_output_move (operands[0], operands[1]); }
|
||||
+ "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
|
||||
+ (operands[0]))"
|
||||
+ [(const_int 0)]
|
||||
+ "
|
||||
+{
|
||||
+ loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
|
||||
+ DONE;
|
||||
+}
|
||||
+ "
|
||||
[(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore")
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
@@ -1749,12 +1767,21 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
-(define_insn "*movsi_internal"
|
||||
+(define_insn_and_split "*movsi_internal"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m,*r,*z")
|
||||
(match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,*m,*f,*f,*z,*r"))]
|
||||
"(register_operand (operands[0], SImode)
|
||||
|| reg_or_0_operand (operands[1], SImode))"
|
||||
{ return loongarch_output_move (operands[0], operands[1]); }
|
||||
+ "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
|
||||
+ (operands[0]))"
|
||||
+ [(const_int 0)]
|
||||
+ "
|
||||
+{
|
||||
+ loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
|
||||
+ DONE;
|
||||
+}
|
||||
+ "
|
||||
[(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore,mftg,mgtf")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
@@ -1774,12 +1801,21 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
-(define_insn "*movhi_internal"
|
||||
+(define_insn_and_split "*movhi_internal"
|
||||
[(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,r,m,r,k")
|
||||
(match_operand:HI 1 "move_operand" "r,Yd,I,m,rJ,k,rJ"))]
|
||||
"(register_operand (operands[0], HImode)
|
||||
|| reg_or_0_operand (operands[1], HImode))"
|
||||
{ return loongarch_output_move (operands[0], operands[1]); }
|
||||
+ "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
|
||||
+ (operands[0]))"
|
||||
+ [(const_int 0)]
|
||||
+ "
|
||||
+{
|
||||
+ loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
|
||||
+ DONE;
|
||||
+}
|
||||
+ "
|
||||
[(set_attr "move_type" "move,const,const,load,store,load,store")
|
||||
(set_attr "mode" "HI")])
|
||||
|
||||
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||
index 8bd0c1376..58c3dc226 100644
|
||||
--- a/gcc/config/loongarch/predicates.md
|
||||
+++ b/gcc/config/loongarch/predicates.md
|
||||
@@ -226,7 +226,7 @@
|
||||
switch (GET_CODE (op))
|
||||
{
|
||||
case CONST_INT:
|
||||
- return !splittable_const_int_operand (op, mode);
|
||||
+ return true;
|
||||
|
||||
case CONST:
|
||||
case SYMBOL_REF:
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load.c b/gcc/testsuite/gcc.target/loongarch/imm-load.c
|
||||
new file mode 100644
|
||||
index 000000000..c04ca3399
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/imm-load.c
|
||||
@@ -0,0 +1,10 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-split1" } */
|
||||
+
|
||||
+long int
|
||||
+test (void)
|
||||
+{
|
||||
+ return 0x1234567890abcdef;
|
||||
+}
|
||||
+/* { dg-final { scan-rtl-dump-times "scanning new insn with uid" 6 "split1" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
|
||||
new file mode 100644
|
||||
index 000000000..2ff029712
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
|
||||
@@ -0,0 +1,26 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -O2" } */
|
||||
+/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
|
||||
+
|
||||
+
|
||||
+extern long long b[10];
|
||||
+static inline long long
|
||||
+repeat_bytes (void)
|
||||
+{
|
||||
+ long long r = 0x0101010101010101;
|
||||
+
|
||||
+ return r;
|
||||
+}
|
||||
+
|
||||
+static inline long long
|
||||
+highbit_mask (long long m)
|
||||
+{
|
||||
+ return m & repeat_bytes ();
|
||||
+}
|
||||
+
|
||||
+void test(long long *a)
|
||||
+{
|
||||
+ for (int i = 0; i < 10; i++)
|
||||
+ b[i] = highbit_mask (a[i]);
|
||||
+
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
114
LoongArch-Optimize-switch-with-sign-extended-index.patch
Normal file
114
LoongArch-Optimize-switch-with-sign-extended-index.patch
Normal file
@ -0,0 +1,114 @@
|
||||
From c6d4efda0fa064dfe1d3cc1b9abf8051a82cd74f Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Sat, 2 Sep 2023 10:59:55 +0800
|
||||
Subject: [PATCH 056/124] LoongArch: Optimize switch with sign-extended index.
|
||||
|
||||
The patch refers to the submission of RISCV
|
||||
7bbce9b50302959286381d9177818642bceaf301.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.cc (loongarch_extend_comparands):
|
||||
In unsigned QImode test, check for sign extended subreg and/or
|
||||
constant operands, and do a sign extension in that case.
|
||||
* config/loongarch/loongarch.md (TARGET_64BIT): Define
|
||||
template cbranchqi4.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/switch-qi.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.cc | 16 +++++++++++++---
|
||||
gcc/config/loongarch/loongarch.md | 10 +++++++---
|
||||
gcc/testsuite/gcc.target/loongarch/switch-qi.c | 16 ++++++++++++++++
|
||||
3 files changed, 36 insertions(+), 6 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/switch-qi.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index 7b48e3216..41d344b82 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -4221,11 +4221,21 @@ loongarch_allocate_fcc (machine_mode mode)
|
||||
static void
|
||||
loongarch_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
|
||||
{
|
||||
- /* Comparisons consider all XLEN bits, so extend sub-XLEN values. */
|
||||
+ /* Comparisons consider all GRLEN bits, so extend sub-GRLEN values. */
|
||||
if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)))
|
||||
{
|
||||
- /* TODO: checkout It is more profitable to zero-extend QImode values. */
|
||||
- if (unsigned_condition (code) == code && GET_MODE (*op0) == QImode)
|
||||
+ /* It is more profitable to zero-extend QImode values. But not if the
|
||||
+ first operand has already been sign-extended, and the second one is
|
||||
+ is a constant or has already been sign-extended also. */
|
||||
+ if (unsigned_condition (code) == code
|
||||
+ && (GET_MODE (*op0) == QImode
|
||||
+ && ! (GET_CODE (*op0) == SUBREG
|
||||
+ && SUBREG_PROMOTED_VAR_P (*op0)
|
||||
+ && SUBREG_PROMOTED_SIGNED_P (*op0)
|
||||
+ && (CONST_INT_P (*op1)
|
||||
+ || (GET_CODE (*op1) == SUBREG
|
||||
+ && SUBREG_PROMOTED_VAR_P (*op1)
|
||||
+ && SUBREG_PROMOTED_SIGNED_P (*op1))))))
|
||||
{
|
||||
*op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
|
||||
if (CONST_INT_P (*op1))
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index cf7441e0b..a5e9352ca 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -357,7 +357,7 @@
|
||||
;; pointer-sized quantities. Exactly one of the two alternatives will match.
|
||||
(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
|
||||
|
||||
-;; Likewise, but for XLEN-sized quantities.
|
||||
+;; Likewise, but for GRLEN-sized quantities.
|
||||
(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")])
|
||||
|
||||
;; 64-bit modes for which we provide move patterns.
|
||||
@@ -2733,11 +2733,15 @@
|
||||
[(set_attr "type" "branch")])
|
||||
|
||||
|
||||
+;; Branches operate on GRLEN-sized quantities, but for LoongArch64 we accept
|
||||
+;; QImode values so we can force zero-extension.
|
||||
+(define_mode_iterator BR [(QI "TARGET_64BIT") SI (DI "TARGET_64BIT")])
|
||||
+
|
||||
(define_expand "cbranch<mode>4"
|
||||
[(set (pc)
|
||||
(if_then_else (match_operator 0 "comparison_operator"
|
||||
- [(match_operand:GPR 1 "register_operand")
|
||||
- (match_operand:GPR 2 "nonmemory_operand")])
|
||||
+ [(match_operand:BR 1 "register_operand")
|
||||
+ (match_operand:BR 2 "nonmemory_operand")])
|
||||
(label_ref (match_operand 3 ""))
|
||||
(pc)))]
|
||||
""
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/switch-qi.c b/gcc/testsuite/gcc.target/loongarch/switch-qi.c
|
||||
new file mode 100644
|
||||
index 000000000..dd192fd49
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/switch-qi.c
|
||||
@@ -0,0 +1,16 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-march=loongarch64 -mabi=lp64d" } */
|
||||
+/* { dg-final { scan-assembler-not "bstrpick" } } */
|
||||
+
|
||||
+/* Test for loongarch_extend_comparands patch. */
|
||||
+extern void asdf (int);
|
||||
+void
|
||||
+foo (signed char x) {
|
||||
+ switch (x) {
|
||||
+ case 0: asdf (10); break;
|
||||
+ case 1: asdf (11); break;
|
||||
+ case 2: asdf (12); break;
|
||||
+ case 3: asdf (13); break;
|
||||
+ case 4: asdf (14); break;
|
||||
+ }
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
810
LoongArch-Optimize-the-implementation-of-stack-check.patch
Normal file
810
LoongArch-Optimize-the-implementation-of-stack-check.patch
Normal file
@ -0,0 +1,810 @@
|
||||
From d3615b555d6885dba298f7b339740be11cb65a8f Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Tue, 29 Nov 2022 16:06:12 +0800
|
||||
Subject: [PATCH 033/124] LoongArch: Optimize the implementation of stack
|
||||
check.
|
||||
|
||||
The old stack check was performed before the stack was dropped,
|
||||
which would cause the detection tool to report a memory leak.
|
||||
|
||||
The current stack check scheme is as follows:
|
||||
|
||||
'-fstack-clash-protection':
|
||||
1. When the frame->total_size is smaller than the guard page size,
|
||||
the stack is dropped according to the original scheme, and there
|
||||
is no need to perform stack detection in the prologue.
|
||||
2. When frame->total_size is greater than or equal to guard page size,
|
||||
the first step to drop the stack is to drop the space required by
|
||||
the caller-save registers. This space needs to save the caller-save
|
||||
registers, so an implicit stack check is performed.
|
||||
So just need to check the rest of the stack space.
|
||||
|
||||
'-fstack-check':
|
||||
There is no one-time stack drop and then page-by-page detection as
|
||||
described in the document. It is also the same as
|
||||
'-fstack-clash-protection', which is detected immediately after page drop.
|
||||
|
||||
It is judged that when frame->total_size is not 0, only the size required
|
||||
to save the s register is dropped for the first stack down.
|
||||
|
||||
The test cases are referenced from aarch64.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/linux.h (STACK_CHECK_MOVING_SP):
|
||||
Define this macro to 1.
|
||||
* config/loongarch/loongarch.cc (STACK_CLASH_PROTECTION_GUARD_SIZE):
|
||||
Size of guard page.
|
||||
(loongarch_first_stack_step): Return the size of the first drop stack
|
||||
according to whether stack checking is performed.
|
||||
(loongarch_emit_probe_stack_range): Adjust the method of stack checking in prologue.
|
||||
(loongarch_output_probe_stack_range): Delete useless code.
|
||||
(loongarch_expand_prologue): Adjust the method of stack checking in prologue.
|
||||
(loongarch_option_override_internal): Enforce that interval is the same
|
||||
size as size so the mid-end does the right thing.
|
||||
* config/loongarch/loongarch.h (STACK_CLASH_MAX_UNROLL_PAGES):
|
||||
New macro decide whether to loop stack detection.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* lib/target-supports.exp:
|
||||
* gcc.target/loongarch/stack-check-alloca-1.c: New test.
|
||||
* gcc.target/loongarch/stack-check-alloca-2.c: New test.
|
||||
* gcc.target/loongarch/stack-check-alloca-3.c: New test.
|
||||
* gcc.target/loongarch/stack-check-alloca-4.c: New test.
|
||||
* gcc.target/loongarch/stack-check-alloca-5.c: New test.
|
||||
* gcc.target/loongarch/stack-check-alloca-6.c: New test.
|
||||
* gcc.target/loongarch/stack-check-alloca.h: New test.
|
||||
* gcc.target/loongarch/stack-check-cfa-1.c: New test.
|
||||
* gcc.target/loongarch/stack-check-cfa-2.c: New test.
|
||||
* gcc.target/loongarch/stack-check-prologue-1.c: New test.
|
||||
* gcc.target/loongarch/stack-check-prologue-2.c: New test.
|
||||
* gcc.target/loongarch/stack-check-prologue-3.c: New test.
|
||||
* gcc.target/loongarch/stack-check-prologue-4.c: New test.
|
||||
* gcc.target/loongarch/stack-check-prologue-5.c: New test.
|
||||
* gcc.target/loongarch/stack-check-prologue-6.c: New test.
|
||||
* gcc.target/loongarch/stack-check-prologue-7.c: New test.
|
||||
* gcc.target/loongarch/stack-check-prologue.h: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/linux.h | 3 +
|
||||
gcc/config/loongarch/loongarch.cc | 248 +++++++++++-------
|
||||
gcc/config/loongarch/loongarch.h | 4 +
|
||||
.../loongarch/stack-check-alloca-1.c | 15 ++
|
||||
.../loongarch/stack-check-alloca-2.c | 12 +
|
||||
.../loongarch/stack-check-alloca-3.c | 12 +
|
||||
.../loongarch/stack-check-alloca-4.c | 12 +
|
||||
.../loongarch/stack-check-alloca-5.c | 13 +
|
||||
.../loongarch/stack-check-alloca-6.c | 13 +
|
||||
.../gcc.target/loongarch/stack-check-alloca.h | 15 ++
|
||||
.../gcc.target/loongarch/stack-check-cfa-1.c | 12 +
|
||||
.../gcc.target/loongarch/stack-check-cfa-2.c | 12 +
|
||||
.../loongarch/stack-check-prologue-1.c | 11 +
|
||||
.../loongarch/stack-check-prologue-2.c | 11 +
|
||||
.../loongarch/stack-check-prologue-3.c | 11 +
|
||||
.../loongarch/stack-check-prologue-4.c | 11 +
|
||||
.../loongarch/stack-check-prologue-5.c | 12 +
|
||||
.../loongarch/stack-check-prologue-6.c | 11 +
|
||||
.../loongarch/stack-check-prologue-7.c | 12 +
|
||||
.../loongarch/stack-check-prologue.h | 5 +
|
||||
gcc/testsuite/lib/target-supports.exp | 7 +-
|
||||
21 files changed, 361 insertions(+), 101 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h
|
||||
|
||||
diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h
|
||||
index 110d0fab9..00039ac18 100644
|
||||
--- a/gcc/config/loongarch/linux.h
|
||||
+++ b/gcc/config/loongarch/linux.h
|
||||
@@ -48,3 +48,6 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define STACK_CHECK_PROTECT (TARGET_64BIT ? 16 * 1024 : 12 * 1024)
|
||||
|
||||
#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
|
||||
+
|
||||
+/* The stack pointer needs to be moved while checking the stack. */
|
||||
+#define STACK_CHECK_MOVING_SP 1
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index f45a49f90..e59edc4cd 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -257,6 +257,10 @@ const char *const
|
||||
loongarch_fp_conditions[16]= {LARCH_FP_CONDITIONS (STRINGIFY)};
|
||||
#undef STRINGIFY
|
||||
|
||||
+/* Size of guard page. */
|
||||
+#define STACK_CLASH_PROTECTION_GUARD_SIZE \
|
||||
+ (1 << param_stack_clash_protection_guard_size)
|
||||
+
|
||||
/* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at
|
||||
least PARM_BOUNDARY bits of alignment, but will be given anything up
|
||||
to PREFERRED_STACK_BOUNDARY bits if the type requires it. */
|
||||
@@ -1069,11 +1073,20 @@ loongarch_restore_reg (rtx reg, rtx mem)
|
||||
static HOST_WIDE_INT
|
||||
loongarch_first_stack_step (struct loongarch_frame_info *frame)
|
||||
{
|
||||
+ HOST_WIDE_INT min_first_step
|
||||
+ = LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
|
||||
+
|
||||
+ /* When stack checking is required, if the sum of frame->total_size
|
||||
+ and stack_check_protect is greater than stack clash protection guard
|
||||
+ size, then return min_first_step. */
|
||||
+ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
|
||||
+ || (flag_stack_clash_protection
|
||||
+ && frame->total_size > STACK_CLASH_PROTECTION_GUARD_SIZE))
|
||||
+ return min_first_step;
|
||||
+
|
||||
if (IMM12_OPERAND (frame->total_size))
|
||||
return frame->total_size;
|
||||
|
||||
- HOST_WIDE_INT min_first_step
|
||||
- = LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset);
|
||||
HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8;
|
||||
HOST_WIDE_INT min_second_step = frame->total_size - max_first_step;
|
||||
gcc_assert (min_first_step <= max_first_step);
|
||||
@@ -1106,103 +1119,109 @@ loongarch_emit_stack_tie (void)
|
||||
static void
|
||||
loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
|
||||
{
|
||||
- /* See if we have a constant small number of probes to generate. If so,
|
||||
- that's the easy case. */
|
||||
- if ((TARGET_64BIT && (first + size <= 32768))
|
||||
- || (!TARGET_64BIT && (first + size <= 2048)))
|
||||
- {
|
||||
- HOST_WIDE_INT i;
|
||||
+ HOST_WIDE_INT rounded_size;
|
||||
+ HOST_WIDE_INT interval;
|
||||
|
||||
- /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
|
||||
- it exceeds SIZE. If only one probe is needed, this will not
|
||||
- generate any code. Then probe at FIRST + SIZE. */
|
||||
- for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
|
||||
- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||
- -(first + i)));
|
||||
+ if (flag_stack_clash_protection)
|
||||
+ interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
|
||||
+ else
|
||||
+ interval = PROBE_INTERVAL;
|
||||
|
||||
- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||
- -(first + size)));
|
||||
- }
|
||||
+ rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
|
||||
+ rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
|
||||
|
||||
- /* Otherwise, do the same as above, but in a loop. Note that we must be
|
||||
- extra careful with variables wrapping around because we might be at
|
||||
- the very top (or the very bottom) of the address space and we have
|
||||
- to be able to handle this case properly; in particular, we use an
|
||||
- equality test for the loop condition. */
|
||||
- else
|
||||
- {
|
||||
- HOST_WIDE_INT rounded_size;
|
||||
- rtx r13 = LARCH_PROLOGUE_TEMP (Pmode);
|
||||
- rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode);
|
||||
- rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode);
|
||||
+ size = size + first;
|
||||
|
||||
- /* Sanity check for the addressing mode we're going to use. */
|
||||
- gcc_assert (first <= 16384);
|
||||
+ /* Sanity check for the addressing mode we're going to use. */
|
||||
+ gcc_assert (first <= 16384);
|
||||
|
||||
+ /* Step 1: round SIZE to the previous multiple of the interval. */
|
||||
|
||||
- /* Step 1: round SIZE to the previous multiple of the interval. */
|
||||
+ rounded_size = ROUND_DOWN (size, interval);
|
||||
|
||||
- rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
|
||||
+ /* Step 2: compute initial and final value of the loop counter. */
|
||||
|
||||
- /* TEST_ADDR = SP + FIRST */
|
||||
- if (first != 0)
|
||||
- {
|
||||
- emit_move_insn (r14, GEN_INT (first));
|
||||
- emit_insn (gen_rtx_SET (r13, gen_rtx_MINUS (Pmode,
|
||||
- stack_pointer_rtx,
|
||||
- r14)));
|
||||
- }
|
||||
- else
|
||||
- emit_move_insn (r13, stack_pointer_rtx);
|
||||
+ emit_move_insn (r14, GEN_INT (interval));
|
||||
+
|
||||
+ /* If rounded_size is zero, it means that the space requested by
|
||||
+ the local variable is less than the interval, and there is no
|
||||
+ need to display and detect the allocated space. */
|
||||
+ if (rounded_size != 0)
|
||||
+ {
|
||||
+ /* Step 3: the loop
|
||||
+
|
||||
+ do
|
||||
+ {
|
||||
+ TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
|
||||
+ probe at TEST_ADDR
|
||||
+ }
|
||||
+ while (TEST_ADDR != LAST_ADDR)
|
||||
|
||||
- /* Step 2: compute initial and final value of the loop counter. */
|
||||
+ probes at FIRST + N * PROBE_INTERVAL for values of N from 1
|
||||
+ until it is equal to ROUNDED_SIZE. */
|
||||
|
||||
- emit_move_insn (r14, GEN_INT (PROBE_INTERVAL));
|
||||
- /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
|
||||
- if (rounded_size == 0)
|
||||
- emit_move_insn (r12, r13);
|
||||
+ if (rounded_size <= STACK_CLASH_MAX_UNROLL_PAGES * interval)
|
||||
+ {
|
||||
+ for (HOST_WIDE_INT i = 0; i < rounded_size; i += interval)
|
||||
+ {
|
||||
+ emit_insn (gen_rtx_SET (stack_pointer_rtx,
|
||||
+ gen_rtx_MINUS (Pmode,
|
||||
+ stack_pointer_rtx,
|
||||
+ r14)));
|
||||
+ emit_move_insn (gen_rtx_MEM (Pmode,
|
||||
+ gen_rtx_PLUS (Pmode,
|
||||
+ stack_pointer_rtx,
|
||||
+ const0_rtx)),
|
||||
+ const0_rtx);
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ }
|
||||
+ dump_stack_clash_frame_info (PROBE_INLINE, size != rounded_size);
|
||||
+ }
|
||||
else
|
||||
{
|
||||
emit_move_insn (r12, GEN_INT (rounded_size));
|
||||
- emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode, r13, r12)));
|
||||
- /* Step 3: the loop
|
||||
-
|
||||
- do
|
||||
- {
|
||||
- TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
|
||||
- probe at TEST_ADDR
|
||||
- }
|
||||
- while (TEST_ADDR != LAST_ADDR)
|
||||
-
|
||||
- probes at FIRST + N * PROBE_INTERVAL for values of N from 1
|
||||
- until it is equal to ROUNDED_SIZE. */
|
||||
-
|
||||
- emit_insn (gen_probe_stack_range (Pmode, r13, r13, r12, r14));
|
||||
+ emit_insn (gen_rtx_SET (r12,
|
||||
+ gen_rtx_MINUS (Pmode,
|
||||
+ stack_pointer_rtx,
|
||||
+ r12)));
|
||||
+
|
||||
+ emit_insn (gen_probe_stack_range (Pmode, stack_pointer_rtx,
|
||||
+ stack_pointer_rtx, r12, r14));
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
|
||||
}
|
||||
+ }
|
||||
+ else
|
||||
+ dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
|
||||
+
|
||||
|
||||
- /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
|
||||
- that SIZE is equal to ROUNDED_SIZE. */
|
||||
+ /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
|
||||
+ that SIZE is equal to ROUNDED_SIZE. */
|
||||
|
||||
- if (size != rounded_size)
|
||||
+ if (size != rounded_size)
|
||||
+ {
|
||||
+ if (size - rounded_size >= 2048)
|
||||
{
|
||||
- if (TARGET_64BIT)
|
||||
- emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
|
||||
- else
|
||||
- {
|
||||
- HOST_WIDE_INT i;
|
||||
- for (i = 2048; i < (size - rounded_size); i += 2048)
|
||||
- {
|
||||
- emit_stack_probe (plus_constant (Pmode, r12, -i));
|
||||
- emit_insn (gen_rtx_SET (r12,
|
||||
- plus_constant (Pmode, r12, -2048)));
|
||||
- }
|
||||
- rtx r1 = plus_constant (Pmode, r12,
|
||||
- -(size - rounded_size - i + 2048));
|
||||
- emit_stack_probe (r1);
|
||||
- }
|
||||
+ emit_move_insn (r14, GEN_INT (size - rounded_size));
|
||||
+ emit_insn (gen_rtx_SET (stack_pointer_rtx,
|
||||
+ gen_rtx_MINUS (Pmode,
|
||||
+ stack_pointer_rtx,
|
||||
+ r14)));
|
||||
}
|
||||
+ else
|
||||
+ emit_insn (gen_rtx_SET (stack_pointer_rtx,
|
||||
+ gen_rtx_PLUS (Pmode,
|
||||
+ stack_pointer_rtx,
|
||||
+ GEN_INT (rounded_size - size))));
|
||||
}
|
||||
|
||||
+ if (first)
|
||||
+ {
|
||||
+ emit_move_insn (r12, GEN_INT (first));
|
||||
+ emit_insn (gen_rtx_SET (stack_pointer_rtx,
|
||||
+ gen_rtx_PLUS (Pmode,
|
||||
+ stack_pointer_rtx, r12)));
|
||||
+ }
|
||||
/* Make sure nothing is scheduled before we are done. */
|
||||
emit_insn (gen_blockage ());
|
||||
}
|
||||
@@ -1223,7 +1242,6 @@ loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3)
|
||||
|
||||
/* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
|
||||
xops[0] = reg1;
|
||||
- xops[1] = GEN_INT (-PROBE_INTERVAL);
|
||||
xops[2] = reg3;
|
||||
if (TARGET_64BIT)
|
||||
output_asm_insn ("sub.d\t%0,%0,%2", xops);
|
||||
@@ -1249,28 +1267,11 @@ loongarch_expand_prologue (void)
|
||||
{
|
||||
struct loongarch_frame_info *frame = &cfun->machine->frame;
|
||||
HOST_WIDE_INT size = frame->total_size;
|
||||
- HOST_WIDE_INT tmp;
|
||||
rtx insn;
|
||||
|
||||
if (flag_stack_usage_info)
|
||||
current_function_static_stack_size = size;
|
||||
|
||||
- if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
|
||||
- || flag_stack_clash_protection)
|
||||
- {
|
||||
- if (crtl->is_leaf && !cfun->calls_alloca)
|
||||
- {
|
||||
- if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
|
||||
- {
|
||||
- tmp = size - get_stack_check_protect ();
|
||||
- loongarch_emit_probe_stack_range (get_stack_check_protect (),
|
||||
- tmp);
|
||||
- }
|
||||
- }
|
||||
- else if (size > 0)
|
||||
- loongarch_emit_probe_stack_range (get_stack_check_protect (), size);
|
||||
- }
|
||||
-
|
||||
/* Save the registers. */
|
||||
if ((frame->mask | frame->fmask) != 0)
|
||||
{
|
||||
@@ -1283,7 +1284,6 @@ loongarch_expand_prologue (void)
|
||||
loongarch_for_each_saved_reg (size, loongarch_save_reg);
|
||||
}
|
||||
|
||||
-
|
||||
/* Set up the frame pointer, if we're using one. */
|
||||
if (frame_pointer_needed)
|
||||
{
|
||||
@@ -1294,7 +1294,45 @@ loongarch_expand_prologue (void)
|
||||
loongarch_emit_stack_tie ();
|
||||
}
|
||||
|
||||
- /* Allocate the rest of the frame. */
|
||||
+ if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
|
||||
+ || flag_stack_clash_protection)
|
||||
+ {
|
||||
+ HOST_WIDE_INT first = get_stack_check_protect ();
|
||||
+
|
||||
+ if (frame->total_size == 0)
|
||||
+ {
|
||||
+ /* do nothing. */
|
||||
+ dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ if (crtl->is_leaf && !cfun->calls_alloca)
|
||||
+ {
|
||||
+ HOST_WIDE_INT interval;
|
||||
+
|
||||
+ if (flag_stack_clash_protection)
|
||||
+ interval = STACK_CLASH_PROTECTION_GUARD_SIZE;
|
||||
+ else
|
||||
+ interval = PROBE_INTERVAL;
|
||||
+
|
||||
+ if (size > interval && size > first)
|
||||
+ loongarch_emit_probe_stack_range (first, size - first);
|
||||
+ else
|
||||
+ loongarch_emit_probe_stack_range (first, size);
|
||||
+ }
|
||||
+ else
|
||||
+ loongarch_emit_probe_stack_range (first, size);
|
||||
+
|
||||
+ if (size > 0)
|
||||
+ {
|
||||
+ /* Describe the effect of the previous instructions. */
|
||||
+ insn = plus_constant (Pmode, stack_pointer_rtx, -size);
|
||||
+ insn = gen_rtx_SET (stack_pointer_rtx, insn);
|
||||
+ loongarch_set_frame_expr (insn);
|
||||
+ }
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
if (size > 0)
|
||||
{
|
||||
if (IMM12_OPERAND (-size))
|
||||
@@ -1305,7 +1343,8 @@ loongarch_expand_prologue (void)
|
||||
}
|
||||
else
|
||||
{
|
||||
- loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size));
|
||||
+ loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode),
|
||||
+ GEN_INT (-size));
|
||||
emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
|
||||
LARCH_PROLOGUE_TEMP (Pmode)));
|
||||
|
||||
@@ -6162,6 +6201,15 @@ loongarch_option_override_internal (struct gcc_options *opts)
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
+ /* Validate the guard size. */
|
||||
+ int guard_size = param_stack_clash_protection_guard_size;
|
||||
+
|
||||
+ /* Enforce that interval is the same size as size so the mid-end does the
|
||||
+ right thing. */
|
||||
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
|
||||
+ param_stack_clash_protection_probe_interval,
|
||||
+ guard_size);
|
||||
+
|
||||
loongarch_init_print_operand_punct ();
|
||||
|
||||
/* Set up array to map GCC register number to debug register number.
|
||||
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||
index a52a81adf..392597943 100644
|
||||
--- a/gcc/config/loongarch/loongarch.h
|
||||
+++ b/gcc/config/loongarch/loongarch.h
|
||||
@@ -668,6 +668,10 @@ enum reg_class
|
||||
|
||||
#define STACK_BOUNDARY (TARGET_ABI_LP64 ? 128 : 64)
|
||||
|
||||
+/* This value controls how many pages we manually unroll the loop for when
|
||||
+ generating stack clash probes. */
|
||||
+#define STACK_CLASH_MAX_UNROLL_PAGES 4
|
||||
+
|
||||
/* Symbolic macros for the registers used to return integer and floating
|
||||
point values. */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c
|
||||
new file mode 100644
|
||||
index 000000000..6ee589c4b
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-1.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-require-effective-target alloca } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE y
|
||||
+#include "stack-check-alloca.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {stx\.d\t\$r0,\$r3,\$r12} 1 } } */
|
||||
+
|
||||
+/* Dynamic alloca, expect loop, and 1 probes with top at sp.
|
||||
+ 1st probe is inside the loop for the full guard-size allocations, second
|
||||
+ probe is for the case where residual is zero. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c
|
||||
new file mode 100644
|
||||
index 000000000..8deaa5873
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-2.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-require-effective-target alloca } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 0
|
||||
+#include "stack-check-alloca.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-not {stp*t*r*\.d\t\$r0,\$r3,4088} } } */
|
||||
+
|
||||
+/* Alloca of 0 should emit no probes, boundary condition. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c
|
||||
new file mode 100644
|
||||
index 000000000..e326ba9a0
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-3.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-require-effective-target alloca } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 100
|
||||
+#include "stack-check-alloca.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {st\.d\t\$r0,\$r3,104} 1 } } */
|
||||
+
|
||||
+/* Alloca is less than guard-size, 1 probe at the top of the new allocation. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c
|
||||
new file mode 100644
|
||||
index 000000000..b9f7572de
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-4.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-require-effective-target alloca } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 64 * 1024
|
||||
+#include "stack-check-alloca.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
|
||||
+
|
||||
+/* Alloca is exactly one guard-size, 1 probe expected at top. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c
|
||||
new file mode 100644
|
||||
index 000000000..0ff6e493f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-5.c
|
||||
@@ -0,0 +1,13 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-require-effective-target alloca } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 65 * 1024
|
||||
+#include "stack-check-alloca.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,1016} 1 } } */
|
||||
+
|
||||
+/* Alloca is more than one guard-page. 2 probes expected. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c
|
||||
new file mode 100644
|
||||
index 000000000..c5cf74fcb
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca-6.c
|
||||
@@ -0,0 +1,13 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-require-effective-target alloca } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 127 * 64 * 1024
|
||||
+#include "stack-check-alloca.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r\d{1,2},-8} 1 } } */
|
||||
+
|
||||
+/* Large alloca of a constant amount which is a multiple of a guard-size.
|
||||
+ Loop expected with top probe. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h
|
||||
new file mode 100644
|
||||
index 000000000..8c75f6c0f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-alloca.h
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* Avoid inclusion of alloca.h, unavailable on some systems. */
|
||||
+#define alloca __builtin_alloca
|
||||
+
|
||||
+__attribute__((noinline, noipa))
|
||||
+void g (char* ptr, int y)
|
||||
+{
|
||||
+ ptr[y] = '\0';
|
||||
+}
|
||||
+
|
||||
+void f_caller (int y)
|
||||
+{
|
||||
+ char* pStr = alloca(SIZE);
|
||||
+ g (pStr, y);
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
|
||||
new file mode 100644
|
||||
index 000000000..f0c6877fc
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -funwind-tables" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 128*1024
|
||||
+#include "stack-check-prologue.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131088} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
|
||||
+
|
||||
+/* Checks that the CFA notes are correct for every sp adjustment. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
|
||||
new file mode 100644
|
||||
index 000000000..c6e07bc56
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16 -funwind-tables" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 1280*1024 + 512
|
||||
+#include "stack-check-prologue.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311248} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */
|
||||
+
|
||||
+/* Checks that the CFA notes are correct for every sp adjustment. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c
|
||||
new file mode 100644
|
||||
index 000000000..351bc1f61
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-1.c
|
||||
@@ -0,0 +1,11 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 128
|
||||
+#include "stack-check-prologue.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 0 } } */
|
||||
+
|
||||
+/* SIZE is smaller than guard-size so no probe expected. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c
|
||||
new file mode 100644
|
||||
index 000000000..6bba659a3
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-2.c
|
||||
@@ -0,0 +1,11 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 63 * 1024
|
||||
+#include "stack-check-prologue.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*.d\t\$r0,\$r3,0} 0 } } */
|
||||
+
|
||||
+/* SIZE is smaller than guard-size so no probe expected. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c
|
||||
new file mode 100644
|
||||
index 000000000..164956c37
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-3.c
|
||||
@@ -0,0 +1,11 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 64 * 1024
|
||||
+#include "stack-check-prologue.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 1 } } */
|
||||
+
|
||||
+/* SIZE is equal to guard-size, 1 probe expected, boundary condition. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c
|
||||
new file mode 100644
|
||||
index 000000000..f53da6b0d
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-4.c
|
||||
@@ -0,0 +1,11 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 65 * 1024
|
||||
+#include "stack-check-prologue.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 1 } } */
|
||||
+
|
||||
+/* SIZE is more than guard-size 1 probe expected. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c
|
||||
new file mode 100644
|
||||
index 000000000..c092317ea
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-5.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 127 * 1024
|
||||
+#include "stack-check-prologue.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 1 } } */
|
||||
+
|
||||
+/* SIZE is more than 1x guard-size and remainder small than guard-size,
|
||||
+ 1 probe expected, unrolled, no loop. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c
|
||||
new file mode 100644
|
||||
index 000000000..70a2f53f6
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-6.c
|
||||
@@ -0,0 +1,11 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 128 * 1024
|
||||
+#include "stack-check-prologue.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*\.d\t\$r0,\$r3,0} 2 } } */
|
||||
+
|
||||
+/* SIZE is more than 2x guard-size and no remainder, unrolled, no loop. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c
|
||||
new file mode 100644
|
||||
index 000000000..e2df89acc
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue-7.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fstack-clash-protection --param stack-clash-protection-guard-size=16" } */
|
||||
+/* { dg-require-effective-target supports_stack_clash_protection } */
|
||||
+/* { dg-skip-if "" { *-*-* } { "-fstack-check" } { "" } } */
|
||||
+
|
||||
+#define SIZE 6 * 64 * 1024
|
||||
+#include "stack-check-prologue.h"
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {stp*t*r*.d\t\$r0,\$r3,0} 1 } } */
|
||||
+
|
||||
+/* SIZE is more than 4x guard-size and no remainder, 1 probe expected in a loop
|
||||
+ and no residual probe. */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h
|
||||
new file mode 100644
|
||||
index 000000000..b7e06aedb
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-prologue.h
|
||||
@@ -0,0 +1,5 @@
|
||||
+int f_test (int x)
|
||||
+{
|
||||
+ char arr[SIZE];
|
||||
+ return arr[x];
|
||||
+}
|
||||
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
|
||||
index c858bd93b..3a326ea1c 100644
|
||||
--- a/gcc/testsuite/lib/target-supports.exp
|
||||
+++ b/gcc/testsuite/lib/target-supports.exp
|
||||
@@ -11292,7 +11292,8 @@ proc check_effective_target_supports_stack_clash_protection { } {
|
||||
|
||||
if { [istarget x86_64-*-*] || [istarget i?86-*-*]
|
||||
|| [istarget powerpc*-*-*] || [istarget rs6000*-*-*]
|
||||
- || [istarget aarch64*-**] || [istarget s390*-*-*] } {
|
||||
+ || [istarget aarch64*-**] || [istarget s390*-*-*]
|
||||
+ || [istarget loongarch64*-**] } {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
@@ -11343,6 +11344,10 @@ proc check_effective_target_caller_implicit_probes { } {
|
||||
return 1;
|
||||
}
|
||||
|
||||
+ if { [istarget loongarch64*-*-*] } {
|
||||
+ return 1;
|
||||
+ }
|
||||
+
|
||||
return 0
|
||||
}
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
232
LoongArch-Optimized-multiply-instruction-generation.patch
Normal file
232
LoongArch-Optimized-multiply-instruction-generation.patch
Normal file
@ -0,0 +1,232 @@
|
||||
From aa1dc79c9a5ff3df241a94cbfb1c857cfa89c686 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Tue, 5 Sep 2023 11:09:03 +0800
|
||||
Subject: [PATCH 074/124] LoongArch: Optimized multiply instruction generation.
|
||||
|
||||
1. Can generate mulh.w[u] instruction.
|
||||
2. Can generate mulw.d.wu instruction.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (mulsidi3_64bit):
|
||||
Field unsigned extension support.
|
||||
(<u>muldi3_highpart): Modify template name.
|
||||
(<u>mulsi3_highpart): Likewise.
|
||||
(<u>mulsidi3_64bit): Field unsigned extension support.
|
||||
(<su>muldi3_highpart): Modify muldi3_highpart to
|
||||
smuldi3_highpart.
|
||||
(<su>mulsi3_highpart): Modify mulsi3_highpart to
|
||||
smulsi3_highpart.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/mulw_d_wu.c: New test.
|
||||
* gcc.target/loongarch/smuldi3_highpart.c: New test.
|
||||
* gcc.target/loongarch/smulsi3_highpart.c: New test.
|
||||
* gcc.target/loongarch/umulsi3_highpart.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 66 ++++++++++++-------
|
||||
.../gcc.target/loongarch/mulw_d_wu.c | 9 +++
|
||||
.../gcc.target/loongarch/smuldi3_highpart.c | 13 ++++
|
||||
.../gcc.target/loongarch/smulsi3_highpart.c | 15 +++++
|
||||
.../gcc.target/loongarch/umulsi3_highpart.c | 14 ++++
|
||||
5 files changed, 94 insertions(+), 23 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index 11c18bf15..264cd325c 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -750,15 +750,6 @@
|
||||
[(set_attr "type" "imul")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
-(define_insn "mulsidi3_64bit"
|
||||
- [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
- (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
|
||||
- (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
|
||||
- "TARGET_64BIT"
|
||||
- "mulw.d.w\t%0,%1,%2"
|
||||
- [(set_attr "type" "imul")
|
||||
- (set_attr "mode" "DI")])
|
||||
-
|
||||
(define_insn "*mulsi3_extended"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(sign_extend:DI
|
||||
@@ -787,14 +778,14 @@
|
||||
emit_insn (gen_muldi3 (low, operands[1], operands[2]));
|
||||
|
||||
rtx high = gen_reg_rtx (DImode);
|
||||
- emit_insn (gen_<u>muldi3_highpart (high, operands[1], operands[2]));
|
||||
+ emit_insn (gen_<su>muldi3_highpart (high, operands[1], operands[2]));
|
||||
|
||||
emit_move_insn (gen_lowpart (DImode, operands[0]), low);
|
||||
emit_move_insn (gen_highpart (DImode, operands[0]), high);
|
||||
DONE;
|
||||
})
|
||||
|
||||
-(define_insn "<u>muldi3_highpart"
|
||||
+(define_insn "<su>muldi3_highpart"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
(truncate:DI
|
||||
(lshiftrt:TI
|
||||
@@ -809,22 +800,34 @@
|
||||
(set_attr "mode" "DI")])
|
||||
|
||||
(define_expand "<u>mulsidi3"
|
||||
- [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
+ [(set (match_operand:DI 0 "register_operand")
|
||||
(mult:DI (any_extend:DI
|
||||
- (match_operand:SI 1 "register_operand" " r"))
|
||||
+ (match_operand:SI 1 "register_operand"))
|
||||
(any_extend:DI
|
||||
- (match_operand:SI 2 "register_operand" " r"))))]
|
||||
- "!TARGET_64BIT"
|
||||
+ (match_operand:SI 2 "register_operand"))))]
|
||||
+ ""
|
||||
{
|
||||
- rtx temp = gen_reg_rtx (SImode);
|
||||
- emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
|
||||
- emit_insn (gen_<u>mulsi3_highpart (loongarch_subword (operands[0], true),
|
||||
- operands[1], operands[2]));
|
||||
- emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp));
|
||||
- DONE;
|
||||
+ if (!TARGET_64BIT)
|
||||
+ {
|
||||
+ rtx temp = gen_reg_rtx (SImode);
|
||||
+ emit_insn (gen_mulsi3 (temp, operands[1], operands[2]));
|
||||
+ emit_insn (gen_<su>mulsi3_highpart (loongarch_subword (operands[0], true),
|
||||
+ operands[1], operands[2]));
|
||||
+ emit_insn (gen_movsi (loongarch_subword (operands[0], false), temp));
|
||||
+ DONE;
|
||||
+ }
|
||||
})
|
||||
|
||||
-(define_insn "<u>mulsi3_highpart"
|
||||
+(define_insn "<u>mulsidi3_64bit"
|
||||
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
+ (mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "r"))
|
||||
+ (any_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
|
||||
+ "TARGET_64BIT"
|
||||
+ "mulw.d.w<u>\t%0,%1,%2"
|
||||
+ [(set_attr "type" "imul")
|
||||
+ (set_attr "mode" "DI")])
|
||||
+
|
||||
+(define_insn "<su>mulsi3_highpart"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(truncate:SI
|
||||
(lshiftrt:DI
|
||||
@@ -833,11 +836,28 @@
|
||||
(any_extend:DI
|
||||
(match_operand:SI 2 "register_operand" " r")))
|
||||
(const_int 32))))]
|
||||
- "!TARGET_64BIT"
|
||||
+ ""
|
||||
"mulh.w<u>\t%0,%1,%2"
|
||||
[(set_attr "type" "imul")
|
||||
(set_attr "mode" "SI")])
|
||||
|
||||
+;; Under the LoongArch architecture, the mulh.w[u] instruction performs
|
||||
+;; sign extension by default, so the sign extension instruction can be
|
||||
+;; eliminated.
|
||||
+(define_peephole
|
||||
+ [(set (match_operand:SI 0 "register_operand")
|
||||
+ (truncate:SI
|
||||
+ (lshiftrt:DI
|
||||
+ (mult:DI (any_extend:DI
|
||||
+ (match_operand:SI 1 "register_operand"))
|
||||
+ (any_extend:DI
|
||||
+ (match_operand:SI 2 "register_operand")))
|
||||
+ (const_int 32))))
|
||||
+ (set (match_operand:DI 3 "register_operand")
|
||||
+ (sign_extend:DI (match_dup 0)))]
|
||||
+ "TARGET_64BIT && REGNO (operands[0]) == REGNO (operands[3])"
|
||||
+ "mulh.w<u>\t%0,%1,%2")
|
||||
+
|
||||
;;
|
||||
;; ....................
|
||||
;;
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c b/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
|
||||
new file mode 100644
|
||||
index 000000000..16163d667
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/mulw_d_wu.c
|
||||
@@ -0,0 +1,9 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -mabi=lp64d" } */
|
||||
+/* { dg-final { scan-assembler "mulw.d.wu" } } */
|
||||
+
|
||||
+__attribute__((noipa, noinline)) unsigned long
|
||||
+f(unsigned long a, unsigned long b)
|
||||
+{
|
||||
+ return (unsigned long)(unsigned int)a * (unsigned long)(unsigned int)b;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
|
||||
new file mode 100644
|
||||
index 000000000..6f5c686ca
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/smuldi3_highpart.c
|
||||
@@ -0,0 +1,13 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -O2 -fdump-rtl-expand-all" } */
|
||||
+
|
||||
+typedef int TI __attribute ((mode(TI)));
|
||||
+typedef int DI __attribute__((mode(DI)));
|
||||
+
|
||||
+DI
|
||||
+test (DI x, DI y)
|
||||
+{
|
||||
+ return ((TI)x * y) >> 64;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-rtl-dump "highparttmp" "expand" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
|
||||
new file mode 100644
|
||||
index 000000000..c4dbf8afc
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/smulsi3_highpart.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-rtl-expand-all" } */
|
||||
+
|
||||
+typedef unsigned int DI __attribute__((mode(DI)));
|
||||
+typedef unsigned int SI __attribute__((mode(SI)));
|
||||
+
|
||||
+SI
|
||||
+f (SI x, SI y)
|
||||
+{
|
||||
+ return ((DI) x * y) >> 32;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-rtl-dump "highparttmp" "expand" } } */
|
||||
+/* { dg-final { scan-assembler "mulh\\.w" } } */
|
||||
+/* { dg-final { scan-assembler-not "slli\\.w" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c b/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
|
||||
new file mode 100644
|
||||
index 000000000..e208803e2
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/umulsi3_highpart.c
|
||||
@@ -0,0 +1,14 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2" } */
|
||||
+
|
||||
+typedef unsigned int DI __attribute__((mode(DI)));
|
||||
+typedef unsigned int SI __attribute__((mode(SI)));
|
||||
+
|
||||
+SI
|
||||
+f (SI x, SI y)
|
||||
+{
|
||||
+ return ((DI) x * y) >> 32;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "mulh\\.wu" } } */
|
||||
+/* { dg-final { scan-assembler-not "slli\\.w" } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
44
LoongArch-Prepare-static-PIE-support.patch
Normal file
44
LoongArch-Prepare-static-PIE-support.patch
Normal file
@ -0,0 +1,44 @@
|
||||
From aa2d9e0e1dc4bf0b612618cf0e3fcea514f92f95 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Tue, 13 Sep 2022 23:21:39 +0800
|
||||
Subject: [PATCH 018/124] LoongArch: Prepare static PIE support
|
||||
|
||||
Static PIE allows us to extend the ASLR to cover static executables and
|
||||
it's not too difficult to support it. On GCC side, we just pass a group
|
||||
of options to the linker, like other ports with static PIE support.
|
||||
|
||||
The real implementation of static PIE (rcrt1.o) will be added into Glibc
|
||||
later.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/gnu-user.h (GNU_USER_TARGET_LINK_SPEC): For
|
||||
-static-pie, pass -static -pie --no-dynamic-linker -z text to
|
||||
the linker, and do not pass --dynamic-linker.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/gnu-user.h | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
|
||||
index 664dc9206..c5b1afe53 100644
|
||||
--- a/gcc/config/loongarch/gnu-user.h
|
||||
+++ b/gcc/config/loongarch/gnu-user.h
|
||||
@@ -40,8 +40,10 @@ along with GCC; see the file COPYING3. If not see
|
||||
#undef GNU_USER_TARGET_LINK_SPEC
|
||||
#define GNU_USER_TARGET_LINK_SPEC \
|
||||
"%{G*} %{shared} -m " GNU_USER_LINK_EMULATION \
|
||||
- "%{!shared: %{static} %{!static: %{rdynamic:-export-dynamic} " \
|
||||
- "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
|
||||
+ "%{!shared: %{static} " \
|
||||
+ "%{!static: %{!static-pie: %{rdynamic:-export-dynamic} " \
|
||||
+ "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} " \
|
||||
+ "%{static-pie: -static -pie --no-dynamic-linker -z text}}"
|
||||
|
||||
|
||||
/* Similar to standard Linux, but adding -ffast-math support. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
100
LoongArch-Provide-fmin-fmax-RTL-pattern.patch
Normal file
100
LoongArch-Provide-fmin-fmax-RTL-pattern.patch
Normal file
@ -0,0 +1,100 @@
|
||||
From b065c84206cdf463a377ca28f719dae7acbed0f7 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Tue, 16 Aug 2022 15:34:36 +0800
|
||||
Subject: [PATCH 009/124] LoongArch: Provide fmin/fmax RTL pattern
|
||||
|
||||
We already had smin/smax RTL pattern using fmin/fmax instruction. But
|
||||
for smin/smax, it's unspecified what will happen if either operand is
|
||||
NaN. So we would generate calls to libc fmin/fmax functions with
|
||||
-fno-finite-math-only (the default for all optimization levels expect
|
||||
-Ofast).
|
||||
|
||||
But, LoongArch fmin/fmax instruction is IEEE-754-2008 conformant so we
|
||||
can also use the instruction for fmin/fmax pattern and avoid the library
|
||||
function call.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (fmax<mode>3): New RTL pattern.
|
||||
(fmin<mode>3): Likewise.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/fmax-fmin.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 18 +++++++++++
|
||||
.../gcc.target/loongarch/fmax-fmin.c | 30 +++++++++++++++++++
|
||||
2 files changed, 48 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/fmax-fmin.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index 6b6df22a5..8e8868de9 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -1023,6 +1023,24 @@
|
||||
[(set_attr "type" "fmove")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
+(define_insn "fmax<mode>3"
|
||||
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
+ (smax:ANYF (match_operand:ANYF 1 "register_operand" "f")
|
||||
+ (match_operand:ANYF 2 "register_operand" "f")))]
|
||||
+ ""
|
||||
+ "fmax.<fmt>\t%0,%1,%2"
|
||||
+ [(set_attr "type" "fmove")
|
||||
+ (set_attr "mode" "<MODE>")])
|
||||
+
|
||||
+(define_insn "fmin<mode>3"
|
||||
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
+ (smin:ANYF (match_operand:ANYF 1 "register_operand" "f")
|
||||
+ (match_operand:ANYF 2 "register_operand" "f")))]
|
||||
+ ""
|
||||
+ "fmin.<fmt>\t%0,%1,%2"
|
||||
+ [(set_attr "type" "fmove")
|
||||
+ (set_attr "mode" "<MODE>")])
|
||||
+
|
||||
(define_insn "smaxa<mode>3"
|
||||
[(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
(if_then_else:ANYF
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/fmax-fmin.c b/gcc/testsuite/gcc.target/loongarch/fmax-fmin.c
|
||||
new file mode 100644
|
||||
index 000000000..92cf8a150
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/fmax-fmin.c
|
||||
@@ -0,0 +1,30 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mdouble-float -fno-finite-math-only" } */
|
||||
+/* { dg-final { scan-assembler "fmin\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "fmin\\.d" } } */
|
||||
+/* { dg-final { scan-assembler "fmax\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "fmax\\.d" } } */
|
||||
+
|
||||
+double
|
||||
+_fmax(double a, double b)
|
||||
+{
|
||||
+ return __builtin_fmax(a, b);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+_fmaxf(float a, float b)
|
||||
+{
|
||||
+ return __builtin_fmaxf(a, b);
|
||||
+}
|
||||
+
|
||||
+double
|
||||
+_fmin(double a, double b)
|
||||
+{
|
||||
+ return __builtin_fmin(a, b);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+_fminf(float a, float b)
|
||||
+{
|
||||
+ return __builtin_fminf(a, b);
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
180
LoongArch-Remove-redundant-sign-extension-instructio.patch
Normal file
180
LoongArch-Remove-redundant-sign-extension-instructio.patch
Normal file
@ -0,0 +1,180 @@
|
||||
From fbe6421c5600a151dbae96d18db2fd31aca2fe7c Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Thu, 24 Aug 2023 16:44:56 +0800
|
||||
Subject: [PATCH 051/124] LoongArch: Remove redundant sign extension
|
||||
instructions caused by SLT instructions.
|
||||
|
||||
Since the SLT instruction does not distinguish between 64-bit operations and 32-bit
|
||||
operations under the 64-bit LoongArch architecture, if the operand of slt is SImode,
|
||||
the sign extension of the operand needs to be displayed.
|
||||
|
||||
But similar to the test case below, the sign extension is redundant:
|
||||
|
||||
extern int src1, src2, src3;
|
||||
|
||||
int
|
||||
test (void)
|
||||
{
|
||||
int data1 = src1 + src2;
|
||||
int data2 = src1 + src3;
|
||||
return data1 > data2 ? data1 : data2;
|
||||
}
|
||||
Assembly code before optimization:
|
||||
...
|
||||
add.w $r4,$r4,$r14
|
||||
add.w $r13,$r13,$r14
|
||||
slli.w $r12,$r4,0
|
||||
slli.w $r14,$r13,0
|
||||
slt $r12,$r12,$r14
|
||||
masknez $r4,$r4,$r12
|
||||
maskeqz $r12,$r13,$r12
|
||||
or $r4,$r4,$r12
|
||||
slli.w $r4,$r4,0
|
||||
...
|
||||
|
||||
After optimization:
|
||||
...
|
||||
add.w $r12,$r12,$r14
|
||||
add.w $r13,$r13,$r14
|
||||
slt $r4,$r12,$r13
|
||||
masknez $r12,$r12,$r4
|
||||
maskeqz $r4,$r13,$r4
|
||||
or $r4,$r12,$r4
|
||||
...
|
||||
|
||||
Similar to this test example, the two operands of SLT are obtained by the
|
||||
addition operation, and add.w implicitly sign-extends, so the two operands
|
||||
of SLT do not require sign-extend.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.cc (loongarch_expand_conditional_move):
|
||||
Optimize the function implementation.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/slt-sign-extend.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.cc | 53 +++++++++++++++++--
|
||||
.../gcc.target/loongarch/slt-sign-extend.c | 14 +++++
|
||||
2 files changed, 63 insertions(+), 4 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index f14de5cce..caacfa8a3 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -4380,14 +4380,30 @@ loongarch_expand_conditional_move (rtx *operands)
|
||||
enum rtx_code code = GET_CODE (operands[1]);
|
||||
rtx op0 = XEXP (operands[1], 0);
|
||||
rtx op1 = XEXP (operands[1], 1);
|
||||
+ rtx op0_extend = op0;
|
||||
+ rtx op1_extend = op1;
|
||||
+
|
||||
+ /* Record whether operands[2] and operands[3] modes are promoted to word_mode. */
|
||||
+ bool promote_p = false;
|
||||
+ machine_mode mode = GET_MODE (operands[0]);
|
||||
|
||||
if (FLOAT_MODE_P (GET_MODE (op1)))
|
||||
loongarch_emit_float_compare (&code, &op0, &op1);
|
||||
else
|
||||
{
|
||||
+ if ((REGNO (op0) == REGNO (operands[2])
|
||||
+ || (REGNO (op1) == REGNO (operands[3]) && (op1 != const0_rtx)))
|
||||
+ && (GET_MODE_SIZE (GET_MODE (op0)) < word_mode))
|
||||
+ {
|
||||
+ mode = word_mode;
|
||||
+ promote_p = true;
|
||||
+ }
|
||||
+
|
||||
loongarch_extend_comparands (code, &op0, &op1);
|
||||
|
||||
op0 = force_reg (word_mode, op0);
|
||||
+ op0_extend = op0;
|
||||
+ op1_extend = force_reg (word_mode, op1);
|
||||
|
||||
if (code == EQ || code == NE)
|
||||
{
|
||||
@@ -4414,23 +4430,52 @@ loongarch_expand_conditional_move (rtx *operands)
|
||||
&& register_operand (operands[2], VOIDmode)
|
||||
&& register_operand (operands[3], VOIDmode))
|
||||
{
|
||||
- machine_mode mode = GET_MODE (operands[0]);
|
||||
+ rtx op2 = operands[2];
|
||||
+ rtx op3 = operands[3];
|
||||
+
|
||||
+ if (promote_p)
|
||||
+ {
|
||||
+ if (REGNO (XEXP (operands[1], 0)) == REGNO (operands[2]))
|
||||
+ op2 = op0_extend;
|
||||
+ else
|
||||
+ {
|
||||
+ loongarch_extend_comparands (code, &op2, &const0_rtx);
|
||||
+ op2 = force_reg (mode, op2);
|
||||
+ }
|
||||
+
|
||||
+ if (REGNO (XEXP (operands[1], 1)) == REGNO (operands[3]))
|
||||
+ op3 = op1_extend;
|
||||
+ else
|
||||
+ {
|
||||
+ loongarch_extend_comparands (code, &op3, &const0_rtx);
|
||||
+ op3 = force_reg (mode, op3);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
rtx temp = gen_reg_rtx (mode);
|
||||
rtx temp2 = gen_reg_rtx (mode);
|
||||
|
||||
emit_insn (gen_rtx_SET (temp,
|
||||
gen_rtx_IF_THEN_ELSE (mode, cond,
|
||||
- operands[2], const0_rtx)));
|
||||
+ op2, const0_rtx)));
|
||||
|
||||
/* Flip the test for the second operand. */
|
||||
cond = gen_rtx_fmt_ee ((code == EQ) ? NE : EQ, GET_MODE (op0), op0, op1);
|
||||
|
||||
emit_insn (gen_rtx_SET (temp2,
|
||||
gen_rtx_IF_THEN_ELSE (mode, cond,
|
||||
- operands[3], const0_rtx)));
|
||||
+ op3, const0_rtx)));
|
||||
|
||||
/* Merge the two results, at least one is guaranteed to be zero. */
|
||||
- emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
|
||||
+ if (promote_p)
|
||||
+ {
|
||||
+ rtx temp3 = gen_reg_rtx (mode);
|
||||
+ emit_insn (gen_rtx_SET (temp3, gen_rtx_IOR (mode, temp, temp2)));
|
||||
+ temp3 = gen_lowpart (GET_MODE (operands[0]), temp3);
|
||||
+ loongarch_emit_move (operands[0], temp3);
|
||||
+ }
|
||||
+ else
|
||||
+ emit_insn (gen_rtx_SET (operands[0], gen_rtx_IOR (mode, temp, temp2)));
|
||||
}
|
||||
else
|
||||
emit_insn (gen_rtx_SET (operands[0],
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
|
||||
new file mode 100644
|
||||
index 000000000..ea6b28b7c
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/slt-sign-extend.c
|
||||
@@ -0,0 +1,14 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -O2" } */
|
||||
+/* { dg-final { scan-assembler-not "slli.w" } } */
|
||||
+
|
||||
+extern int src1, src2, src3;
|
||||
+
|
||||
+int
|
||||
+test (void)
|
||||
+{
|
||||
+ int data1 = src1 + src2;
|
||||
+ int data2 = src1 + src3;
|
||||
+
|
||||
+ return data1 > data2 ? data1 : data2;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
36
LoongArch-Remove-the-definition-of-the-macro-LOGICAL.patch
Normal file
36
LoongArch-Remove-the-definition-of-the-macro-LOGICAL.patch
Normal file
@ -0,0 +1,36 @@
|
||||
From 297b8c5770ad85bf468526602e28aff8a66dc01a Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Thu, 13 Apr 2023 19:24:38 +0800
|
||||
Subject: [PATCH 040/124] LoongArch: Remove the definition of the macro
|
||||
LOGICAL_OP_NON_SHORT_CIRCUIT under the architecture and use the default
|
||||
definition instead.
|
||||
|
||||
In some cases, setting this macro as the default can reduce the number of conditional
|
||||
branch instructions.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.h (LOGICAL_OP_NON_SHORT_CIRCUIT): Remove the macro
|
||||
definition.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.h | 1 -
|
||||
1 file changed, 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||
index 392597943..c6e37b1b4 100644
|
||||
--- a/gcc/config/loongarch/loongarch.h
|
||||
+++ b/gcc/config/loongarch/loongarch.h
|
||||
@@ -836,7 +836,6 @@ typedef struct {
|
||||
1 is the default; other values are interpreted relative to that. */
|
||||
|
||||
#define BRANCH_COST(speed_p, predictable_p) loongarch_branch_cost
|
||||
-#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
|
||||
|
||||
/* Return the asm template for a conditional branch instruction.
|
||||
OPCODE is the opcode's mnemonic and OPERANDS is the asm template for
|
||||
--
|
||||
2.33.0
|
||||
|
||||
65
LoongArch-Rename-frint_-fmt-to-rint-mode-2.patch
Normal file
65
LoongArch-Rename-frint_-fmt-to-rint-mode-2.patch
Normal file
@ -0,0 +1,65 @@
|
||||
From 7584716b03b13c06b8bb9956b9f49e0cfc29c6b3 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Sun, 6 Nov 2022 20:41:38 +0800
|
||||
Subject: [PATCH 027/124] LoongArch: Rename frint_<fmt> to rint<mode>2
|
||||
|
||||
Use standard name so __builtin_rint{,f} can be expanded to one
|
||||
instruction.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (frint_<fmt>): Rename to ..
|
||||
(rint<mode>2): .. this.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/frint.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 4 ++--
|
||||
gcc/testsuite/gcc.target/loongarch/frint.c | 16 ++++++++++++++++
|
||||
2 files changed, 18 insertions(+), 2 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/frint.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index bda34d0f3..a14ab14ac 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -2012,8 +2012,8 @@
|
||||
[(set_attr "type" "move")]
|
||||
)
|
||||
|
||||
-;; Convert floating-point numbers to integers
|
||||
-(define_insn "frint_<fmt>"
|
||||
+;; Round floating-point numbers to integers
|
||||
+(define_insn "rint<mode>2"
|
||||
[(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
|
||||
UNSPEC_FRINT))]
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/frint.c b/gcc/testsuite/gcc.target/loongarch/frint.c
|
||||
new file mode 100644
|
||||
index 000000000..3ee6a8f97
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/frint.c
|
||||
@@ -0,0 +1,16 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mdouble-float" } */
|
||||
+/* { dg-final { scan-assembler "frint\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "frint\\.d" } } */
|
||||
+
|
||||
+double
|
||||
+my_rint (double a)
|
||||
+{
|
||||
+ return __builtin_rint (a);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+my_rintf (float a)
|
||||
+{
|
||||
+ return __builtin_rintf (a);
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
113
LoongArch-Set-default-alignment-for-functions-and-la.patch
Normal file
113
LoongArch-Set-default-alignment-for-functions-and-la.patch
Normal file
@ -0,0 +1,113 @@
|
||||
From 129d96b9ab5a2445d0fc2c3f7b72baa0453bd93f Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Wed, 14 Jun 2023 08:24:05 +0800
|
||||
Subject: [PATCH 047/124] LoongArch: Set default alignment for functions and
|
||||
labels with -mtune
|
||||
|
||||
The LA464 micro-architecture is sensitive to alignment of code. The
|
||||
Loongson team has benchmarked various combinations of function, the
|
||||
results [1] show that 16-byte label alignment together with 32-byte
|
||||
function alignment gives best results in terms of SPEC score.
|
||||
|
||||
Add a mtune-based table-driven mechanism to set the default of
|
||||
-falign-{functions,labels}. As LA464 is the first (and the only for
|
||||
now) uarch supported by GCC, the same setting is also used for
|
||||
the "generic" -mtune=loongarch64. In the future we may set different
|
||||
settings for LA{2,3,6}64 once we add the support for them.
|
||||
|
||||
Bootstrapped and regtested on loongarch64-linux-gnu. Ok for trunk?
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch-tune.h (loongarch_align): New
|
||||
struct.
|
||||
* config/loongarch/loongarch-def.h (loongarch_cpu_align): New
|
||||
array.
|
||||
* config/loongarch/loongarch-def.c (loongarch_cpu_align): Define
|
||||
the array.
|
||||
* config/loongarch/loongarch.cc
|
||||
(loongarch_option_override_internal): Set the value of
|
||||
-falign-functions= if -falign-functions is enabled but no value
|
||||
is given. Likewise for -falign-labels=.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch-def.c | 12 ++++++++++++
|
||||
gcc/config/loongarch/loongarch-def.h | 1 +
|
||||
gcc/config/loongarch/loongarch-tune.h | 8 ++++++++
|
||||
gcc/config/loongarch/loongarch.cc | 6 ++++++
|
||||
4 files changed, 27 insertions(+)
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
|
||||
index 80ab10a52..74d422ce0 100644
|
||||
--- a/gcc/config/loongarch/loongarch-def.c
|
||||
+++ b/gcc/config/loongarch/loongarch-def.c
|
||||
@@ -72,6 +72,18 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
|
||||
},
|
||||
};
|
||||
|
||||
+struct loongarch_align
|
||||
+loongarch_cpu_align[N_TUNE_TYPES] = {
|
||||
+ [CPU_LOONGARCH64] = {
|
||||
+ .function = "32",
|
||||
+ .label = "16",
|
||||
+ },
|
||||
+ [CPU_LA464] = {
|
||||
+ .function = "32",
|
||||
+ .label = "16",
|
||||
+ },
|
||||
+};
|
||||
+
|
||||
/* The following properties cannot be looked up directly using "cpucfg".
|
||||
So it is necessary to provide a default value for "unknown native"
|
||||
tune targets (i.e. -mtune=native while PRID does not correspond to
|
||||
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
|
||||
index b5985f070..eb87a79a5 100644
|
||||
--- a/gcc/config/loongarch/loongarch-def.h
|
||||
+++ b/gcc/config/loongarch/loongarch-def.h
|
||||
@@ -144,6 +144,7 @@ extern int loongarch_cpu_issue_rate[];
|
||||
extern int loongarch_cpu_multipass_dfa_lookahead[];
|
||||
|
||||
extern struct loongarch_cache loongarch_cpu_cache[];
|
||||
+extern struct loongarch_align loongarch_cpu_align[];
|
||||
extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[];
|
||||
|
||||
#ifdef __cplusplus
|
||||
diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
|
||||
index 8e3eb2947..d961963f0 100644
|
||||
--- a/gcc/config/loongarch/loongarch-tune.h
|
||||
+++ b/gcc/config/loongarch/loongarch-tune.h
|
||||
@@ -48,4 +48,12 @@ struct loongarch_cache {
|
||||
int simultaneous_prefetches; /* number of parallel prefetch */
|
||||
};
|
||||
|
||||
+/* Alignment for functions and labels for best performance. For new uarchs
|
||||
+ the value should be measured via benchmarking. See the documentation for
|
||||
+ -falign-functions and -falign-labels in invoke.texi for the format. */
|
||||
+struct loongarch_align {
|
||||
+ const char *function; /* default value for -falign-functions */
|
||||
+ const char *label; /* default value for -falign-labels */
|
||||
+};
|
||||
+
|
||||
#endif /* LOONGARCH_TUNE_H */
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index 4c0f393b6..f14de5cce 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -6246,6 +6246,12 @@ loongarch_option_override_internal (struct gcc_options *opts)
|
||||
&& !opts->x_optimize_size)
|
||||
opts->x_flag_prefetch_loop_arrays = 1;
|
||||
|
||||
+ if (opts->x_flag_align_functions && !opts->x_str_align_functions)
|
||||
+ opts->x_str_align_functions = loongarch_cpu_align[LARCH_ACTUAL_TUNE].function;
|
||||
+
|
||||
+ if (opts->x_flag_align_labels && !opts->x_str_align_labels)
|
||||
+ opts->x_str_align_labels = loongarch_cpu_align[LARCH_ACTUAL_TUNE].label;
|
||||
+
|
||||
if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
|
||||
error ("%qs cannot be used for compiling a shared library",
|
||||
"-mdirect-extern-access");
|
||||
--
|
||||
2.33.0
|
||||
|
||||
37
LoongArch-Slightly-simplify-loongarch_block_move_str.patch
Normal file
37
LoongArch-Slightly-simplify-loongarch_block_move_str.patch
Normal file
@ -0,0 +1,37 @@
|
||||
From f4a0248c80fedff3a6841407ff95b732dfbb93a1 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Fri, 8 Sep 2023 00:29:57 +0800
|
||||
Subject: [PATCH 073/124] LoongArch: Slightly simplify
|
||||
loongarch_block_move_straight
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.cc (loongarch_block_move_straight):
|
||||
Check precondition (delta must be a power of 2) and use
|
||||
popcount_hwi instead of a homebrew loop.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.cc | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index baa5c2354..baa9831aa 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -5221,9 +5221,8 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
|
||||
emit two ld.d/st.d pairs, one ld.w/st.w pair, and one ld.b/st.b
|
||||
pair. For each load/store pair we use a dedicated register to keep
|
||||
the pipeline as populated as possible. */
|
||||
- HOST_WIDE_INT num_reg = length / delta;
|
||||
- for (delta_cur = delta / 2; delta_cur != 0; delta_cur /= 2)
|
||||
- num_reg += !!(length & delta_cur);
|
||||
+ gcc_assert (pow2p_hwi (delta));
|
||||
+ HOST_WIDE_INT num_reg = length / delta + popcount_hwi (length % delta);
|
||||
|
||||
/* Allocate a buffer for the temporary registers. */
|
||||
regs = XALLOCAVEC (rtx, num_reg);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1234
LoongArch-Subdivision-symbol-type-add-SYMBOL_PCREL-s.patch
Normal file
1234
LoongArch-Subdivision-symbol-type-add-SYMBOL_PCREL-s.patch
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user