gcc/LoongArch-Optimize-additions-with-immediates.patch

From a31baa1e437fa4acedfaf03db91c1d6e5ce78013 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao <xry111@xry111.site>
Date: Sun, 2 Apr 2023 21:37:49 +0800
Subject: [PATCH 041/124] LoongArch: Optimize additions with immediates

1. Use addu16i.d for TARGET_64BIT and suitable immediates.
2. Split one addition with immediate into two addu16i.d or addi.{d/w}
   instructions if possible.  This can avoid using a temp register w/o
   increase the count of instructions.

Inspired by https://reviews.llvm.org/D143710 and
https://reviews.llvm.org/D147222.

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for GCC 14?

gcc/ChangeLog:

	* config/loongarch/loongarch-protos.h
	(loongarch_addu16i_imm12_operand_p): New function prototype.
	(loongarch_split_plus_constant): Likewise.
	* config/loongarch/loongarch.cc
	(loongarch_addu16i_imm12_operand_p): New function.
	(loongarch_split_plus_constant): Likewise.
	* config/loongarch/loongarch.h (ADDU16I_OPERAND): New macro.
	(DUAL_IMM12_OPERAND): Likewise.
	(DUAL_ADDU16I_OPERAND): Likewise.
	* config/loongarch/constraints.md (La, Lb, Lc, Ld, Le): New
	constraint.
	* config/loongarch/predicates.md (const_dual_imm12_operand): New
	predicate.
	(const_addu16i_operand): Likewise.
	(const_addu16i_imm12_di_operand): Likewise.
	(const_addu16i_imm12_si_operand): Likewise.
	(plus_di_operand): Likewise.
	(plus_si_operand): Likewise.
	(plus_si_extend_operand): Likewise.
	* config/loongarch/loongarch.md (add<mode>3): Convert to
	define_insn_and_split.  Use plus_<mode>_operand predicate
	instead of arith_operand.  Add alternatives for La, Lb, Lc, Ld,
	and Le constraints.
	(*addsi3_extended): Convert to define_insn_and_split.  Use
	plus_si_extend_operand instead of arith_operand.  Add
	alternatives for La and Le alternatives.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/add-const.c: New test.
	* gcc.target/loongarch/stack-check-cfa-1.c: Adjust for stack
	frame size change.
	* gcc.target/loongarch/stack-check-cfa-2.c: Likewise.

Signed-off-by: Peng Fan <fanpeng@loongson.cn>
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
---
 gcc/config/loongarch/constraints.md           | 46 ++++++++++++-
 gcc/config/loongarch/loongarch-protos.h       |  2 +
 gcc/config/loongarch/loongarch.cc             | 44 +++++++++++++
 gcc/config/loongarch/loongarch.h              | 19 ++++++
 gcc/config/loongarch/loongarch.md             | 66 +++++++++++++++----
 gcc/config/loongarch/predicates.md            | 36 ++++++++++
 .../gcc.target/loongarch/add-const.c          | 45 +++++++++++++
 .../gcc.target/loongarch/stack-check-cfa-1.c  |  2 +-
 .../gcc.target/loongarch/stack-check-cfa-2.c  |  2 +-
 9 files changed, 246 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/add-const.c

diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
index 46f7f63ae..25f3cda35 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -60,7 +60,22 @@
 ;; "I" "A signed 12-bit constant (for arithmetic instructions)."
 ;; "J" "Integer zero."
 ;; "K" "An unsigned 12-bit constant (for logic instructions)."
-;; "L" <-----unused
+;; "L" -
+;;     "La"
+;;	 "A signed constant in [-4096, 2048) or (2047, 4094]."
+;;     "Lb"
+;;	 "A signed 32-bit constant and low 16-bit is zero, which can be
+;;	  added onto a register with addu16i.d.  It matches nothing if
+;;	  the addu16i.d instruction is not available."
+;;     "Lc"
+;;	 "A signed 64-bit constant can be expressed as Lb + I, but not a
+;;	  single Lb or I."
+;;     "Ld"
+;;	 "A signed 64-bit constant can be expressed as Lb + Lb, but not a
+;;	  single Lb."
+;;     "Le"
+;;	 "A signed 32-bit constant can be expressed as Lb + I, but not a
+;;	  single Lb or I."
 ;; "M" <-----unused
 ;; "N" <-----unused
 ;; "O" <-----unused
@@ -170,6 +185,35 @@
   (and (match_code "const_int")
        (match_test "IMM12_OPERAND_UNSIGNED (ival)")))

+(define_constraint "La"
+  "A signed constant in [-4096, 2048) or (2047, 4094]."
+  (and (match_code "const_int")
+       (match_test "DUAL_IMM12_OPERAND (ival)")))
+
+(define_constraint "Lb"
+  "A signed 32-bit constant and low 16-bit is zero, which can be added
+   onto a register with addu16i.d."
+  (and (match_code "const_int")
+       (match_test "ADDU16I_OPERAND (ival)")))
+
+(define_constraint "Lc"
+  "A signed 64-bit constant can be expressed as Lb + I, but not a single Lb
+   or I."
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (ival, DImode)")))
+
+(define_constraint "Ld"
+  "A signed 64-bit constant can be expressed as Lb + Lb, but not a single
+   Lb."
+  (and (match_code "const_int")
+       (match_test "DUAL_ADDU16I_OPERAND (ival)")))
+
+(define_constraint "Le"
+  "A signed 32-bit constant can be expressed as Lb + I, but not a single Lb
+   or I."
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (ival, SImode)")))
+
 (define_constraint "Yd"
   "@internal
    A constant @code{move_operand} that can be safely loaded using
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
index 77b221724..0a9b47722 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -83,6 +83,8 @@ extern rtx loongarch_legitimize_call_address (rtx);
 extern rtx loongarch_subword (rtx, bool);
 extern bool loongarch_split_move_p (rtx, rtx);
 extern void loongarch_split_move (rtx, rtx, rtx);
+extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
+extern void loongarch_split_plus_constant (rtx *, machine_mode);
 extern const char *loongarch_output_move (rtx, rtx);
 extern bool loongarch_cfun_has_cprestore_slot_p (void);
 #ifdef RTX_CODE
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 1a4686f03..233dddbac 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -3753,6 +3753,50 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_)
     }
 }

+/* Check if adding an integer constant value for a specific mode can be
+   performed with an addu16i.d instruction and an addi.{w/d}
+   instruction.  */
+
+bool
+loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT value, machine_mode mode)
+{
+  /* Not necessary, but avoid unnecessary calculation if !TARGET_64BIT.  */
+  if (!TARGET_64BIT)
+    return false;
+
+  if ((value & 0xffff) == 0)
+    return false;
+
+  if (IMM12_OPERAND (value))
+    return false;
+
+  value = (value & ~HWIT_UC_0xFFF) + ((value & 0x800) << 1);
+  return ADDU16I_OPERAND (trunc_int_for_mode (value, mode));
+}
+
+/* Split one integer constant op[0] into two (op[1] and op[2]) for constant
+   plus operation in a specific mode.  The splitted constants can be added
+   onto a register with a single instruction (addi.{d/w} or addu16i.d).  */
+
+void
+loongarch_split_plus_constant (rtx *op, machine_mode mode)
+{
+  HOST_WIDE_INT v = INTVAL (op[0]), a;
+
+  if (DUAL_IMM12_OPERAND (v))
+    a = (v > 0 ? 2047 : -2048);
+  else if (loongarch_addu16i_imm12_operand_p (v, mode))
+    a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1);
+  else if (mode == DImode && DUAL_ADDU16I_OPERAND (v))
+    a = (v > 0 ? 0x7fff : -0x8000) << 16;
+  else
+    gcc_unreachable ();
+
+  op[1] = gen_int_mode (a, mode);
+  v = v - (unsigned HOST_WIDE_INT) a;
+  op[2] = gen_int_mode (v, mode);
+}
+
 /* Return true if a move from SRC to DEST in INSN should be split.  */

 static bool
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index c6e37b1b4..9d3cd9ca0 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -612,6 +612,25 @@ enum reg_class

 #define CONST_LOW_PART(VALUE) ((VALUE) - CONST_HIGH_PART (VALUE))

+/* True if VALUE can be added onto a register with one addu16i.d
+   instruction.  */
+
+#define ADDU16I_OPERAND(VALUE)			\
+  (TARGET_64BIT && (((VALUE) & 0xffff) == 0	\
+   && IMM16_OPERAND ((HOST_WIDE_INT) (VALUE) / 65536)))
+
+/* True if VALUE can be added onto a register with two addi.{d/w}
+   instructions, but not one addi.{d/w} instruction.  */
+#define DUAL_IMM12_OPERAND(VALUE) \
+  (IN_RANGE ((VALUE), -4096, 4094) && !IMM12_OPERAND (VALUE))
+
+/* True if VALUE can be added onto a register with two addu16i.d
+   instruction, but not one addu16i.d instruction.  */
+#define DUAL_ADDU16I_OPERAND(VALUE)		\
+  (TARGET_64BIT && (((VALUE) & 0xffff) == 0	\
+   && !ADDU16I_OPERAND (VALUE)			\
+   && IN_RANGE ((VALUE) / 65536, -0x10000, 0xfffe)))
+
 #define IMM12_INT(X) IMM12_OPERAND (INTVAL (X))
 #define IMM12_INT_UNSIGNED(X) IMM12_OPERAND_UNSIGNED (INTVAL (X))
 #define LU12I_INT(X) LU12I_OPERAND (INTVAL (X))
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 833b94753..b2f7c7f78 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -598,24 +598,64 @@
   [(set_attr "type" "fadd")
    (set_attr "mode" "<UNITMODE>")])

-(define_insn "add<mode>3"
-  [(set (match_operand:GPR 0 "register_operand" "=r,r")
-	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r")
-		  (match_operand:GPR 2 "arith_operand" "r,I")))]
+(define_insn_and_split "add<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r,r,r,r")
+	(plus:GPR (match_operand:GPR 1 "register_operand" "r,r,r,r,r,r,r")
+		  (match_operand:GPR 2 "plus_<mode>_operand"
+				       "r,I,La,Lb,Lc,Ld,Le")))]
   ""
-  "add%i2.<d>\t%0,%1,%2";
+  "@
+   add.<d>\t%0,%1,%2
+   addi.<d>\t%0,%1,%2
+   #
+   * operands[2] = GEN_INT (INTVAL (operands[2]) / 65536); \
+     return \"addu16i.d\t%0,%1,%2\";
+   #
+   #
+   #"
+  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2]) \
+   && !ADDU16I_OPERAND (INTVAL (operands[2]))"
+  [(set (match_dup 0) (plus:GPR (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:GPR (match_dup 0) (match_dup 4)))]
+  {
+    loongarch_split_plus_constant (&operands[2], <MODE>mode);
+  }
   [(set_attr "alu_type" "add")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*addsi3_extended"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+   (set_attr "mode" "<MODE>")
+   (set_attr "insn_count" "1,1,2,1,2,2,2")
+   (set (attr "enabled")
+      (cond
+	[(match_test "<MODE>mode != DImode && which_alternative == 4")
+	 (const_string "no")
+	 (match_test "<MODE>mode != DImode && which_alternative == 5")
+	 (const_string "no")
+	 (match_test "<MODE>mode != SImode && which_alternative == 6")
+	 (const_string "no")]
+	(const_string "yes")))])
+
+(define_insn_and_split "*addsi3_extended"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
 	(sign_extend:DI
-	     (plus:SI (match_operand:SI 1 "register_operand" "r,r")
-		      (match_operand:SI 2 "arith_operand" "r,I"))))]
+	     (plus:SI (match_operand:SI 1 "register_operand" "r,r,r,r")
+		      (match_operand:SI 2 "plus_si_extend_operand"
+					  "r,I,La,Le"))))]
   "TARGET_64BIT"
-  "add%i2.w\t%0,%1,%2"
+  "@
+   add.w\t%0,%1,%2
+   addi.w\t%0,%1,%2
+   #
+   #"
+  "CONST_INT_P (operands[2]) && !IMM12_INT (operands[2])"
+  [(set (subreg:SI (match_dup 0) 0) (plus:SI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0)
+	(sign_extend:DI (plus:SI (subreg:SI (match_dup 0) 0)
+				 (match_dup 4))))]
+  {
+    loongarch_split_plus_constant (&operands[2], SImode);
+  }
   [(set_attr "alu_type" "add")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "SI")
+   (set_attr "insn_count" "1,1,2,2")])


 ;;
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
index 3c32b2987..4966d5569 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -39,14 +39,50 @@
   (and (match_code "const_int")
        (match_test "IMM12_OPERAND (INTVAL (op))")))

+(define_predicate "const_dual_imm12_operand"
+  (and (match_code "const_int")
+       (match_test "DUAL_IMM12_OPERAND (INTVAL (op))")))
+
 (define_predicate "const_imm16_operand"
   (and (match_code "const_int")
        (match_test "IMM16_OPERAND (INTVAL (op))")))

+(define_predicate "const_addu16i_operand"
+  (and (match_code "const_int")
+       (match_test "ADDU16I_OPERAND (INTVAL (op))")))
+
+(define_predicate "const_addu16i_imm12_di_operand"
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), DImode)")))
+
+(define_predicate "const_addu16i_imm12_si_operand"
+  (and (match_code "const_int")
+       (match_test "loongarch_addu16i_imm12_operand_p (INTVAL (op), SImode)")))
+
+(define_predicate "const_dual_addu16i_operand"
+  (and (match_code "const_int")
+       (match_test "DUAL_ADDU16I_OPERAND (INTVAL (op))")))
+
 (define_predicate "arith_operand"
   (ior (match_operand 0 "const_arith_operand")
        (match_operand 0 "register_operand")))

+(define_predicate "plus_di_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_dual_imm12_operand")
+       (match_operand 0 "const_addu16i_operand")
+       (match_operand 0 "const_addu16i_imm12_di_operand")
+       (match_operand 0 "const_dual_addu16i_operand")))
+
+(define_predicate "plus_si_extend_operand"
+  (ior (match_operand 0 "arith_operand")
+       (match_operand 0 "const_dual_imm12_operand")
+       (match_operand 0 "const_addu16i_imm12_si_operand")))
+
+(define_predicate "plus_si_operand"
+  (ior (match_operand 0 "plus_si_extend_operand")
+       (match_operand 0 "const_addu16i_operand")))
+
 (define_predicate "const_immalsl_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 1, 4)")))
diff --git a/gcc/testsuite/gcc.target/loongarch/add-const.c b/gcc/testsuite/gcc.target/loongarch/add-const.c
new file mode 100644
index 000000000..7b6a7cb92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/add-const.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mabi=lp64d" } */
+
+/* None of these functions should load the const operand into a temp
+   register.  */
+
+/* { dg-final { scan-assembler-not "add\\.[dw]" } } */
+
+unsigned long f01 (unsigned long x) { return x + 1; }
+unsigned long f02 (unsigned long x) { return x - 1; }
+unsigned long f03 (unsigned long x) { return x + 2047; }
+unsigned long f04 (unsigned long x) { return x + 4094; }
+unsigned long f05 (unsigned long x) { return x - 2048; }
+unsigned long f06 (unsigned long x) { return x - 4096; }
+unsigned long f07 (unsigned long x) { return x + 0x7fff0000; }
+unsigned long f08 (unsigned long x) { return x - 0x80000000l; }
+unsigned long f09 (unsigned long x) { return x + 0x7fff0000l * 2; }
+unsigned long f10 (unsigned long x) { return x - 0x80000000l * 2; }
+unsigned long f11 (unsigned long x) { return x + 0x7fff0000 + 0x1; }
+unsigned long f12 (unsigned long x) { return x + 0x7fff0000 - 0x1; }
+unsigned long f13 (unsigned long x) { return x + 0x7fff0000 + 0x7ff; }
+unsigned long f14 (unsigned long x) { return x + 0x7fff0000 - 0x800; }
+unsigned long f15 (unsigned long x) { return x - 0x80000000l - 1; }
+unsigned long f16 (unsigned long x) { return x - 0x80000000l + 1; }
+unsigned long f17 (unsigned long x) { return x - 0x80000000l - 0x800; }
+unsigned long f18 (unsigned long x) { return x - 0x80000000l + 0x7ff; }
+
+unsigned int g01 (unsigned int x) { return x + 1; }
+unsigned int g02 (unsigned int x) { return x - 1; }
+unsigned int g03 (unsigned int x) { return x + 2047; }
+unsigned int g04 (unsigned int x) { return x + 4094; }
+unsigned int g05 (unsigned int x) { return x - 2048; }
+unsigned int g06 (unsigned int x) { return x - 4096; }
+unsigned int g07 (unsigned int x) { return x + 0x7fff0000; }
+unsigned int g08 (unsigned int x) { return x - 0x80000000l; }
+unsigned int g09 (unsigned int x) { return x + 0x7fff0000l * 2; }
+unsigned int g10 (unsigned int x) { return x - 0x80000000l * 2; }
+unsigned int g11 (unsigned int x) { return x + 0x7fff0000 + 0x1; }
+unsigned int g12 (unsigned int x) { return x + 0x7fff0000 - 0x1; }
+unsigned int g13 (unsigned int x) { return x + 0x7fff0000 + 0x7ff; }
+unsigned int g14 (unsigned int x) { return x + 0x7fff0000 - 0x800; }
+unsigned int g15 (unsigned int x) { return x - 0x80000000l - 1; }
+unsigned int g16 (unsigned int x) { return x - 0x80000000l + 1; }
+unsigned int g17 (unsigned int x) { return x - 0x80000000l - 0x800; }
+unsigned int g18 (unsigned int x) { return x - 0x80000000l + 0x7ff; }
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
index 3533fe7b6..cd72154f4 100644
--- a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-1.c
@@ -6,7 +6,7 @@
 #define SIZE 128*1024
 #include "stack-check-prologue.h"

-/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131088} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 131072} 1 } } */
 /* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */

 /* Checks that the CFA notes are correct for every sp adjustment.  */
diff --git a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
index e5e711105..3e5ca05b2 100644
--- a/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
+++ b/gcc/testsuite/gcc.target/loongarch/stack-check-cfa-2.c
@@ -6,7 +6,7 @@
 #define SIZE 1280*1024 + 512
 #include "stack-check-prologue.h"

-/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311248} 1 } } */
+/* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 1311232} 1 } } */
 /* { dg-final { scan-assembler-times {\.cfi_def_cfa_offset 0} 1 } } */

 /* Checks that the CFA notes are correct for every sp adjustment.  */
--
2.33.0