155 lines
5.9 KiB
Diff
155 lines
5.9 KiB
Diff
From 01b932dead0e7bcc05aae2ac742c76b5fcac5ae7 Mon Sep 17 00:00:00 2001
|
|
From: Xi Ruoyao <xry111@xry111.site>
|
|
Date: Tue, 5 Sep 2023 21:02:38 +0800
|
|
Subject: [PATCH 072/124] LoongArch: Use LSX and LASX for block move
|
|
|
|
gcc/ChangeLog:
|
|
|
|
* config/loongarch/loongarch.h (LARCH_MAX_MOVE_PER_INSN):
|
|
Define to the maximum amount of bytes able to be loaded or
|
|
stored with one machine instruction.
|
|
* config/loongarch/loongarch.cc (loongarch_mode_for_move_size):
|
|
New static function.
|
|
(loongarch_block_move_straight): Call
|
|
loongarch_mode_for_move_size for machine_mode to be moved.
|
|
(loongarch_expand_block_move): Use LARCH_MAX_MOVE_PER_INSN
|
|
instead of UNITS_PER_WORD.
|
|
|
|
gcc/testsuite/ChangeLog:
|
|
|
|
* gcc.target/loongarch/memcpy-vec-1.c: New test.
|
|
* gcc.target/loongarch/memcpy-vec-2.c: New test.
|
|
* gcc.target/loongarch/memcpy-vec-3.c: New test.
|
|
|
|
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
|
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
|
---
|
|
gcc/config/loongarch/loongarch.cc | 22 +++++++++++++++----
|
|
gcc/config/loongarch/loongarch.h | 3 +++
|
|
.../gcc.target/loongarch/memcpy-vec-1.c | 11 ++++++++++
|
|
.../gcc.target/loongarch/memcpy-vec-2.c | 12 ++++++++++
|
|
.../gcc.target/loongarch/memcpy-vec-3.c | 6 +++++
|
|
5 files changed, 50 insertions(+), 4 deletions(-)
|
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/memcpy-vec-1.c
|
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/memcpy-vec-2.c
|
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
|
|
|
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
|
index 4b0944d56..baa5c2354 100644
|
|
--- a/gcc/config/loongarch/loongarch.cc
|
|
+++ b/gcc/config/loongarch/loongarch.cc
|
|
@@ -5187,6 +5187,20 @@ loongarch_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
|
|
return true;
|
|
}
|
|
|
|
+static machine_mode
|
|
+loongarch_mode_for_move_size (HOST_WIDE_INT size)
|
|
+{
|
|
+ switch (size)
|
|
+ {
|
|
+ case 32:
|
|
+ return V32QImode;
|
|
+ case 16:
|
|
+ return V16QImode;
|
|
+ }
|
|
+
|
|
+ return int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
|
|
+}
|
|
+
|
|
/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
|
|
Assume that the areas do not overlap. */
|
|
|
|
@@ -5216,7 +5230,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
|
|
|
|
for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
|
|
{
|
|
- mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
|
|
+ mode = loongarch_mode_for_move_size (delta_cur);
|
|
|
|
for (; offs + delta_cur <= length; offs += delta_cur, i++)
|
|
{
|
|
@@ -5227,7 +5241,7 @@ loongarch_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length,
|
|
|
|
for (delta_cur = delta, i = 0, offs = 0; offs < length; delta_cur /= 2)
|
|
{
|
|
- mode = int_mode_for_size (delta_cur * BITS_PER_UNIT, 0).require ();
|
|
+ mode = loongarch_mode_for_move_size (delta_cur);
|
|
|
|
for (; offs + delta_cur <= length; offs += delta_cur, i++)
|
|
loongarch_emit_move (adjust_address (dest, mode, offs), regs[i]);
|
|
@@ -5322,8 +5336,8 @@ loongarch_expand_block_move (rtx dest, rtx src, rtx r_length, rtx r_align)
|
|
|
|
HOST_WIDE_INT align = INTVAL (r_align);
|
|
|
|
- if (!TARGET_STRICT_ALIGN || align > UNITS_PER_WORD)
|
|
- align = UNITS_PER_WORD;
|
|
+ if (!TARGET_STRICT_ALIGN || align > LARCH_MAX_MOVE_PER_INSN)
|
|
+ align = LARCH_MAX_MOVE_PER_INSN;
|
|
|
|
if (length <= align * LARCH_MAX_MOVE_OPS_STRAIGHT)
|
|
{
|
|
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
|
index b2295c589..c7e91a06d 100644
|
|
--- a/gcc/config/loongarch/loongarch.h
|
|
+++ b/gcc/config/loongarch/loongarch.h
|
|
@@ -1181,6 +1181,9 @@ typedef struct {
|
|
least twice. */
|
|
#define LARCH_MAX_MOVE_OPS_STRAIGHT (LARCH_MAX_MOVE_OPS_PER_LOOP_ITER * 2)
|
|
|
|
+#define LARCH_MAX_MOVE_PER_INSN \
|
|
+ (ISA_HAS_LASX ? 32 : (ISA_HAS_LSX ? 16 : UNITS_PER_WORD))
|
|
+
|
|
/* The base cost of a memcpy call, for MOVE_RATIO and friends. These
|
|
values were determined experimentally by benchmarking with CSiBE.
|
|
*/
|
|
diff --git a/gcc/testsuite/gcc.target/loongarch/memcpy-vec-1.c b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-1.c
|
|
new file mode 100644
|
|
index 000000000..8d9fedc9e
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-1.c
|
|
@@ -0,0 +1,11 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2 -mabi=lp64d -march=la464 -mno-strict-align" } */
|
|
+/* { dg-final { scan-assembler-times "xvst" 2 } } */
|
|
+/* { dg-final { scan-assembler-times "\tvst" 1 } } */
|
|
+/* { dg-final { scan-assembler-times "st\\.d|stptr\\.d" 1 } } */
|
|
+/* { dg-final { scan-assembler-times "st\\.w|stptr\\.w" 1 } } */
|
|
+/* { dg-final { scan-assembler-times "st\\.h" 1 } } */
|
|
+/* { dg-final { scan-assembler-times "st\\.b" 1 } } */
|
|
+
|
|
+extern char a[], b[];
|
|
+void test() { __builtin_memcpy(a, b, 95); }
|
|
diff --git a/gcc/testsuite/gcc.target/loongarch/memcpy-vec-2.c b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-2.c
|
|
new file mode 100644
|
|
index 000000000..6b28b884d
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-2.c
|
|
@@ -0,0 +1,12 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2 -mabi=lp64d -march=la464 -mno-strict-align" } */
|
|
+/* { dg-final { scan-assembler-times "xvst" 2 } } */
|
|
+/* { dg-final { scan-assembler-times "\tvst" 1 } } */
|
|
+/* { dg-final { scan-assembler-times "st\\.d|stptr\\.d" 1 } } */
|
|
+/* { dg-final { scan-assembler-times "st\\.w|stptr\\.w" 1 } } */
|
|
+/* { dg-final { scan-assembler-times "st\\.h" 1 } } */
|
|
+/* { dg-final { scan-assembler-times "st\\.b" 1 } } */
|
|
+
|
|
+typedef char __attribute__ ((vector_size (32), aligned (32))) vec;
|
|
+extern vec a[], b[];
|
|
+void test() { __builtin_memcpy(a, b, 95); }
|
|
diff --git a/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
|
|
new file mode 100644
|
|
index 000000000..233ed2150
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/loongarch/memcpy-vec-3.c
|
|
@@ -0,0 +1,6 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2 -march=la464 -mabi=lp64d -mstrict-align" } */
|
|
+/* { dg-final { scan-assembler-not "vst" } } */
|
|
+
|
|
+extern char a[], b[];
|
|
+void test() { __builtin_memcpy(a, b, 16); }
|
|
--
|
|
2.33.0
|
|
|