310 lines
10 KiB
Diff
310 lines
10 KiB
Diff
From e86c9ece7ae922fe80017ba2ffe22f6267531682 Mon Sep 17 00:00:00 2001
|
|
From: Xi Ruoyao <xry111@xry111.site>
|
|
Date: Sun, 23 Apr 2023 20:52:22 +0800
|
|
Subject: [PATCH 045/124] LoongArch: Enable shrink wrapping
|
|
|
|
This commit implements the target macros for shrink wrapping of function
|
|
prologues/epilogues shrink wrapping on LoongArch.
|
|
|
|
Bootstrapped and regtested on loongarch64-linux-gnu. I don't have an
|
|
access to SPEC CPU so I hope the reviewer can perform a benchmark to see
|
|
if there is real benefit.
|
|
|
|
gcc/ChangeLog:
|
|
|
|
* config/loongarch/loongarch.h (struct machine_function): Add
|
|
reg_is_wrapped_separately array for register wrapping
|
|
information.
|
|
* config/loongarch/loongarch.cc
|
|
(loongarch_get_separate_components): New function.
|
|
(loongarch_components_for_bb): Likewise.
|
|
(loongarch_disqualify_components): Likewise.
|
|
(loongarch_process_components): Likewise.
|
|
(loongarch_emit_prologue_components): Likewise.
|
|
(loongarch_emit_epilogue_components): Likewise.
|
|
(loongarch_set_handled_components): Likewise.
|
|
(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS): Define.
|
|
(TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB): Likewise.
|
|
(TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS): Likewise.
|
|
(TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS): Likewise.
|
|
(TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS): Likewise.
|
|
(TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Likewise.
|
|
(loongarch_for_each_saved_reg): Skip registers that are wrapped
|
|
separately.
|
|
|
|
gcc/testsuite/ChangeLog:
|
|
|
|
* gcc.target/loongarch/shrink-wrap.c: New test.
|
|
|
|
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
|
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
|
---
|
|
gcc/config/loongarch/loongarch.cc | 179 +++++++++++++++++-
|
|
gcc/config/loongarch/loongarch.h | 2 +
|
|
.../gcc.target/loongarch/shrink-wrap.c | 19 ++
|
|
3 files changed, 197 insertions(+), 3 deletions(-)
|
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
|
|
|
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
|
index d3c6f22ad..4c0f393b6 100644
|
|
--- a/gcc/config/loongarch/loongarch.cc
|
|
+++ b/gcc/config/loongarch/loongarch.cc
|
|
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see
|
|
#include "builtins.h"
|
|
#include "rtl-iter.h"
|
|
#include "opts.h"
|
|
+#include "function-abi.h"
|
|
|
|
/* This file should be included last. */
|
|
#include "target-def.h"
|
|
@@ -1014,19 +1015,23 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
|
|
for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
|
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
|
{
|
|
- loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
|
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
|
|
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
|
+
|
|
offset -= UNITS_PER_WORD;
|
|
}
|
|
|
|
/* This loop must iterate over the same space as its companion in
|
|
loongarch_compute_frame_info. */
|
|
offset = cfun->machine->frame.fp_sp_offset - sp_offset;
|
|
+ machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
|
|
+
|
|
for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
|
if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
|
{
|
|
- machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
|
|
+ if (!cfun->machine->reg_is_wrapped_separately[regno])
|
|
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
|
|
|
- loongarch_save_restore_reg (mode, regno, offset, fn);
|
|
offset -= GET_MODE_SIZE (mode);
|
|
}
|
|
}
|
|
@@ -6630,6 +6635,151 @@ loongarch_asan_shadow_offset (void)
|
|
return TARGET_64BIT ? (HOST_WIDE_INT_1 << 46) : 0;
|
|
}
|
|
|
|
+static sbitmap
|
|
+loongarch_get_separate_components (void)
|
|
+{
|
|
+ HOST_WIDE_INT offset;
|
|
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
|
|
+ bitmap_clear (components);
|
|
+ offset = cfun->machine->frame.gp_sp_offset;
|
|
+
|
|
+ /* The stack should be aligned to 16-bytes boundary, so we can make the use
|
|
+ of ldptr instructions. */
|
|
+ gcc_assert (offset % UNITS_PER_WORD == 0);
|
|
+
|
|
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
|
+ if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
|
+ {
|
|
+ /* We can wrap general registers saved at [sp, sp + 32768) using the
|
|
+ ldptr/stptr instructions. For large offsets a pseudo register
|
|
+ might be needed which cannot be created during the shrink
|
|
+ wrapping pass.
|
|
+
|
|
+ TODO: This may need a revise when we add LA32 as ldptr.w is not
|
|
+ guaranteed available by the manual. */
|
|
+ if (offset < 32768)
|
|
+ bitmap_set_bit (components, regno);
|
|
+
|
|
+ offset -= UNITS_PER_WORD;
|
|
+ }
|
|
+
|
|
+ offset = cfun->machine->frame.fp_sp_offset;
|
|
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
|
+ if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
|
+ {
|
|
+ /* We can only wrap FP registers with imm12 offsets. For large
|
|
+ offsets a pseudo register might be needed which cannot be
|
|
+ created during the shrink wrapping pass. */
|
|
+ if (IMM12_OPERAND (offset))
|
|
+ bitmap_set_bit (components, regno);
|
|
+
|
|
+ offset -= UNITS_PER_FPREG;
|
|
+ }
|
|
+
|
|
+ /* Don't mess with the hard frame pointer. */
|
|
+ if (frame_pointer_needed)
|
|
+ bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
|
|
+
|
|
+ bitmap_clear_bit (components, RETURN_ADDR_REGNUM);
|
|
+
|
|
+ return components;
|
|
+}
|
|
+
|
|
+static sbitmap
|
|
+loongarch_components_for_bb (basic_block bb)
|
|
+{
|
|
+ /* Registers are used in a bb if they are in the IN, GEN, or KILL sets. */
|
|
+ auto_bitmap used;
|
|
+ bitmap_copy (used, DF_LIVE_IN (bb));
|
|
+ bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->gen);
|
|
+ bitmap_ior_into (used, &DF_LIVE_BB_INFO (bb)->kill);
|
|
+
|
|
+ sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
|
|
+ bitmap_clear (components);
|
|
+
|
|
+ function_abi_aggregator callee_abis;
|
|
+ rtx_insn *insn;
|
|
+ FOR_BB_INSNS (bb, insn)
|
|
+ if (CALL_P (insn))
|
|
+ callee_abis.note_callee_abi (insn_callee_abi (insn));
|
|
+
|
|
+ HARD_REG_SET extra_caller_saves =
|
|
+ callee_abis.caller_save_regs (*crtl->abi);
|
|
+
|
|
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
|
+ if (!fixed_regs[regno]
|
|
+ && !crtl->abi->clobbers_full_reg_p (regno)
|
|
+ && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
|
|
+ bitmap_bit_p (used, regno)))
|
|
+ bitmap_set_bit (components, regno);
|
|
+
|
|
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
|
+ if (!fixed_regs[regno]
|
|
+ && !crtl->abi->clobbers_full_reg_p (regno)
|
|
+ && (TEST_HARD_REG_BIT (extra_caller_saves, regno) ||
|
|
+ bitmap_bit_p (used, regno)))
|
|
+ bitmap_set_bit (components, regno);
|
|
+
|
|
+ return components;
|
|
+}
|
|
+
|
|
+static void
|
|
+loongarch_disqualify_components (sbitmap, edge, sbitmap, bool)
|
|
+{
|
|
+ /* Do nothing. */
|
|
+}
|
|
+
|
|
+static void
|
|
+loongarch_process_components (sbitmap components, loongarch_save_restore_fn fn)
|
|
+{
|
|
+ HOST_WIDE_INT offset = cfun->machine->frame.gp_sp_offset;
|
|
+
|
|
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
|
+ if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
|
+ {
|
|
+ if (bitmap_bit_p (components, regno))
|
|
+ loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
|
+
|
|
+ offset -= UNITS_PER_WORD;
|
|
+ }
|
|
+
|
|
+ offset = cfun->machine->frame.fp_sp_offset;
|
|
+ machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
|
|
+
|
|
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
|
+ if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
|
|
+ {
|
|
+ if (bitmap_bit_p (components, regno))
|
|
+ loongarch_save_restore_reg (mode, regno, offset, fn);
|
|
+
|
|
+ offset -= UNITS_PER_FPREG;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void
|
|
+loongarch_emit_prologue_components (sbitmap components)
|
|
+{
|
|
+ loongarch_process_components (components, loongarch_save_reg);
|
|
+}
|
|
+
|
|
+static void
|
|
+loongarch_emit_epilogue_components (sbitmap components)
|
|
+{
|
|
+ loongarch_process_components (components, loongarch_restore_reg);
|
|
+}
|
|
+
|
|
+static void
|
|
+loongarch_set_handled_components (sbitmap components)
|
|
+{
|
|
+ for (unsigned int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
|
+ if (bitmap_bit_p (components, regno))
|
|
+ cfun->machine->reg_is_wrapped_separately[regno] = true;
|
|
+
|
|
+ for (unsigned int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
|
|
+ if (bitmap_bit_p (components, regno))
|
|
+ cfun->machine->reg_is_wrapped_separately[regno] = true;
|
|
+}
|
|
+
|
|
/* Initialize the GCC target structure. */
|
|
#undef TARGET_ASM_ALIGNED_HI_OP
|
|
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
|
|
@@ -6827,6 +6977,29 @@ loongarch_asan_shadow_offset (void)
|
|
#undef TARGET_ASAN_SHADOW_OFFSET
|
|
#define TARGET_ASAN_SHADOW_OFFSET loongarch_asan_shadow_offset
|
|
|
|
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
|
|
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
|
|
+ loongarch_get_separate_components
|
|
+
|
|
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
|
|
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB loongarch_components_for_bb
|
|
+
|
|
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
|
|
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
|
|
+ loongarch_disqualify_components
|
|
+
|
|
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
|
|
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
|
|
+ loongarch_emit_prologue_components
|
|
+
|
|
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
|
|
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
|
|
+ loongarch_emit_epilogue_components
|
|
+
|
|
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
|
|
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
|
|
+ loongarch_set_handled_components
|
|
+
|
|
struct gcc_target targetm = TARGET_INITIALIZER;
|
|
|
|
#include "gt-loongarch.h"
|
|
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
|
index af24bfa01..44ebadfaa 100644
|
|
--- a/gcc/config/loongarch/loongarch.h
|
|
+++ b/gcc/config/loongarch/loongarch.h
|
|
@@ -1147,6 +1147,8 @@ struct GTY (()) machine_function
|
|
/* The current frame information, calculated by loongarch_compute_frame_info.
|
|
*/
|
|
struct loongarch_frame_info frame;
|
|
+
|
|
+ bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
|
|
};
|
|
#endif
|
|
|
|
diff --git a/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
|
|
new file mode 100644
|
|
index 000000000..1431536c5
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/loongarch/shrink-wrap.c
|
|
@@ -0,0 +1,19 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O -fshrink-wrap" } */
|
|
+
|
|
+/* We should not save anything before checking the value of x. */
|
|
+/* { dg-final { scan-assembler-not "st(ptr)?\\\.\[dw\].*b(eq|ne)z" } } */
|
|
+
|
|
+int
|
|
+foo (int x)
|
|
+{
|
|
+ __asm__ ("nop" :);
|
|
+ if (x)
|
|
+ {
|
|
+ __asm__ ("" ::: "s0", "s1");
|
|
+ return x;
|
|
+ }
|
|
+
|
|
+ __asm__ ("" ::: "s2", "s3");
|
|
+ return 0;
|
|
+}
|
|
--
|
|
2.33.0
|
|
|