1246 lines
39 KiB
Diff
1246 lines
39 KiB
Diff
From 9a8e5716543972dec36bae1f9d380d27bfbcdae1 Mon Sep 17 00:00:00 2001
|
||
From: Agrachev Andrey WX1228450 <agrachev.andrey@huawei-partners.com>
|
||
Date: Mon, 21 Aug 2023 12:35:19 +0300
|
||
Subject: [PATCH 09/18] Add split-complex-instructions pass
|
||
|
||
- Add option -fsplit-ldp-stp
|
||
- Add functionality to detect and split depended from store LDP instructions.
|
||
- Add -param=param-ldp-dependency-search-range= to configure ldp dependency search range
|
||
- Add RTL tests
|
||
|
||
Co-authored-by: Chernonog Vyacheslav 00812786 <chernonog.vyacheslav@huawei.com>
|
||
Co-authored-by: Zinin Ivan WX1305386 <zinin.ivan@huawei-partners.com>
|
||
Co-authored-by: Gadzhiev Emin WX1195297 <gadzhiev.emin@huawei-partners.com>
|
||
---
|
||
gcc/common.opt | 5 +
|
||
gcc/config/aarch64/aarch64.cc | 42 ++
|
||
gcc/doc/tm.texi | 8 +
|
||
gcc/doc/tm.texi.in | 4 +
|
||
gcc/params.opt | 3 +
|
||
gcc/passes.def | 1 +
|
||
gcc/sched-rgn.cc | 704 +++++++++++++++++-
|
||
gcc/target.def | 10 +
|
||
.../gcc.dg/rtl/aarch64/test-ldp-dont-split.c | 74 ++
|
||
.../rtl/aarch64/test-ldp-split-rearrange.c | 40 +
|
||
.../gcc.dg/rtl/aarch64/test-ldp-split.c | 174 +++++
|
||
gcc/timevar.def | 1 +
|
||
gcc/tree-pass.h | 1 +
|
||
13 files changed, 1066 insertions(+), 1 deletion(-)
|
||
create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
|
||
create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
|
||
create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
|
||
|
||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||
index a42bee250..c0e3f5687 100644
|
||
--- a/gcc/common.opt
|
||
+++ b/gcc/common.opt
|
||
@@ -1797,6 +1797,11 @@ floop-nest-optimize
|
||
Common Var(flag_loop_nest_optimize) Optimization
|
||
Enable the loop nest optimizer.
|
||
|
||
+fsplit-ldp-stp
|
||
+Common Var(flag_split_ldp_stp) Optimization
|
||
+Split load/store pair instructions into separate load/store operations
|
||
+for better performance.
|
||
+
|
||
fstrict-volatile-bitfields
|
||
Common Var(flag_strict_volatile_bitfields) Init(-1) Optimization
|
||
Force bitfield accesses to match their type width.
|
||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||
index 04072ca25..48e2eded0 100644
|
||
--- a/gcc/config/aarch64/aarch64.cc
|
||
+++ b/gcc/config/aarch64/aarch64.cc
|
||
@@ -27507,6 +27507,48 @@ aarch64_run_selftests (void)
|
||
|
||
#endif /* #if CHECKING_P */
|
||
|
||
+/* TODO: refuse to use ranges intead of full list of an instruction codes. */
|
||
+
|
||
+bool
|
||
+is_aarch64_ldp_insn (int icode)
|
||
+{
|
||
+ if ((icode >= CODE_FOR_load_pair_sw_sisi
|
||
+ && icode <= CODE_FOR_load_pair_dw_tftf)
|
||
+ || (icode >= CODE_FOR_loadwb_pairsi_si
|
||
+ && icode <= CODE_FOR_loadwb_pairtf_di)
|
||
+ || (icode >= CODE_FOR_load_pairv8qiv8qi
|
||
+ && icode <= CODE_FOR_load_pairdfdf)
|
||
+ || (icode >= CODE_FOR_load_pairv16qiv16qi
|
||
+ && icode <= CODE_FOR_load_pairv8bfv2df)
|
||
+ || (icode >= CODE_FOR_load_pair_lanesv8qi
|
||
+ && icode <= CODE_FOR_load_pair_lanesdf))
|
||
+ return true;
|
||
+ return false;
|
||
+}
|
||
+
|
||
+bool
|
||
+is_aarch64_stp_insn (int icode)
|
||
+{
|
||
+ if ((icode >= CODE_FOR_store_pair_sw_sisi
|
||
+ && icode <= CODE_FOR_store_pair_dw_tftf)
|
||
+ || (icode >= CODE_FOR_storewb_pairsi_si
|
||
+ && icode <= CODE_FOR_storewb_pairtf_di)
|
||
+ || (icode >= CODE_FOR_vec_store_pairv8qiv8qi
|
||
+ && icode <= CODE_FOR_vec_store_pairdfdf)
|
||
+ || (icode >= CODE_FOR_vec_store_pairv16qiv16qi
|
||
+ && icode <= CODE_FOR_vec_store_pairv8bfv2df)
|
||
+ || (icode >= CODE_FOR_store_pair_lanesv8qi
|
||
+ && icode <= CODE_FOR_store_pair_lanesdf))
|
||
+ return true;
|
||
+ return false;
|
||
+}
|
||
+
|
||
+#undef TARGET_IS_LDP_INSN
|
||
+#define TARGET_IS_LDP_INSN is_aarch64_ldp_insn
|
||
+
|
||
+#undef TARGET_IS_STP_INSN
|
||
+#define TARGET_IS_STP_INSN is_aarch64_stp_insn
|
||
+
|
||
#undef TARGET_STACK_PROTECT_GUARD
|
||
#define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
|
||
|
||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||
index c5006afc0..0c6415a9c 100644
|
||
--- a/gcc/doc/tm.texi
|
||
+++ b/gcc/doc/tm.texi
|
||
@@ -12113,6 +12113,14 @@ object files that are not referenced from @code{main} and uses export
|
||
lists.
|
||
@end defmac
|
||
|
||
+@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
|
||
+Return true if icode is corresponding to any of the LDP instruction types.
|
||
+@end deftypefn
|
||
+
|
||
+@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode})
|
||
+Return true if icode is corresponding to any of the STP instruction types.
|
||
+@end deftypefn
|
||
+
|
||
@deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void)
|
||
This target hook returns @code{true} past the point in which new jump
|
||
instructions could be created. On machines that require a register for
|
||
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
||
index f869ddd5e..6ff60e562 100644
|
||
--- a/gcc/doc/tm.texi.in
|
||
+++ b/gcc/doc/tm.texi.in
|
||
@@ -7977,6 +7977,10 @@ object files that are not referenced from @code{main} and uses export
|
||
lists.
|
||
@end defmac
|
||
|
||
+@hook TARGET_IS_LDP_INSN
|
||
+
|
||
+@hook TARGET_IS_STP_INSN
|
||
+
|
||
@hook TARGET_CANNOT_MODIFY_JUMPS_P
|
||
|
||
@hook TARGET_HAVE_CONDITIONAL_EXECUTION
|
||
diff --git a/gcc/params.opt b/gcc/params.opt
|
||
index 7fcc2398d..6176d4790 100644
|
||
--- a/gcc/params.opt
|
||
+++ b/gcc/params.opt
|
||
@@ -1217,4 +1217,7 @@ Enum(vrp_mode) String(ranger) Value(VRP_MODE_RANGER)
|
||
Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization
|
||
Target size of compressed pointer, which should be 8, 16 or 32.
|
||
|
||
+-param=param-ldp-dependency-search-range=
|
||
+Common Joined UInteger Var(param_ldp_dependency_search_range) Init(16) IntegerRange(1, 32) Param Optimization
|
||
+Range for depended ldp search in split-ldp-stp path.
|
||
; This comment is to ensure we retain the blank line above.
|
||
diff --git a/gcc/passes.def b/gcc/passes.def
|
||
index 941bbadf0..a30e05688 100644
|
||
--- a/gcc/passes.def
|
||
+++ b/gcc/passes.def
|
||
@@ -514,6 +514,7 @@ along with GCC; see the file COPYING3. If not see
|
||
NEXT_PASS (pass_reorder_blocks);
|
||
NEXT_PASS (pass_leaf_regs);
|
||
NEXT_PASS (pass_split_before_sched2);
|
||
+ NEXT_PASS (pass_split_complex_instructions);
|
||
NEXT_PASS (pass_sched2);
|
||
NEXT_PASS (pass_stack_regs);
|
||
PUSH_INSERT_PASSES_WITHIN (pass_stack_regs)
|
||
diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
|
||
index a0dfdb788..b4df8bdc5 100644
|
||
--- a/gcc/sched-rgn.cc
|
||
+++ b/gcc/sched-rgn.cc
|
||
@@ -44,6 +44,8 @@ along with GCC; see the file COPYING3. If not see
|
||
are actually scheduled. */
|
||
|
||
#include "config.h"
|
||
+#define INCLUDE_SET
|
||
+#define INCLUDE_VECTOR
|
||
#include "system.h"
|
||
#include "coretypes.h"
|
||
#include "backend.h"
|
||
@@ -65,6 +67,7 @@ along with GCC; see the file COPYING3. If not see
|
||
#include "dbgcnt.h"
|
||
#include "pretty-print.h"
|
||
#include "print-rtl.h"
|
||
+#include "cfgrtl.h"
|
||
|
||
/* Disable warnings about quoting issues in the pp_xxx calls below
|
||
that (intentionally) don't follow GCC diagnostic conventions. */
|
||
@@ -3951,6 +3954,705 @@ make_pass_sched_fusion (gcc::context *ctxt)
|
||
return new pass_sched_fusion (ctxt);
|
||
}
|
||
|
||
+namespace {
|
||
+
|
||
+/* Def-use analisys special functions implementation. */
|
||
+
|
||
+static struct df_link *
|
||
+get_defs (rtx_insn *insn, rtx reg)
|
||
+{
|
||
+ df_ref use;
|
||
+ struct df_link *ref_chain, *ref_link;
|
||
+
|
||
+ FOR_EACH_INSN_USE (use, insn)
|
||
+ {
|
||
+ if (GET_CODE (DF_REF_REG (use)) == SUBREG)
|
||
+ return NULL;
|
||
+ if (REGNO (DF_REF_REG (use)) == REGNO (reg))
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ gcc_assert (use != NULL);
|
||
+
|
||
+ ref_chain = DF_REF_CHAIN (use);
|
||
+
|
||
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
|
||
+ {
|
||
+ /* Problem getting some definition for this instruction. */
|
||
+ if (ref_link->ref == NULL)
|
||
+ return NULL;
|
||
+ if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
|
||
+ return NULL;
|
||
+ /* As global regs are assumed to be defined at each function call
|
||
+ dataflow can report a call_insn as being a definition of REG.
|
||
+ But we can't do anything with that in this pass so proceed only
|
||
+ if the instruction really sets REG in a way that can be deduced
|
||
+ from the RTL structure. */
|
||
+ if (global_regs[REGNO (reg)]
|
||
+ && !set_of (reg, DF_REF_INSN (ref_link->ref)))
|
||
+ return NULL;
|
||
+ }
|
||
+
|
||
+ return ref_chain;
|
||
+}
|
||
+
|
||
+static struct df_link *
|
||
+get_uses (rtx_insn *insn, rtx reg)
|
||
+{
|
||
+ df_ref def;
|
||
+ struct df_link *ref_chain, *ref_link;
|
||
+
|
||
+ FOR_EACH_INSN_DEF (def, insn)
|
||
+ if (REGNO (DF_REF_REG (def)) == REGNO (reg))
|
||
+ break;
|
||
+
|
||
+ gcc_assert (def != NULL && "Broken def-use analisys chain.");
|
||
+
|
||
+ ref_chain = DF_REF_CHAIN (def);
|
||
+
|
||
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
|
||
+ {
|
||
+ /* Problem getting some use for this instruction. */
|
||
+ if (ref_link->ref == NULL)
|
||
+ return NULL;
|
||
+ }
|
||
+
|
||
+ return ref_chain;
|
||
+}
|
||
+
|
||
+const pass_data pass_data_split_complex_instructions = {
|
||
+ RTL_PASS, /* Type. */
|
||
+ "split_complex_instructions", /* Name. */
|
||
+ OPTGROUP_NONE, /* Optinfo_flags. */
|
||
+ TV_SPLIT_CMP_INS, /* Tv_id. */
|
||
+ 0, /* Properties_required. */
|
||
+ 0, /* Properties_provided. */
|
||
+ 0, /* Properties_destroyed. */
|
||
+ 0, /* Todo_flags_start. */
|
||
+ (TODO_df_verify | TODO_df_finish), /* Todo_flags_finish. */
|
||
+};
|
||
+
|
||
+class pass_split_complex_instructions : public rtl_opt_pass
|
||
+{
|
||
+private:
|
||
+ enum complex_instructions_t
|
||
+ {
|
||
+ UNDEFINED,
|
||
+ LDP,
|
||
+ LDP_TI,
|
||
+ STP,
|
||
+ STR
|
||
+ };
|
||
+
|
||
+ void split_complex_insn (rtx_insn *insn);
|
||
+ void split_ldp_ti (rtx_insn *insn);
|
||
+ void split_ldp_with_offset (rtx_insn *ldp_insn);
|
||
+ void split_simple_ldp (rtx_insn *ldp_insn);
|
||
+ void split_ldp_stp (rtx_insn *insn);
|
||
+ complex_instructions_t get_insn_type (rtx_insn *insn);
|
||
+
|
||
+ basic_block bb;
|
||
+ rtx_insn *insn;
|
||
+ std::set<rtx_insn *> dependent_stores_candidates;
|
||
+ std::set<rtx_insn *> ldp_to_split_list;
|
||
+
|
||
+ complex_instructions_t complex_insn_type = UNDEFINED;
|
||
+ bool is_store_insn (rtx_insn *insn);
|
||
+ bool is_ldp_dependent_on_store (rtx_insn *ldp_insn, basic_block bb);
|
||
+ bool bfs_for_reg_dependent_store (rtx_insn *ldp_insn, basic_block search_bb,
|
||
+ rtx_insn *search_insn,
|
||
+ int search_range
|
||
+ = param_ldp_dependency_search_range);
|
||
+ bool is_store_reg_dependent (rtx_insn *ldp_insn, rtx_insn *str_insn);
|
||
+ void init_df ();
|
||
+ void find_dependent_stores_candidates (rtx_insn *ldp_insn);
|
||
+ int get_insn_offset (rtx_insn *insn, complex_instructions_t insn_type,
|
||
+ int *arith_operation_ptr = NULL);
|
||
+
|
||
+public:
|
||
+ pass_split_complex_instructions (gcc::context *ctxt)
|
||
+ : rtl_opt_pass (pass_data_split_complex_instructions, ctxt)
|
||
+ {
|
||
+ }
|
||
+ /* opt_pass methods: */
|
||
+ virtual bool gate (function *);
|
||
+
|
||
+ virtual unsigned int
|
||
+ execute (function *)
|
||
+ {
|
||
+ enum rtx_code ldp_memref_code;
|
||
+ init_df ();
|
||
+ ldp_to_split_list.clear ();
|
||
+ FOR_EACH_BB_FN (bb, cfun)
|
||
+ {
|
||
+ FOR_BB_INSNS (bb, insn)
|
||
+ {
|
||
+ complex_instructions_t insn_type = get_insn_type (insn);
|
||
+ /* TODO: Add splitting of STP instructions. */
|
||
+ if (insn_type != LDP && insn_type != LDP_TI)
|
||
+ continue;
|
||
+ /* TODO: Currently support only ldp_ti and ldp with REG or
|
||
+ PLUS/MINUS offset expression. */
|
||
+ if (insn_type == LDP_TI)
|
||
+ {
|
||
+ ldp_memref_code = GET_CODE (XEXP (XEXP (PATTERN (insn), 1),
|
||
+ 0));
|
||
+ if (ldp_memref_code != REG && ldp_memref_code != PLUS
|
||
+ && ldp_memref_code != MINUS)
|
||
+ continue;
|
||
+ }
|
||
+ if (is_ldp_dependent_on_store (insn, bb))
|
||
+ {
|
||
+ ldp_to_split_list.insert (insn);
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+
|
||
+ for (std::set<rtx_insn *>::iterator i = ldp_to_split_list.begin ();
|
||
+ i != ldp_to_split_list.end (); ++i)
|
||
+ split_complex_insn (*i);
|
||
+
|
||
+ return 0;
|
||
+ }
|
||
+}; // class pass_split_complex_instructions
|
||
+
|
||
+bool
|
||
+pass_split_complex_instructions::is_ldp_dependent_on_store (rtx_insn *ldp_insn,
|
||
+ basic_block bb)
|
||
+{
|
||
+ find_dependent_stores_candidates (ldp_insn);
|
||
+ return bfs_for_reg_dependent_store (ldp_insn, bb, ldp_insn);
|
||
+}
|
||
+
|
||
+bool
|
||
+pass_split_complex_instructions::bfs_for_reg_dependent_store (
|
||
+ rtx_insn *ldp_insn, basic_block search_bb, rtx_insn *search_insn,
|
||
+ int search_range)
|
||
+{
|
||
+ rtx_insn *current_search_insn = search_insn;
|
||
+
|
||
+ for (int i = search_range; i > 0; --i)
|
||
+ {
|
||
+ if (!current_search_insn)
|
||
+ return false;
|
||
+ bool checking_result
|
||
+ = is_store_reg_dependent (ldp_insn, current_search_insn);
|
||
+ if (checking_result)
|
||
+ {
|
||
+ if (dump_file)
|
||
+ {
|
||
+ fprintf (dump_file, "LDP to split:\n");
|
||
+ print_rtl_single (dump_file, ldp_insn);
|
||
+ fprintf (dump_file, "Found STR:\n");
|
||
+ print_rtl_single (dump_file, current_search_insn);
|
||
+ }
|
||
+ return true;
|
||
+ }
|
||
+ if (current_search_insn == BB_HEAD (search_bb))
|
||
+ {
|
||
+ /* Search in all parent BBs for the reg_dependent store. */
|
||
+ edge_iterator ei;
|
||
+ edge e;
|
||
+
|
||
+ FOR_EACH_EDGE (e, ei, search_bb->preds)
|
||
+ if (e->src->index != 0
|
||
+ && bfs_for_reg_dependent_store (ldp_insn, e->src,
|
||
+ BB_END (e->src), i - 1))
|
||
+ return true;
|
||
+ return false;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ if (!active_insn_p (current_search_insn))
|
||
+ i++;
|
||
+ current_search_insn = PREV_INSN (current_search_insn);
|
||
+ }
|
||
+ }
|
||
+ return false;
|
||
+}
|
||
+
|
||
+void
|
||
+pass_split_complex_instructions::init_df ()
|
||
+{
|
||
+ df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
|
||
+ df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
|
||
+ df_mir_add_problem ();
|
||
+ df_live_add_problem ();
|
||
+ df_live_set_all_dirty ();
|
||
+ df_analyze ();
|
||
+ df_set_flags (DF_DEFER_INSN_RESCAN);
|
||
+}
|
||
+
|
||
+void
|
||
+pass_split_complex_instructions::find_dependent_stores_candidates (
|
||
+ rtx_insn *ldp_insn)
|
||
+{
|
||
+ dependent_stores_candidates.clear ();
|
||
+ df_ref use;
|
||
+
|
||
+ FOR_EACH_INSN_USE (use, ldp_insn)
|
||
+ {
|
||
+ df_link *defs = get_defs (ldp_insn, DF_REF_REG (use));
|
||
+ if (!defs)
|
||
+ return;
|
||
+
|
||
+ for (df_link *def = defs; def; def = def->next)
|
||
+ {
|
||
+ df_link *uses
|
||
+ = get_uses (DF_REF_INSN (def->ref), DF_REF_REG (def->ref));
|
||
+ if (!uses)
|
||
+ continue;
|
||
+
|
||
+ for (df_link *use = uses; use; use = use->next)
|
||
+ {
|
||
+ if (DF_REF_CLASS (use->ref) == DF_REF_REGULAR
|
||
+ && is_store_insn (DF_REF_INSN (use->ref)))
|
||
+ dependent_stores_candidates.insert (DF_REF_INSN (use->ref));
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+}
|
||
+
|
||
+bool
|
||
+pass_split_complex_instructions::is_store_reg_dependent (rtx_insn *ldp_insn,
|
||
+ rtx_insn *str_insn)
|
||
+{
|
||
+ if (!is_store_insn (str_insn)
|
||
+ || dependent_stores_candidates.find (str_insn)
|
||
+ == dependent_stores_candidates.end ())
|
||
+ return false;
|
||
+
|
||
+ int ldp_offset_sign = UNDEFINED;
|
||
+ int ldp_offset
|
||
+ = get_insn_offset (ldp_insn, get_insn_type (ldp_insn), &ldp_offset_sign);
|
||
+ if (ldp_offset_sign == MINUS)
|
||
+ ldp_offset = -ldp_offset;
|
||
+
|
||
+ int str_offset_sign = UNDEFINED;
|
||
+ int str_offset = get_insn_offset (str_insn, STR, &str_offset_sign);
|
||
+ if (str_offset_sign == MINUS)
|
||
+ str_offset = -str_offset;
|
||
+
|
||
+ if (str_offset == ldp_offset || str_offset == ldp_offset + 8)
|
||
+ return true;
|
||
+
|
||
+ return false;
|
||
+}
|
||
+
|
||
+bool
|
||
+pass_split_complex_instructions::is_store_insn (rtx_insn *insn)
|
||
+{
|
||
+ if (!insn)
|
||
+ return false;
|
||
+ rtx sset_b = single_set (insn);
|
||
+ /* TODO: The condition below allow to take only store instructions in which
|
||
+ the memory location's operand is either a register (base) or an plus/minus
|
||
+ operation (base + #imm). So it might make sense to add support for other
|
||
+ cases (e.g. multiply and shift). */
|
||
+ if (sset_b && MEM_P (SET_DEST (sset_b))
|
||
+ && GET_MODE (XEXP (sset_b, 0)) != BLKmode
|
||
+ && (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == REG
|
||
+ || (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == PLUS
|
||
+ || GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == MINUS)
|
||
+ && (GET_CODE (XEXP (XEXP (XEXP (sset_b, 0), 0), 1)) == CONST_INT)))
|
||
+ return true;
|
||
+
|
||
+ return false;
|
||
+}
|
||
+
|
||
+int
|
||
+pass_split_complex_instructions::get_insn_offset (
|
||
+ rtx_insn *insn, complex_instructions_t insn_type, int *arith_operation_ptr)
|
||
+{
|
||
+ rtx insn_pat = PATTERN (insn);
|
||
+ int returned_offset = 0;
|
||
+
|
||
+ rtx offset_expr = NULL;
|
||
+ rtx offset_value_expr = NULL;
|
||
+
|
||
+ switch (insn_type)
|
||
+ {
|
||
+ case LDP:
|
||
+ {
|
||
+ int number_of_sub_insns = XVECLEN (insn_pat, 0);
|
||
+
|
||
+ /* Calculate it's own ofsset of first load insn. */
|
||
+ rtx_insn *first_load_insn = NULL;
|
||
+ if (number_of_sub_insns == 2)
|
||
+ {
|
||
+ first_load_insn
|
||
+ = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0)));
|
||
+ arith_operation_ptr = NULL;
|
||
+
|
||
+ offset_expr = XEXP (XEXP (PATTERN (first_load_insn), 1), 0);
|
||
+ if (GET_CODE (offset_expr) == PLUS
|
||
+ || GET_CODE (offset_expr) == MINUS)
|
||
+ offset_value_expr
|
||
+ = XEXP (XEXP (XEXP (PATTERN (first_load_insn), 1), 0), 1);
|
||
+ else
|
||
+ offset_expr = NULL;
|
||
+ }
|
||
+ else if (number_of_sub_insns == 3)
|
||
+ {
|
||
+ rtx_insn *offset_sub_insn
|
||
+ = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0)));
|
||
+
|
||
+ offset_expr = XEXP (PATTERN (offset_sub_insn), 1);
|
||
+ offset_value_expr = XEXP (XEXP (PATTERN (offset_sub_insn), 1), 1);
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ gcc_assert (false
|
||
+ && "Wrong number of elements in the ldp_insn vector");
|
||
+ }
|
||
+ break;
|
||
+ }
|
||
+ case LDP_TI:
|
||
+ {
|
||
+ offset_expr = XEXP (XEXP (insn_pat, 1), 0);
|
||
+ if (GET_CODE (offset_expr) != PLUS && GET_CODE (offset_expr) != MINUS)
|
||
+ return 0;
|
||
+ offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 1), 0), 1);
|
||
+ break;
|
||
+ }
|
||
+ case STR:
|
||
+ {
|
||
+ offset_expr = XEXP (XEXP (insn_pat, 0), 0);
|
||
+ /* If memory location is specified by single base register then the
|
||
+ offset is zero. */
|
||
+ if (GET_CODE (offset_expr) == REG)
|
||
+ return 0;
|
||
+ offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 0), 0), 1);
|
||
+ break;
|
||
+ }
|
||
+ default:
|
||
+ {
|
||
+ if (dumps_are_enabled && dump_file)
|
||
+ {
|
||
+ fprintf (dump_file, "Instruction that was tried to split:\n");
|
||
+ print_rtl_single (dump_file, insn);
|
||
+ }
|
||
+ gcc_assert (false && "Unsupported instruction type");
|
||
+ break;
|
||
+ }
|
||
+ }
|
||
+
|
||
+ if (offset_expr != NULL && offset_value_expr
|
||
+ && GET_CODE (offset_value_expr) == CONST_INT)
|
||
+ returned_offset = XINT (offset_value_expr, 0);
|
||
+
|
||
+ if (arith_operation_ptr != NULL)
|
||
+ {
|
||
+ *arith_operation_ptr = GET_CODE (offset_expr);
|
||
+ gcc_assert ((*arith_operation_ptr == MINUS
|
||
+ || *arith_operation_ptr == PLUS)
|
||
+ && "Unexpected arithmetic operation in the offset expr");
|
||
+ }
|
||
+
|
||
+ return returned_offset;
|
||
+}
|
||
+
|
||
+void
|
||
+pass_split_complex_instructions::split_simple_ldp (rtx_insn *ldp_insn)
|
||
+{
|
||
+ rtx pat = PATTERN (ldp_insn);
|
||
+
|
||
+ rtx_insn *mem_insn_1 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 0)));
|
||
+ rtx_insn *mem_insn_2 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 1)));
|
||
+
|
||
+ int dest_regno = REGNO (SET_DEST (PATTERN (mem_insn_1)));
|
||
+ int src_regno;
|
||
+
|
||
+ rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (mem_insn_1)), 0);
|
||
+
|
||
+ if (GET_CODE (srs_reg_insn) == REG)
|
||
+ src_regno = REGNO (srs_reg_insn);
|
||
+ else
|
||
+ src_regno = REGNO (XEXP (srs_reg_insn, 0));
|
||
+
|
||
+ rtx_insn *emited_insn_1, *emited_insn_2;
|
||
+
|
||
+ /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */
|
||
+ if (src_regno == dest_regno)
|
||
+ std::swap (mem_insn_1, mem_insn_2);
|
||
+
|
||
+ emited_insn_1 = emit_insn (PATTERN (mem_insn_1));
|
||
+ emited_insn_2 = emit_insn (PATTERN (mem_insn_2));
|
||
+
|
||
+ int sub_insn_1_code = recog (PATTERN (mem_insn_1), mem_insn_1, 0);
|
||
+ int sub_insn_2_code = recog (PATTERN (mem_insn_2), mem_insn_2, 0);
|
||
+
|
||
+ INSN_CODE (emited_insn_1) = sub_insn_1_code;
|
||
+ INSN_CODE (emited_insn_2) = sub_insn_2_code;
|
||
+}
|
||
+
|
||
+void
|
||
+pass_split_complex_instructions::split_ldp_with_offset (rtx_insn *ldp_insn)
|
||
+{
|
||
+ rtx pat = PATTERN (ldp_insn);
|
||
+ bool post_index = true;
|
||
+
|
||
+ rtx_insn offset_insn;
|
||
+ rtx_insn mem_insn_1;
|
||
+ rtx_insn mem_insn_2;
|
||
+
|
||
+ int offset_insn_code;
|
||
+ int mem_insn_1_code = -1;
|
||
+ int mem_insn_2_code = -1;
|
||
+
|
||
+ int offset = 0;
|
||
+ int arith_operation = UNDEFINED;
|
||
+
|
||
+ for (int i = 0; i < 3; i++)
|
||
+ {
|
||
+ rtx sub_insn = XVECEXP (pat, 0, i);
|
||
+ rtx_insn *copy_of_sub_insn = make_insn_raw (copy_rtx (sub_insn));
|
||
+ int sub_insn_code
|
||
+ = recog (PATTERN (copy_of_sub_insn), copy_of_sub_insn, 0);
|
||
+
|
||
+ /* If sub_insn is offset related. */
|
||
+ if (GET_RTX_CLASS (sub_insn_code) == RTX_UNARY)
|
||
+ {
|
||
+ offset_insn = *copy_of_sub_insn;
|
||
+ offset_insn_code = sub_insn_code;
|
||
+ gcc_assert (i == 0
|
||
+ && "Offset related insn must be the first "
|
||
+ "element of a parallel insn vector");
|
||
+
|
||
+ offset = get_insn_offset (ldp_insn, LDP, &arith_operation);
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ if (GET_CODE (XEXP (PATTERN (copy_of_sub_insn), 0)) != REG)
|
||
+ {
|
||
+ rtx &offset_expr
|
||
+ = XEXP (XEXP (XEXP (PATTERN (copy_of_sub_insn), 0), 0), 1);
|
||
+ if (GET_CODE (offset_expr) == CONST_INT)
|
||
+ {
|
||
+ int local_offset = XINT (offset_expr, 0);
|
||
+ offset = (arith_operation == PLUS ? offset : -offset);
|
||
+
|
||
+ offset_expr = GEN_INT (local_offset + offset);
|
||
+
|
||
+ gcc_assert (
|
||
+ (arith_operation == MINUS || arith_operation == PLUS)
|
||
+ && "Unexpected arithmetic operation in offset related "
|
||
+ "sub_insn");
|
||
+
|
||
+ if (i == 1)
|
||
+ post_index = false;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ post_index = true;
|
||
+ }
|
||
+ }
|
||
+ }
|
||
+ if (i == 1)
|
||
+ {
|
||
+ mem_insn_1 = *copy_of_sub_insn;
|
||
+ mem_insn_1_code = sub_insn_code;
|
||
+ }
|
||
+ if (i == 2)
|
||
+ {
|
||
+ mem_insn_2 = *copy_of_sub_insn;
|
||
+ mem_insn_2_code = sub_insn_code;
|
||
+ }
|
||
+ }
|
||
+ gcc_assert (mem_insn_1_code != -1 && mem_insn_2_code != -1
|
||
+ && "Uninitialized memory insns");
|
||
+
|
||
+ int dest_regno = REGNO (SET_DEST (PATTERN (&mem_insn_1)));
|
||
+ int src_regno;
|
||
+
|
||
+ rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (&mem_insn_1)), 0);
|
||
+
|
||
+ if (GET_CODE (srs_reg_insn) == REG)
|
||
+ src_regno = REGNO (srs_reg_insn);
|
||
+ else
|
||
+ src_regno = REGNO (XEXP (srs_reg_insn, 0));
|
||
+
|
||
+ /* Don't split such weird LDP. */
|
||
+ if (src_regno == dest_regno)
|
||
+ return;
|
||
+
|
||
+ rtx_insn *emited_offset_insn;
|
||
+ if (!post_index)
|
||
+ {
|
||
+ emited_offset_insn = emit_insn (PATTERN (&offset_insn));
|
||
+ INSN_CODE (emited_offset_insn) = offset_insn_code;
|
||
+ }
|
||
+
|
||
+ rtx_insn *emited_insn_1 = emit_insn (PATTERN (&mem_insn_1));
|
||
+ rtx_insn *emited_insn_2 = emit_insn (PATTERN (&mem_insn_2));
|
||
+
|
||
+
|
||
+ INSN_CODE (emited_insn_1) = mem_insn_1_code;
|
||
+ INSN_CODE (emited_insn_2) = mem_insn_2_code;
|
||
+
|
||
+ if (post_index)
|
||
+ {
|
||
+ emited_offset_insn = emit_insn (PATTERN (&offset_insn));
|
||
+ INSN_CODE (emited_offset_insn) = offset_insn_code;
|
||
+ }
|
||
+}
|
||
+
|
||
+void
|
||
+pass_split_complex_instructions::split_ldp_stp (rtx_insn *insn)
|
||
+{
|
||
+ rtx_insn *prev_insn = PREV_INSN (insn);
|
||
+ int number_of_sub_insns = XVECLEN (PATTERN (insn), 0);
|
||
+
|
||
+ start_sequence ();
|
||
+
|
||
+ if (number_of_sub_insns == 2)
|
||
+ split_simple_ldp (insn);
|
||
+ else if (number_of_sub_insns == 3)
|
||
+ split_ldp_with_offset (insn);
|
||
+ else
|
||
+ gcc_assert (false && "Broken complex insn vector");
|
||
+
|
||
+ rtx_insn *seq = get_insns ();
|
||
+ unshare_all_rtl_in_chain (seq);
|
||
+ end_sequence ();
|
||
+
|
||
+ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn));
|
||
+ delete_insn_and_edges (insn);
|
||
+}
|
||
+
|
||
+void
|
||
+pass_split_complex_instructions::split_ldp_ti (rtx_insn *insn)
|
||
+{
|
||
+ rtx_insn *prev_insn = PREV_INSN (insn);
|
||
+ rtx_insn *load_insn_1 = make_insn_raw (copy_rtx (PATTERN (insn)));
|
||
+ rtx_insn *load_insn_2 = make_insn_raw (copy_rtx (PATTERN (insn)));
|
||
+
|
||
+ rtx reg_insn_1 = XEXP (PATTERN (load_insn_1), 0);
|
||
+ rtx mem_insn_1 = XEXP (PATTERN (load_insn_1), 1);
|
||
+ rtx mem_insn_2 = XEXP (PATTERN (load_insn_2), 1);
|
||
+
|
||
+ PUT_MODE (mem_insn_1, DImode);
|
||
+ PUT_MODE (mem_insn_2, DImode);
|
||
+
|
||
+ int reg_no_1 = REGNO (reg_insn_1);
|
||
+
|
||
+ XEXP (PATTERN (load_insn_1), 0) = gen_rtx_REG (DImode, reg_no_1);
|
||
+ XEXP (PATTERN (load_insn_2), 0) = gen_rtx_REG (DImode, reg_no_1 + 1);
|
||
+
|
||
+ rtx load_insn_2_plus_expr = XEXP (XEXP (PATTERN (load_insn_2), 1), 0);
|
||
+ if (GET_CODE (load_insn_2_plus_expr) == REG)
|
||
+ {
|
||
+ XEXP (XEXP (PATTERN (load_insn_2), 1), 0)
|
||
+ = gen_rtx_PLUS (DImode,
|
||
+ gen_rtx_REG (DImode, REGNO (load_insn_2_plus_expr)),
|
||
+ GEN_INT (GET_MODE_SIZE (DImode)));
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ rtx load_insn_2_offset_expr
|
||
+ = XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1);
|
||
+
|
||
+ if (load_insn_2_offset_expr == NULL)
|
||
+ return;
|
||
+
|
||
+ if (GET_CODE (load_insn_2_offset_expr) == CONST_INT)
|
||
+ {
|
||
+ int load_insn_2_offset = XINT (load_insn_2_offset_expr, 0);
|
||
+ XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1)
|
||
+ = GEN_INT (load_insn_2_offset + GET_MODE_SIZE (DImode));
|
||
+ }
|
||
+ }
|
||
+
|
||
+ start_sequence ();
|
||
+
|
||
+ int src_regno;
|
||
+ rtx srs_reg_insn = XEXP (XEXP (PATTERN (load_insn_1), 1), 0);
|
||
+
|
||
+ if (GET_CODE (srs_reg_insn) == REG)
|
||
+ src_regno = REGNO (srs_reg_insn);
|
||
+ else
|
||
+ src_regno = REGNO (XEXP (srs_reg_insn, 0));
|
||
+
|
||
+ /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */
|
||
+ if (src_regno == reg_no_1)
|
||
+ std::swap (load_insn_1, load_insn_2);
|
||
+
|
||
+ rtx_insn *emited_load_insn_1 = emit_insn (PATTERN (load_insn_1));
|
||
+ rtx_insn *emited_load_insn_2 = emit_insn (PATTERN (load_insn_2));
|
||
+
|
||
+ INSN_CODE (emited_load_insn_1)
|
||
+ = recog (PATTERN (emited_load_insn_1), emited_load_insn_1, 0);
|
||
+ INSN_CODE (emited_load_insn_2)
|
||
+ = recog (PATTERN (emited_load_insn_2), emited_load_insn_2, 0);
|
||
+
|
||
+ rtx_insn *seq = get_insns ();
|
||
+ unshare_all_rtl_in_chain (seq);
|
||
+ end_sequence ();
|
||
+
|
||
+ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn));
|
||
+ delete_insn_and_edges (insn);
|
||
+}
|
||
+
|
||
+void
|
||
+pass_split_complex_instructions::split_complex_insn (rtx_insn *insn)
|
||
+{
|
||
+ complex_instructions_t insn_type = get_insn_type (insn);
|
||
+ /* TODO: Add splitting of STP instructions. */
|
||
+ if (insn_type == LDP || insn_type == STP)
|
||
+ split_ldp_stp (insn);
|
||
+ else if (insn_type == LDP_TI)
|
||
+ split_ldp_ti (insn);
|
||
+ else
|
||
+ gcc_assert (false && "Unsupported type of insn to split");
|
||
+}
|
||
+
|
||
+pass_split_complex_instructions::complex_instructions_t
|
||
+pass_split_complex_instructions::get_insn_type (rtx_insn *insn)
|
||
+{
|
||
+ if (!INSN_P (insn))
|
||
+ return UNDEFINED;
|
||
+
|
||
+ rtx pat = PATTERN (insn);
|
||
+ int icode = recog (PATTERN (insn), insn, NULL);
|
||
+
|
||
+ if (GET_CODE (pat) == PARALLEL)
|
||
+ {
|
||
+ if (targetm.is_ldp_insn (icode))
|
||
+ {
|
||
+ return LDP;
|
||
+ }
|
||
+ if (targetm.is_stp_insn (icode))
|
||
+ {
|
||
+ return STP;
|
||
+ }
|
||
+ else
|
||
+ {
|
||
+ return UNDEFINED;
|
||
+ }
|
||
+ }
|
||
+ rtx set_insn = single_set (insn);
|
||
+ if (set_insn && GET_CODE (XEXP (set_insn, 1)) == MEM
|
||
+ && GET_MODE (XEXP (set_insn, 1)) == E_TImode)
|
||
+ return LDP_TI;
|
||
+
|
||
+ return UNDEFINED;
|
||
+}
|
||
+
|
||
+bool
|
||
+pass_split_complex_instructions::gate (function *)
|
||
+{
|
||
+ return targetm.is_ldp_insn && targetm.is_stp_insn && optimize > 0
|
||
+ && flag_split_ldp_stp > 0;
|
||
+}
|
||
+
|
||
+} // anon namespace
|
||
+
|
||
+rtl_opt_pass *
|
||
+make_pass_split_complex_instructions (gcc::context *ctxt)
|
||
+{
|
||
+ return new pass_split_complex_instructions (ctxt);
|
||
+}
|
||
+
|
||
#if __GNUC__ >= 10
|
||
# pragma GCC diagnostic pop
|
||
-#endif
|
||
+#endif
|
||
\ No newline at end of file
|
||
diff --git a/gcc/target.def b/gcc/target.def
|
||
index d85adf36a..a3a50b474 100644
|
||
--- a/gcc/target.def
|
||
+++ b/gcc/target.def
|
||
@@ -2677,6 +2677,16 @@ modes and they have different conditional execution capability, such as ARM.",
|
||
bool, (void),
|
||
default_have_conditional_execution)
|
||
|
||
+DEFHOOK
|
||
+(is_ldp_insn,
|
||
+ "Return true if icode is corresponding to any of the LDP instruction types.",
|
||
+ bool, (int icode), NULL)
|
||
+
|
||
+DEFHOOK
|
||
+(is_stp_insn,
|
||
+ "Return true if icode is corresponding to any of the STP instruction types.",
|
||
+ bool, (int icode), NULL)
|
||
+
|
||
DEFHOOK
|
||
(gen_ccmp_first,
|
||
"This function prepares to emit a comparison insn for the first compare in a\n\
|
||
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
|
||
new file mode 100644
|
||
index 000000000..3918d43f6
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
|
||
@@ -0,0 +1,74 @@
|
||
+/* { dg-do compile { target aarch64-*-* } } */
|
||
+/* { dg-additional-options "-fsplit-ldp-stp" } */
|
||
+/*
|
||
+ * Tests are:
|
||
+ * Patterns where LDP insns should NOT be split
|
||
+ * */
|
||
+
|
||
+int __RTL (startwith ("split_complex_instructions"))
|
||
+simple_ldp_after_store ()
|
||
+{
|
||
+(function "simple_ldp_after_store"
|
||
+ (insn-chain
|
||
+ (block 2
|
||
+ (edge-from entry (flags "FALLTHRU"))
|
||
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
|
||
+ (cinsn 228 (set (reg/i:DI sp)
|
||
+ (reg/i:DI x0)))
|
||
+ (cinsn 101 (set (mem/c:DI
|
||
+ (plus:DI (reg/f:DI sp)
|
||
+ (const_int 32))[1 S4 A32])(reg:DI x0)))
|
||
+ (cinsn 10 (parallel [
|
||
+ (set (reg:DI x29)
|
||
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
|
||
+ (set (reg:DI x30)
|
||
+ (mem:DI (plus:DI (reg/f:DI sp)
|
||
+ (const_int 16)) [1 S4 A32]))]))
|
||
+ (cinsn 11 (use (reg/i:DI sp)))
|
||
+ (cinsn 12 (use (reg/i:DI cc)))
|
||
+ (cinsn 13 (use (reg/i:DI x29)))
|
||
+ (cinsn 14 (use (reg/i:DI x30)))
|
||
+ (cinsn 15 (use (reg/i:DI x0)))
|
||
+ (edge-to exit (flags "FALLTHRU"))
|
||
+ ) ;; block 2
|
||
+ ) ;; insn-chain
|
||
+) ;; function "simple_ldp_after_store"
|
||
+}
|
||
+
|
||
+int __RTL (startwith ("split_complex_instructions"))
|
||
+ldp_after_store_in_different_bb ()
|
||
+{
|
||
+(function "ldp_after_store_in_different_bb"
|
||
+ (insn-chain
|
||
+ (block 2
|
||
+ (edge-from entry (flags "FALLTHRU"))
|
||
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
|
||
+ (cinsn 228 (set (reg/i:DI sp)
|
||
+ (reg/i:DI x0)))
|
||
+ (cinsn 101 (set (mem/c:DI
|
||
+ (plus:DI (reg/f:DI sp)
|
||
+ (const_int 32))[1 S4 A32])(reg:DI x0)))
|
||
+ (edge-to 3 (flags "FALLTHRU"))
|
||
+ ) ;; block 2
|
||
+ (block 3
|
||
+ (edge-from 2 (flags "FALLTHRU"))
|
||
+ (cnote 4 [bb 3] NOTE_INSN_BASIC_BLOCK)
|
||
+ (cinsn 10 (parallel [
|
||
+ (set (reg:DI x29)
|
||
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
|
||
+ (set (reg:DI x30)
|
||
+ (mem:DI (plus:DI (reg/f:DI sp)
|
||
+ (const_int 16)) [1 S4 A32]))]))
|
||
+ (cinsn 11 (use (reg/i:DI sp)))
|
||
+ (cinsn 12 (use (reg/i:DI cc)))
|
||
+ (cinsn 13 (use (reg/i:DI x29)))
|
||
+ (cinsn 14 (use (reg/i:DI x30)))
|
||
+ (cinsn 15 (use (reg/i:DI x0)))
|
||
+ (edge-to exit (flags "FALLTHRU"))
|
||
+ ) ;; block 3
|
||
+ ) ;; insn-chain
|
||
+) ;; function "ldp_after_store_in_different_bb"
|
||
+}
|
||
+
|
||
+/* Verify that the output code contains exactly 2 ldp. */
|
||
+/* { dg-final { scan-assembler-times {ldp\t} 2 } } */
|
||
\ No newline at end of file
|
||
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
|
||
new file mode 100644
|
||
index 000000000..653c30f83
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
|
||
@@ -0,0 +1,40 @@
|
||
+/* { dg-do compile { target aarch64-*-* } } */
|
||
+/* { dg-additional-options "-fsplit-ldp-stp" } */
|
||
+/*
|
||
+ * Test is:
|
||
+ * Pattern where LDP insns should be split with rearrangement in order
|
||
+ * to deal with data dependecy betwen subinstruction.
|
||
+ * */
|
||
+
|
||
+int __RTL (startwith ("split_complex_instructions"))
|
||
+simple_ldp_after_store ()
|
||
+{
|
||
+(function "ldp_equal_registers"
|
||
+ (insn-chain
|
||
+ (block 2
|
||
+ (edge-from entry (flags "FALLTHRU"))
|
||
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
|
||
+ (cinsn 228 (set (reg/i:DI x1)
|
||
+ (reg/i:DI x0)))
|
||
+ (cinsn 101 (set (mem/c:DI
|
||
+ (plus:DI (reg/f:DI x1)
|
||
+ (const_int 8))[1 S4 A32])(reg:DI x0)))
|
||
+ (cinsn 10 (parallel [
|
||
+ (set (reg:DI x1)
|
||
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int 8)) [1 S4 A32]))
|
||
+ (set (reg:DI x2)
|
||
+ (mem:DI (plus:DI (reg/f:DI x1)
|
||
+ (const_int 16)) [1 S4 A32]))]))
|
||
+ (cinsn 11 (use (reg/i:DI sp)))
|
||
+ (cinsn 12 (use (reg/i:DI cc)))
|
||
+ (cinsn 13 (use (reg/i:DI x0)))
|
||
+ (cinsn 14 (use (reg/i:DI x1)))
|
||
+ (cinsn 15 (use (reg/i:DI x2)))
|
||
+ (edge-to exit (flags "FALLTHRU"))
|
||
+ ) ;; block 2
|
||
+ ) ;; insn-chain
|
||
+) ;; function "ldp_equal_registers"
|
||
+}
|
||
+
|
||
+/* Verify that the output code rearrange ldrs. */
|
||
+/* { dg-final { scan-assembler-times ".*ldr.*x2.*x1,.*16.*ldr.*x1.*x1.*8" 1 } } */
|
||
\ No newline at end of file
|
||
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
|
||
new file mode 100644
|
||
index 000000000..dc9f26efb
|
||
--- /dev/null
|
||
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
|
||
@@ -0,0 +1,174 @@
|
||
+/* { dg-do compile { target aarch64-*-* } } */
|
||
+/* { dg-additional-options "-O1 -fsplit-ldp-stp" } */
|
||
+/*
|
||
+ * Tests are:
|
||
+ * Patterns where LDP insns should be split
|
||
+ * */
|
||
+
|
||
+int __RTL (startwith ("split_complex_instructions"))
|
||
+simple_ldp_after_store ()
|
||
+{
|
||
+(function "simple_ldp_after_store"
|
||
+ (insn-chain
|
||
+ (block 2
|
||
+ (edge-from entry (flags "FALLTHRU"))
|
||
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
|
||
+ (cinsn 228 (set (reg/i:DI sp)
|
||
+ (reg/i:DI x0)))
|
||
+ (cinsn 238 (set (reg/i:DI x1)
|
||
+ (reg/i:DI x0)))
|
||
+
|
||
+ (cinsn 101 (set (mem/c:DI
|
||
+ (plus:DI (reg/f:DI sp)
|
||
+ (const_int 8))[1 S4 A32])(reg:DI x0)))
|
||
+ (cinsn 10 (parallel [
|
||
+ (set (reg:DI x29)
|
||
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
|
||
+ (set (reg:DI x30)
|
||
+ (mem:DI (plus:DI (reg/f:DI sp)
|
||
+ (const_int 16)) [1 S4 A32]))]))
|
||
+
|
||
+ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1)
|
||
+ (const_int -16)) [1 S4 A32])
|
||
+ (reg:DI x0)))
|
||
+ (cinsn 11 (parallel [
|
||
+ (set (reg:DI x3)
|
||
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32]))
|
||
+ (set (reg:DI x4)
|
||
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32]))
|
||
+ ]))
|
||
+
|
||
+ (cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32])
|
||
+ (reg:DI x0)))
|
||
+ (cinsn 12 (parallel [
|
||
+ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32]))
|
||
+ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
|
||
+ (const_int 8)) [1 S4 A32]))
|
||
+ ]))
|
||
+
|
||
+ (cinsn 13 (use (reg/i:DI sp)))
|
||
+ (cinsn 14 (use (reg/i:DI cc)))
|
||
+ (cinsn 15 (use (reg/i:DI x29)))
|
||
+ (cinsn 16 (use (reg/i:DI x30)))
|
||
+ (cinsn 17 (use (reg/i:DI x0)))
|
||
+ (cinsn 18 (use (reg/i:DI x3)))
|
||
+ (cinsn 19 (use (reg/i:DI x4)))
|
||
+ (cinsn 20 (use (reg/i:DI x5)))
|
||
+ (cinsn 21 (use (reg/i:DI x6)))
|
||
+ (edge-to exit (flags "FALLTHRU"))
|
||
+ ) ;; block 2
|
||
+ ) ;; insn-chain
|
||
+) ;; function "simple_ldp_after_store"
|
||
+}
|
||
+
|
||
+int __RTL (startwith ("split_complex_instructions"))
|
||
+ldp_ti_after_store ()
|
||
+{
|
||
+ (function "ldp_ti_after_store"
|
||
+ (insn-chain
|
||
+ (block 2
|
||
+ (edge-from entry (flags "FALLTHRU"))
|
||
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
|
||
+ (cinsn 228 (set (reg/i:DI sp)
|
||
+ (reg/i:DI x0)))
|
||
+ (cinsn 238 (set (reg/i:DI x2)
|
||
+ (reg/i:DI x0)))
|
||
+
|
||
+ (cinsn 101 (set (mem/c:DI
|
||
+ (plus:DI (reg/f:DI sp)
|
||
+ (const_int 136))[1 S4 A32])(reg:DI x0)))
|
||
+ (insn 81 (set (reg:TI x0 [1 S4 A32])
|
||
+ (mem/c:TI (plus:DI (reg/f:DI sp)
|
||
+ (const_int 136 )) [1 S4 A32]))
|
||
+ (expr_list:REG_EQUIV (mem/c:TI (plus:DI (reg/f:DI sfp)
|
||
+ (const_int -24 )) [1 S4 A32])
|
||
+ (nil)))
|
||
+
|
||
+ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x2)
|
||
+ (const_int -16)) [1 S4 A32])
|
||
+ (reg:DI x0)))
|
||
+ (insn 82 (set (reg:TI x3 [1 S4 A32])
|
||
+ (mem/c:TI (plus:DI (reg/f:DI x2)
|
||
+ (const_int -16)) [1 S4 A32])))
|
||
+
|
||
+ (cinsn 103 (set (mem/c:DI (reg/f:DI x2) [1 S4 A32])
|
||
+ (reg:DI x0)))
|
||
+ (insn 83 (set (reg:TI x5 [1 S4 A32])
|
||
+ (mem/c:TI (reg/f:DI x2) [1 S4 A32])))
|
||
+
|
||
+ (cinsn 11 (use (reg/i:DI sp)))
|
||
+ (cinsn 12 (use (reg/i:DI cc)))
|
||
+ (cinsn 13 (use (reg/i:DI x29)))
|
||
+ (cinsn 14 (use (reg/i:DI x30)))
|
||
+ (cinsn 15 (use (reg/i:DI x0)))
|
||
+ (cinsn 16 (use (reg/i:DI x3)))
|
||
+ (cinsn 17 (use (reg/i:DI x5)))
|
||
+ (cinsn 18 (use (reg/i:DI x1)))
|
||
+ (cinsn 19 (use (reg/i:DI x4)))
|
||
+ (cinsn 20 (use (reg/i:DI x6)))
|
||
+ (edge-to exit (flags "FALLTHRU"))
|
||
+ ) ;; block 2
|
||
+ ) ;; insn-chain
|
||
+) ;; function "ldp_ti_after_store"
|
||
+}
|
||
+
|
||
+int __RTL (startwith ("split_complex_instructions"))
|
||
+ldp_after_store_in_different_bb ()
|
||
+{
|
||
+(function "ldp_after_store_in_different_bb"
|
||
+ (insn-chain
|
||
+ (block 2
|
||
+ (edge-from entry (flags "FALLTHRU"))
|
||
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
|
||
+ (cinsn 228 (set (reg/i:DI sp)
|
||
+ (reg/i:DI x0)))
|
||
+ (cinsn 238 (set (reg/i:DI x1)
|
||
+ (reg/i:DI x0)))
|
||
+
|
||
+ (cinsn 101 (set (mem/c:DI
|
||
+ (plus:DI (reg/f:DI sp)
|
||
+ (const_int 8))[1 S4 A32])(reg:DI x0)))
|
||
+ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1)
|
||
+ (const_int -16)) [1 S4 A32])
|
||
+ (reg:DI x0)))
|
||
+ (cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32])
|
||
+ (reg:DI x0)))
|
||
+ (edge-to 3 (flags "FALLTHRU"))
|
||
+ ) ;; block 2
|
||
+ (block 3
|
||
+ (edge-from 2 (flags "FALLTHRU"))
|
||
+ (cnote 4 [bb 3] NOTE_INSN_BASIC_BLOCK)
|
||
+ (cinsn 10 (parallel [
|
||
+ (set (reg:DI x29)
|
||
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
|
||
+ (set (reg:DI x30)
|
||
+ (mem:DI (plus:DI (reg/f:DI sp)
|
||
+ (const_int 16)) [1 S4 A32]))]))
|
||
+ (cinsn 11 (parallel [
|
||
+ (set (reg:DI x3)
|
||
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32]))
|
||
+ (set (reg:DI x4)
|
||
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32]))
|
||
+ ]))
|
||
+ (cinsn 12 (parallel [
|
||
+ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32]))
|
||
+ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
|
||
+ (const_int 8)) [1 S4 A32]))
|
||
+ ]))
|
||
+ (cinsn 13 (use (reg/i:DI sp)))
|
||
+ (cinsn 14 (use (reg/i:DI cc)))
|
||
+ (cinsn 15 (use (reg/i:DI x29)))
|
||
+ (cinsn 16 (use (reg/i:DI x30)))
|
||
+ (cinsn 17 (use (reg/i:DI x0)))
|
||
+ (cinsn 18 (use (reg/i:DI x3)))
|
||
+ (cinsn 19 (use (reg/i:DI x4)))
|
||
+ (cinsn 20 (use (reg/i:DI x5)))
|
||
+ (cinsn 21 (use (reg/i:DI x6)))
|
||
+ (edge-to exit (flags "FALLTHRU"))
|
||
+ ) ;; block 3
|
||
+ ) ;; insn-chain
|
||
+) ;; function "ldp_after_store_in_different_bb"
|
||
+}
|
||
+
|
||
+/* Verify that the output code doesn't contain ldp. */
|
||
+/* { dg-final { scan-assembler-not {ldp\t} } } */
|
||
\ No newline at end of file
|
||
diff --git a/gcc/timevar.def b/gcc/timevar.def
|
||
index 1e7d4e74b..2ccecffb5 100644
|
||
--- a/gcc/timevar.def
|
||
+++ b/gcc/timevar.def
|
||
@@ -280,6 +280,7 @@ DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs")
|
||
DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload")
|
||
DEFTIMEVAR (TV_REE , "ree")
|
||
DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread pro- & epilogue")
|
||
+DEFTIMEVAR (TV_SPLIT_CMP_INS , "split complex instructions")
|
||
DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2")
|
||
DEFTIMEVAR (TV_SPLIT_PATHS , "split paths")
|
||
DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments")
|
||
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
|
||
index 86f38e2f2..6daac7fc1 100644
|
||
--- a/gcc/tree-pass.h
|
||
+++ b/gcc/tree-pass.h
|
||
@@ -612,6 +612,7 @@ extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt);
|
||
extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context
|
||
*ctxt);
|
||
extern rtl_opt_pass *make_pass_zero_call_used_regs (gcc::context *ctxt);
|
||
+extern rtl_opt_pass *make_pass_split_complex_instructions (gcc::context *ctxt);
|
||
extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt);
|
||
extern rtl_opt_pass *make_pass_sched_fusion (gcc::context *ctxt);
|
||
extern rtl_opt_pass *make_pass_peephole2 (gcc::context *ctxt);
|
||
--
|
||
2.33.0
|
||
|