gcc/0042-Add-split-complex-instructions-pass.patch
2024-04-11 19:37:37 +08:00

1246 lines
39 KiB
Diff
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 9a8e5716543972dec36bae1f9d380d27bfbcdae1 Mon Sep 17 00:00:00 2001
From: Agrachev Andrey WX1228450 <agrachev.andrey@huawei-partners.com>
Date: Mon, 21 Aug 2023 12:35:19 +0300
Subject: [PATCH 09/18] Add split-complex-instructions pass
- Add option -fsplit-ldp-stp
- Add functionality to detect and split depended from store LDP instructions.
- Add -param=param-ldp-dependency-search-range= to configure ldp dependency search range
- Add RTL tests
Co-authored-by: Chernonog Vyacheslav 00812786 <chernonog.vyacheslav@huawei.com>
Co-authored-by: Zinin Ivan WX1305386 <zinin.ivan@huawei-partners.com>
Co-authored-by: Gadzhiev Emin WX1195297 <gadzhiev.emin@huawei-partners.com>
---
gcc/common.opt | 5 +
gcc/config/aarch64/aarch64.cc | 42 ++
gcc/doc/tm.texi | 8 +
gcc/doc/tm.texi.in | 4 +
gcc/params.opt | 3 +
gcc/passes.def | 1 +
gcc/sched-rgn.cc | 704 +++++++++++++++++-
gcc/target.def | 10 +
.../gcc.dg/rtl/aarch64/test-ldp-dont-split.c | 74 ++
.../rtl/aarch64/test-ldp-split-rearrange.c | 40 +
.../gcc.dg/rtl/aarch64/test-ldp-split.c | 174 +++++
gcc/timevar.def | 1 +
gcc/tree-pass.h | 1 +
13 files changed, 1066 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
create mode 100644 gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
diff --git a/gcc/common.opt b/gcc/common.opt
index a42bee250..c0e3f5687 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1797,6 +1797,11 @@ floop-nest-optimize
Common Var(flag_loop_nest_optimize) Optimization
Enable the loop nest optimizer.
+fsplit-ldp-stp
+Common Var(flag_split_ldp_stp) Optimization
+Split load/store pair instructions into separate load/store operations
+for better performance.
+
fstrict-volatile-bitfields
Common Var(flag_strict_volatile_bitfields) Init(-1) Optimization
Force bitfield accesses to match their type width.
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 04072ca25..48e2eded0 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -27507,6 +27507,48 @@ aarch64_run_selftests (void)
#endif /* #if CHECKING_P */
+/* TODO: refuse to use ranges intead of full list of an instruction codes. */
+
+bool
+is_aarch64_ldp_insn (int icode)
+{
+ if ((icode >= CODE_FOR_load_pair_sw_sisi
+ && icode <= CODE_FOR_load_pair_dw_tftf)
+ || (icode >= CODE_FOR_loadwb_pairsi_si
+ && icode <= CODE_FOR_loadwb_pairtf_di)
+ || (icode >= CODE_FOR_load_pairv8qiv8qi
+ && icode <= CODE_FOR_load_pairdfdf)
+ || (icode >= CODE_FOR_load_pairv16qiv16qi
+ && icode <= CODE_FOR_load_pairv8bfv2df)
+ || (icode >= CODE_FOR_load_pair_lanesv8qi
+ && icode <= CODE_FOR_load_pair_lanesdf))
+ return true;
+ return false;
+}
+
+bool
+is_aarch64_stp_insn (int icode)
+{
+ if ((icode >= CODE_FOR_store_pair_sw_sisi
+ && icode <= CODE_FOR_store_pair_dw_tftf)
+ || (icode >= CODE_FOR_storewb_pairsi_si
+ && icode <= CODE_FOR_storewb_pairtf_di)
+ || (icode >= CODE_FOR_vec_store_pairv8qiv8qi
+ && icode <= CODE_FOR_vec_store_pairdfdf)
+ || (icode >= CODE_FOR_vec_store_pairv16qiv16qi
+ && icode <= CODE_FOR_vec_store_pairv8bfv2df)
+ || (icode >= CODE_FOR_store_pair_lanesv8qi
+ && icode <= CODE_FOR_store_pair_lanesdf))
+ return true;
+ return false;
+}
+
+#undef TARGET_IS_LDP_INSN
+#define TARGET_IS_LDP_INSN is_aarch64_ldp_insn
+
+#undef TARGET_IS_STP_INSN
+#define TARGET_IS_STP_INSN is_aarch64_stp_insn
+
#undef TARGET_STACK_PROTECT_GUARD
#define TARGET_STACK_PROTECT_GUARD aarch64_stack_protect_guard
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c5006afc0..0c6415a9c 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -12113,6 +12113,14 @@ object files that are not referenced from @code{main} and uses export
lists.
@end defmac
+@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
+Return true if icode is corresponding to any of the LDP instruction types.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_IS_STP_INSN (int @var{icode})
+Return true if icode is corresponding to any of the STP instruction types.
+@end deftypefn
+
@deftypefn {Target Hook} bool TARGET_CANNOT_MODIFY_JUMPS_P (void)
This target hook returns @code{true} past the point in which new jump
instructions could be created. On machines that require a register for
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f869ddd5e..6ff60e562 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7977,6 +7977,10 @@ object files that are not referenced from @code{main} and uses export
lists.
@end defmac
+@hook TARGET_IS_LDP_INSN
+
+@hook TARGET_IS_STP_INSN
+
@hook TARGET_CANNOT_MODIFY_JUMPS_P
@hook TARGET_HAVE_CONDITIONAL_EXECUTION
diff --git a/gcc/params.opt b/gcc/params.opt
index 7fcc2398d..6176d4790 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -1217,4 +1217,7 @@ Enum(vrp_mode) String(ranger) Value(VRP_MODE_RANGER)
Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization
Target size of compressed pointer, which should be 8, 16 or 32.
+-param=param-ldp-dependency-search-range=
+Common Joined UInteger Var(param_ldp_dependency_search_range) Init(16) IntegerRange(1, 32) Param Optimization
+Range for depended ldp search in split-ldp-stp path.
; This comment is to ensure we retain the blank line above.
diff --git a/gcc/passes.def b/gcc/passes.def
index 941bbadf0..a30e05688 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -514,6 +514,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_reorder_blocks);
NEXT_PASS (pass_leaf_regs);
NEXT_PASS (pass_split_before_sched2);
+ NEXT_PASS (pass_split_complex_instructions);
NEXT_PASS (pass_sched2);
NEXT_PASS (pass_stack_regs);
PUSH_INSERT_PASSES_WITHIN (pass_stack_regs)
diff --git a/gcc/sched-rgn.cc b/gcc/sched-rgn.cc
index a0dfdb788..b4df8bdc5 100644
--- a/gcc/sched-rgn.cc
+++ b/gcc/sched-rgn.cc
@@ -44,6 +44,8 @@ along with GCC; see the file COPYING3. If not see
are actually scheduled. */
#include "config.h"
+#define INCLUDE_SET
+#define INCLUDE_VECTOR
#include "system.h"
#include "coretypes.h"
#include "backend.h"
@@ -65,6 +67,7 @@ along with GCC; see the file COPYING3. If not see
#include "dbgcnt.h"
#include "pretty-print.h"
#include "print-rtl.h"
+#include "cfgrtl.h"
/* Disable warnings about quoting issues in the pp_xxx calls below
that (intentionally) don't follow GCC diagnostic conventions. */
@@ -3951,6 +3954,705 @@ make_pass_sched_fusion (gcc::context *ctxt)
return new pass_sched_fusion (ctxt);
}
+namespace {
+
+/* Def-use analisys special functions implementation. */
+
+static struct df_link *
+get_defs (rtx_insn *insn, rtx reg)
+{
+ df_ref use;
+ struct df_link *ref_chain, *ref_link;
+
+ FOR_EACH_INSN_USE (use, insn)
+ {
+ if (GET_CODE (DF_REF_REG (use)) == SUBREG)
+ return NULL;
+ if (REGNO (DF_REF_REG (use)) == REGNO (reg))
+ break;
+ }
+
+ gcc_assert (use != NULL);
+
+ ref_chain = DF_REF_CHAIN (use);
+
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+ {
+ /* Problem getting some definition for this instruction. */
+ if (ref_link->ref == NULL)
+ return NULL;
+ if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
+ return NULL;
+ /* As global regs are assumed to be defined at each function call
+ dataflow can report a call_insn as being a definition of REG.
+ But we can't do anything with that in this pass so proceed only
+ if the instruction really sets REG in a way that can be deduced
+ from the RTL structure. */
+ if (global_regs[REGNO (reg)]
+ && !set_of (reg, DF_REF_INSN (ref_link->ref)))
+ return NULL;
+ }
+
+ return ref_chain;
+}
+
+static struct df_link *
+get_uses (rtx_insn *insn, rtx reg)
+{
+ df_ref def;
+ struct df_link *ref_chain, *ref_link;
+
+ FOR_EACH_INSN_DEF (def, insn)
+ if (REGNO (DF_REF_REG (def)) == REGNO (reg))
+ break;
+
+ gcc_assert (def != NULL && "Broken def-use analisys chain.");
+
+ ref_chain = DF_REF_CHAIN (def);
+
+ for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+ {
+ /* Problem getting some use for this instruction. */
+ if (ref_link->ref == NULL)
+ return NULL;
+ }
+
+ return ref_chain;
+}
+
+const pass_data pass_data_split_complex_instructions = {
+ RTL_PASS, /* Type. */
+ "split_complex_instructions", /* Name. */
+ OPTGROUP_NONE, /* Optinfo_flags. */
+ TV_SPLIT_CMP_INS, /* Tv_id. */
+ 0, /* Properties_required. */
+ 0, /* Properties_provided. */
+ 0, /* Properties_destroyed. */
+ 0, /* Todo_flags_start. */
+ (TODO_df_verify | TODO_df_finish), /* Todo_flags_finish. */
+};
+
+class pass_split_complex_instructions : public rtl_opt_pass
+{
+private:
+ enum complex_instructions_t
+ {
+ UNDEFINED,
+ LDP,
+ LDP_TI,
+ STP,
+ STR
+ };
+
+ void split_complex_insn (rtx_insn *insn);
+ void split_ldp_ti (rtx_insn *insn);
+ void split_ldp_with_offset (rtx_insn *ldp_insn);
+ void split_simple_ldp (rtx_insn *ldp_insn);
+ void split_ldp_stp (rtx_insn *insn);
+ complex_instructions_t get_insn_type (rtx_insn *insn);
+
+ basic_block bb;
+ rtx_insn *insn;
+ std::set<rtx_insn *> dependent_stores_candidates;
+ std::set<rtx_insn *> ldp_to_split_list;
+
+ complex_instructions_t complex_insn_type = UNDEFINED;
+ bool is_store_insn (rtx_insn *insn);
+ bool is_ldp_dependent_on_store (rtx_insn *ldp_insn, basic_block bb);
+ bool bfs_for_reg_dependent_store (rtx_insn *ldp_insn, basic_block search_bb,
+ rtx_insn *search_insn,
+ int search_range
+ = param_ldp_dependency_search_range);
+ bool is_store_reg_dependent (rtx_insn *ldp_insn, rtx_insn *str_insn);
+ void init_df ();
+ void find_dependent_stores_candidates (rtx_insn *ldp_insn);
+ int get_insn_offset (rtx_insn *insn, complex_instructions_t insn_type,
+ int *arith_operation_ptr = NULL);
+
+public:
+ pass_split_complex_instructions (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_split_complex_instructions, ctxt)
+ {
+ }
+ /* opt_pass methods: */
+ virtual bool gate (function *);
+
+ virtual unsigned int
+ execute (function *)
+ {
+ enum rtx_code ldp_memref_code;
+ init_df ();
+ ldp_to_split_list.clear ();
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ FOR_BB_INSNS (bb, insn)
+ {
+ complex_instructions_t insn_type = get_insn_type (insn);
+ /* TODO: Add splitting of STP instructions. */
+ if (insn_type != LDP && insn_type != LDP_TI)
+ continue;
+ /* TODO: Currently support only ldp_ti and ldp with REG or
+ PLUS/MINUS offset expression. */
+ if (insn_type == LDP_TI)
+ {
+ ldp_memref_code = GET_CODE (XEXP (XEXP (PATTERN (insn), 1),
+ 0));
+ if (ldp_memref_code != REG && ldp_memref_code != PLUS
+ && ldp_memref_code != MINUS)
+ continue;
+ }
+ if (is_ldp_dependent_on_store (insn, bb))
+ {
+ ldp_to_split_list.insert (insn);
+ }
+ }
+ }
+
+ for (std::set<rtx_insn *>::iterator i = ldp_to_split_list.begin ();
+ i != ldp_to_split_list.end (); ++i)
+ split_complex_insn (*i);
+
+ return 0;
+ }
+}; // class pass_split_complex_instructions
+
+bool
+pass_split_complex_instructions::is_ldp_dependent_on_store (rtx_insn *ldp_insn,
+ basic_block bb)
+{
+ find_dependent_stores_candidates (ldp_insn);
+ return bfs_for_reg_dependent_store (ldp_insn, bb, ldp_insn);
+}
+
+bool
+pass_split_complex_instructions::bfs_for_reg_dependent_store (
+ rtx_insn *ldp_insn, basic_block search_bb, rtx_insn *search_insn,
+ int search_range)
+{
+ rtx_insn *current_search_insn = search_insn;
+
+ for (int i = search_range; i > 0; --i)
+ {
+ if (!current_search_insn)
+ return false;
+ bool checking_result
+ = is_store_reg_dependent (ldp_insn, current_search_insn);
+ if (checking_result)
+ {
+ if (dump_file)
+ {
+ fprintf (dump_file, "LDP to split:\n");
+ print_rtl_single (dump_file, ldp_insn);
+ fprintf (dump_file, "Found STR:\n");
+ print_rtl_single (dump_file, current_search_insn);
+ }
+ return true;
+ }
+ if (current_search_insn == BB_HEAD (search_bb))
+ {
+ /* Search in all parent BBs for the reg_dependent store. */
+ edge_iterator ei;
+ edge e;
+
+ FOR_EACH_EDGE (e, ei, search_bb->preds)
+ if (e->src->index != 0
+ && bfs_for_reg_dependent_store (ldp_insn, e->src,
+ BB_END (e->src), i - 1))
+ return true;
+ return false;
+ }
+ else
+ {
+ if (!active_insn_p (current_search_insn))
+ i++;
+ current_search_insn = PREV_INSN (current_search_insn);
+ }
+ }
+ return false;
+}
+
+void
+pass_split_complex_instructions::init_df ()
+{
+ df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
+ df_chain_add_problem (DF_UD_CHAIN + DF_DU_CHAIN);
+ df_mir_add_problem ();
+ df_live_add_problem ();
+ df_live_set_all_dirty ();
+ df_analyze ();
+ df_set_flags (DF_DEFER_INSN_RESCAN);
+}
+
+void
+pass_split_complex_instructions::find_dependent_stores_candidates (
+ rtx_insn *ldp_insn)
+{
+ dependent_stores_candidates.clear ();
+ df_ref use;
+
+ FOR_EACH_INSN_USE (use, ldp_insn)
+ {
+ df_link *defs = get_defs (ldp_insn, DF_REF_REG (use));
+ if (!defs)
+ return;
+
+ for (df_link *def = defs; def; def = def->next)
+ {
+ df_link *uses
+ = get_uses (DF_REF_INSN (def->ref), DF_REF_REG (def->ref));
+ if (!uses)
+ continue;
+
+ for (df_link *use = uses; use; use = use->next)
+ {
+ if (DF_REF_CLASS (use->ref) == DF_REF_REGULAR
+ && is_store_insn (DF_REF_INSN (use->ref)))
+ dependent_stores_candidates.insert (DF_REF_INSN (use->ref));
+ }
+ }
+ }
+}
+
+bool
+pass_split_complex_instructions::is_store_reg_dependent (rtx_insn *ldp_insn,
+ rtx_insn *str_insn)
+{
+ if (!is_store_insn (str_insn)
+ || dependent_stores_candidates.find (str_insn)
+ == dependent_stores_candidates.end ())
+ return false;
+
+ int ldp_offset_sign = UNDEFINED;
+ int ldp_offset
+ = get_insn_offset (ldp_insn, get_insn_type (ldp_insn), &ldp_offset_sign);
+ if (ldp_offset_sign == MINUS)
+ ldp_offset = -ldp_offset;
+
+ int str_offset_sign = UNDEFINED;
+ int str_offset = get_insn_offset (str_insn, STR, &str_offset_sign);
+ if (str_offset_sign == MINUS)
+ str_offset = -str_offset;
+
+ if (str_offset == ldp_offset || str_offset == ldp_offset + 8)
+ return true;
+
+ return false;
+}
+
+bool
+pass_split_complex_instructions::is_store_insn (rtx_insn *insn)
+{
+ if (!insn)
+ return false;
+ rtx sset_b = single_set (insn);
+ /* TODO: The condition below allow to take only store instructions in which
+ the memory location's operand is either a register (base) or an plus/minus
+ operation (base + #imm). So it might make sense to add support for other
+ cases (e.g. multiply and shift). */
+ if (sset_b && MEM_P (SET_DEST (sset_b))
+ && GET_MODE (XEXP (sset_b, 0)) != BLKmode
+ && (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == REG
+ || (GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == PLUS
+ || GET_CODE (XEXP (XEXP (sset_b, 0), 0)) == MINUS)
+ && (GET_CODE (XEXP (XEXP (XEXP (sset_b, 0), 0), 1)) == CONST_INT)))
+ return true;
+
+ return false;
+}
+
+int
+pass_split_complex_instructions::get_insn_offset (
+ rtx_insn *insn, complex_instructions_t insn_type, int *arith_operation_ptr)
+{
+ rtx insn_pat = PATTERN (insn);
+ int returned_offset = 0;
+
+ rtx offset_expr = NULL;
+ rtx offset_value_expr = NULL;
+
+ switch (insn_type)
+ {
+ case LDP:
+ {
+ int number_of_sub_insns = XVECLEN (insn_pat, 0);
+
+ /* Calculate it's own ofsset of first load insn. */
+ rtx_insn *first_load_insn = NULL;
+ if (number_of_sub_insns == 2)
+ {
+ first_load_insn
+ = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0)));
+ arith_operation_ptr = NULL;
+
+ offset_expr = XEXP (XEXP (PATTERN (first_load_insn), 1), 0);
+ if (GET_CODE (offset_expr) == PLUS
+ || GET_CODE (offset_expr) == MINUS)
+ offset_value_expr
+ = XEXP (XEXP (XEXP (PATTERN (first_load_insn), 1), 0), 1);
+ else
+ offset_expr = NULL;
+ }
+ else if (number_of_sub_insns == 3)
+ {
+ rtx_insn *offset_sub_insn
+ = make_insn_raw (copy_rtx (XVECEXP (insn_pat, 0, 0)));
+
+ offset_expr = XEXP (PATTERN (offset_sub_insn), 1);
+ offset_value_expr = XEXP (XEXP (PATTERN (offset_sub_insn), 1), 1);
+ }
+ else
+ {
+ gcc_assert (false
+ && "Wrong number of elements in the ldp_insn vector");
+ }
+ break;
+ }
+ case LDP_TI:
+ {
+ offset_expr = XEXP (XEXP (insn_pat, 1), 0);
+ if (GET_CODE (offset_expr) != PLUS && GET_CODE (offset_expr) != MINUS)
+ return 0;
+ offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 1), 0), 1);
+ break;
+ }
+ case STR:
+ {
+ offset_expr = XEXP (XEXP (insn_pat, 0), 0);
+ /* If memory location is specified by single base register then the
+ offset is zero. */
+ if (GET_CODE (offset_expr) == REG)
+ return 0;
+ offset_value_expr = XEXP (XEXP (XEXP (insn_pat, 0), 0), 1);
+ break;
+ }
+ default:
+ {
+ if (dumps_are_enabled && dump_file)
+ {
+ fprintf (dump_file, "Instruction that was tried to split:\n");
+ print_rtl_single (dump_file, insn);
+ }
+ gcc_assert (false && "Unsupported instruction type");
+ break;
+ }
+ }
+
+ if (offset_expr != NULL && offset_value_expr
+ && GET_CODE (offset_value_expr) == CONST_INT)
+ returned_offset = XINT (offset_value_expr, 0);
+
+ if (arith_operation_ptr != NULL)
+ {
+ *arith_operation_ptr = GET_CODE (offset_expr);
+ gcc_assert ((*arith_operation_ptr == MINUS
+ || *arith_operation_ptr == PLUS)
+ && "Unexpected arithmetic operation in the offset expr");
+ }
+
+ return returned_offset;
+}
+
+void
+pass_split_complex_instructions::split_simple_ldp (rtx_insn *ldp_insn)
+{
+ rtx pat = PATTERN (ldp_insn);
+
+ rtx_insn *mem_insn_1 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 0)));
+ rtx_insn *mem_insn_2 = make_insn_raw (copy_rtx (XVECEXP (pat, 0, 1)));
+
+ int dest_regno = REGNO (SET_DEST (PATTERN (mem_insn_1)));
+ int src_regno;
+
+ rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (mem_insn_1)), 0);
+
+ if (GET_CODE (srs_reg_insn) == REG)
+ src_regno = REGNO (srs_reg_insn);
+ else
+ src_regno = REGNO (XEXP (srs_reg_insn, 0));
+
+ rtx_insn *emited_insn_1, *emited_insn_2;
+
+ /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */
+ if (src_regno == dest_regno)
+ std::swap (mem_insn_1, mem_insn_2);
+
+ emited_insn_1 = emit_insn (PATTERN (mem_insn_1));
+ emited_insn_2 = emit_insn (PATTERN (mem_insn_2));
+
+ int sub_insn_1_code = recog (PATTERN (mem_insn_1), mem_insn_1, 0);
+ int sub_insn_2_code = recog (PATTERN (mem_insn_2), mem_insn_2, 0);
+
+ INSN_CODE (emited_insn_1) = sub_insn_1_code;
+ INSN_CODE (emited_insn_2) = sub_insn_2_code;
+}
+
+void
+pass_split_complex_instructions::split_ldp_with_offset (rtx_insn *ldp_insn)
+{
+ rtx pat = PATTERN (ldp_insn);
+ bool post_index = true;
+
+ rtx_insn offset_insn;
+ rtx_insn mem_insn_1;
+ rtx_insn mem_insn_2;
+
+ int offset_insn_code;
+ int mem_insn_1_code = -1;
+ int mem_insn_2_code = -1;
+
+ int offset = 0;
+ int arith_operation = UNDEFINED;
+
+ for (int i = 0; i < 3; i++)
+ {
+ rtx sub_insn = XVECEXP (pat, 0, i);
+ rtx_insn *copy_of_sub_insn = make_insn_raw (copy_rtx (sub_insn));
+ int sub_insn_code
+ = recog (PATTERN (copy_of_sub_insn), copy_of_sub_insn, 0);
+
+ /* If sub_insn is offset related. */
+ if (GET_RTX_CLASS (sub_insn_code) == RTX_UNARY)
+ {
+ offset_insn = *copy_of_sub_insn;
+ offset_insn_code = sub_insn_code;
+ gcc_assert (i == 0
+ && "Offset related insn must be the first "
+ "element of a parallel insn vector");
+
+ offset = get_insn_offset (ldp_insn, LDP, &arith_operation);
+ }
+ else
+ {
+ if (GET_CODE (XEXP (PATTERN (copy_of_sub_insn), 0)) != REG)
+ {
+ rtx &offset_expr
+ = XEXP (XEXP (XEXP (PATTERN (copy_of_sub_insn), 0), 0), 1);
+ if (GET_CODE (offset_expr) == CONST_INT)
+ {
+ int local_offset = XINT (offset_expr, 0);
+ offset = (arith_operation == PLUS ? offset : -offset);
+
+ offset_expr = GEN_INT (local_offset + offset);
+
+ gcc_assert (
+ (arith_operation == MINUS || arith_operation == PLUS)
+ && "Unexpected arithmetic operation in offset related "
+ "sub_insn");
+
+ if (i == 1)
+ post_index = false;
+ }
+ else
+ {
+ post_index = true;
+ }
+ }
+ }
+ if (i == 1)
+ {
+ mem_insn_1 = *copy_of_sub_insn;
+ mem_insn_1_code = sub_insn_code;
+ }
+ if (i == 2)
+ {
+ mem_insn_2 = *copy_of_sub_insn;
+ mem_insn_2_code = sub_insn_code;
+ }
+ }
+ gcc_assert (mem_insn_1_code != -1 && mem_insn_2_code != -1
+ && "Uninitialized memory insns");
+
+ int dest_regno = REGNO (SET_DEST (PATTERN (&mem_insn_1)));
+ int src_regno;
+
+ rtx srs_reg_insn = XEXP (SET_SRC (PATTERN (&mem_insn_1)), 0);
+
+ if (GET_CODE (srs_reg_insn) == REG)
+ src_regno = REGNO (srs_reg_insn);
+ else
+ src_regno = REGNO (XEXP (srs_reg_insn, 0));
+
+ /* Don't split such weird LDP. */
+ if (src_regno == dest_regno)
+ return;
+
+ rtx_insn *emited_offset_insn;
+ if (!post_index)
+ {
+ emited_offset_insn = emit_insn (PATTERN (&offset_insn));
+ INSN_CODE (emited_offset_insn) = offset_insn_code;
+ }
+
+ rtx_insn *emited_insn_1 = emit_insn (PATTERN (&mem_insn_1));
+ rtx_insn *emited_insn_2 = emit_insn (PATTERN (&mem_insn_2));
+
+
+ INSN_CODE (emited_insn_1) = mem_insn_1_code;
+ INSN_CODE (emited_insn_2) = mem_insn_2_code;
+
+ if (post_index)
+ {
+ emited_offset_insn = emit_insn (PATTERN (&offset_insn));
+ INSN_CODE (emited_offset_insn) = offset_insn_code;
+ }
+}
+
+void
+pass_split_complex_instructions::split_ldp_stp (rtx_insn *insn)
+{
+ rtx_insn *prev_insn = PREV_INSN (insn);
+ int number_of_sub_insns = XVECLEN (PATTERN (insn), 0);
+
+ start_sequence ();
+
+ if (number_of_sub_insns == 2)
+ split_simple_ldp (insn);
+ else if (number_of_sub_insns == 3)
+ split_ldp_with_offset (insn);
+ else
+ gcc_assert (false && "Broken complex insn vector");
+
+ rtx_insn *seq = get_insns ();
+ unshare_all_rtl_in_chain (seq);
+ end_sequence ();
+
+ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn));
+ delete_insn_and_edges (insn);
+}
+
+void
+pass_split_complex_instructions::split_ldp_ti (rtx_insn *insn)
+{
+ rtx_insn *prev_insn = PREV_INSN (insn);
+ rtx_insn *load_insn_1 = make_insn_raw (copy_rtx (PATTERN (insn)));
+ rtx_insn *load_insn_2 = make_insn_raw (copy_rtx (PATTERN (insn)));
+
+ rtx reg_insn_1 = XEXP (PATTERN (load_insn_1), 0);
+ rtx mem_insn_1 = XEXP (PATTERN (load_insn_1), 1);
+ rtx mem_insn_2 = XEXP (PATTERN (load_insn_2), 1);
+
+ PUT_MODE (mem_insn_1, DImode);
+ PUT_MODE (mem_insn_2, DImode);
+
+ int reg_no_1 = REGNO (reg_insn_1);
+
+ XEXP (PATTERN (load_insn_1), 0) = gen_rtx_REG (DImode, reg_no_1);
+ XEXP (PATTERN (load_insn_2), 0) = gen_rtx_REG (DImode, reg_no_1 + 1);
+
+ rtx load_insn_2_plus_expr = XEXP (XEXP (PATTERN (load_insn_2), 1), 0);
+ if (GET_CODE (load_insn_2_plus_expr) == REG)
+ {
+ XEXP (XEXP (PATTERN (load_insn_2), 1), 0)
+ = gen_rtx_PLUS (DImode,
+ gen_rtx_REG (DImode, REGNO (load_insn_2_plus_expr)),
+ GEN_INT (GET_MODE_SIZE (DImode)));
+ }
+ else
+ {
+ rtx load_insn_2_offset_expr
+ = XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1);
+
+ if (load_insn_2_offset_expr == NULL)
+ return;
+
+ if (GET_CODE (load_insn_2_offset_expr) == CONST_INT)
+ {
+ int load_insn_2_offset = XINT (load_insn_2_offset_expr, 0);
+ XEXP (XEXP (XEXP (PATTERN (load_insn_2), 1), 0), 1)
+ = GEN_INT (load_insn_2_offset + GET_MODE_SIZE (DImode));
+ }
+ }
+
+ start_sequence ();
+
+ int src_regno;
+ rtx srs_reg_insn = XEXP (XEXP (PATTERN (load_insn_1), 1), 0);
+
+ if (GET_CODE (srs_reg_insn) == REG)
+ src_regno = REGNO (srs_reg_insn);
+ else
+ src_regno = REGNO (XEXP (srs_reg_insn, 0));
+
+ /* in cases like ldp r1,r2,[r1] we emit ldr r2,[r1] first. */
+ if (src_regno == reg_no_1)
+ std::swap (load_insn_1, load_insn_2);
+
+ rtx_insn *emited_load_insn_1 = emit_insn (PATTERN (load_insn_1));
+ rtx_insn *emited_load_insn_2 = emit_insn (PATTERN (load_insn_2));
+
+ INSN_CODE (emited_load_insn_1)
+ = recog (PATTERN (emited_load_insn_1), emited_load_insn_1, 0);
+ INSN_CODE (emited_load_insn_2)
+ = recog (PATTERN (emited_load_insn_2), emited_load_insn_2, 0);
+
+ rtx_insn *seq = get_insns ();
+ unshare_all_rtl_in_chain (seq);
+ end_sequence ();
+
+ emit_insn_after_setloc (seq, prev_insn, INSN_LOCATION (insn));
+ delete_insn_and_edges (insn);
+}
+
+void
+pass_split_complex_instructions::split_complex_insn (rtx_insn *insn)
+{
+ complex_instructions_t insn_type = get_insn_type (insn);
+ /* TODO: Add splitting of STP instructions. */
+ if (insn_type == LDP || insn_type == STP)
+ split_ldp_stp (insn);
+ else if (insn_type == LDP_TI)
+ split_ldp_ti (insn);
+ else
+ gcc_assert (false && "Unsupported type of insn to split");
+}
+
+pass_split_complex_instructions::complex_instructions_t
+pass_split_complex_instructions::get_insn_type (rtx_insn *insn)
+{
+ if (!INSN_P (insn))
+ return UNDEFINED;
+
+ rtx pat = PATTERN (insn);
+ int icode = recog (PATTERN (insn), insn, NULL);
+
+ if (GET_CODE (pat) == PARALLEL)
+ {
+ if (targetm.is_ldp_insn (icode))
+ {
+ return LDP;
+ }
+ if (targetm.is_stp_insn (icode))
+ {
+ return STP;
+ }
+ else
+ {
+ return UNDEFINED;
+ }
+ }
+ rtx set_insn = single_set (insn);
+ if (set_insn && GET_CODE (XEXP (set_insn, 1)) == MEM
+ && GET_MODE (XEXP (set_insn, 1)) == E_TImode)
+ return LDP_TI;
+
+ return UNDEFINED;
+}
+
+bool
+pass_split_complex_instructions::gate (function *)
+{
+ return targetm.is_ldp_insn && targetm.is_stp_insn && optimize > 0
+ && flag_split_ldp_stp > 0;
+}
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_split_complex_instructions (gcc::context *ctxt)
+{
+ return new pass_split_complex_instructions (ctxt);
+}
+
#if __GNUC__ >= 10
# pragma GCC diagnostic pop
-#endif
+#endif
\ No newline at end of file
diff --git a/gcc/target.def b/gcc/target.def
index d85adf36a..a3a50b474 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2677,6 +2677,16 @@ modes and they have different conditional execution capability, such as ARM.",
bool, (void),
default_have_conditional_execution)
+DEFHOOK
+(is_ldp_insn,
+ "Return true if icode is corresponding to any of the LDP instruction types.",
+ bool, (int icode), NULL)
+
+DEFHOOK
+(is_stp_insn,
+ "Return true if icode is corresponding to any of the STP instruction types.",
+ bool, (int icode), NULL)
+
DEFHOOK
(gen_ccmp_first,
"This function prepares to emit a comparison insn for the first compare in a\n\
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
new file mode 100644
index 000000000..3918d43f6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-dont-split.c
@@ -0,0 +1,74 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-additional-options "-fsplit-ldp-stp" } */
+/*
+ * Tests are:
+ * Patterns where LDP insns should NOT be split
+ * */
+
+int __RTL (startwith ("split_complex_instructions"))
+simple_ldp_after_store ()
+{
+(function "simple_ldp_after_store"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI sp)
+ (reg/i:DI x0)))
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI sp)
+ (const_int 32))[1 S4 A32])(reg:DI x0)))
+ (cinsn 10 (parallel [
+ (set (reg:DI x29)
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
+ (set (reg:DI x30)
+ (mem:DI (plus:DI (reg/f:DI sp)
+ (const_int 16)) [1 S4 A32]))]))
+ (cinsn 11 (use (reg/i:DI sp)))
+ (cinsn 12 (use (reg/i:DI cc)))
+ (cinsn 13 (use (reg/i:DI x29)))
+ (cinsn 14 (use (reg/i:DI x30)))
+ (cinsn 15 (use (reg/i:DI x0)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+) ;; function "simple_ldp_after_store"
+}
+
+int __RTL (startwith ("split_complex_instructions"))
+ldp_after_store_in_different_bb ()
+{
+(function "ldp_after_store_in_different_bb"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI sp)
+ (reg/i:DI x0)))
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI sp)
+ (const_int 32))[1 S4 A32])(reg:DI x0)))
+ (edge-to 3 (flags "FALLTHRU"))
+ ) ;; block 2
+ (block 3
+ (edge-from 2 (flags "FALLTHRU"))
+ (cnote 4 [bb 3] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 10 (parallel [
+ (set (reg:DI x29)
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
+ (set (reg:DI x30)
+ (mem:DI (plus:DI (reg/f:DI sp)
+ (const_int 16)) [1 S4 A32]))]))
+ (cinsn 11 (use (reg/i:DI sp)))
+ (cinsn 12 (use (reg/i:DI cc)))
+ (cinsn 13 (use (reg/i:DI x29)))
+ (cinsn 14 (use (reg/i:DI x30)))
+ (cinsn 15 (use (reg/i:DI x0)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 3
+ ) ;; insn-chain
+) ;; function "ldp_after_store_in_different_bb"
+}
+
+/* Verify that the output code contains exactly 2 ldp. */
+/* { dg-final { scan-assembler-times {ldp\t} 2 } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
new file mode 100644
index 000000000..653c30f83
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split-rearrange.c
@@ -0,0 +1,40 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-additional-options "-fsplit-ldp-stp" } */
+/*
+ * Test is:
+ * Pattern where LDP insns should be split with rearrangement in order
+ * to deal with data dependecy betwen subinstruction.
+ * */
+
+int __RTL (startwith ("split_complex_instructions"))
+simple_ldp_after_store ()
+{
+(function "ldp_equal_registers"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI x1)
+ (reg/i:DI x0)))
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI x1)
+ (const_int 8))[1 S4 A32])(reg:DI x0)))
+ (cinsn 10 (parallel [
+ (set (reg:DI x1)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int 8)) [1 S4 A32]))
+ (set (reg:DI x2)
+ (mem:DI (plus:DI (reg/f:DI x1)
+ (const_int 16)) [1 S4 A32]))]))
+ (cinsn 11 (use (reg/i:DI sp)))
+ (cinsn 12 (use (reg/i:DI cc)))
+ (cinsn 13 (use (reg/i:DI x0)))
+ (cinsn 14 (use (reg/i:DI x1)))
+ (cinsn 15 (use (reg/i:DI x2)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+) ;; function "ldp_equal_registers"
+}
+
+/* Verify that the output code rearrange ldrs. */
+/* { dg-final { scan-assembler-times ".*ldr.*x2.*x1,.*16.*ldr.*x1.*x1.*8" 1 } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
new file mode 100644
index 000000000..dc9f26efb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/rtl/aarch64/test-ldp-split.c
@@ -0,0 +1,174 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-additional-options "-O1 -fsplit-ldp-stp" } */
+/*
+ * Tests are:
+ * Patterns where LDP insns should be split
+ * */
+
+int __RTL (startwith ("split_complex_instructions"))
+simple_ldp_after_store ()
+{
+(function "simple_ldp_after_store"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI sp)
+ (reg/i:DI x0)))
+ (cinsn 238 (set (reg/i:DI x1)
+ (reg/i:DI x0)))
+
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI sp)
+ (const_int 8))[1 S4 A32])(reg:DI x0)))
+ (cinsn 10 (parallel [
+ (set (reg:DI x29)
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
+ (set (reg:DI x30)
+ (mem:DI (plus:DI (reg/f:DI sp)
+ (const_int 16)) [1 S4 A32]))]))
+
+ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1)
+ (const_int -16)) [1 S4 A32])
+ (reg:DI x0)))
+ (cinsn 11 (parallel [
+ (set (reg:DI x3)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32]))
+ (set (reg:DI x4)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32]))
+ ]))
+
+ (cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32])
+ (reg:DI x0)))
+ (cinsn 12 (parallel [
+ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32]))
+ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
+ (const_int 8)) [1 S4 A32]))
+ ]))
+
+ (cinsn 13 (use (reg/i:DI sp)))
+ (cinsn 14 (use (reg/i:DI cc)))
+ (cinsn 15 (use (reg/i:DI x29)))
+ (cinsn 16 (use (reg/i:DI x30)))
+ (cinsn 17 (use (reg/i:DI x0)))
+ (cinsn 18 (use (reg/i:DI x3)))
+ (cinsn 19 (use (reg/i:DI x4)))
+ (cinsn 20 (use (reg/i:DI x5)))
+ (cinsn 21 (use (reg/i:DI x6)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+) ;; function "simple_ldp_after_store"
+}
+
+int __RTL (startwith ("split_complex_instructions"))
+ldp_ti_after_store ()
+{
+ (function "ldp_ti_after_store"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI sp)
+ (reg/i:DI x0)))
+ (cinsn 238 (set (reg/i:DI x2)
+ (reg/i:DI x0)))
+
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI sp)
+ (const_int 136))[1 S4 A32])(reg:DI x0)))
+ (insn 81 (set (reg:TI x0 [1 S4 A32])
+ (mem/c:TI (plus:DI (reg/f:DI sp)
+ (const_int 136 )) [1 S4 A32]))
+ (expr_list:REG_EQUIV (mem/c:TI (plus:DI (reg/f:DI sfp)
+ (const_int -24 )) [1 S4 A32])
+ (nil)))
+
+ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x2)
+ (const_int -16)) [1 S4 A32])
+ (reg:DI x0)))
+ (insn 82 (set (reg:TI x3 [1 S4 A32])
+ (mem/c:TI (plus:DI (reg/f:DI x2)
+ (const_int -16)) [1 S4 A32])))
+
+ (cinsn 103 (set (mem/c:DI (reg/f:DI x2) [1 S4 A32])
+ (reg:DI x0)))
+ (insn 83 (set (reg:TI x5 [1 S4 A32])
+ (mem/c:TI (reg/f:DI x2) [1 S4 A32])))
+
+ (cinsn 11 (use (reg/i:DI sp)))
+ (cinsn 12 (use (reg/i:DI cc)))
+ (cinsn 13 (use (reg/i:DI x29)))
+ (cinsn 14 (use (reg/i:DI x30)))
+ (cinsn 15 (use (reg/i:DI x0)))
+ (cinsn 16 (use (reg/i:DI x3)))
+ (cinsn 17 (use (reg/i:DI x5)))
+ (cinsn 18 (use (reg/i:DI x1)))
+ (cinsn 19 (use (reg/i:DI x4)))
+ (cinsn 20 (use (reg/i:DI x6)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+) ;; function "ldp_ti_after_store"
+}
+
+int __RTL (startwith ("split_complex_instructions"))
+ldp_after_store_in_different_bb ()
+{
+(function "ldp_after_store_in_different_bb"
+ (insn-chain
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 228 (set (reg/i:DI sp)
+ (reg/i:DI x0)))
+ (cinsn 238 (set (reg/i:DI x1)
+ (reg/i:DI x0)))
+
+ (cinsn 101 (set (mem/c:DI
+ (plus:DI (reg/f:DI sp)
+ (const_int 8))[1 S4 A32])(reg:DI x0)))
+ (cinsn 102 (set (mem/c:DI (plus:DI (reg/f:DI x1)
+ (const_int -16)) [1 S4 A32])
+ (reg:DI x0)))
+ (cinsn 103 (set (mem/c:DI (reg/f:DI x1) [1 S4 A32])
+ (reg:DI x0)))
+ (edge-to 3 (flags "FALLTHRU"))
+ ) ;; block 2
+ (block 3
+ (edge-from 2 (flags "FALLTHRU"))
+ (cnote 4 [bb 3] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 10 (parallel [
+ (set (reg:DI x29)
+ (mem:DI (plus:DI (reg/f:DI sp) (const_int 8)) [1 S4 A32]))
+ (set (reg:DI x30)
+ (mem:DI (plus:DI (reg/f:DI sp)
+ (const_int 16)) [1 S4 A32]))]))
+ (cinsn 11 (parallel [
+ (set (reg:DI x3)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -16)) [1 S4 A32]))
+ (set (reg:DI x4)
+ (mem:DI (plus:DI (reg/f:DI x1) (const_int -8)) [1 S4 A32]))
+ ]))
+ (cinsn 12 (parallel [
+ (set (reg:DI x5) (mem:DI (reg/f:DI x1) [1 S4 A32]))
+ (set (reg:DI x6) (mem:DI (plus:DI (reg/f:DI x1)
+ (const_int 8)) [1 S4 A32]))
+ ]))
+ (cinsn 13 (use (reg/i:DI sp)))
+ (cinsn 14 (use (reg/i:DI cc)))
+ (cinsn 15 (use (reg/i:DI x29)))
+ (cinsn 16 (use (reg/i:DI x30)))
+ (cinsn 17 (use (reg/i:DI x0)))
+ (cinsn 18 (use (reg/i:DI x3)))
+ (cinsn 19 (use (reg/i:DI x4)))
+ (cinsn 20 (use (reg/i:DI x5)))
+ (cinsn 21 (use (reg/i:DI x6)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 3
+ ) ;; insn-chain
+) ;; function "ldp_after_store_in_different_bb"
+}
+
+/* Verify that the output code doesn't contain ldp. */
+/* { dg-final { scan-assembler-not {ldp\t} } } */
\ No newline at end of file
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 1e7d4e74b..2ccecffb5 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -280,6 +280,7 @@ DEFTIMEVAR (TV_RELOAD_CSE_REGS , "reload CSE regs")
DEFTIMEVAR (TV_GCSE_AFTER_RELOAD , "load CSE after reload")
DEFTIMEVAR (TV_REE , "ree")
DEFTIMEVAR (TV_THREAD_PROLOGUE_AND_EPILOGUE, "thread pro- & epilogue")
+DEFTIMEVAR (TV_SPLIT_CMP_INS , "split complex instructions")
DEFTIMEVAR (TV_IFCVT2 , "if-conversion 2")
DEFTIMEVAR (TV_SPLIT_PATHS , "split paths")
DEFTIMEVAR (TV_COMBINE_STACK_ADJUST , "combine stack adjustments")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 86f38e2f2..6daac7fc1 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -612,6 +612,7 @@ extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context
*ctxt);
extern rtl_opt_pass *make_pass_zero_call_used_regs (gcc::context *ctxt);
+extern rtl_opt_pass *make_pass_split_complex_instructions (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_sched_fusion (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_peephole2 (gcc::context *ctxt);
--
2.33.0