Sync patch from openeuler/gcc - 20221201 (cherry picked from commit 5487e8942c694fd317f2cbf1662e9eaf33f2f612)
1008 lines
32 KiB
Diff
1008 lines
32 KiB
Diff
From d334ec1579fb0668da5e23ced3b782d7f6f35d77 Mon Sep 17 00:00:00 2001
|
|
From: benniaobufeijiushiji <linda7@huawei.com>
|
|
Date: Mon, 17 Oct 2022 17:21:57 +0800
|
|
Subject: [PATCH 30/35] [Loop-distribution] Add isomorphic stmts analysis
|
|
|
|
Use option -ftree-slp-transpose-vectorize
|
|
|
|
Check if loop is vectorizable before analysis. For unvectorizable
|
|
loops, try to find isomorphic stmts from grouped load as new seed stmts
|
|
for distribution.
|
|
---
|
|
gcc/tree-loop-distribution.c | 858 +++++++++++++++++++++++++++++++++++
|
|
gcc/tree-vect-loop.c | 37 +-
|
|
gcc/tree-vectorizer.h | 3 +-
|
|
3 files changed, 894 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
|
|
index 888af4894..c08af6562 100644
|
|
--- a/gcc/tree-loop-distribution.c
|
|
+++ b/gcc/tree-loop-distribution.c
|
|
@@ -90,6 +90,8 @@ along with GCC; see the file COPYING3. If not see
|
|
data reuse. */
|
|
|
|
#include "config.h"
|
|
+#define INCLUDE_MAP
|
|
+#define INCLUDE_ALGORITHM
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "backend.h"
|
|
@@ -115,6 +117,7 @@ along with GCC; see the file COPYING3. If not see
|
|
#include "tree-vectorizer.h"
|
|
#include "tree-eh.h"
|
|
#include "gimple-fold.h"
|
|
+#include "optabs-tree.h"
|
|
|
|
|
|
#define MAX_DATAREFS_NUM \
|
|
@@ -183,6 +186,52 @@ struct rdg_vertex
|
|
#define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
|
|
#define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
|
|
|
|
+/* Results of isomorphic group analysis. */
|
|
+#define UNINITIALIZED (0)
|
|
+#define ISOMORPHIC (1)
|
|
+#define HETEROGENEOUS (1 << 1)
|
|
+#define UNCERTAIN (1 << 2)
|
|
+
|
|
+/* Information of a stmt while analyzing isomorphic use in group. */
|
|
+
|
|
+typedef struct _group_info
|
|
+{
|
|
+ gimple *stmt;
|
|
+
|
|
+ /* True if stmt can be a cut point. */
|
|
+ bool cut_point;
|
|
+
|
|
+ /* For use_stmt with two rhses, one of which is the lhs of stmt.
|
|
+ If the other is unknown to be isomorphic, mark it uncertain. */
|
|
+ bool uncertain;
|
|
+
|
|
+ /* Searching of isomorphic stmt reaches heterogeneous groups or reaches
|
|
+ MEM stmts. */
|
|
+ bool done;
|
|
+
|
|
+ _group_info ()
|
|
+ {
|
|
+ stmt = NULL;
|
|
+ cut_point = false;
|
|
+ uncertain = false;
|
|
+ done = false;
|
|
+ }
|
|
+} *group_info;
|
|
+
|
|
+/* PAIR of cut points and corresponding profit. */
|
|
+typedef std::pair<vec<gimple *> *, int> stmts_profit;
|
|
+
|
|
+/* MAP of vector factor VF and corresponding stmts_profit PAIR. */
|
|
+typedef std::map<unsigned, stmts_profit> vf_stmts_profit_map;
|
|
+
|
|
+/* PAIR of group_num and iteration_num. We consider rhses from the same
|
|
+ group and interation are isomorphic. */
|
|
+typedef std::pair<unsigned, unsigned> group_iteration;
|
|
+
|
|
+/* An isomorphic stmt is detetmined by lhs of use_stmt, group_num and
|
|
+ the iteration_num when we insert this stmt to this map. */
|
|
+typedef std::map<tree, group_iteration> isomer_stmt_lhs;
|
|
+
|
|
/* Data dependence type. */
|
|
|
|
enum rdg_dep_type
|
|
@@ -640,6 +689,18 @@ class loop_distribution
|
|
void finalize_partitions (class loop *loop, vec<struct partition *>
|
|
*partitions, vec<ddr_p> *alias_ddrs);
|
|
|
|
+ /* Analyze loop form and if it's vectorizable to decide if we need to
|
|
+ insert temp arrays to distribute it. */
|
|
+ bool may_insert_temp_arrays (loop_p loop, struct graph *&rdg,
|
|
+ control_dependences *cd);
|
|
+
|
|
+ /* Reset gimple_uid of GIMPLE_DEBUG and GIMPLE_LABEL to -1. */
|
|
+ void reset_gimple_uid (loop_p loop);
|
|
+
|
|
+ bool check_loop_vectorizable (loop_p loop);
|
|
+
|
|
+ inline void rebuild_rdg (loop_p loop, struct graph *&rdg,
|
|
+ control_dependences *cd);
|
|
/* Distributes the code from LOOP in such a way that producer statements
|
|
are placed before consumer statements. Tries to separate only the
|
|
statements from STMTS into separate loops. Returns the number of
|
|
@@ -2900,6 +2961,803 @@ loop_distribution::finalize_partitions (class loop *loop,
|
|
fuse_memset_builtins (partitions);
|
|
}
|
|
|
|
+/* Gimple uids of GIMPLE_DEBUG and GIMPLE_LABEL were changed during function
|
|
+ vect_analyze_loop, reset them to -1. */
|
|
+
|
|
+void
|
|
+loop_distribution::reset_gimple_uid (loop_p loop)
|
|
+{
|
|
+ basic_block *bbs = get_loop_body_in_custom_order (loop, this,
|
|
+ bb_top_order_cmp_r);
|
|
+ for (int i = 0; i < int (loop->num_nodes); i++)
|
|
+ {
|
|
+ basic_block bb = bbs[i];
|
|
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
|
|
+ gsi_next (&gsi))
|
|
+ {
|
|
+ gimple *stmt = gsi_stmt (gsi);
|
|
+ if (is_gimple_debug (stmt) || gimple_code (stmt) == GIMPLE_LABEL)
|
|
+ gimple_set_uid (stmt, -1);
|
|
+ }
|
|
+ }
|
|
+ free (bbs);
|
|
+}
|
|
+
|
|
+bool
|
|
+loop_distribution::check_loop_vectorizable (loop_p loop)
|
|
+{
|
|
+ vec_info_shared shared;
|
|
+ vect_analyze_loop (loop, &shared, true);
|
|
+ loop_vec_info vinfo = loop_vec_info_for_loop (loop);
|
|
+ reset_gimple_uid (loop);
|
|
+ if (vinfo == NULL)
|
|
+ {
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ fprintf (dump_file,
|
|
+ "Loop %d no temp array insertion: bad data access pattern,"
|
|
+ " unable to generate loop_vinfo.\n", loop->num);
|
|
+ return false;
|
|
+ }
|
|
+ if (vinfo->vectorizable)
|
|
+ {
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ fprintf (dump_file, "Loop %d no temp array insertion: original loop"
|
|
+ " can be vectorized without distribution.\n",
|
|
+ loop->num);
|
|
+ delete vinfo;
|
|
+ loop->aux = NULL;
|
|
+ return false;
|
|
+ }
|
|
+ if (vinfo->grouped_loads.length () == 0)
|
|
+ {
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ fprintf (dump_file, "Loop %d no temp array insertion: original loop"
|
|
+ " has no grouped loads.\n" , loop->num);
|
|
+ delete vinfo;
|
|
+ loop->aux = NULL;
|
|
+ return false;
|
|
+ }
|
|
+ return true;
|
|
+}
|
|
+
|
|
+inline void
|
|
+loop_distribution::rebuild_rdg (loop_p loop, struct graph *&rdg,
|
|
+ control_dependences *cd)
|
|
+{
|
|
+ free_rdg (rdg);
|
|
+ rdg = build_rdg (loop, cd);
|
|
+ gcc_checking_assert (rdg != NULL);
|
|
+}
|
|
+
|
|
+bool
|
|
+loop_distribution::may_insert_temp_arrays (loop_p loop, struct graph *&rdg,
|
|
+ control_dependences *cd)
|
|
+{
|
|
+ if (!(flag_tree_slp_transpose_vectorize && flag_tree_loop_vectorize))
|
|
+ return false;
|
|
+
|
|
+ /* Only loops with two basic blocks HEADER and LATCH are supported. HEADER
|
|
+ is the main body of a LOOP and LATCH is the basic block that controls the
|
|
+ LOOP execution. Size of temp array is determined by loop execution time,
|
|
+ so it must be a const. */
|
|
+ tree loop_extent = number_of_latch_executions (loop);
|
|
+ if (loop->inner != NULL || loop->num_nodes > 2
|
|
+ || rdg->n_vertices > param_slp_max_insns_in_bb
|
|
+ || TREE_CODE (loop_extent) != INTEGER_CST)
|
|
+ {
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ fprintf (dump_file, "Loop %d: no temp array insertion: bad loop"
|
|
+ " form.\n", loop->num);
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ if (loop->dont_vectorize)
|
|
+ {
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
|
+ fprintf (dump_file, "Loop %d: no temp array insertion: this loop"
|
|
+ " should never be vectorized.\n",
|
|
+ loop->num);
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ /* Do not distribute a LOOP that is able to be vectorized without
|
|
+ distribution. */
|
|
+ if (!check_loop_vectorizable (loop))
|
|
+ {
|
|
+ rebuild_rdg (loop, rdg, cd);
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ rebuild_rdg (loop, rdg, cd);
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/* Return max grouped loads' length if all groupes length satisfy len = 2 ^ n.
|
|
+ Otherwise, return 0. */
|
|
+
|
|
+static unsigned
|
|
+get_max_vf (loop_vec_info vinfo)
|
|
+{
|
|
+ unsigned size = 0;
|
|
+ unsigned max = 0;
|
|
+ stmt_vec_info stmt_info;
|
|
+ unsigned i = 0;
|
|
+ FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info)
|
|
+ {
|
|
+ size = stmt_info->size;
|
|
+ if (!pow2p_hwi (size))
|
|
+ return 0;
|
|
+ max = size > max ? size : max;
|
|
+ }
|
|
+ return max;
|
|
+}
|
|
+
|
|
+/* Convert grouped_loads from linked list to vector with length vf. Init
|
|
+ group_info of each stmt in the same group and put then into a vector. And
|
|
+ these vectors consist WORKLISTS. We will re-analyze a group if it is
|
|
+ uncertain, so we regard WORKLISTS as a circular queue. */
|
|
+
|
|
+static unsigned
|
|
+build_queue (loop_vec_info vinfo, unsigned vf,
|
|
+ vec<vec<group_info> *> &worklists)
|
|
+{
|
|
+ stmt_vec_info stmt_info;
|
|
+ unsigned i = 0;
|
|
+ group_info ginfo = NULL;
|
|
+ vec<group_info> *worklist = NULL;
|
|
+ FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info)
|
|
+ {
|
|
+ unsigned group_size = stmt_info->size;
|
|
+ stmt_vec_info c_stmt_info = stmt_info;
|
|
+ while (group_size >= vf)
|
|
+ {
|
|
+ vec_alloc (worklist, vf);
|
|
+ for (unsigned j = 0; j < vf; ++j)
|
|
+ {
|
|
+ ginfo = new _group_info ();
|
|
+ ginfo->stmt = c_stmt_info->stmt;
|
|
+ worklist->safe_push (ginfo);
|
|
+ c_stmt_info = c_stmt_info->next_element;
|
|
+ }
|
|
+ worklists.safe_push (worklist);
|
|
+ group_size -= vf;
|
|
+ }
|
|
+ }
|
|
+ return worklists.length ();
|
|
+}
|
|
+
|
|
+static bool
|
|
+check_same_oprand_type (tree op1, tree op2)
|
|
+{
|
|
+ tree type1 = TREE_TYPE (op1);
|
|
+ tree type2 = TREE_TYPE (op2);
|
|
+ if (TREE_CODE (type1) != INTEGER_TYPE && TREE_CODE (type1) != REAL_TYPE)
|
|
+ return false;
|
|
+
|
|
+ return (TREE_CODE (type1) == TREE_CODE (type2)
|
|
+ && TYPE_UNSIGNED (type1) == TYPE_UNSIGNED (type2)
|
|
+ && TYPE_PRECISION (type1) == TYPE_PRECISION (type2));
|
|
+}
|
|
+
|
|
+static bool
|
|
+bit_field_p (gimple *stmt)
|
|
+{
|
|
+ unsigned i = 0;
|
|
+ auto_vec<data_reference_p, 2> datarefs_vec;
|
|
+ data_reference_p dr;
|
|
+ if (!find_data_references_in_stmt (NULL, stmt, &datarefs_vec))
|
|
+ return true;
|
|
+
|
|
+ FOR_EACH_VEC_ELT (datarefs_vec, i, dr)
|
|
+ {
|
|
+ if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
|
|
+ && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+static inline bool
|
|
+shift_operation (enum tree_code op)
|
|
+{
|
|
+ return op == LSHIFT_EXPR || op == RSHIFT_EXPR || op == LROTATE_EXPR
|
|
+ || op == RROTATE_EXPR;
|
|
+}
|
|
+
|
|
+/* Return relationship between USE_STMT and the first use_stmt of the group.
|
|
+ RHS1 is the lhs of stmt recorded in group_info. If another rhs of use_stmt
|
|
+ is not a constant, return UNCERTAIN and re-check it later. */
|
|
+
|
|
+static unsigned
|
|
+check_isomorphic (gimple *use_stmt, gimple *first,
|
|
+ tree rhs1, vec<tree> &hetero_lhs)
|
|
+{
|
|
+ /* Check same operation. */
|
|
+ enum tree_code rhs_code_first = gimple_assign_rhs_code (first);
|
|
+ enum tree_code rhs_code_current = gimple_assign_rhs_code (use_stmt);
|
|
+ if (rhs_code_first != rhs_code_current)
|
|
+ return HETEROGENEOUS;
|
|
+
|
|
+ /* For shift operations, oprands should be equal. */
|
|
+ if (shift_operation (rhs_code_current))
|
|
+ {
|
|
+ tree shift_op_first = gimple_assign_rhs2 (first);
|
|
+ tree shift_op_current = gimple_assign_rhs2 (use_stmt);
|
|
+ if (!operand_equal_p (shift_op_first, shift_op_current, 0)
|
|
+ || !TREE_CONSTANT (shift_op_first))
|
|
+ return HETEROGENEOUS;
|
|
+
|
|
+ return ISOMORPHIC;
|
|
+ }
|
|
+ /* Type convertion expr or assignment. */
|
|
+ if (gimple_num_ops (first) == 2)
|
|
+ return (rhs_code_first == NOP_EXPR || rhs_code_first == CONVERT_EXPR
|
|
+ || rhs_code_first == SSA_NAME) ? ISOMORPHIC : HETEROGENEOUS;
|
|
+
|
|
+ /* We find USE_STMT from lhs of a stmt, denote it as rhs1 of USE_STMT and
|
|
+ the other one as rhs2. Check if define-stmt of current rhs2 is isomorphic
|
|
+ with define-stmt of rhs2 in the first USE_STMT at this group. */
|
|
+ tree rhs2_first = gimple_assign_rhs1 (use_stmt) == rhs1
|
|
+ ? gimple_assign_rhs2 (first) : gimple_assign_rhs1 (first);
|
|
+ tree rhs2_curr = gimple_assign_rhs1 (use_stmt) == rhs1
|
|
+ ? gimple_assign_rhs2 (use_stmt) : gimple_assign_rhs1 (use_stmt);
|
|
+
|
|
+ if (check_same_oprand_type (rhs2_first, rhs2_curr))
|
|
+ {
|
|
+ if (TREE_CONSTANT (rhs2_curr))
|
|
+ return ISOMORPHIC;
|
|
+ else if (hetero_lhs.contains (rhs2_curr))
|
|
+ return HETEROGENEOUS;
|
|
+
|
|
+ /* Provisionally set the stmt as uncertain and analyze the whole group
|
|
+ in function CHECK_UNCERTAIN later if all use_stmts are uncertain. */
|
|
+ return UNCERTAIN;
|
|
+ }
|
|
+ return HETEROGENEOUS;
|
|
+}
|
|
+
|
|
+static bool
|
|
+unsupported_operations (gimple *stmt)
|
|
+{
|
|
+ enum tree_code code = gimple_assign_rhs_code (stmt);
|
|
+ return code == COND_EXPR;
|
|
+}
|
|
+
|
|
+/* Check if the single use_stmt of STMT is isomorphic with the first one's
|
|
+ use_stmt in current group. */
|
|
+
|
|
+static unsigned
|
|
+check_use_stmt (group_info elmt, gimple *&first,
|
|
+ vec<gimple *> &tmp_stmts, vec<tree> &hetero_lhs)
|
|
+{
|
|
+ if (gimple_code (elmt->stmt) != GIMPLE_ASSIGN)
|
|
+ return HETEROGENEOUS;
|
|
+ use_operand_p dummy;
|
|
+ tree lhs = gimple_assign_lhs (elmt->stmt);
|
|
+ gimple *use_stmt = NULL;
|
|
+ single_imm_use (lhs, &dummy, &use_stmt);
|
|
+ /* STMTs with three rhs are not supported, e.g., GIMPLE_COND. */
|
|
+ if (use_stmt == NULL || gimple_code (use_stmt) != GIMPLE_ASSIGN
|
|
+ || unsupported_operations (use_stmt) || bit_field_p (use_stmt))
|
|
+ return HETEROGENEOUS;
|
|
+ tmp_stmts.safe_push (use_stmt);
|
|
+ if (first == NULL)
|
|
+ {
|
|
+ first = use_stmt;
|
|
+ return UNINITIALIZED;
|
|
+ }
|
|
+ /* Check if current use_stmt and the first menber's use_stmt in the group
|
|
+ are of the same type. */
|
|
+ tree first_lhs = gimple_assign_lhs (first);
|
|
+ tree curr_lhs = gimple_assign_lhs (use_stmt);
|
|
+ if (!check_same_oprand_type (first_lhs, curr_lhs))
|
|
+ return HETEROGENEOUS;
|
|
+ return check_isomorphic (use_stmt, first, lhs, hetero_lhs);
|
|
+}
|
|
+
|
|
+/* Replace stmt field in group with stmts in TMP_STMTS, and insert their
|
|
+ lhs_info to ISOMER_LHS. */
|
|
+
|
|
+static void
|
|
+update_isomer_lhs (vec<group_info> *group, unsigned group_num,
|
|
+ unsigned iteration, isomer_stmt_lhs &isomer_lhs,
|
|
+ vec<gimple *> tmp_stmts, int &profit,
|
|
+ vec<unsigned> &merged_groups)
|
|
+{
|
|
+ group_info elmt = NULL;
|
|
+ /* Do not insert temp array if isomorphic stmts from grouped load have
|
|
+ only casting operations. Once isomorphic calculation has 3 oprands,
|
|
+ such as plus operation, this group can be regarded as cut point. */
|
|
+ bool operated = (gimple_num_ops (tmp_stmts[0]) == 3);
|
|
+ /* Do not insert temp arrays if search of iosomophic stmts reaches
|
|
+ MEM stmts. */
|
|
+ bool has_vdef = gimple_vdef (tmp_stmts[0]) != NULL;
|
|
+ bool merge = false;
|
|
+ for (unsigned i = 0; i < group->length (); i++)
|
|
+ {
|
|
+ elmt = (*group)[i];
|
|
+ elmt->stmt = has_vdef ? NULL : tmp_stmts[i];
|
|
+ elmt->cut_point = has_vdef ? false : (elmt->cut_point || operated);
|
|
+ elmt->uncertain = false;
|
|
+ elmt->done = has_vdef;
|
|
+ tree lhs = gimple_assign_lhs (tmp_stmts[i]);
|
|
+ if (isomer_lhs.find (lhs) != isomer_lhs.end ())
|
|
+ {
|
|
+ merge = true;
|
|
+ continue;
|
|
+ }
|
|
+ isomer_lhs[lhs] = std::make_pair (group_num, iteration);
|
|
+ }
|
|
+ if (merge)
|
|
+ {
|
|
+ merged_groups.safe_push (group_num);
|
|
+ profit = 0;
|
|
+ return;
|
|
+ }
|
|
+ enum vect_cost_for_stmt kind = scalar_stmt;
|
|
+ int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
|
|
+ profit = (tmp_stmts.length () - 1) * scalar_cost;
|
|
+}
|
|
+
|
|
+/* Try to find rhs2 in ISOMER_LHS, if all rhs2 were found and their group_num
|
|
+ and iteration are same, GROUP is isomorphic. */
|
|
+
|
|
+static unsigned
|
|
+check_isomorphic_rhs (vec<group_info> *group, vec<gimple *> &tmp_stmts,
|
|
+ isomer_stmt_lhs &isomer_lhs)
|
|
+{
|
|
+ group_info elmt = NULL;
|
|
+ gimple *stmt = NULL;
|
|
+ unsigned j = 0;
|
|
+ unsigned group_num = -1u;
|
|
+ unsigned iteration = -1u;
|
|
+ tree rhs1 = NULL;
|
|
+ tree rhs2 = NULL;
|
|
+ unsigned status = UNINITIALIZED;
|
|
+ FOR_EACH_VEC_ELT (*group, j, elmt)
|
|
+ {
|
|
+ rhs1 = gimple_assign_lhs (elmt->stmt);
|
|
+ stmt = tmp_stmts[j];
|
|
+ rhs2 = (rhs1 == gimple_assign_rhs1 (stmt))
|
|
+ ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
|
|
+ isomer_stmt_lhs::iterator iter = isomer_lhs.find (rhs2);
|
|
+ if (iter != isomer_lhs.end ())
|
|
+ {
|
|
+ if (group_num == -1u)
|
|
+ {
|
|
+ group_num = iter->second.first;
|
|
+ iteration = iter->second.second;
|
|
+ status |= ISOMORPHIC;
|
|
+ continue;
|
|
+ }
|
|
+ if (iter->second.first == group_num
|
|
+ && iter->second.second == iteration)
|
|
+ {
|
|
+ status |= ISOMORPHIC;
|
|
+ continue;
|
|
+ }
|
|
+ return HETEROGENEOUS;
|
|
+ }
|
|
+ else
|
|
+ status |= UNCERTAIN;
|
|
+ }
|
|
+ return status;
|
|
+}
|
|
+
|
|
+/* Update group_info for uncertain groups. */
|
|
+
|
|
+static void
|
|
+update_uncertain_stmts (vec<group_info> *group, unsigned group_num,
|
|
+ unsigned iteration, vec<gimple *> &tmp_stmts)
|
|
+{
|
|
+ unsigned j = 0;
|
|
+ group_info elmt = NULL;
|
|
+ FOR_EACH_VEC_ELT (*group, j, elmt)
|
|
+ {
|
|
+ elmt->uncertain = true;
|
|
+ elmt->done = false;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Push stmts in TMP_STMTS into HETERO_LHS. */
|
|
+
|
|
+static void
|
|
+set_hetero (vec<group_info> *group, vec<tree> &hetero_lhs,
|
|
+ vec<gimple *> &tmp_stmts)
|
|
+{
|
|
+ group_info elmt = NULL;
|
|
+ unsigned i = 0;
|
|
+ for (i = 0; i < group->length (); i++)
|
|
+ {
|
|
+ elmt = (*group)[i];
|
|
+ elmt->uncertain = false;
|
|
+ elmt->done = true;
|
|
+ }
|
|
+ gimple *stmt = NULL;
|
|
+ FOR_EACH_VEC_ELT (tmp_stmts, i, stmt)
|
|
+ if (stmt != NULL)
|
|
+ hetero_lhs.safe_push (gimple_assign_lhs (stmt));
|
|
+}
|
|
+
|
|
+/* Given an uncertain group, TMP_STMTS are use_stmts of stmts in GROUP.
|
|
+ Rhs1 is the lhs of stmt in GROUP, rhs2 is the other rhs of USE_STMT.
|
|
+
|
|
+ Try to find rhs2 in ISOMER_LHS, if all found rhs2 have same group_num
|
|
+ and iteration, this uncertain group is isomorphic.
|
|
+
|
|
+ If no rhs matched, this GROUP remains uncertain and update group_info.
|
|
+
|
|
+ Otherwise, this GROUP is heterogeneous and return true to end analysis
|
|
+ for this group. */
|
|
+
|
|
+static bool
|
|
+check_uncertain (vec<group_info> *group, unsigned group_num,
|
|
+ unsigned iteration, int &profit,
|
|
+ vec<gimple *> &tmp_stmts, isomer_stmt_lhs &isomer_lhs,
|
|
+ vec<tree> &hetero_lhs, vec<unsigned> &merged_groups)
|
|
+{
|
|
+ unsigned status = check_isomorphic_rhs (group, tmp_stmts, isomer_lhs);
|
|
+ bool done = false;
|
|
+ switch (status)
|
|
+ {
|
|
+ case UNCERTAIN:
|
|
+ update_uncertain_stmts (group, group_num, iteration, tmp_stmts);
|
|
+ break;
|
|
+ case ISOMORPHIC:
|
|
+ update_isomer_lhs (group, group_num, iteration, isomer_lhs,
|
|
+ tmp_stmts, profit, merged_groups);
|
|
+ break;
|
|
+ default:
|
|
+ set_hetero (group, hetero_lhs, tmp_stmts);
|
|
+ done = true;
|
|
+ }
|
|
+ return done;
|
|
+}
|
|
+
|
|
+/* Return false if analysis of this group is not finished, e.g., isomorphic or
|
|
+ uncertain. Calculate the profit if vectorized. */
|
|
+
|
|
+static bool
|
|
+check_group (vec<group_info> *group, unsigned group_num, unsigned iteration,
|
|
+ int &profit, vec<unsigned> &merged_groups,
|
|
+ isomer_stmt_lhs &isomer_lhs, vec<tree> &hetero_lhs)
|
|
+{
|
|
+ unsigned j = 0;
|
|
+ group_info elmt = NULL;
|
|
+ gimple *first = NULL;
|
|
+ unsigned res = 0;
|
|
+ /* Record single use stmts in TMP_STMTS and decide whether replace stmts in
|
|
+ ginfo in succeeding processes. */
|
|
+ auto_vec<gimple *> tmp_stmts;
|
|
+ FOR_EACH_VEC_ELT (*group, j, elmt)
|
|
+ {
|
|
+ if (merged_groups.contains (group_num))
|
|
+ return true;
|
|
+ res |= check_use_stmt (elmt, first, tmp_stmts, hetero_lhs);
|
|
+ }
|
|
+
|
|
+ /* Update each group member according to RES. */
|
|
+ switch (res)
|
|
+ {
|
|
+ case ISOMORPHIC:
|
|
+ update_isomer_lhs (group, group_num, iteration, isomer_lhs,
|
|
+ tmp_stmts, profit, merged_groups);
|
|
+ return false;
|
|
+ case UNCERTAIN:
|
|
+ return check_uncertain (group, group_num, iteration, profit,
|
|
+ tmp_stmts, isomer_lhs, hetero_lhs,
|
|
+ merged_groups);
|
|
+ default:
|
|
+ set_hetero (group, hetero_lhs, tmp_stmts);
|
|
+ return true;
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Return true if all analysises are done except uncertain groups. */
|
|
+
|
|
+static bool
|
|
+end_of_search (vec<vec<group_info> *> &circular_queue,
|
|
+ vec<unsigned> &merged_groups)
|
|
+{
|
|
+ unsigned i = 0;
|
|
+ vec<group_info> *group = NULL;
|
|
+ group_info elmt = NULL;
|
|
+ FOR_EACH_VEC_ELT (circular_queue, i, group)
|
|
+ {
|
|
+ if (merged_groups.contains (i))
|
|
+ continue;
|
|
+ elmt = (*group)[0];
|
|
+ /* If there is any isomorphic use_stmts, continue analysis of isomorphic
|
|
+ use_stmts. */
|
|
+ if (!elmt->done && !elmt->uncertain)
|
|
+ return false;
|
|
+ }
|
|
+ return true;
|
|
+}
|
|
+
|
|
+/* Push valid stmts to STMTS as cutpoints. */
|
|
+
|
|
+static bool
|
|
+check_any_cutpoints (vec<vec<group_info> *> &circular_queue,
|
|
+ vec<gimple *> *&stmts, vec<unsigned> &merged_groups)
|
|
+{
|
|
+ unsigned front = 0;
|
|
+ vec<group_info> *group = NULL;
|
|
+ group_info elmt = NULL;
|
|
+ unsigned max = circular_queue.length () * circular_queue[0]->length ();
|
|
+ vec_alloc (stmts, max);
|
|
+ while (front < circular_queue.length ())
|
|
+ {
|
|
+ unsigned i = 0;
|
|
+ if (merged_groups.contains (front))
|
|
+ {
|
|
+ front++;
|
|
+ continue;
|
|
+ }
|
|
+ group = circular_queue[front++];
|
|
+ FOR_EACH_VEC_ELT (*group, i, elmt)
|
|
+ if (elmt->stmt != NULL && elmt->done && elmt->cut_point)
|
|
+ stmts->safe_push (elmt->stmt);
|
|
+ }
|
|
+ return stmts->length () != 0;
|
|
+}
|
|
+
|
|
+/* Grouped loads are isomorphic. Make pair for group number and iteration,
|
|
+ map load stmt to this pair. We set iteration 0 here. */
|
|
+
|
|
+static void
|
|
+init_isomer_lhs (vec<vec<group_info> *> &groups, isomer_stmt_lhs &isomer_lhs)
|
|
+{
|
|
+ vec<group_info> *group = NULL;
|
|
+ group_info elmt = NULL;
|
|
+ unsigned i = 0;
|
|
+ FOR_EACH_VEC_ELT (groups, i, group)
|
|
+ {
|
|
+ unsigned j = 0;
|
|
+ FOR_EACH_VEC_ELT (*group, j, elmt)
|
|
+ isomer_lhs[gimple_assign_lhs (elmt->stmt)] = std::make_pair (i, 0);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* It's not a strict analysis of load/store profit. Assume scalar and vector
|
|
+ load/store are of the same cost. The result PROFIT equals profit form
|
|
+ vectorizing of scalar loads/stores minus cost of a vectorized load/store. */
|
|
+
|
|
+static int
|
|
+load_store_profit (unsigned scalar_mem_ops, unsigned vf, unsigned new_mem_ops)
|
|
+{
|
|
+ int profit = 0;
|
|
+ enum vect_cost_for_stmt kind = scalar_load;
|
|
+ int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
|
|
+ profit += (scalar_mem_ops - (scalar_mem_ops / vf)) * scalar_cost;
|
|
+ profit -= new_mem_ops / vf * scalar_cost;
|
|
+ kind = scalar_store;
|
|
+ scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
|
|
+ profit -= new_mem_ops / vf * scalar_cost;
|
|
+ return profit;
|
|
+}
|
|
+
|
|
+/* Breadth first search the graph consisting of define-use chain starting from
|
|
+ the circular queue initialized by function BUILD_QUEUE. Find single use of
|
|
+ each stmt in group and check if they are isomorphic. Isomorphic is defined
|
|
+ as same rhs type, same operator, and isomorphic calculation of each rhs
|
|
+ starting from load. If another rhs is uncertain to be isomorphic, put it
|
|
+ at the end of circular queue and re-analyze it during the next iteration.
|
|
+ If a group shares the same use_stmt with another group, skip one of them in
|
|
+ succeedor prcoesses as merged. Iterate the circular queue until all
|
|
+ remianing groups heterogeneous or reaches MEN stmts. If all other groups
|
|
+ have finishes the analysis, and the remaining groups are uncertain,
|
|
+ return false to avoid endless loop. */
|
|
+
|
|
+bool
|
|
+bfs_find_isomer_stmts (vec<vec<group_info> *> &circular_queue,
|
|
+ stmts_profit &profit_pair, unsigned vf,
|
|
+ bool &reach_vdef)
|
|
+{
|
|
+ isomer_stmt_lhs isomer_lhs;
|
|
+ auto_vec<tree> hetero_lhs;
|
|
+ auto_vec<unsigned> merged_groups;
|
|
+ vec<group_info> *group = NULL;
|
|
+ /* True if analysis finishes. */
|
|
+ bool done = false;
|
|
+ int profit_sum = 0;
|
|
+ vec<gimple *> *stmts = NULL;
|
|
+ init_isomer_lhs (circular_queue, isomer_lhs);
|
|
+ for (unsigned i = 1; !done; ++i)
|
|
+ {
|
|
+ unsigned front = 0;
|
|
+ /* Re-initialize DONE to TRUE while a new iteration begins. */
|
|
+ done = true;
|
|
+ while (front < circular_queue.length ())
|
|
+ {
|
|
+ int profit = 0;
|
|
+ group = circular_queue[front];
|
|
+ done &= check_group (group, front, i, profit, merged_groups,
|
|
+ isomer_lhs, hetero_lhs);
|
|
+ profit_sum += profit;
|
|
+ if (profit != 0 && (*group)[0]->stmt == NULL)
|
|
+ {
|
|
+ reach_vdef = true;
|
|
+ return false;
|
|
+ }
|
|
+ ++front;
|
|
+ }
|
|
+ /* Uncertain result, return. */
|
|
+ if (!done && end_of_search (circular_queue, merged_groups))
|
|
+ return false;
|
|
+ }
|
|
+ if (check_any_cutpoints (circular_queue, stmts, merged_groups))
|
|
+ {
|
|
+ profit_pair.first = stmts;
|
|
+ unsigned loads = circular_queue.length () * circular_queue[0]->length ();
|
|
+ profit_pair.second = profit_sum + load_store_profit (loads, vf,
|
|
+ stmts->length ());
|
|
+ if (profit_pair.second > 0)
|
|
+ return true;
|
|
+ }
|
|
+ return false;
|
|
+}
|
|
+
|
|
+/* Free memory allocated by ginfo. */
|
|
+
|
|
+static void
|
|
+free_ginfos (vec<vec<group_info> *> &worklists)
|
|
+{
|
|
+ vec<group_info> *worklist;
|
|
+ unsigned i = 0;
|
|
+ while (i < worklists.length ())
|
|
+ {
|
|
+ worklist = worklists[i++];
|
|
+ group_info ginfo;
|
|
+ unsigned j = 0;
|
|
+ FOR_EACH_VEC_ELT (*worklist, j, ginfo)
|
|
+ delete ginfo;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void
|
|
+release_tmp_stmts (vf_stmts_profit_map &candi_stmts)
|
|
+{
|
|
+ vf_stmts_profit_map::iterator iter;
|
|
+ for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter)
|
|
+ iter->second.first->release ();
|
|
+}
|
|
+
|
|
+/* Choose the group of stmt with maximun profit. */
|
|
+
|
|
+static bool
|
|
+decide_stmts_by_profit (vf_stmts_profit_map &candi_stmts, vec<gimple *> &stmts)
|
|
+{
|
|
+ vf_stmts_profit_map::iterator iter;
|
|
+ int profit = 0;
|
|
+ int max = 0;
|
|
+ vec<gimple *> *tmp = NULL;
|
|
+ for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter)
|
|
+ {
|
|
+ profit = iter->second.second;
|
|
+ if (profit > max)
|
|
+ {
|
|
+ tmp = iter->second.first;
|
|
+ max = profit;
|
|
+ }
|
|
+ }
|
|
+ if (max == 0)
|
|
+ {
|
|
+ release_tmp_stmts (candi_stmts);
|
|
+ return false;
|
|
+ }
|
|
+ unsigned i = 0;
|
|
+ gimple *stmt = NULL;
|
|
+ FOR_EACH_VEC_ELT (*tmp, i, stmt)
|
|
+ stmts.safe_push (stmt);
|
|
+ release_tmp_stmts (candi_stmts);
|
|
+ return stmts.length () != 0;
|
|
+}
|
|
+
|
|
+/* Find isomorphic stmts from grouped loads with vector factor VF.
|
|
+
|
|
+ Given source code as follows and ignore casting.
|
|
+
|
|
+ a0 = (a[0] + b[0]) + ((a[4] - b[4]) << 16);
|
|
+ a1 = (a[1] + b[1]) + ((a[5] - b[5]) << 16);
|
|
+ a2 = (a[2] + b[2]) + ((a[6] - b[6]) << 16);
|
|
+ a3 = (a[3] + b[3]) + ((a[7] - b[7]) << 16);
|
|
+
|
|
+ We get grouped loads in VINFO as
|
|
+
|
|
+ GROUP_1 GROUP_2
|
|
+ _1 = *a _11 = *b
|
|
+ _2 = *(a + 1) _12 = *(b + 1)
|
|
+ _3 = *(a + 2) _13 = *(b + 2)
|
|
+ _4 = *(a + 3) _14 = *(b + 3)
|
|
+ _5 = *(a + 4) _15 = *(b + 4)
|
|
+ _6 = *(a + 5) _16 = *(b + 5)
|
|
+ _7 = *(a + 6) _17 = *(b + 6)
|
|
+ _8 = *(a + 7) _18 = *(b + 7)
|
|
+
|
|
+ First we try VF = 8, we get two worklists
|
|
+
|
|
+ WORKLIST_1 WORKLIST_2
|
|
+ _1 = *a _11 = *b
|
|
+ _2 = *(a + 1) _12 = *(b + 1)
|
|
+ _3 = *(a + 2) _13 = *(b + 2)
|
|
+ _4 = *(a + 3) _14 = *(b + 3)
|
|
+ _5 = *(a + 4) _15 = *(b + 4)
|
|
+ _6 = *(a + 5) _16 = *(b + 5)
|
|
+ _7 = *(a + 6) _17 = *(b + 6)
|
|
+ _8 = *(a + 7) _18 = *(b + 7)
|
|
+
|
|
+ We find _111 = _1 + _11 and _115 = _5 - _15 are not isomorphic,
|
|
+ so we try VF = VF / 2.
|
|
+
|
|
+ GROUP_1 GROUP_2
|
|
+ _1 = *a _5 = *(a + 4)
|
|
+ _2 = *(a + 1) _6 = *(a + 5)
|
|
+ _3 = *(a + 2) _7 = *(a + 6)
|
|
+ _4 = *(a + 3) _8 = *(a + 7)
|
|
+
|
|
+ GROUP_3 GROUP_4
|
|
+ _11 = *b _15 = *(b + 4)
|
|
+ _12 = *(b + 1) _16 = *(b + 5)
|
|
+ _13 = *(b + 2) _17 = *(b + 6)
|
|
+ _14 = *(b + 3) _18 = *(b + 7)
|
|
+
|
|
+ We first analyze group_1, and find all operations are isomorphic, then
|
|
+ replace stmts in group_1 with their use_stmts. Group_2 as well.
|
|
+
|
|
+ GROUP_1 GROUP_2
|
|
+ _111 = _1 + _11 _115 = _5 - _15
|
|
+ _112 = _2 + _12 _116 = _6 - _16
|
|
+ _113 = _3 + _13 _117 = _7 - _17
|
|
+ _114 = _4 + _14 _118 = _8 - _18
|
|
+
|
|
+ When analyzing group_3 and group_4, we find their use_stmts are the same
|
|
+ as group_1 and group_2. So group_3 is regarded as being merged to group_1
|
|
+ and group_4 being merged to group_2. In future procedures, we will skip
|
|
+ group_3 and group_4.
|
|
+
|
|
+ We repeat such processing until opreations are not isomorphic or searching
|
|
+ reaches MEM stmts. In our given case, searching end up at a0, a1, a2 and
|
|
+ a3. */
|
|
+
|
|
+static bool
|
|
+find_isomorphic_stmts (loop_vec_info vinfo, vec<gimple *> &stmts)
|
|
+{
|
|
+ unsigned vf = get_max_vf (vinfo);
|
|
+ if (vf == 0)
|
|
+ return false;
|
|
+ auto_vec<vec<group_info> *> circular_queue;
|
|
+ /* Map of vector factor and corresponding vectorizing profit. */
|
|
+ stmts_profit profit_map;
|
|
+ /* Map of cut_points and vector factor. */
|
|
+ vf_stmts_profit_map candi_stmts;
|
|
+ bool reach_vdef = false;
|
|
+ while (vf > 2)
|
|
+ {
|
|
+ if (build_queue (vinfo, vf, circular_queue) == 0)
|
|
+ return false;
|
|
+ if (!bfs_find_isomer_stmts (circular_queue, profit_map, vf, reach_vdef))
|
|
+ {
|
|
+ if (reach_vdef)
|
|
+ {
|
|
+ release_tmp_stmts (candi_stmts);
|
|
+ free_ginfos (circular_queue);
|
|
+ circular_queue.release ();
|
|
+ return false;
|
|
+ }
|
|
+ vf /= 2;
|
|
+ free_ginfos (circular_queue);
|
|
+ circular_queue.release ();
|
|
+ continue;
|
|
+ }
|
|
+ candi_stmts[vf] = profit_map;
|
|
+ free_ginfos (circular_queue);
|
|
+ vf /= 2;
|
|
+ circular_queue.release ();
|
|
+ }
|
|
+ return decide_stmts_by_profit (candi_stmts, stmts);
|
|
+}
|
|
+
|
|
/* Distributes the code from LOOP in such a way that producer statements
|
|
are placed before consumer statements. Tries to separate only the
|
|
statements from STMTS into separate loops. Returns the number of
|
|
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
|
|
index 7990e31de..1e332d3c5 100644
|
|
--- a/gcc/tree-vect-loop.c
|
|
+++ b/gcc/tree-vect-loop.c
|
|
@@ -2516,9 +2516,11 @@ vect_reanalyze_as_main_loop (loop_vec_info loop_vinfo, unsigned int *n_stmts)
|
|
|
|
Apply a set of analyses on LOOP, and create a loop_vec_info struct
|
|
for it. The different analyses will record information in the
|
|
- loop_vec_info struct. */
|
|
+ loop_vec_info struct. When RESULT_ONLY_P is true, quit analysis
|
|
+ if loop is vectorizable, otherwise, do not delete vinfo.*/
|
|
opt_loop_vec_info
|
|
-vect_analyze_loop (class loop *loop, vec_info_shared *shared)
|
|
+vect_analyze_loop (class loop *loop, vec_info_shared *shared,
|
|
+ bool result_only_p)
|
|
{
|
|
auto_vector_modes vector_modes;
|
|
|
|
@@ -2545,6 +2547,8 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
|
|
unsigned n_stmts = 0;
|
|
machine_mode autodetected_vector_mode = VOIDmode;
|
|
opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
|
|
+ /* Loop_vinfo for loop-distribution pass. */
|
|
+ opt_loop_vec_info fail_loop_vinfo = opt_loop_vec_info::success (NULL);
|
|
machine_mode next_vector_mode = VOIDmode;
|
|
poly_uint64 lowest_th = 0;
|
|
unsigned vectorized_loops = 0;
|
|
@@ -2633,6 +2637,13 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
|
|
if (res)
|
|
{
|
|
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
|
|
+ /* In loop-distribution pass, we only need to get loop_vinfo, do not
|
|
+ conduct further operations. */
|
|
+ if (result_only_p)
|
|
+ {
|
|
+ loop->aux = (loop_vec_info) loop_vinfo;
|
|
+ return loop_vinfo;
|
|
+ }
|
|
vectorized_loops++;
|
|
|
|
/* Once we hit the desired simdlen for the first time,
|
|
@@ -2724,7 +2735,19 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
|
|
}
|
|
else
|
|
{
|
|
- delete loop_vinfo;
|
|
+ /* If current analysis shows LOOP is unable to vectorize, loop_vinfo
|
|
+ will be deleted. If LOOP is under ldist analysis, backup it before
|
|
+ it is deleted and return it if all modes are analyzed and still
|
|
+ fail to vectorize. */
|
|
+ if (result_only_p && (mode_i == vector_modes.length ()
|
|
+ || autodetected_vector_mode == VOIDmode))
|
|
+ {
|
|
+ fail_loop_vinfo = loop_vinfo;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ delete loop_vinfo;
|
|
+ }
|
|
if (fatal)
|
|
{
|
|
gcc_checking_assert (first_loop_vinfo == NULL);
|
|
@@ -2773,6 +2796,14 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
|
|
return first_loop_vinfo;
|
|
}
|
|
|
|
+ /* Return loop_vinfo for ldist if loop is unvectorizable. */
|
|
+ if (result_only_p && (mode_i == vector_modes.length ()
|
|
+ || autodetected_vector_mode == VOIDmode))
|
|
+ {
|
|
+ loop->aux = (loop_vec_info) fail_loop_vinfo;
|
|
+ return fail_loop_vinfo;
|
|
+ }
|
|
+
|
|
return opt_loop_vec_info::propagate_failure (res);
|
|
}
|
|
|
|
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
|
|
index 1c4a6c421..dc8175f00 100644
|
|
--- a/gcc/tree-vectorizer.h
|
|
+++ b/gcc/tree-vectorizer.h
|
|
@@ -1896,7 +1896,8 @@ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree,
|
|
enum tree_code);
|
|
extern bool needs_fold_left_reduction_p (tree, tree_code);
|
|
/* Drive for loop analysis stage. */
|
|
-extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *);
|
|
+extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *,
|
|
+ bool result_only_p = false);
|
|
extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
|
|
extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
|
|
tree *, bool);
|
|
--
|
|
2.27.0.windows.1
|
|
|