2681 lines
86 KiB
Diff
2681 lines
86 KiB
Diff
|
|
diff -N -urp a/gcc/Makefile.in b/gcc/Makefile.in
|
||
|
|
--- a/gcc/Makefile.in 2018-11-15 15:59:30.435048460 +0800
|
||
|
|
+++ b/gcc/Makefile.in 2018-11-15 16:04:16.735055997 +0800
|
||
|
|
@@ -1293,6 +1293,7 @@ OBJS = \
|
||
|
|
gimple-fold.o \
|
||
|
|
gimple-laddress.o \
|
||
|
|
gimple-loop-jam.o \
|
||
|
|
+ gimple-loop-interchange.o \
|
||
|
|
gimple-low.o \
|
||
|
|
gimple-pretty-print.o \
|
||
|
|
gimple-ssa-backprop.o \
|
||
|
|
diff -N -urp a/gcc/cfgloop.h b/gcc/cfgloop.h
|
||
|
|
--- a/gcc/cfgloop.h 2018-11-15 15:59:30.439048461 +0800
|
||
|
|
+++ b/gcc/cfgloop.h 2018-11-15 16:03:17.431054436 +0800
|
||
|
|
@@ -225,6 +225,16 @@ struct GTY ((chain_next ("%h.next"))) lo
|
||
|
|
builtins. */
|
||
|
|
tree simduid;
|
||
|
|
|
||
|
|
+ /* In loop optimization, it's common to generate loops from the original
|
||
|
|
+ loop. This field records the index of the original loop which can be
|
||
|
|
+ used to track the original loop from newly generated loops. This can
|
||
|
|
+ be done by calling function get_loop (cfun, orig_loop_num). Note the
|
||
|
|
+ original loop could be destroyed for various reasons thus no longer
|
||
|
|
+ exists, as a result, function call to get_loop returns NULL pointer.
|
||
|
|
+ In this case, this field should not be used and needs to be cleared
|
||
|
|
+ whenever possible. */
|
||
|
|
+ int orig_loop_num;
|
||
|
|
+
|
||
|
|
/* Upper bound on number of iterations of a loop. */
|
||
|
|
struct nb_iter_bound *bounds;
|
||
|
|
|
||
|
|
diff -N -urp a/gcc/common.opt b/gcc/common.opt
|
||
|
|
--- a/gcc/common.opt 2018-11-15 15:59:30.447048461 +0800
|
||
|
|
+++ b/gcc/common.opt 2018-11-15 16:03:17.431054436 +0800
|
||
|
|
@@ -1488,8 +1488,8 @@ Common Alias(floop-nest-optimize)
|
||
|
|
Enable loop nest transforms. Same as -floop-nest-optimize.
|
||
|
|
|
||
|
|
floop-interchange
|
||
|
|
-Common Alias(floop-nest-optimize)
|
||
|
|
-Enable loop nest transforms. Same as -floop-nest-optimize.
|
||
|
|
+Common Report Var(flag_loop_interchange) Optimization
|
||
|
|
+Enable loop interchange on trees.
|
||
|
|
|
||
|
|
floop-block
|
||
|
|
Common Alias(floop-nest-optimize)
|
||
|
|
diff -N -urp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||
|
|
--- a/gcc/doc/invoke.texi 2018-11-15 15:59:30.451048461 +0800
|
||
|
|
+++ b/gcc/doc/invoke.texi 2018-11-15 16:05:06.803057315 +0800
|
||
|
|
@@ -8224,11 +8224,9 @@ Perform loop optimizations on trees. Th
|
||
|
|
at @option{-O} and higher.
|
||
|
|
|
||
|
|
@item -ftree-loop-linear
|
||
|
|
-@itemx -floop-interchange
|
||
|
|
@itemx -floop-strip-mine
|
||
|
|
@itemx -floop-block
|
||
|
|
@opindex ftree-loop-linear
|
||
|
|
-@opindex floop-interchange
|
||
|
|
@opindex floop-strip-mine
|
||
|
|
@opindex floop-block
|
||
|
|
Perform loop nest optimizations. Same as
|
||
|
|
@@ -8328,6 +8326,25 @@ Apply unroll and jam transformations on
|
||
|
|
nest this unrolls the outer loop by some factor and fuses the resulting
|
||
|
|
multiple inner loops. This flag is enabled by default at @option{-O3}.
|
||
|
|
|
||
|
|
+@item -floop-interchange
|
||
|
|
+@opindex floop-interchange
|
||
|
|
+Perform loop interchange outside of graphite. This flag can improve cache
|
||
|
|
+performance on loop nest and allow further loop optimizations, like
|
||
|
|
+vectorization, to take place. For example, the loop
|
||
|
|
+@smallexample
|
||
|
|
+for (int i = 0; i < N; i++)
|
||
|
|
+ for (int j = 0; j < N; j++)
|
||
|
|
+ for (int k = 0; k < N; k++)
|
||
|
|
+ c[i][j] = c[i][j] + a[i][k]*b[k][j];
|
||
|
|
+@end smallexample
|
||
|
|
+is transformed to
|
||
|
|
+@smallexample
|
||
|
|
+for (int i = 0; i < N; i++)
|
||
|
|
+ for (int k = 0; k < N; k++)
|
||
|
|
+ for (int j = 0; j < N; j++)
|
||
|
|
+ c[i][j] = c[i][j] + a[i][k]*b[k][j];
|
||
|
|
+@end smallexample
|
||
|
|
+
|
||
|
|
@item -ftree-loop-im
|
||
|
|
@opindex ftree-loop-im
|
||
|
|
Perform loop invariant motion on trees. This pass moves only invariants that
|
||
|
|
@@ -10203,6 +10220,12 @@ The size of L1 cache, in kilobytes.
|
||
|
|
@item l2-cache-size
|
||
|
|
The size of L2 cache, in kilobytes.
|
||
|
|
|
||
|
|
+@item loop-interchange-max-num-stmts
|
||
|
|
+The maximum number of stmts in a loop to be interchanged.
|
||
|
|
+
|
||
|
|
+@item loop-interchange-stride-ratio
|
||
|
|
+The minimum ratio between stride of two loops for interchange to be profitable.
|
||
|
|
+
|
||
|
|
@item min-insn-to-prefetch-ratio
|
||
|
|
The minimum ratio between the number of instructions and the
|
||
|
|
number of prefetches to enable prefetching in a loop.
|
||
|
|
diff -N -urp a/gcc/gimple-loop-interchange.cc b/gcc/gimple-loop-interchange.cc
|
||
|
|
--- a/gcc/gimple-loop-interchange.cc 1970-01-01 08:00:00.000000000 +0800
|
||
|
|
+++ b/gcc/gimple-loop-interchange.cc 2018-11-15 16:03:17.443054436 +0800
|
||
|
|
@@ -0,0 +1,2039 @@
|
||
|
|
+/* Loop interchange.
|
||
|
|
+ Copyright (C) 2017 Free Software Foundation, Inc.
|
||
|
|
+ Contributed by ARM Ltd.
|
||
|
|
+
|
||
|
|
+This file is part of GCC.
|
||
|
|
+
|
||
|
|
+GCC is free software; you can redistribute it and/or modify it
|
||
|
|
+under the terms of the GNU General Public License as published by the
|
||
|
|
+Free Software Foundation; either version 3, or (at your option) any
|
||
|
|
+later version.
|
||
|
|
+
|
||
|
|
+GCC is distributed in the hope that it will be useful, but WITHOUT
|
||
|
|
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
|
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
|
+for more details.
|
||
|
|
+
|
||
|
|
+You should have received a copy of the GNU General Public License
|
||
|
|
+along with GCC; see the file COPYING3. If not see
|
||
|
|
+<http://www.gnu.org/licenses/>. */
|
||
|
|
+
|
||
|
|
+#include "config.h"
|
||
|
|
+#include "system.h"
|
||
|
|
+#include "coretypes.h"
|
||
|
|
+#include "backend.h"
|
||
|
|
+#include "is-a.h"
|
||
|
|
+#include "tree.h"
|
||
|
|
+#include "gimple.h"
|
||
|
|
+#include "tree-pass.h"
|
||
|
|
+#include "ssa.h"
|
||
|
|
+#include "gimple-pretty-print.h"
|
||
|
|
+#include "fold-const.h"
|
||
|
|
+#include "gimplify.h"
|
||
|
|
+#include "gimple-iterator.h"
|
||
|
|
+#include "gimplify-me.h"
|
||
|
|
+#include "cfgloop.h"
|
||
|
|
+#include "params.h"
|
||
|
|
+#include "tree-ssa.h"
|
||
|
|
+#include "tree-scalar-evolution.h"
|
||
|
|
+#include "tree-ssa-loop-manip.h"
|
||
|
|
+#include "tree-ssa-loop-niter.h"
|
||
|
|
+#include "tree-ssa-loop-ivopts.h"
|
||
|
|
+#include "tree-ssa-dce.h"
|
||
|
|
+#include "tree-data-ref.h"
|
||
|
|
+#include "tree-vectorizer.h"
|
||
|
|
+
|
||
|
|
+/* This pass performs loop interchange: for example, the loop nest
|
||
|
|
+
|
||
|
|
+ for (int j = 0; j < N; j++)
|
||
|
|
+ for (int k = 0; k < N; k++)
|
||
|
|
+ for (int i = 0; i < N; i++)
|
||
|
|
+ c[i][j] = c[i][j] + a[i][k]*b[k][j];
|
||
|
|
+
|
||
|
|
+ is transformed to
|
||
|
|
+
|
||
|
|
+ for (int i = 0; i < N; i++)
|
||
|
|
+ for (int j = 0; j < N; j++)
|
||
|
|
+ for (int k = 0; k < N; k++)
|
||
|
|
+ c[i][j] = c[i][j] + a[i][k]*b[k][j];
|
||
|
|
+
|
||
|
|
+ This pass implements loop interchange in the following steps:
|
||
|
|
+
|
||
|
|
+ 1) Find perfect loop nest for each innermost loop and compute data
|
||
|
|
+ dependence relations for it. For above example, loop nest is
|
||
|
|
+ <loop_j, loop_k, loop_i>.
|
||
|
|
+ 2) From innermost to outermost loop, this pass tries to interchange
|
||
|
|
+ each loop pair. For above case, it firstly tries to interchange
|
||
|
|
+ <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
|
||
|
|
+ Then it tries to interchange <loop_j, loop_i> and loop nest becomes
|
||
|
|
+ <loop_i, loop_j, loop_k>. The overall effect is to move innermost
|
||
|
|
+ loop to the outermost position. For loop pair <loop_i, loop_j>
|
||
|
|
+ to be interchanged, we:
|
||
|
|
+ 3) Check if data dependence relations are valid for loop interchange.
|
||
|
|
+ 4) Check if both loops can be interchanged in terms of transformation.
|
||
|
|
+ 5) Check if interchanging the two loops is profitable.
|
||
|
|
+ 6) Interchange the two loops by mapping induction variables.
|
||
|
|
+
|
||
|
|
+ This pass also handles reductions in loop nest. So far we only support
|
||
|
|
+ simple reduction of inner loop and double reduction of the loop nest. */
|
||
|
|
+
|
||
|
|
+/* Maximum number of stmts in each loop that should be interchanged. */
|
||
|
|
+#define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS))
|
||
|
|
+/* Maximum number of data references in loop nest. */
|
||
|
|
+#define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
|
||
|
|
+
|
||
|
|
+/* Comparison ratio of access stride between inner/outer loops to be
|
||
|
|
+ interchanged. This is the minimum stride ratio for loop interchange
|
||
|
|
+ to be profitable. */
|
||
|
|
+#define OUTER_STRIDE_RATIO (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO))
|
||
|
|
+/* The same as above, but we require higher ratio for interchanging the
|
||
|
|
+ innermost two loops. */
|
||
|
|
+#define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
|
||
|
|
+
|
||
|
|
+/* Vector of strides that DR accesses in each level loop of a loop nest. */
|
||
|
|
+#define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
|
||
|
|
+
|
||
|
|
+/* Structure recording loop induction variable. */
|
||
|
|
+typedef struct induction
|
||
|
|
+{
|
||
|
|
+ /* IV itself. */
|
||
|
|
+ tree var;
|
||
|
|
+ /* IV's initializing value, which is the init arg of the IV PHI node. */
|
||
|
|
+ tree init_val;
|
||
|
|
+ /* IV's initializing expr, which is (the expanded result of) init_val. */
|
||
|
|
+ tree init_expr;
|
||
|
|
+ /* IV's step. */
|
||
|
|
+ tree step;
|
||
|
|
+} *induction_p;
|
||
|
|
+
|
||
|
|
+/* Enum type for loop reduction variable. */
|
||
|
|
+enum reduction_type
|
||
|
|
+{
|
||
|
|
+ UNKNOWN_RTYPE = 0,
|
||
|
|
+ SIMPLE_RTYPE,
|
||
|
|
+ DOUBLE_RTYPE
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+/* Structure recording loop reduction variable. */
|
||
|
|
+typedef struct reduction
|
||
|
|
+{
|
||
|
|
+ /* Reduction itself. */
|
||
|
|
+ tree var;
|
||
|
|
+ /* PHI node defining reduction variable. */
|
||
|
|
+ gphi *phi;
|
||
|
|
+ /* Init and next variables of the reduction. */
|
||
|
|
+ tree init;
|
||
|
|
+ tree next;
|
||
|
|
+ /* Lcssa PHI node if reduction is used outside of its definition loop. */
|
||
|
|
+ gphi *lcssa_phi;
|
||
|
|
+ /* Stmts defining init and next. */
|
||
|
|
+ gimple *producer;
|
||
|
|
+ gimple *consumer;
|
||
|
|
+ /* If init is loaded from memory, this is the loading memory reference. */
|
||
|
|
+ tree init_ref;
|
||
|
|
+ /* If reduction is finally stored to memory, this is the stored memory
|
||
|
|
+ reference. */
|
||
|
|
+ tree fini_ref;
|
||
|
|
+ enum reduction_type type;
|
||
|
|
+} *reduction_p;
|
||
|
|
+
|
||
|
|
+
|
||
|
|
+/* Dump reduction RE. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+dump_reduction (reduction_p re)
|
||
|
|
+{
|
||
|
|
+ if (re->type == SIMPLE_RTYPE)
|
||
|
|
+ fprintf (dump_file, " Simple reduction: ");
|
||
|
|
+ else if (re->type == DOUBLE_RTYPE)
|
||
|
|
+ fprintf (dump_file, " Double reduction: ");
|
||
|
|
+ else
|
||
|
|
+ fprintf (dump_file, " Unknown reduction: ");
|
||
|
|
+
|
||
|
|
+ print_gimple_stmt (dump_file, re->phi, 0);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Dump LOOP's induction IV. */
|
||
|
|
+static void
|
||
|
|
+dump_induction (struct loop *loop, induction_p iv)
|
||
|
|
+{
|
||
|
|
+ fprintf (dump_file, " Induction: ");
|
||
|
|
+ print_generic_expr (dump_file, iv->var, TDF_SLIM);
|
||
|
|
+ fprintf (dump_file, " = {");
|
||
|
|
+ print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
|
||
|
|
+ fprintf (dump_file, ", ");
|
||
|
|
+ print_generic_expr (dump_file, iv->step, TDF_SLIM);
|
||
|
|
+ fprintf (dump_file, "}_%d\n", loop->num);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Loop candidate for interchange. */
|
||
|
|
+
|
||
|
|
+struct loop_cand
|
||
|
|
+{
|
||
|
|
+ loop_cand (struct loop *, struct loop *);
|
||
|
|
+ ~loop_cand ();
|
||
|
|
+
|
||
|
|
+ reduction_p find_reduction_by_stmt (gimple *);
|
||
|
|
+ void classify_simple_reduction (reduction_p);
|
||
|
|
+ bool analyze_iloop_reduction_var (tree);
|
||
|
|
+ bool analyze_oloop_reduction_var (loop_cand *, tree);
|
||
|
|
+ bool analyze_induction_var (tree, tree);
|
||
|
|
+ bool analyze_carried_vars (loop_cand *);
|
||
|
|
+ bool analyze_lcssa_phis (void);
|
||
|
|
+ bool can_interchange_p (loop_cand *);
|
||
|
|
+ bool supported_operations (basic_block, loop_cand *, int *);
|
||
|
|
+ void undo_simple_reduction (reduction_p, bitmap);
|
||
|
|
+
|
||
|
|
+ /* The loop itself. */
|
||
|
|
+ struct loop *m_loop;
|
||
|
|
+ /* The outer loop for interchange. It equals to loop if this loop cand
|
||
|
|
+ itself represents the outer loop. */
|
||
|
|
+ struct loop *m_outer;
|
||
|
|
+ /* Vector of induction variables in loop. */
|
||
|
|
+ vec<induction_p> m_inductions;
|
||
|
|
+ /* Vector of reduction variables in loop. */
|
||
|
|
+ vec<reduction_p> m_reductions;
|
||
|
|
+ /* Lcssa PHI nodes of this loop. */
|
||
|
|
+ vec<gphi *> m_lcssa_nodes;
|
||
|
|
+ /* Single exit edge of this loop. */
|
||
|
|
+ edge m_exit;
|
||
|
|
+ /* Basic blocks of this loop. */
|
||
|
|
+ basic_block *m_bbs;
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+/* Constructor. */
|
||
|
|
+
|
||
|
|
+loop_cand::loop_cand (struct loop *loop, struct loop *outer)
|
||
|
|
+ : m_loop (loop), m_outer (outer),
|
||
|
|
+ m_exit (single_exit (loop)), m_bbs (get_loop_body (loop))
|
||
|
|
+{
|
||
|
|
+ m_inductions.create (3);
|
||
|
|
+ m_reductions.create (3);
|
||
|
|
+ m_lcssa_nodes.create (3);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Destructor. */
|
||
|
|
+
|
||
|
|
+loop_cand::~loop_cand ()
|
||
|
|
+{
|
||
|
|
+ induction_p iv;
|
||
|
|
+ for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
|
||
|
|
+ free (iv);
|
||
|
|
+
|
||
|
|
+ reduction_p re;
|
||
|
|
+ for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
|
||
|
|
+ free (re);
|
||
|
|
+
|
||
|
|
+ m_inductions.release ();
|
||
|
|
+ m_reductions.release ();
|
||
|
|
+ m_lcssa_nodes.release ();
|
||
|
|
+ free (m_bbs);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return single use stmt of VAR in LOOP, otherwise return NULL. */
|
||
|
|
+
|
||
|
|
+static gimple *
|
||
|
|
+single_use_in_loop (tree var, struct loop *loop)
|
||
|
|
+{
|
||
|
|
+ gimple *stmt, *res = NULL;
|
||
|
|
+ use_operand_p use_p;
|
||
|
|
+ imm_use_iterator iterator;
|
||
|
|
+
|
||
|
|
+ FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
|
||
|
|
+ {
|
||
|
|
+ stmt = USE_STMT (use_p);
|
||
|
|
+ if (is_gimple_debug (stmt))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ if (res)
|
||
|
|
+ return NULL;
|
||
|
|
+
|
||
|
|
+ res = stmt;
|
||
|
|
+ }
|
||
|
|
+ return res;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return true if E is unsupported in loop interchange, i.e, E is a complex
|
||
|
|
+ edge or part of irreducible loop. */
|
||
|
|
+
|
||
|
|
+static inline bool
|
||
|
|
+unsupported_edge (edge e)
|
||
|
|
+{
|
||
|
|
+ return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
|
||
|
|
+ stmt. */
|
||
|
|
+
|
||
|
|
+reduction_p
|
||
|
|
+loop_cand::find_reduction_by_stmt (gimple *stmt)
|
||
|
|
+{
|
||
|
|
+ gphi *phi = dyn_cast <gphi *> (stmt);
|
||
|
|
+ reduction_p re;
|
||
|
|
+
|
||
|
|
+ for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
|
||
|
|
+ if ((phi != NULL && phi == re->lcssa_phi)
|
||
|
|
+ || (stmt == re->producer || stmt == re->consumer))
|
||
|
|
+ return re;
|
||
|
|
+
|
||
|
|
+ return NULL;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return true if all stmts in BB can be supported by loop interchange,
|
||
|
|
+ otherwise return false. ILOOP is not NULL if this loop_cand is the
|
||
|
|
+ outer loop in loop nest. Add the number of supported statements to
|
||
|
|
+ NUM_STMTS. */
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+loop_cand::supported_operations (basic_block bb, loop_cand *iloop,
|
||
|
|
+ int *num_stmts)
|
||
|
|
+{
|
||
|
|
+ int bb_num_stmts = 0;
|
||
|
|
+ gphi_iterator psi;
|
||
|
|
+ gimple_stmt_iterator gsi;
|
||
|
|
+
|
||
|
|
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||
|
|
+ {
|
||
|
|
+ gimple *stmt = gsi_stmt (gsi);
|
||
|
|
+ if (is_gimple_debug (stmt))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ if (gimple_has_side_effects (stmt))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ bb_num_stmts++;
|
||
|
|
+ if (gcall *call = dyn_cast <gcall *> (stmt))
|
||
|
|
+ {
|
||
|
|
+ /* In basic block of outer loop, the call should be cheap since
|
||
|
|
+ it will be moved to inner loop. */
|
||
|
|
+ if (iloop != NULL
|
||
|
|
+ && !gimple_inexpensive_call_p (call))
|
||
|
|
+ return false;
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (!iloop || !gimple_vuse (stmt))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ /* Support stmt accessing memory in outer loop only if it is for inner
|
||
|
|
+ loop's reduction. */
|
||
|
|
+ if (iloop->find_reduction_by_stmt (stmt))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ tree lhs;
|
||
|
|
+ /* Support loop invariant memory reference if it's only used once by
|
||
|
|
+ inner loop. */
|
||
|
|
+ /* ??? How's this checking for invariantness? */
|
||
|
|
+ if (gimple_assign_single_p (stmt)
|
||
|
|
+ && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
|
||
|
|
+ && TREE_CODE (lhs) == SSA_NAME
|
||
|
|
+ && single_use_in_loop (lhs, iloop->m_loop))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ *num_stmts += bb_num_stmts;
|
||
|
|
+
|
||
|
|
+ /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
|
||
|
|
+ loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
|
||
|
|
+ if (!iloop || bb == m_loop->header
|
||
|
|
+ || bb == iloop->m_exit->dest)
|
||
|
|
+ return true;
|
||
|
|
+
|
||
|
|
+ /* Don't allow any other PHI nodes. */
|
||
|
|
+ for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
|
||
|
|
+ if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return true if current loop_cand be interchanged. ILOOP is not NULL if
|
||
|
|
+ current loop_cand is outer loop in loop nest. */
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+loop_cand::can_interchange_p (loop_cand *iloop)
|
||
|
|
+{
|
||
|
|
+ /* For now we only support at most one reduction. */
|
||
|
|
+ unsigned allowed_reduction_num = 1;
|
||
|
|
+
|
||
|
|
+ /* Only support reduction if the loop nest to be interchanged is the
|
||
|
|
+ innermostin two loops. */
|
||
|
|
+ if ((iloop == NULL && m_loop->inner != NULL)
|
||
|
|
+ || (iloop != NULL && iloop->m_loop->inner != NULL))
|
||
|
|
+ allowed_reduction_num = 0;
|
||
|
|
+
|
||
|
|
+ if (m_reductions.length () > allowed_reduction_num
|
||
|
|
+ || (m_reductions.length () == 1
|
||
|
|
+ && m_reductions[0]->type == UNKNOWN_RTYPE))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Only support lcssa PHI node which is for reduction. */
|
||
|
|
+ if (m_lcssa_nodes.length () > allowed_reduction_num)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ int num_stmts = 0;
|
||
|
|
+ /* Check basic blocks other than loop header/exit. */
|
||
|
|
+ for (unsigned i = 0; i < m_loop->num_nodes; i++)
|
||
|
|
+ {
|
||
|
|
+ basic_block bb = m_bbs[i];
|
||
|
|
+
|
||
|
|
+ /* Skip basic blocks of inner loops. */
|
||
|
|
+ if (bb->loop_father != m_loop)
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ /* Check if basic block has any unsupported operation. */
|
||
|
|
+ if (!supported_operations (bb, iloop, &num_stmts))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Check if loop has too many stmts. */
|
||
|
|
+ if (num_stmts > MAX_NUM_STMT)
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Programmers and optimizers (like loop store motion) may optimize code:
|
||
|
|
+
|
||
|
|
+ for (int i = 0; i < N; i++)
|
||
|
|
+ for (int j = 0; j < N; j++)
|
||
|
|
+ a[i] += b[j][i] * c[j][i];
|
||
|
|
+
|
||
|
|
+ into reduction:
|
||
|
|
+
|
||
|
|
+ for (int i = 0; i < N; i++)
|
||
|
|
+ {
|
||
|
|
+ // producer. Note sum can be intitialized to a constant.
|
||
|
|
+ int sum = a[i];
|
||
|
|
+ for (int j = 0; j < N; j++)
|
||
|
|
+ {
|
||
|
|
+ sum += b[j][i] * c[j][i];
|
||
|
|
+ }
|
||
|
|
+ // consumer.
|
||
|
|
+ a[i] = sum;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ The result code can't be interchanged without undoing the optimization.
|
||
|
|
+ This function classifies this kind reduction and records information so
|
||
|
|
+ that we can undo the store motion during interchange. */
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+loop_cand::classify_simple_reduction (reduction_p re)
|
||
|
|
+{
|
||
|
|
+ gimple *producer, *consumer;
|
||
|
|
+
|
||
|
|
+ /* Check init variable of reduction and how it is initialized. */
|
||
|
|
+ if (TREE_CODE (re->init) == SSA_NAME)
|
||
|
|
+ {
|
||
|
|
+ producer = SSA_NAME_DEF_STMT (re->init);
|
||
|
|
+ re->producer = producer;
|
||
|
|
+ basic_block bb = gimple_bb (producer);
|
||
|
|
+ if (!bb || bb->loop_father != m_outer)
|
||
|
|
+ return;
|
||
|
|
+
|
||
|
|
+ if (!gimple_assign_load_p (producer))
|
||
|
|
+ return;
|
||
|
|
+
|
||
|
|
+ re->init_ref = gimple_assign_rhs1 (producer);
|
||
|
|
+ }
|
||
|
|
+ else if (!CONSTANT_CLASS_P (re->init))
|
||
|
|
+ return;
|
||
|
|
+
|
||
|
|
+ /* Check how reduction variable is used. */
|
||
|
|
+ consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
|
||
|
|
+ if (!consumer
|
||
|
|
+ || !gimple_store_p (consumer))
|
||
|
|
+ return;
|
||
|
|
+
|
||
|
|
+ re->fini_ref = gimple_get_lhs (consumer);
|
||
|
|
+ re->consumer = consumer;
|
||
|
|
+
|
||
|
|
+ /* Simple reduction with constant initializer. */
|
||
|
|
+ if (!re->init_ref)
|
||
|
|
+ {
|
||
|
|
+ gcc_assert (CONSTANT_CLASS_P (re->init));
|
||
|
|
+ re->init_ref = unshare_expr (re->fini_ref);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Require memory references in producer and consumer are the same so
|
||
|
|
+ that we can undo reduction during interchange. */
|
||
|
|
+ if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
|
||
|
|
+ return;
|
||
|
|
+
|
||
|
|
+ re->type = SIMPLE_RTYPE;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Analyze reduction variable VAR for inner loop of the loop nest to be
|
||
|
|
+ interchanged. Return true if analysis succeeds. */
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+loop_cand::analyze_iloop_reduction_var (tree var)
|
||
|
|
+{
|
||
|
|
+ gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
|
||
|
|
+ gphi *lcssa_phi = NULL, *use_phi;
|
||
|
|
+ tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
|
||
|
|
+ tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
|
||
|
|
+ reduction_p re;
|
||
|
|
+ gimple *stmt, *next_def, *single_use = NULL;
|
||
|
|
+ use_operand_p use_p;
|
||
|
|
+ imm_use_iterator iterator;
|
||
|
|
+
|
||
|
|
+ if (TREE_CODE (next) != SSA_NAME)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ next_def = SSA_NAME_DEF_STMT (next);
|
||
|
|
+ basic_block bb = gimple_bb (next_def);
|
||
|
|
+ if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* In restricted reduction, the var is (and must be) used in defining
|
||
|
|
+ the updated var. The process can be depicted as below:
|
||
|
|
+
|
||
|
|
+ var ;; = PHI<init, next>
|
||
|
|
+ |
|
||
|
|
+ |
|
||
|
|
+ v
|
||
|
|
+ +---------------------+
|
||
|
|
+ | reduction operators | <-- other operands
|
||
|
|
+ +---------------------+
|
||
|
|
+ |
|
||
|
|
+ |
|
||
|
|
+ v
|
||
|
|
+ next
|
||
|
|
+
|
||
|
|
+ In terms loop interchange, we don't change how NEXT is computed based
|
||
|
|
+ on VAR and OTHER OPERANDS. In case of double reduction in loop nest
|
||
|
|
+ to be interchanged, we don't changed it at all. In the case of simple
|
||
|
|
+ reduction in inner loop, we only make change how VAR/NEXT is loaded or
|
||
|
|
+ stored. With these conditions, we can relax restrictions on reduction
|
||
|
|
+ in a way that reduction operation is seen as black box. In general,
|
||
|
|
+ we can ignore reassociation of reduction operator; we can handle fake
|
||
|
|
+ reductions in which VAR is not even used to compute NEXT. */
|
||
|
|
+ if (! single_imm_use (var, &use_p, &single_use)
|
||
|
|
+ || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Check the reduction operation. We require a left-associative operation.
|
||
|
|
+ For FP math we also need to be allowed to associate operations. */
|
||
|
|
+ if (gassign *ass = dyn_cast <gassign *> (single_use))
|
||
|
|
+ {
|
||
|
|
+ enum tree_code code = gimple_assign_rhs_code (ass);
|
||
|
|
+ if (! (associative_tree_code (code)
|
||
|
|
+ || (code == MINUS_EXPR
|
||
|
|
+ && use_p->use == gimple_assign_rhs1_ptr (ass)))
|
||
|
|
+ || (FLOAT_TYPE_P (TREE_TYPE (var))
|
||
|
|
+ && ! flag_associative_math))
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Handle and verify a series of stmts feeding the reduction op. */
|
||
|
|
+ if (single_use != next_def
|
||
|
|
+ && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
|
||
|
|
+ gimple_assign_rhs_code (single_use)))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Only support cases in which INIT is used in inner loop. */
|
||
|
|
+ if (TREE_CODE (init) == SSA_NAME)
|
||
|
|
+ FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
|
||
|
|
+ {
|
||
|
|
+ stmt = USE_STMT (use_p);
|
||
|
|
+ if (is_gimple_debug (stmt))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
|
||
|
|
+ {
|
||
|
|
+ stmt = USE_STMT (use_p);
|
||
|
|
+ if (is_gimple_debug (stmt))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ /* Or else it's used in PHI itself. */
|
||
|
|
+ use_phi = dyn_cast <gphi *> (stmt);
|
||
|
|
+ if (use_phi == phi)
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ if (use_phi != NULL
|
||
|
|
+ && lcssa_phi == NULL
|
||
|
|
+ && gimple_bb (stmt) == m_exit->dest
|
||
|
|
+ && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
|
||
|
|
+ lcssa_phi = use_phi;
|
||
|
|
+ else
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ if (!lcssa_phi)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ re = XCNEW (struct reduction);
|
||
|
|
+ re->var = var;
|
||
|
|
+ re->init = init;
|
||
|
|
+ re->next = next;
|
||
|
|
+ re->phi = phi;
|
||
|
|
+ re->lcssa_phi = lcssa_phi;
|
||
|
|
+
|
||
|
|
+ classify_simple_reduction (re);
|
||
|
|
+
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ dump_reduction (re);
|
||
|
|
+
|
||
|
|
+ m_reductions.safe_push (re);
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Analyze reduction variable VAR for outer loop of the loop nest to be
|
||
|
|
+ interchanged. ILOOP is not NULL and points to inner loop. For the
|
||
|
|
+ moment, we only support double reduction for outer loop, like:
|
||
|
|
+
|
||
|
|
+ for (int i = 0; i < n; i++)
|
||
|
|
+ {
|
||
|
|
+ int sum = 0;
|
||
|
|
+
|
||
|
|
+ for (int j = 0; j < n; j++) // outer loop
|
||
|
|
+ for (int k = 0; k < n; k++) // inner loop
|
||
|
|
+ sum += a[i][k]*b[k][j];
|
||
|
|
+
|
||
|
|
+ s[i] = sum;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ Note the innermost two loops are the loop nest to be interchanged.
|
||
|
|
+ Return true if analysis succeeds. */
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
|
||
|
|
+{
|
||
|
|
+ gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
|
||
|
|
+ gphi *lcssa_phi = NULL, *use_phi;
|
||
|
|
+ tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
|
||
|
|
+ tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
|
||
|
|
+ reduction_p re;
|
||
|
|
+ gimple *stmt, *next_def;
|
||
|
|
+ use_operand_p use_p;
|
||
|
|
+ imm_use_iterator iterator;
|
||
|
|
+
|
||
|
|
+ if (TREE_CODE (next) != SSA_NAME)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ next_def = SSA_NAME_DEF_STMT (next);
|
||
|
|
+ basic_block bb = gimple_bb (next_def);
|
||
|
|
+ if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Find inner loop's simple reduction that uses var as initializer. */
|
||
|
|
+ reduction_p inner_re = NULL;
|
||
|
|
+ for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
|
||
|
|
+ if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
+ if (inner_re == NULL
|
||
|
|
+ || inner_re->type != UNKNOWN_RTYPE
|
||
|
|
+ || inner_re->producer != phi)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* In case of double reduction, outer loop's reduction should be updated
|
||
|
|
+ by inner loop's simple reduction. */
|
||
|
|
+ if (next_def != inner_re->lcssa_phi)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Outer loop's reduction should only be used to initialize inner loop's
|
||
|
|
+ simple reduction. */
|
||
|
|
+ if (! single_imm_use (var, &use_p, &stmt)
|
||
|
|
+ || stmt != inner_re->phi)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Check this reduction is correctly used outside of loop via lcssa phi. */
|
||
|
|
+ FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
|
||
|
|
+ {
|
||
|
|
+ stmt = USE_STMT (use_p);
|
||
|
|
+ if (is_gimple_debug (stmt))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ /* Or else it's used in PHI itself. */
|
||
|
|
+ use_phi = dyn_cast <gphi *> (stmt);
|
||
|
|
+ if (use_phi == phi)
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ if (lcssa_phi == NULL
|
||
|
|
+ && use_phi != NULL
|
||
|
|
+ && gimple_bb (stmt) == m_exit->dest
|
||
|
|
+ && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
|
||
|
|
+ lcssa_phi = use_phi;
|
||
|
|
+ else
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ if (!lcssa_phi)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ re = XCNEW (struct reduction);
|
||
|
|
+ re->var = var;
|
||
|
|
+ re->init = init;
|
||
|
|
+ re->next = next;
|
||
|
|
+ re->phi = phi;
|
||
|
|
+ re->lcssa_phi = lcssa_phi;
|
||
|
|
+ re->type = DOUBLE_RTYPE;
|
||
|
|
+ inner_re->type = DOUBLE_RTYPE;
|
||
|
|
+
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ dump_reduction (re);
|
||
|
|
+
|
||
|
|
+ m_reductions.safe_push (re);
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return true if VAR is induction variable of current loop whose scev is
|
||
|
|
+ specified by CHREC. */
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+loop_cand::analyze_induction_var (tree var, tree chrec)
|
||
|
|
+{
|
||
|
|
+ gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
|
||
|
|
+ tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
|
||
|
|
+
|
||
|
|
+ /* Var is loop invariant, though it's unlikely to happen. */
|
||
|
|
+ if (tree_does_not_contain_chrecs (chrec))
|
||
|
|
+ {
|
||
|
|
+ struct induction *iv = XCNEW (struct induction);
|
||
|
|
+ iv->var = var;
|
||
|
|
+ iv->init_val = init;
|
||
|
|
+ iv->init_expr = chrec;
|
||
|
|
+ iv->step = build_int_cst (TREE_TYPE (chrec), 0);
|
||
|
|
+ m_inductions.safe_push (iv);
|
||
|
|
+ return true;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
|
||
|
|
+ || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
|
||
|
|
+ || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
|
||
|
|
+ || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ struct induction *iv = XCNEW (struct induction);
|
||
|
|
+ iv->var = var;
|
||
|
|
+ iv->init_val = init;
|
||
|
|
+ iv->init_expr = CHREC_LEFT (chrec);
|
||
|
|
+ iv->step = CHREC_RIGHT (chrec);
|
||
|
|
+
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ dump_induction (m_loop, iv);
|
||
|
|
+
|
||
|
|
+ m_inductions.safe_push (iv);
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return true if all loop carried variables defined in loop header can
|
||
|
|
+ be successfully analyzed. */
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+loop_cand::analyze_carried_vars (loop_cand *iloop)
|
||
|
|
+{
|
||
|
|
+ edge e = loop_preheader_edge (m_outer);
|
||
|
|
+ gphi_iterator gsi;
|
||
|
|
+
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
|
||
|
|
+
|
||
|
|
+ for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
|
||
|
|
+ {
|
||
|
|
+ gphi *phi = gsi.phi ();
|
||
|
|
+
|
||
|
|
+ tree var = PHI_RESULT (phi);
|
||
|
|
+ if (virtual_operand_p (var))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ tree chrec = analyze_scalar_evolution (m_loop, var);
|
||
|
|
+ chrec = instantiate_scev (e, m_loop, chrec);
|
||
|
|
+
|
||
|
|
+ /* Analyze var as reduction variable. */
|
||
|
|
+ if (chrec_contains_undetermined (chrec)
|
||
|
|
+ || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
|
||
|
|
+ {
|
||
|
|
+ if (iloop && !analyze_oloop_reduction_var (iloop, var))
|
||
|
|
+ return false;
|
||
|
|
+ if (!iloop && !analyze_iloop_reduction_var (var))
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ /* Analyze var as induction variable. */
|
||
|
|
+ else if (!analyze_induction_var (var, chrec))
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+loop_cand::analyze_lcssa_phis (void)
|
||
|
|
+{
|
||
|
|
+ gphi_iterator gsi;
|
||
|
|
+ for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
|
||
|
|
+ {
|
||
|
|
+ gphi *phi = gsi.phi ();
|
||
|
|
+
|
||
|
|
+ if (virtual_operand_p (PHI_RESULT (phi)))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ /* TODO: We only support lcssa phi for reduction for now. */
|
||
|
|
+ if (!find_reduction_by_stmt (phi))
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* CONSUMER is a stmt in BB storing reduction result into memory object.
|
||
|
|
+ When the reduction is intialized from constant value, we need to add
|
||
|
|
+ a stmt loading from the memory object to target basic block in inner
|
||
|
|
+ loop during undoing the reduction. Problem is that memory reference
|
||
|
|
+ may use ssa variables not dominating the target basic block. This
|
||
|
|
+ function finds all stmts on which CONSUMER depends in basic block BB,
|
||
|
|
+ records and returns them via STMTS. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
|
||
|
|
+{
|
||
|
|
+ auto_vec<gimple *, 4> worklist;
|
||
|
|
+ use_operand_p use_p;
|
||
|
|
+ ssa_op_iter iter;
|
||
|
|
+ gimple *stmt, *def_stmt;
|
||
|
|
+ gimple_stmt_iterator gsi;
|
||
|
|
+
|
||
|
|
+ /* First clear flag for stmts in bb. */
|
||
|
|
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||
|
|
+ gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
|
||
|
|
+
|
||
|
|
+ /* DFS search all depended stmts in bb and mark flag for these stmts. */
|
||
|
|
+ worklist.safe_push (consumer);
|
||
|
|
+ while (!worklist.is_empty ())
|
||
|
|
+ {
|
||
|
|
+ stmt = worklist.pop ();
|
||
|
|
+ FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
|
||
|
|
+ {
|
||
|
|
+ def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
|
||
|
|
+
|
||
|
|
+ if (is_a <gphi *> (def_stmt)
|
||
|
|
+ || gimple_bb (def_stmt) != bb
|
||
|
|
+ || gimple_plf (def_stmt, GF_PLF_1))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ worklist.safe_push (def_stmt);
|
||
|
|
+ }
|
||
|
|
+ gimple_set_plf (stmt, GF_PLF_1, true);
|
||
|
|
+ }
|
||
|
|
+ for (gsi = gsi_start_bb_nondebug (bb);
|
||
|
|
+ !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
|
||
|
|
+ {
|
||
|
|
+ /* Move dep stmts to sequence STMTS. */
|
||
|
|
+ if (gimple_plf (stmt, GF_PLF_1))
|
||
|
|
+ {
|
||
|
|
+ gsi_remove (&gsi, false);
|
||
|
|
+ gimple_seq_add_stmt_without_update (stmts, stmt);
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ gsi_next_nondebug (&gsi);
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* User can write, optimizers can generate simple reduction RE for inner
|
||
|
|
+ loop. In order to make interchange valid, we have to undo reduction by
|
||
|
|
+ moving producer and consumer stmts into the inner loop. For example,
|
||
|
|
+ below code:
|
||
|
|
+
|
||
|
|
+ init = MEM_REF[idx]; //producer
|
||
|
|
+ loop:
|
||
|
|
+ var = phi<init, next>
|
||
|
|
+ next = var op ...
|
||
|
|
+ reduc_sum = phi<next>
|
||
|
|
+ MEM_REF[idx] = reduc_sum //consumer
|
||
|
|
+
|
||
|
|
+ is transformed into:
|
||
|
|
+
|
||
|
|
+ loop:
|
||
|
|
+ new_var = MEM_REF[idx]; //producer after moving
|
||
|
|
+ next = new_var op ...
|
||
|
|
+ MEM_REF[idx] = next; //consumer after moving
|
||
|
|
+
|
||
|
|
+ Note if the reduction variable is initialized to constant, like:
|
||
|
|
+
|
||
|
|
+ var = phi<0.0, next>
|
||
|
|
+
|
||
|
|
+ we compute new_var as below:
|
||
|
|
+
|
||
|
|
+ loop:
|
||
|
|
+ tmp = MEM_REF[idx];
|
||
|
|
+ new_var = !first_iteration ? tmp : 0.0;
|
||
|
|
+
|
||
|
|
+ so that the initial const is used in the first iteration of loop. Also
|
||
|
|
+ record ssa variables for dead code elimination in DCE_SEEDS. */
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
|
||
|
|
+{
|
||
|
|
+ gimple *stmt;
|
||
|
|
+ gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
|
||
|
|
+ gimple_seq stmts = NULL;
|
||
|
|
+ tree new_var;
|
||
|
|
+
|
||
|
|
+ /* Prepare the initialization stmts and insert it to inner loop. */
|
||
|
|
+ if (re->producer != NULL)
|
||
|
|
+ {
|
||
|
|
+ gimple_set_vuse (re->producer, NULL_TREE);
|
||
|
|
+ from = gsi_for_stmt (re->producer);
|
||
|
|
+ gsi_remove (&from, false);
|
||
|
|
+ gimple_seq_add_stmt_without_update (&stmts, re->producer);
|
||
|
|
+ new_var = re->init;
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ /* Find all stmts on which expression "MEM_REF[idx]" depends. */
|
||
|
|
+ find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
|
||
|
|
+ /* Because we generate new stmt loading from the MEM_REF to TMP. */
|
||
|
|
+ tree cond, tmp = copy_ssa_name (re->var);
|
||
|
|
+ stmt = gimple_build_assign (tmp, re->init_ref);
|
||
|
|
+ gimple_seq_add_stmt_without_update (&stmts, stmt);
|
||
|
|
+
|
||
|
|
+ /* Init new_var to MEM_REF or CONST depending on if it is the first
|
||
|
|
+ iteration. */
|
||
|
|
+ induction_p iv = m_inductions[0];
|
||
|
|
+ cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
|
||
|
|
+ new_var = copy_ssa_name (re->var);
|
||
|
|
+ stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
|
||
|
|
+ gimple_seq_add_stmt_without_update (&stmts, stmt);
|
||
|
|
+ }
|
||
|
|
+ gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
|
||
|
|
+
|
||
|
|
+ /* Replace all uses of reduction var with new variable. */
|
||
|
|
+ use_operand_p use_p;
|
||
|
|
+ imm_use_iterator iterator;
|
||
|
|
+ FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
|
||
|
|
+ {
|
||
|
|
+ FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
|
||
|
|
+ SET_USE (use_p, new_var);
|
||
|
|
+
|
||
|
|
+ update_stmt (stmt);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Move consumer stmt into inner loop, just after reduction next's def. */
|
||
|
|
+ unlink_stmt_vdef (re->consumer);
|
||
|
|
+ release_ssa_name (gimple_vdef (re->consumer));
|
||
|
|
+ gimple_set_vdef (re->consumer, NULL_TREE);
|
||
|
|
+ gimple_set_vuse (re->consumer, NULL_TREE);
|
||
|
|
+ gimple_assign_set_rhs1 (re->consumer, re->next);
|
||
|
|
+ from = gsi_for_stmt (re->consumer);
|
||
|
|
+ to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
|
||
|
|
+ gsi_move_after (&from, &to);
|
||
|
|
+
|
||
|
|
+ /* Mark the reduction variables for DCE. */
|
||
|
|
+ bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
|
||
|
|
+ bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Free DATAREFS and its auxiliary memory. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+free_data_refs_with_aux (vec<data_reference_p> datarefs)
|
||
|
|
+{
|
||
|
|
+ data_reference_p dr;
|
||
|
|
+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
|
||
|
|
+ if (dr->aux != NULL)
|
||
|
|
+ {
|
||
|
|
+ DR_ACCESS_STRIDE (dr)->release ();
|
||
|
|
+ free (dr->aux);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ free_data_refs (datarefs);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Class for loop interchange transformation. */
|
||
|
|
+
|
||
|
|
+class tree_loop_interchange
|
||
|
|
+{
|
||
|
|
+public:
|
||
|
|
+ tree_loop_interchange (vec<struct loop *> loop_nest)
|
||
|
|
+ : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
|
||
|
|
+ m_dce_seeds (BITMAP_ALLOC (NULL)) { }
|
||
|
|
+ ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
|
||
|
|
+ bool interchange (vec<data_reference_p>, vec<ddr_p>);
|
||
|
|
+
|
||
|
|
+private:
|
||
|
|
+ void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
|
||
|
|
+ bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
|
||
|
|
+ void interchange_loops (loop_cand &, loop_cand &);
|
||
|
|
+ void map_inductions_to_loop (loop_cand &, loop_cand &);
|
||
|
|
+ void move_code_to_inner_loop (struct loop *, struct loop *, basic_block *);
|
||
|
|
+
|
||
|
|
+ /* The whole loop nest in which interchange is ongoing. */
|
||
|
|
+ vec<struct loop *> m_loop_nest;
|
||
|
|
+ /* We create new IV which is only used in loop's exit condition check.
|
||
|
|
+ In case of 3-level loop nest interchange, when we interchange the
|
||
|
|
+ innermost two loops, new IV created in the middle level loop does
|
||
|
|
+ not need to be preserved in interchanging the outermost two loops
|
||
|
|
+ later. We record the IV so that it can be skipped. */
|
||
|
|
+ tree m_niters_iv_var;
|
||
|
|
+ /* Bitmap of seed variables for dead code elimination after interchange. */
|
||
|
|
+ bitmap m_dce_seeds;
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+/* Update data refs' access stride and dependence information after loop
|
||
|
|
+ interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
|
||
|
|
+ nest. DATAREFS are data references. DDRS are data dependences. */
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
|
||
|
|
+ vec<data_reference_p> datarefs,
|
||
|
|
+ vec<ddr_p> ddrs)
|
||
|
|
+{
|
||
|
|
+ struct data_reference *dr;
|
||
|
|
+ struct data_dependence_relation *ddr;
|
||
|
|
+
|
||
|
|
+ /* Update strides of data references. */
|
||
|
|
+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
|
||
|
|
+ {
|
||
|
|
+ vec<tree> *stride = DR_ACCESS_STRIDE (dr);
|
||
|
|
+ gcc_assert (stride->length () > i_idx);
|
||
|
|
+ std::swap ((*stride)[i_idx], (*stride)[o_idx]);
|
||
|
|
+ }
|
||
|
|
+ /* Update data dependences. */
|
||
|
|
+ for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
|
||
|
|
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
|
||
|
|
+ {
|
||
|
|
+ for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
|
||
|
|
+ {
|
||
|
|
+ lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
|
||
|
|
+ std::swap (dist_vect[i_idx], dist_vect[o_idx]);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Check data dependence relations, return TRUE if it's valid to interchange
|
||
|
|
+ two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
|
||
|
|
+ loops is valid only if dist vector, after interchanging, doesn't have '>'
|
||
|
|
+ as the leftmost non-'=' direction. Practically, this function have been
|
||
|
|
+ conservative here by not checking some valid cases. */
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
|
||
|
|
+ vec<ddr_p> ddrs)
|
||
|
|
+{
|
||
|
|
+ struct data_dependence_relation *ddr;
|
||
|
|
+
|
||
|
|
+ for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
|
||
|
|
+ {
|
||
|
|
+ /* Skip no-dependence case. */
|
||
|
|
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
|
||
|
|
+ {
|
||
|
|
+ lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
|
||
|
|
+ unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
|
||
|
|
+
|
||
|
|
+ /* If there is no carried dependence. */
|
||
|
|
+ if (level == 0)
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ level --;
|
||
|
|
+
|
||
|
|
+ /* If dependence is not carried by any loop in between the two
|
||
|
|
+ loops [oloop, iloop] to interchange. */
|
||
|
|
+ if (level < o_idx || level > i_idx)
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ /* Be conservative, skip case if either direction at i_idx/o_idx
|
||
|
|
+ levels is not '=' or '<'. */
|
||
|
|
+ if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Interchange two loops specified by ILOOP and OLOOP. */
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
|
||
|
|
+{
|
||
|
|
+ reduction_p re;
|
||
|
|
+ gimple_stmt_iterator gsi;
|
||
|
|
+ tree i_niters, o_niters, var_after;
|
||
|
|
+
|
||
|
|
+ /* Undo inner loop's simple reduction. */
|
||
|
|
+ for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
|
||
|
|
+ if (re->type != DOUBLE_RTYPE)
|
||
|
|
+ {
|
||
|
|
+ if (re->producer)
|
||
|
|
+ reset_debug_uses (re->producer);
|
||
|
|
+
|
||
|
|
+ iloop.undo_simple_reduction (re, m_dce_seeds);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Only need to reset debug uses for double reduction. */
|
||
|
|
+ for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
|
||
|
|
+ {
|
||
|
|
+ gcc_assert (re->type == DOUBLE_RTYPE);
|
||
|
|
+ reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
|
||
|
|
+ reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Prepare niters for both loops. */
|
||
|
|
+ struct loop *loop_nest = m_loop_nest[0];
|
||
|
|
+ edge instantiate_below = loop_preheader_edge (loop_nest);
|
||
|
|
+ gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
|
||
|
|
+ i_niters = number_of_latch_executions (iloop.m_loop);
|
||
|
|
+ i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
|
||
|
|
+ i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
|
||
|
|
+ i_niters);
|
||
|
|
+ i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
|
||
|
|
+ NULL_TREE, false, GSI_CONTINUE_LINKING);
|
||
|
|
+ o_niters = number_of_latch_executions (oloop.m_loop);
|
||
|
|
+ if (oloop.m_loop != loop_nest)
|
||
|
|
+ {
|
||
|
|
+ o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
|
||
|
|
+ o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
|
||
|
|
+ o_niters);
|
||
|
|
+ }
|
||
|
|
+ o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
|
||
|
|
+ NULL_TREE, false, GSI_CONTINUE_LINKING);
|
||
|
|
+
|
||
|
|
+ /* Move src's code to tgt loop. This is necessary when src is the outer
|
||
|
|
+ loop and tgt is the inner loop. */
|
||
|
|
+ move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
|
||
|
|
+
|
||
|
|
+ /* Map outer loop's IV to inner loop, and vice versa. */
|
||
|
|
+ map_inductions_to_loop (oloop, iloop);
|
||
|
|
+ map_inductions_to_loop (iloop, oloop);
|
||
|
|
+
|
||
|
|
+ /* Create canonical IV for both loops. Note canonical IV for outer/inner
|
||
|
|
+ loop is actually from inner/outer loop. Also we record the new IV
|
||
|
|
+ created for the outer loop so that it can be skipped in later loop
|
||
|
|
+ interchange. */
|
||
|
|
+ create_canonical_iv (oloop.m_loop, oloop.m_exit,
|
||
|
|
+ i_niters, &m_niters_iv_var, &var_after);
|
||
|
|
+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
|
||
|
|
+ create_canonical_iv (iloop.m_loop, iloop.m_exit,
|
||
|
|
+ o_niters, NULL, &var_after);
|
||
|
|
+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
|
||
|
|
+
|
||
|
|
+ /* Scrap niters estimation of interchanged loops. */
|
||
|
|
+ iloop.m_loop->any_upper_bound = false;
|
||
|
|
+ iloop.m_loop->any_likely_upper_bound = false;
|
||
|
|
+ free_numbers_of_iterations_estimates_loop (iloop.m_loop);
|
||
|
|
+ oloop.m_loop->any_upper_bound = false;
|
||
|
|
+ oloop.m_loop->any_likely_upper_bound = false;
|
||
|
|
+ free_numbers_of_iterations_estimates_loop (oloop.m_loop);
|
||
|
|
+
|
||
|
|
+ /* ??? The association between the loop data structure and the
|
||
|
|
+ CFG changed, so what was loop N at the source level is now
|
||
|
|
+ loop M. We should think of retaining the association or breaking
|
||
|
|
+ it fully by creating a new loop instead of re-using the "wrong" one. */
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Map induction variables of SRC loop to TGT loop. The function firstly
|
||
|
|
+ creates the same IV of SRC loop in TGT loop, then deletes the original
|
||
|
|
+ IV and re-initialize it using the newly created IV. For example, loop
|
||
|
|
+ nest:
|
||
|
|
+
|
||
|
|
+ for (i = 0; i < N; i++)
|
||
|
|
+ for (j = 0; j < M; j++)
|
||
|
|
+ {
|
||
|
|
+ //use of i;
|
||
|
|
+ //use of j;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ will be transformed into:
|
||
|
|
+
|
||
|
|
+ for (jj = 0; jj < M; jj++)
|
||
|
|
+ for (ii = 0; ii < N; ii++)
|
||
|
|
+ {
|
||
|
|
+ //use of ii;
|
||
|
|
+ //use of jj;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ after loop interchange. */
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
|
||
|
|
+{
|
||
|
|
+ induction_p iv;
|
||
|
|
+ edge e = tgt.m_exit;
|
||
|
|
+ gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
|
||
|
|
+
|
||
|
|
+ /* Map source loop's IV to target loop. */
|
||
|
|
+ for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
|
||
|
|
+ {
|
||
|
|
+ gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
|
||
|
|
+ gcc_assert (is_a <gphi *> (stmt));
|
||
|
|
+
|
||
|
|
+ use_operand_p use_p;
|
||
|
|
+ /* Only map original IV to target loop. */
|
||
|
|
+ if (m_niters_iv_var != iv->var)
|
||
|
|
+ {
|
||
|
|
+ /* Map the IV by creating the same one in target loop. */
|
||
|
|
+ tree var_before, var_after;
|
||
|
|
+ tree base = unshare_expr (iv->init_expr);
|
||
|
|
+ tree step = unshare_expr (iv->step);
|
||
|
|
+ create_iv (base, step, SSA_NAME_VAR (iv->var),
|
||
|
|
+ tgt.m_loop, &incr_pos, false, &var_before, &var_after);
|
||
|
|
+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
|
||
|
|
+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
|
||
|
|
+
|
||
|
|
+ /* Replace uses of the original IV var with newly created IV var. */
|
||
|
|
+ imm_use_iterator imm_iter;
|
||
|
|
+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
|
||
|
|
+ {
|
||
|
|
+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
|
||
|
|
+ SET_USE (use_p, var_before);
|
||
|
|
+
|
||
|
|
+ update_stmt (use_stmt);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Mark all uses for DCE. */
|
||
|
|
+ ssa_op_iter op_iter;
|
||
|
|
+ FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
|
||
|
|
+ {
|
||
|
|
+ tree use = USE_FROM_PTR (use_p);
|
||
|
|
+ if (TREE_CODE (use) == SSA_NAME
|
||
|
|
+ && ! SSA_NAME_IS_DEFAULT_DEF (use))
|
||
|
|
+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Delete definition of the original IV in the source loop. */
|
||
|
|
+ gsi = gsi_for_stmt (stmt);
|
||
|
|
+ remove_phi_node (&gsi, true);
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Move stmts of outer loop to inner loop. */
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+tree_loop_interchange::move_code_to_inner_loop (struct loop *outer,
|
||
|
|
+ struct loop *inner,
|
||
|
|
+ basic_block *outer_bbs)
|
||
|
|
+{
|
||
|
|
+ basic_block oloop_exit_bb = single_exit (outer)->src;
|
||
|
|
+ gimple_stmt_iterator gsi, to;
|
||
|
|
+
|
||
|
|
+ for (unsigned i = 0; i < outer->num_nodes; i++)
|
||
|
|
+ {
|
||
|
|
+ basic_block bb = outer_bbs[i];
|
||
|
|
+
|
||
|
|
+ /* Skip basic blocks of inner loop. */
|
||
|
|
+ if (flow_bb_inside_loop_p (inner, bb))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ /* Move code from header/latch to header/latch. */
|
||
|
|
+ if (bb == outer->header)
|
||
|
|
+ to = gsi_after_labels (inner->header);
|
||
|
|
+ else if (bb == outer->latch)
|
||
|
|
+ to = gsi_after_labels (inner->latch);
|
||
|
|
+ else
|
||
|
|
+ /* Otherwise, simply move to exit->src. */
|
||
|
|
+ to = gsi_last_bb (single_exit (inner)->src);
|
||
|
|
+
|
||
|
|
+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
|
||
|
|
+ {
|
||
|
|
+ gimple *stmt = gsi_stmt (gsi);
|
||
|
|
+
|
||
|
|
+ if (oloop_exit_bb == bb
|
||
|
|
+ && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
|
||
|
|
+ {
|
||
|
|
+ gsi_next (&gsi);
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (gimple_vuse (stmt))
|
||
|
|
+ gimple_set_vuse (stmt, NULL_TREE);
|
||
|
|
+ if (gimple_vdef (stmt))
|
||
|
|
+ {
|
||
|
|
+ unlink_stmt_vdef (stmt);
|
||
|
|
+ release_ssa_name (gimple_vdef (stmt));
|
||
|
|
+ gimple_set_vdef (stmt, NULL_TREE);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ reset_debug_uses (stmt);
|
||
|
|
+ gsi_move_before (&gsi, &to);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Given data reference DR in LOOP_NEST, the function computes DR's access
|
||
|
|
+ stride at each level of loop from innermost LOOP to outer. On success,
|
||
|
|
+ it saves access stride at each level loop in a vector which is pointed
|
||
|
|
+ by DR->aux. For example:
|
||
|
|
+
|
||
|
|
+ int arr[100][100][100];
|
||
|
|
+ for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
|
||
|
|
+ for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
|
||
|
|
+ for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
|
||
|
|
+ arr[i][j - 1][k] = 0; */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+compute_access_stride (struct loop *loop_nest, struct loop *loop,
|
||
|
|
+ data_reference_p dr)
|
||
|
|
+{
|
||
|
|
+ vec<tree> *strides = new vec<tree> ();
|
||
|
|
+ basic_block bb = gimple_bb (DR_STMT (dr));
|
||
|
|
+
|
||
|
|
+ while (!flow_bb_inside_loop_p (loop, bb))
|
||
|
|
+ {
|
||
|
|
+ strides->safe_push (build_int_cst (sizetype, 0));
|
||
|
|
+ loop = loop_outer (loop);
|
||
|
|
+ }
|
||
|
|
+ gcc_assert (loop == bb->loop_father);
|
||
|
|
+
|
||
|
|
+ tree ref = DR_REF (dr);
|
||
|
|
+ tree scev_base = build_fold_addr_expr (ref);
|
||
|
|
+ tree scev = analyze_scalar_evolution (loop, scev_base);
|
||
|
|
+ scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
|
||
|
|
+ if (! chrec_contains_undetermined (scev))
|
||
|
|
+ {
|
||
|
|
+ tree sl = scev;
|
||
|
|
+ struct loop *expected = loop;
|
||
|
|
+ while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
|
||
|
|
+ {
|
||
|
|
+ struct loop *sl_loop = get_chrec_loop (sl);
|
||
|
|
+ while (sl_loop != expected)
|
||
|
|
+ {
|
||
|
|
+ strides->safe_push (size_int (0));
|
||
|
|
+ expected = loop_outer (expected);
|
||
|
|
+ }
|
||
|
|
+ strides->safe_push (CHREC_RIGHT (sl));
|
||
|
|
+ sl = CHREC_LEFT (sl);
|
||
|
|
+ expected = loop_outer (expected);
|
||
|
|
+ }
|
||
|
|
+ if (! tree_contains_chrecs (sl, NULL))
|
||
|
|
+ while (expected != loop_outer (loop_nest))
|
||
|
|
+ {
|
||
|
|
+ strides->safe_push (size_int (0));
|
||
|
|
+ expected = loop_outer (expected);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ dr->aux = strides;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Given loop nest LOOP_NEST with innermost LOOP, the function computes
|
||
|
|
+ access strides with respect to each level loop for all data refs in
|
||
|
|
+ DATAREFS from inner loop to outer loop. On success, it returns the
|
||
|
|
+ outermost loop that access strides can be computed successfully for
|
||
|
|
+ all data references. If access strides cannot be computed at least
|
||
|
|
+ for two levels of loop for any data reference, it returns NULL. */
|
||
|
|
+
|
||
|
|
+static struct loop *
|
||
|
|
+compute_access_strides (struct loop *loop_nest, struct loop *loop,
|
||
|
|
+ vec<data_reference_p> datarefs)
|
||
|
|
+{
|
||
|
|
+ unsigned i, j, num_loops = (unsigned) -1;
|
||
|
|
+ data_reference_p dr;
|
||
|
|
+ vec<tree> *stride;
|
||
|
|
+
|
||
|
|
+ for (i = 0; datarefs.iterate (i, &dr); ++i)
|
||
|
|
+ {
|
||
|
|
+ compute_access_stride (loop_nest, loop, dr);
|
||
|
|
+ stride = DR_ACCESS_STRIDE (dr);
|
||
|
|
+ if (stride->length () < num_loops)
|
||
|
|
+ {
|
||
|
|
+ num_loops = stride->length ();
|
||
|
|
+ if (num_loops < 2)
|
||
|
|
+ return NULL;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ for (i = 0; datarefs.iterate (i, &dr); ++i)
|
||
|
|
+ {
|
||
|
|
+ stride = DR_ACCESS_STRIDE (dr);
|
||
|
|
+ if (stride->length () > num_loops)
|
||
|
|
+ stride->truncate (num_loops);
|
||
|
|
+
|
||
|
|
+ for (j = 0; j < (num_loops >> 1); ++j)
|
||
|
|
+ std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
|
||
|
|
+ gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
|
||
|
|
+ return loop;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Prune access strides for data references in DATAREFS by removing strides
|
||
|
|
+ of loops that isn't in current LOOP_NEST. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+prune_access_strides_not_in_loop (struct loop *loop_nest,
|
||
|
|
+ struct loop *innermost,
|
||
|
|
+ vec<data_reference_p> datarefs)
|
||
|
|
+{
|
||
|
|
+ data_reference_p dr;
|
||
|
|
+ unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
|
||
|
|
+ gcc_assert (num_loops > 1);
|
||
|
|
+
|
||
|
|
+ /* Block remove strides of loops that is not in current loop nest. */
|
||
|
|
+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
|
||
|
|
+ {
|
||
|
|
+ vec<tree> *stride = DR_ACCESS_STRIDE (dr);
|
||
|
|
+ if (stride->length () > num_loops)
|
||
|
|
+ stride->block_remove (0, stride->length () - num_loops);
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Dump access strides for all DATAREFS. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+dump_access_strides (vec<data_reference_p> datarefs)
|
||
|
|
+{
|
||
|
|
+ data_reference_p dr;
|
||
|
|
+ fprintf (dump_file, "Access Strides for DRs:\n");
|
||
|
|
+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, " ");
|
||
|
|
+ print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
|
||
|
|
+ fprintf (dump_file, ":\t\t<");
|
||
|
|
+
|
||
|
|
+ vec<tree> *stride = DR_ACCESS_STRIDE (dr);
|
||
|
|
+ unsigned num_loops = stride->length ();
|
||
|
|
+ for (unsigned j = 0; j < num_loops; ++j)
|
||
|
|
+ {
|
||
|
|
+ print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
|
||
|
|
+ fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return true if it's profitable to interchange two loops whose index
|
||
|
|
+ in whole loop nest vector are I_IDX/O_IDX respectively. The function
|
||
|
|
+ computes and compares three types information from all DATAREFS:
|
||
|
|
+ 1) Access stride for loop I_IDX and O_IDX.
|
||
|
|
+ 2) Number of invariant memory references with respect to I_IDX before
|
||
|
|
+ and after loop interchange.
|
||
|
|
+ 3) Flags indicating if all memory references access sequential memory
|
||
|
|
+ in ILOOP, before and after loop interchange.
|
||
|
|
+ If INNMOST_LOOP_P is true, the two loops for interchanging are the two
|
||
|
|
+ innermost loops in loop nest. This function also dumps information if
|
||
|
|
+ DUMP_INFO_P is true. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+should_interchange_loops (unsigned i_idx, unsigned o_idx,
|
||
|
|
+ vec<data_reference_p> datarefs,
|
||
|
|
+ bool innermost_loops_p, bool dump_info_p = true)
|
||
|
|
+{
|
||
|
|
+ unsigned HOST_WIDE_INT ratio;
|
||
|
|
+ unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
|
||
|
|
+ struct data_reference *dr;
|
||
|
|
+ bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
|
||
|
|
+ widest_int iloop_strides = 0, oloop_strides = 0;
|
||
|
|
+ unsigned num_unresolved_drs = 0;
|
||
|
|
+ unsigned num_resolved_ok_drs = 0;
|
||
|
|
+ unsigned num_resolved_not_ok_drs = 0;
|
||
|
|
+
|
||
|
|
+ if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
|
||
|
|
+
|
||
|
|
+ for (i = 0; datarefs.iterate (i, &dr); ++i)
|
||
|
|
+ {
|
||
|
|
+ vec<tree> *stride = DR_ACCESS_STRIDE (dr);
|
||
|
|
+ tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
|
||
|
|
+
|
||
|
|
+ bool subloop_stride_p = false;
|
||
|
|
+ /* Data ref can't be invariant or sequential access at current loop if
|
||
|
|
+ its address changes with respect to any subloops. */
|
||
|
|
+ for (j = i_idx + 1; j < stride->length (); ++j)
|
||
|
|
+ if (!integer_zerop ((*stride)[j]))
|
||
|
|
+ {
|
||
|
|
+ subloop_stride_p = true;
|
||
|
|
+ break;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (integer_zerop (iloop_stride))
|
||
|
|
+ {
|
||
|
|
+ if (!subloop_stride_p)
|
||
|
|
+ num_old_inv_drs++;
|
||
|
|
+ }
|
||
|
|
+ if (integer_zerop (oloop_stride))
|
||
|
|
+ {
|
||
|
|
+ if (!subloop_stride_p)
|
||
|
|
+ num_new_inv_drs++;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (TREE_CODE (iloop_stride) == INTEGER_CST
|
||
|
|
+ && TREE_CODE (oloop_stride) == INTEGER_CST)
|
||
|
|
+ {
|
||
|
|
+ iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
|
||
|
|
+ oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
|
||
|
|
+ }
|
||
|
|
+ else if (multiple_of_p (TREE_TYPE (iloop_stride),
|
||
|
|
+ iloop_stride, oloop_stride))
|
||
|
|
+ num_resolved_ok_drs++;
|
||
|
|
+ else if (multiple_of_p (TREE_TYPE (iloop_stride),
|
||
|
|
+ oloop_stride, iloop_stride))
|
||
|
|
+ num_resolved_not_ok_drs++;
|
||
|
|
+ else
|
||
|
|
+ num_unresolved_drs++;
|
||
|
|
+
|
||
|
|
+ /* Data ref can't be sequential access if its address changes in sub
|
||
|
|
+ loop. */
|
||
|
|
+ if (subloop_stride_p)
|
||
|
|
+ {
|
||
|
|
+ all_seq_dr_before_p = false;
|
||
|
|
+ all_seq_dr_after_p = false;
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ /* Track if all data references are sequential accesses before/after loop
|
||
|
|
+ interchange. Note invariant is considered sequential here. */
|
||
|
|
+ tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
|
||
|
|
+ if (all_seq_dr_before_p
|
||
|
|
+ && ! (integer_zerop (iloop_stride)
|
||
|
|
+ || operand_equal_p (access_size, iloop_stride, 0)))
|
||
|
|
+ all_seq_dr_before_p = false;
|
||
|
|
+ if (all_seq_dr_after_p
|
||
|
|
+ && ! (integer_zerop (oloop_stride)
|
||
|
|
+ || operand_equal_p (access_size, oloop_stride, 0)))
|
||
|
|
+ all_seq_dr_after_p = false;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "\toverall:\t\t");
|
||
|
|
+ print_decu (iloop_strides, dump_file);
|
||
|
|
+ fprintf (dump_file, "\t");
|
||
|
|
+ print_decu (oloop_strides, dump_file);
|
||
|
|
+ fprintf (dump_file, "\n");
|
||
|
|
+
|
||
|
|
+ fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
|
||
|
|
+ num_old_inv_drs, num_new_inv_drs);
|
||
|
|
+ fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
|
||
|
|
+ all_seq_dr_before_p ? "true" : "false",
|
||
|
|
+ all_seq_dr_after_p ? "true" : "false");
|
||
|
|
+ fprintf (dump_file, "OK to interchage with variable strides: %d\n",
|
||
|
|
+ num_resolved_ok_drs);
|
||
|
|
+ fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
|
||
|
|
+ num_resolved_not_ok_drs);
|
||
|
|
+ fprintf (dump_file, "Variable strides we cannot decide: %d\n",
|
||
|
|
+ num_unresolved_drs);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* We use different stride comparison ratio for interchanging innermost
|
||
|
|
+ two loops or not. The idea is to be conservative in interchange for
|
||
|
|
+ the innermost loops. */
|
||
|
|
+ ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
|
||
|
|
+ /* Do interchange if it gives better data locality behavior. */
|
||
|
|
+ if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
|
||
|
|
+ return true;
|
||
|
|
+ if (wi::gtu_p (iloop_strides, oloop_strides))
|
||
|
|
+ {
|
||
|
|
+ /* Or it creates more invariant memory references. */
|
||
|
|
+ if ((!all_seq_dr_before_p || all_seq_dr_after_p)
|
||
|
|
+ && num_new_inv_drs > num_old_inv_drs)
|
||
|
|
+ return true;
|
||
|
|
+ /* Or it makes all memory references sequential. */
|
||
|
|
+ if (num_new_inv_drs >= num_old_inv_drs
|
||
|
|
+ && !all_seq_dr_before_p && all_seq_dr_after_p)
|
||
|
|
+ return true;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ return false;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Try to interchange inner loop of a loop nest to outer level. */
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
|
||
|
|
+ vec<ddr_p> ddrs)
|
||
|
|
+{
|
||
|
|
+ bool changed_p = false;
|
||
|
|
+ /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
|
||
|
|
+ The overall effect is to push inner loop to outermost level in whole
|
||
|
|
+ loop nest. */
|
||
|
|
+ for (unsigned i = m_loop_nest.length (); i > 1; --i)
|
||
|
|
+ {
|
||
|
|
+ unsigned i_idx = i - 1, o_idx = i - 2;
|
||
|
|
+
|
||
|
|
+ /* Check validity for loop interchange. */
|
||
|
|
+ if (!valid_data_dependences (i_idx, o_idx, ddrs))
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
+ loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
|
||
|
|
+ loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
|
||
|
|
+
|
||
|
|
+ /* Check if we can do transformation for loop interchange. */
|
||
|
|
+ if (!iloop.analyze_carried_vars (NULL)
|
||
|
|
+ || !iloop.analyze_lcssa_phis ()
|
||
|
|
+ || !oloop.analyze_carried_vars (&iloop)
|
||
|
|
+ || !oloop.analyze_lcssa_phis ()
|
||
|
|
+ || !iloop.can_interchange_p (NULL)
|
||
|
|
+ || !oloop.can_interchange_p (&iloop))
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
+ /* Check profitability for loop interchange. */
|
||
|
|
+ if (should_interchange_loops (i_idx, o_idx, datarefs,
|
||
|
|
+ iloop.m_loop->inner == NULL))
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ fprintf (dump_file,
|
||
|
|
+ "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
|
||
|
|
+ oloop.m_loop->num, iloop.m_loop->num);
|
||
|
|
+
|
||
|
|
+ changed_p = true;
|
||
|
|
+ interchange_loops (iloop, oloop);
|
||
|
|
+ /* No need to update if there is no further loop interchange. */
|
||
|
|
+ if (o_idx > 0)
|
||
|
|
+ update_data_info (i_idx, o_idx, datarefs, ddrs);
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ fprintf (dump_file,
|
||
|
|
+ "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
|
||
|
|
+ oloop.m_loop->num, iloop.m_loop->num);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ simple_dce_from_worklist (m_dce_seeds);
|
||
|
|
+ return changed_p;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+
|
||
|
|
+/* Loop interchange pass. */
|
||
|
|
+
|
||
|
|
+namespace {
|
||
|
|
+
|
||
|
|
+const pass_data pass_data_linterchange =
|
||
|
|
+{
|
||
|
|
+ GIMPLE_PASS, /* type */
|
||
|
|
+ "linterchange", /* name */
|
||
|
|
+ OPTGROUP_LOOP, /* optinfo_flags */
|
||
|
|
+ TV_LINTERCHANGE, /* tv_id */
|
||
|
|
+ PROP_cfg, /* properties_required */
|
||
|
|
+ 0, /* properties_provided */
|
||
|
|
+ 0, /* properties_destroyed */
|
||
|
|
+ 0, /* todo_flags_start */
|
||
|
|
+ 0, /* todo_flags_finish */
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+class pass_linterchange : public gimple_opt_pass
|
||
|
|
+{
|
||
|
|
+public:
|
||
|
|
+ pass_linterchange (gcc::context *ctxt)
|
||
|
|
+ : gimple_opt_pass (pass_data_linterchange, ctxt)
|
||
|
|
+ {}
|
||
|
|
+
|
||
|
|
+ /* opt_pass methods: */
|
||
|
|
+ opt_pass * clone () { return new pass_linterchange (m_ctxt); }
|
||
|
|
+ virtual bool gate (function *) { return flag_loop_interchange; }
|
||
|
|
+ virtual unsigned int execute (function *);
|
||
|
|
+
|
||
|
|
+}; // class pass_linterchange
|
||
|
|
+
|
||
|
|
+
|
||
|
|
+/* Return true if LOOP has proper form for interchange. We check three
|
||
|
|
+ conditions in the function:
|
||
|
|
+ 1) In general, a loop can be interchanged only if it doesn't have
|
||
|
|
+ basic blocks other than header, exit and latch besides possible
|
||
|
|
+ inner loop nest. This basically restricts loop interchange to
|
||
|
|
+ below form loop nests:
|
||
|
|
+
|
||
|
|
+ header<---+
|
||
|
|
+ | |
|
||
|
|
+ v |
|
||
|
|
+ INNER_LOOP |
|
||
|
|
+ | |
|
||
|
|
+ v |
|
||
|
|
+ exit--->latch
|
||
|
|
+
|
||
|
|
+ 2) Data reference in basic block that executes in different times
|
||
|
|
+ than loop head/exit is not allowed.
|
||
|
|
+ 3) Record the innermost outer loop that doesn't form rectangle loop
|
||
|
|
+ nest with LOOP. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+proper_loop_form_for_interchange (struct loop *loop, struct loop **min_outer)
|
||
|
|
+{
|
||
|
|
+ edge e0, e1, exit;
|
||
|
|
+
|
||
|
|
+ /* Don't interchange if loop has unsupported information for the moment. */
|
||
|
|
+ if (loop->safelen > 0
|
||
|
|
+ || loop->constraints != 0
|
||
|
|
+ || loop->can_be_parallel
|
||
|
|
+ || loop->dont_vectorize
|
||
|
|
+ || loop->force_vectorize
|
||
|
|
+ || loop->in_oacc_kernels_region
|
||
|
|
+ || loop->orig_loop_num != 0
|
||
|
|
+ || loop->simduid != NULL_TREE)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Don't interchange if outer loop has basic block other than header, exit
|
||
|
|
+ and latch. */
|
||
|
|
+ if (loop->inner != NULL
|
||
|
|
+ && loop->num_nodes != loop->inner->num_nodes + 3)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ if ((exit = single_dom_exit (loop)) == NULL)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Check control flow on loop header/exit blocks. */
|
||
|
|
+ if (loop->header == exit->src
|
||
|
|
+ && (EDGE_COUNT (loop->header->preds) != 2
|
||
|
|
+ || EDGE_COUNT (loop->header->succs) != 2))
|
||
|
|
+ return false;
|
||
|
|
+ else if (loop->header != exit->src
|
||
|
|
+ && (EDGE_COUNT (loop->header->preds) != 2
|
||
|
|
+ || !single_succ_p (loop->header)
|
||
|
|
+ || unsupported_edge (single_succ_edge (loop->header))
|
||
|
|
+ || EDGE_COUNT (exit->src->succs) != 2
|
||
|
|
+ || !single_pred_p (exit->src)
|
||
|
|
+ || unsupported_edge (single_pred_edge (exit->src))))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ e0 = EDGE_PRED (loop->header, 0);
|
||
|
|
+ e1 = EDGE_PRED (loop->header, 1);
|
||
|
|
+ if (unsupported_edge (e0) || unsupported_edge (e1)
|
||
|
|
+ || (e0->src != loop->latch && e1->src != loop->latch)
|
||
|
|
+ || (e0->src->loop_father == loop && e1->src->loop_father == loop))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ e0 = EDGE_SUCC (exit->src, 0);
|
||
|
|
+ e1 = EDGE_SUCC (exit->src, 1);
|
||
|
|
+ if (unsupported_edge (e0) || unsupported_edge (e1)
|
||
|
|
+ || (e0->dest != loop->latch && e1->dest != loop->latch)
|
||
|
|
+ || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Don't interchange if any reference is in basic block that doesn't
|
||
|
|
+ dominate exit block. */
|
||
|
|
+ basic_block *bbs = get_loop_body (loop);
|
||
|
|
+ for (unsigned i = 0; i < loop->num_nodes; i++)
|
||
|
|
+ {
|
||
|
|
+ basic_block bb = bbs[i];
|
||
|
|
+
|
||
|
|
+ if (bb->loop_father != loop
|
||
|
|
+ || bb == loop->header || bb == exit->src
|
||
|
|
+ || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ for (gimple_stmt_iterator gsi = gsi_start_bb_nondebug (bb);
|
||
|
|
+ !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
|
||
|
|
+ if (gimple_vuse (gsi_stmt (gsi)))
|
||
|
|
+ {
|
||
|
|
+ free (bbs);
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ free (bbs);
|
||
|
|
+
|
||
|
|
+ tree niters = number_of_latch_executions (loop);
|
||
|
|
+ niters = analyze_scalar_evolution (loop_outer (loop), niters);
|
||
|
|
+ if (!niters || chrec_contains_undetermined (niters))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Record the innermost outer loop that doesn't form rectangle loop nest. */
|
||
|
|
+ for (loop_p loop2 = loop_outer (loop);
|
||
|
|
+ loop2 && flow_loop_nested_p (*min_outer, loop2);
|
||
|
|
+ loop2 = loop_outer (loop2))
|
||
|
|
+ {
|
||
|
|
+ niters = instantiate_scev (loop_preheader_edge (loop2),
|
||
|
|
+ loop_outer (loop), niters);
|
||
|
|
+ if (!evolution_function_is_invariant_p (niters, loop2->num))
|
||
|
|
+ {
|
||
|
|
+ *min_outer = loop2;
|
||
|
|
+ break;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
|
||
|
|
+ should be interchanged by looking into all DATAREFS. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+should_interchange_loop_nest (struct loop *loop_nest, struct loop *innermost,
|
||
|
|
+ vec<data_reference_p> datarefs)
|
||
|
|
+{
|
||
|
|
+ unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
|
||
|
|
+ gcc_assert (idx > 0);
|
||
|
|
+
|
||
|
|
+ /* Check if any two adjacent loops should be interchanged. */
|
||
|
|
+ for (struct loop *loop = innermost;
|
||
|
|
+ loop != loop_nest; loop = loop_outer (loop), idx--)
|
||
|
|
+ if (should_interchange_loops (idx, idx - 1, datarefs,
|
||
|
|
+ loop == innermost, false))
|
||
|
|
+ return true;
|
||
|
|
+
|
||
|
|
+ return false;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Given loop nest LOOP_NEST and data references DATAREFS, compute data
|
||
|
|
+ dependences for loop interchange and store it in DDRS. Note we compute
|
||
|
|
+ dependences directly rather than call generic interface so that we can
|
||
|
|
+ return on unknown dependence instantly. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
|
||
|
|
+ vec<data_reference_p> datarefs,
|
||
|
|
+ vec<ddr_p> *ddrs)
|
||
|
|
+{
|
||
|
|
+ struct data_reference *a, *b;
|
||
|
|
+ struct loop *innermost = loop_nest.last ();
|
||
|
|
+
|
||
|
|
+ for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
|
||
|
|
+ {
|
||
|
|
+ bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
|
||
|
|
+ for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
|
||
|
|
+ if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
|
||
|
|
+ {
|
||
|
|
+ bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
|
||
|
|
+ /* Don't support multiple write references in outer loop. */
|
||
|
|
+ if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
|
||
|
|
+ ddrs->safe_push (ddr);
|
||
|
|
+ compute_affine_dependence (ddr, loop_nest[0]);
|
||
|
|
+
|
||
|
|
+ /* Give up if ddr is unknown dependence or classic direct vector
|
||
|
|
+ is not available. */
|
||
|
|
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
|
||
|
|
+ || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
|
||
|
|
+ && DDR_NUM_DIR_VECTS (ddr) == 0))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* If either data references is in outer loop of nest, we require
|
||
|
|
+ no dependence here because the data reference need to be moved
|
||
|
|
+ into inner loop during interchange. */
|
||
|
|
+ if (a_outer_p && b_outer_p
|
||
|
|
+ && operand_equal_p (DR_REF (a), DR_REF (b), 0))
|
||
|
|
+ continue;
|
||
|
|
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known
|
||
|
|
+ && (a_outer_p || b_outer_p))
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Prune DATAREFS by removing any data reference not inside of LOOP. */
|
||
|
|
+
|
||
|
|
+static inline void
|
||
|
|
+prune_datarefs_not_in_loop (struct loop *loop, vec<data_reference_p> datarefs)
|
||
|
|
+{
|
||
|
|
+ unsigned i, j;
|
||
|
|
+ struct data_reference *dr;
|
||
|
|
+
|
||
|
|
+ for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
|
||
|
|
+ {
|
||
|
|
+ if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
|
||
|
|
+ datarefs[j++] = dr;
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ if (dr->aux)
|
||
|
|
+ {
|
||
|
|
+ DR_ACCESS_STRIDE (dr)->release ();
|
||
|
|
+ free (dr->aux);
|
||
|
|
+ }
|
||
|
|
+ free_data_ref (dr);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ datarefs.truncate (j);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Find and store data references in DATAREFS for LOOP nest. If there's
|
||
|
|
+ difficult data reference in a basic block, we shrink the loop nest to
|
||
|
|
+ inner loop of that basic block's father loop. On success, return the
|
||
|
|
+ outer loop of the result loop nest. */
|
||
|
|
+
|
||
|
|
+static struct loop *
|
||
|
|
+prepare_data_references (struct loop *loop, vec<data_reference_p> *datarefs)
|
||
|
|
+{
|
||
|
|
+ struct loop *loop_nest = loop;
|
||
|
|
+ vec<data_reference_p> *bb_refs;
|
||
|
|
+ basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
|
||
|
|
+
|
||
|
|
+ for (unsigned i = 0; i < loop->num_nodes; i++)
|
||
|
|
+ bbs[i]->aux = NULL;
|
||
|
|
+
|
||
|
|
+ /* Find data references for all basic blocks. Shrink loop nest on difficult
|
||
|
|
+ data reference. */
|
||
|
|
+ for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
|
||
|
|
+ {
|
||
|
|
+ bb = bbs[i];
|
||
|
|
+ if (!flow_bb_inside_loop_p (loop_nest, bb))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ bb_refs = new vec<data_reference_p> ();
|
||
|
|
+ if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
|
||
|
|
+ {
|
||
|
|
+ loop_nest = bb->loop_father->inner;
|
||
|
|
+ if (loop_nest && !loop_nest->inner)
|
||
|
|
+ loop_nest = NULL;
|
||
|
|
+
|
||
|
|
+ free_data_refs (*bb_refs);
|
||
|
|
+ delete bb_refs;
|
||
|
|
+ }
|
||
|
|
+ else if (bb_refs->is_empty ())
|
||
|
|
+ delete bb_refs;
|
||
|
|
+ else
|
||
|
|
+ bb->aux = bb_refs;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Collect all data references in loop nest. */
|
||
|
|
+ for (unsigned i = 0; i < loop->num_nodes; i++)
|
||
|
|
+ {
|
||
|
|
+ bb = bbs[i];
|
||
|
|
+ if (!bb->aux)
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ bb_refs = (vec<data_reference_p> *) bb->aux;
|
||
|
|
+ if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
|
||
|
|
+ datarefs->safe_splice (*bb_refs);
|
||
|
|
+ else
|
||
|
|
+ free_data_refs (*bb_refs);
|
||
|
|
+
|
||
|
|
+ delete bb_refs;
|
||
|
|
+ bb->aux = NULL;
|
||
|
|
+ }
|
||
|
|
+ free (bbs);
|
||
|
|
+
|
||
|
|
+ return loop_nest;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Given innermost LOOP, return true if perfect loop nest can be found and
|
||
|
|
+ data dependences can be computed. If succeed, record the perfect loop
|
||
|
|
+ nest in LOOP_NEST; record all data references in DATAREFS and record all
|
||
|
|
+ data dependence relations in DDRS.
|
||
|
|
+
|
||
|
|
+ We do support a restricted form of imperfect loop nest, i.e, loop nest
|
||
|
|
+ with load/store in outer loop initializing/finalizing simple reduction
|
||
|
|
+ of the innermost loop. For such outer loop reference, we require that
|
||
|
|
+ it has no dependence with others sinve it will be moved to inner loop
|
||
|
|
+ in interchange. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+prepare_perfect_loop_nest (struct loop *loop, vec<loop_p> *loop_nest,
|
||
|
|
+ vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
|
||
|
|
+{
|
||
|
|
+ struct loop *start_loop = NULL, *innermost = loop;
|
||
|
|
+ struct loop *outermost = loops_for_fn (cfun)->tree_root;
|
||
|
|
+
|
||
|
|
+ /* Find loop nest from the innermost loop. The outermost is the innermost
|
||
|
|
+ outer*/
|
||
|
|
+ while (loop->num != 0 && loop->inner == start_loop
|
||
|
|
+ && flow_loop_nested_p (outermost, loop))
|
||
|
|
+ {
|
||
|
|
+ if (!proper_loop_form_for_interchange (loop, &outermost))
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
+ start_loop = loop;
|
||
|
|
+ /* If this loop has sibling loop, the father loop won't be in perfect
|
||
|
|
+ loop nest. */
|
||
|
|
+ if (loop->next != NULL)
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
+ loop = loop_outer (loop);
|
||
|
|
+ }
|
||
|
|
+ if (!start_loop || !start_loop->inner)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Prepare the data reference vector for the loop nest, pruning outer
|
||
|
|
+ loops we cannot handle. */
|
||
|
|
+ start_loop = prepare_data_references (start_loop, datarefs);
|
||
|
|
+ if (!start_loop
|
||
|
|
+ /* Check if there is no data reference. */
|
||
|
|
+ || datarefs->is_empty ()
|
||
|
|
+ /* Check if there are too many of data references. */
|
||
|
|
+ || (int) datarefs->length () > MAX_DATAREFS)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Compute access strides for all data references, pruning outer
|
||
|
|
+ loops we cannot analyze refs in. */
|
||
|
|
+ start_loop = compute_access_strides (start_loop, innermost, *datarefs);
|
||
|
|
+ if (!start_loop)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Check if any interchange is profitable in the loop nest. */
|
||
|
|
+ if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Check if data dependences can be computed for loop nest starting from
|
||
|
|
+ start_loop. */
|
||
|
|
+ loop = start_loop;
|
||
|
|
+ do {
|
||
|
|
+ loop_nest->truncate (0);
|
||
|
|
+
|
||
|
|
+ if (loop != start_loop)
|
||
|
|
+ prune_datarefs_not_in_loop (start_loop, *datarefs);
|
||
|
|
+
|
||
|
|
+ if (find_loop_nest (start_loop, loop_nest)
|
||
|
|
+ && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ fprintf (dump_file,
|
||
|
|
+ "\nConsider loop interchange for loop_nest<%d - %d>\n",
|
||
|
|
+ start_loop->num, innermost->num);
|
||
|
|
+
|
||
|
|
+ if (loop != start_loop)
|
||
|
|
+ prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
|
||
|
|
+
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ dump_access_strides (*datarefs);
|
||
|
|
+
|
||
|
|
+ return true;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ free_dependence_relations (*ddrs);
|
||
|
|
+ *ddrs = vNULL;
|
||
|
|
+ /* Try to compute data dependences with the outermost loop stripped. */
|
||
|
|
+ loop = start_loop;
|
||
|
|
+ start_loop = start_loop->inner;
|
||
|
|
+ } while (start_loop && start_loop->inner);
|
||
|
|
+
|
||
|
|
+ return false;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Main entry for loop interchange pass. */
|
||
|
|
+
|
||
|
|
+unsigned int
|
||
|
|
+pass_linterchange::execute (function *fun)
|
||
|
|
+{
|
||
|
|
+ if (number_of_loops (fun) <= 2)
|
||
|
|
+ return 0;
|
||
|
|
+
|
||
|
|
+ bool changed_p = false;
|
||
|
|
+ struct loop *loop;
|
||
|
|
+ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
|
||
|
|
+ {
|
||
|
|
+ vec<loop_p> loop_nest = vNULL;
|
||
|
|
+ vec<data_reference_p> datarefs = vNULL;
|
||
|
|
+ vec<ddr_p> ddrs = vNULL;
|
||
|
|
+ if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
|
||
|
|
+ {
|
||
|
|
+ tree_loop_interchange loop_interchange (loop_nest);
|
||
|
|
+ changed_p |= loop_interchange.interchange (datarefs, ddrs);
|
||
|
|
+ }
|
||
|
|
+ free_dependence_relations (ddrs);
|
||
|
|
+ free_data_refs_with_aux (datarefs);
|
||
|
|
+ loop_nest.release ();
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (changed_p)
|
||
|
|
+ scev_reset_htab ();
|
||
|
|
+
|
||
|
|
+ return changed_p ? (TODO_update_ssa_only_virtuals) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+} // anon namespace
|
||
|
|
+
|
||
|
|
+gimple_opt_pass *
|
||
|
|
+make_pass_linterchange (gcc::context *ctxt)
|
||
|
|
+{
|
||
|
|
+ return new pass_linterchange (ctxt);
|
||
|
|
+}
|
||
|
|
diff -N -urp a/gcc/gimple-pretty-print.h b/gcc/gimple-pretty-print.h
|
||
|
|
--- a/gcc/gimple-pretty-print.h 2018-11-15 15:54:01.223039794 +0800
|
||
|
|
+++ b/gcc/gimple-pretty-print.h 2018-11-15 16:03:17.447054436 +0800
|
||
|
|
@@ -27,10 +27,10 @@ along with GCC; see the file COPYING3.
|
||
|
|
extern void debug_gimple_stmt (gimple *);
|
||
|
|
extern void debug_gimple_seq (gimple_seq);
|
||
|
|
extern void print_gimple_seq (FILE *, gimple_seq, int, int);
|
||
|
|
-extern void print_gimple_stmt (FILE *, gimple *, int, int);
|
||
|
|
+extern void print_gimple_stmt (FILE *, gimple *, int, int = 0);
|
||
|
|
extern void debug (gimple &ref);
|
||
|
|
extern void debug (gimple *ptr);
|
||
|
|
-extern void print_gimple_expr (FILE *, gimple *, int, int);
|
||
|
|
+extern void print_gimple_expr (FILE *, gimple *, int, int = 0);
|
||
|
|
extern void pp_gimple_stmt_1 (pretty_printer *, gimple *, int, int);
|
||
|
|
extern void gimple_dump_bb (FILE *, basic_block, int, int);
|
||
|
|
extern void gimple_dump_bb_for_graph (pretty_printer *, basic_block);
|
||
|
|
diff -N -urp a/gcc/opts.c b/gcc/opts.c
|
||
|
|
--- a/gcc/opts.c 2018-11-15 15:59:30.459048461 +0800
|
||
|
|
+++ b/gcc/opts.c 2018-11-15 16:03:17.447054436 +0800
|
||
|
|
@@ -538,6 +538,7 @@ static const struct default_options defa
|
||
|
|
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
|
||
|
|
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
|
||
|
|
{ OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
|
||
|
|
+ { OPT_LEVELS_3_PLUS, OPT_floop_interchange, NULL, 1 },
|
||
|
|
{ OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC },
|
||
|
|
{ OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
|
||
|
|
{ OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
|
||
|
|
diff -N -urp a/gcc/params.def b/gcc/params.def
|
||
|
|
--- a/gcc/params.def 2018-11-15 15:59:30.459048461 +0800
|
||
|
|
+++ b/gcc/params.def 2018-11-15 16:03:17.451054437 +0800
|
||
|
|
@@ -780,6 +780,20 @@ DEFPARAM (PARAM_L2_CACHE_SIZE,
|
||
|
|
"The size of L2 cache.",
|
||
|
|
512, 0, 0)
|
||
|
|
|
||
|
|
+/* Maximum number of statements in loop nest for loop interchange. */
|
||
|
|
+
|
||
|
|
+DEFPARAM (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS,
|
||
|
|
+ "loop-interchange-max-num-stmts",
|
||
|
|
+ "The maximum number of stmts in loop nest for loop interchange.",
|
||
|
|
+ 64, 0, 0)
|
||
|
|
+
|
||
|
|
+/* Minimum stride ratio for loop interchange to be profitiable. */
|
||
|
|
+
|
||
|
|
+DEFPARAM (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO,
|
||
|
|
+ "loop-interchange-stride-ratio",
|
||
|
|
+ "The minimum stride ratio for loop interchange to be profitable",
|
||
|
|
+ 2, 0, 0)
|
||
|
|
+
|
||
|
|
/* Whether we should use canonical types rather than deep "structural"
|
||
|
|
type checking. Setting this value to 1 (the default) improves
|
||
|
|
compilation performance in the C++ and Objective-C++ front end;
|
||
|
|
diff -N -urp a/gcc/passes.def b/gcc/passes.def
|
||
|
|
--- a/gcc/passes.def 2018-11-15 15:59:30.463048461 +0800
|
||
|
|
+++ b/gcc/passes.def 2018-11-15 16:03:17.451054437 +0800
|
||
|
|
@@ -278,6 +278,7 @@ along with GCC; see the file COPYING3.
|
||
|
|
NEXT_PASS (pass_cd_dce);
|
||
|
|
NEXT_PASS (pass_record_bounds);
|
||
|
|
NEXT_PASS (pass_loop_distribution);
|
||
|
|
+ NEXT_PASS (pass_linterchange);
|
||
|
|
NEXT_PASS (pass_copy_prop);
|
||
|
|
NEXT_PASS (pass_graphite);
|
||
|
|
PUSH_INSERT_PASSES_WITHIN (pass_graphite)
|
||
|
|
diff -N -urp a/gcc/timevar.def b/gcc/timevar.def
|
||
|
|
--- a/gcc/timevar.def 2018-11-15 15:59:30.463048461 +0800
|
||
|
|
+++ b/gcc/timevar.def 2018-11-15 16:03:17.455054437 +0800
|
||
|
|
@@ -182,6 +182,7 @@ DEFTIMEVAR (TV_TREE_LOOP , "tree lo
|
||
|
|
DEFTIMEVAR (TV_TREE_NOLOOP , "loopless fn")
|
||
|
|
DEFTIMEVAR (TV_TREE_LOOP_BOUNDS , "tree loop bounds")
|
||
|
|
DEFTIMEVAR (TV_LIM , "tree loop invariant motion")
|
||
|
|
+DEFTIMEVAR (TV_LINTERCHANGE , "tree loop interchange")
|
||
|
|
DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv")
|
||
|
|
DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop")
|
||
|
|
DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
|
||
|
|
diff -N -urp a/gcc/tree-pass.h b/gcc/tree-pass.h
|
||
|
|
--- a/gcc/tree-pass.h 2018-11-15 15:59:30.467048461 +0800
|
||
|
|
+++ b/gcc/tree-pass.h 2018-11-15 16:03:17.455054437 +0800
|
||
|
|
@@ -367,6 +367,7 @@ extern gimple_opt_pass *make_pass_tree_l
|
||
|
|
extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt);
|
||
|
|
extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt);
|
||
|
|
extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
|
||
|
|
+extern gimple_opt_pass *make_pass_linterchange (gcc::context *ctxt);
|
||
|
|
extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
|
||
|
|
extern gimple_opt_pass *make_pass_loop_split (gcc::context *ctxt);
|
||
|
|
extern gimple_opt_pass *make_pass_loop_jam (gcc::context *ctxt);
|
||
|
|
diff -N -urp a/gcc/tree-pretty-print.h b/gcc/tree-pretty-print.h
|
||
|
|
--- a/gcc/tree-pretty-print.h 2018-11-15 15:54:01.439039800 +0800
|
||
|
|
+++ b/gcc/tree-pretty-print.h 2018-11-15 16:03:17.455054437 +0800
|
||
|
|
@@ -37,7 +37,7 @@ extern void debug_tree_chain (tree);
|
||
|
|
extern void print_generic_decl (FILE *, tree, int);
|
||
|
|
extern void print_generic_stmt (FILE *, tree, int);
|
||
|
|
extern void print_generic_stmt_indented (FILE *, tree, int, int);
|
||
|
|
-extern void print_generic_expr (FILE *, tree, int);
|
||
|
|
+extern void print_generic_expr (FILE *, tree, int = 0);
|
||
|
|
extern void dump_omp_clauses (pretty_printer *, tree, int, int);
|
||
|
|
extern int dump_generic_node (pretty_printer *, tree, int, int, bool);
|
||
|
|
extern void print_declaration (pretty_printer *, tree, int, int);
|
||
|
|
diff -N -urp a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
|
||
|
|
--- a/gcc/tree-scalar-evolution.c 2018-11-15 15:54:01.443039800 +0800
|
||
|
|
+++ b/gcc/tree-scalar-evolution.c 2018-11-15 16:03:17.459054437 +0800
|
||
|
|
@@ -3000,6 +3000,50 @@ instantiate_scev (basic_block instantiat
|
||
|
|
return res;
|
||
|
|
}
|
||
|
|
|
||
|
|
+tree
|
||
|
|
+instantiate_scev (edge instantiate_below, struct loop *evolution_loop,
|
||
|
|
+ tree chrec)
|
||
|
|
+{
|
||
|
|
+ tree res;
|
||
|
|
+
|
||
|
|
+ if (dump_file && (dump_flags & TDF_SCEV))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "(instantiate_scev \n");
|
||
|
|
+ fprintf (dump_file, " (instantiate_below = %d -> %d)\n",
|
||
|
|
+ instantiate_below->src->index, instantiate_below->dest->index);
|
||
|
|
+ if (evolution_loop)
|
||
|
|
+ fprintf (dump_file, " (evolution_loop = %d)\n", evolution_loop->num);
|
||
|
|
+ fprintf (dump_file, " (chrec = ");
|
||
|
|
+ print_generic_expr (dump_file, chrec);
|
||
|
|
+ fprintf (dump_file, ")\n");
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ bool destr = false;
|
||
|
|
+ if (!global_cache)
|
||
|
|
+ {
|
||
|
|
+ global_cache = new instantiate_cache_type;
|
||
|
|
+ destr = true;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ res = instantiate_scev_r (instantiate_below->src, evolution_loop,
|
||
|
|
+ NULL, chrec, NULL, 0);
|
||
|
|
+
|
||
|
|
+ if (destr)
|
||
|
|
+ {
|
||
|
|
+ delete global_cache;
|
||
|
|
+ global_cache = NULL;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (dump_file && (dump_flags & TDF_SCEV))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, " (res = ");
|
||
|
|
+ print_generic_expr (dump_file, res);
|
||
|
|
+ fprintf (dump_file, "))\n");
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ return res;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Similar to instantiate_parameters, but does not introduce the
|
||
|
|
evolutions in outer loops for LOOP invariants in CHREC, and does not
|
||
|
|
care about causing overflows, as long as they do not affect value
|
||
|
|
diff -N -urp a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h
|
||
|
|
--- a/gcc/tree-scalar-evolution.h 2018-11-15 15:54:01.443039800 +0800
|
||
|
|
+++ b/gcc/tree-scalar-evolution.h 2018-11-15 16:03:17.459054437 +0800
|
||
|
|
@@ -31,6 +31,7 @@ extern void scev_reset_htab (void);
|
||
|
|
extern void scev_finalize (void);
|
||
|
|
extern tree analyze_scalar_evolution (struct loop *, tree);
|
||
|
|
extern tree instantiate_scev (basic_block, struct loop *, tree);
|
||
|
|
+extern tree instantiate_scev (edge, struct loop *, tree);
|
||
|
|
extern tree resolve_mixers (struct loop *, tree, bool *);
|
||
|
|
extern void gather_stats_on_scev_database (void);
|
||
|
|
extern void final_value_replacement_loop (struct loop *);
|
||
|
|
diff -N -urp a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c
|
||
|
|
--- a/gcc/tree-ssa-dce.c 2018-11-15 15:54:01.443039800 +0800
|
||
|
|
+++ b/gcc/tree-ssa-dce.c 2018-11-15 16:03:17.463054437 +0800
|
||
|
|
@@ -1729,3 +1729,55 @@ make_pass_cd_dce (gcc::context *ctxt)
|
||
|
|
{
|
||
|
|
return new pass_cd_dce (ctxt);
|
||
|
|
}
|
||
|
|
+
|
||
|
|
+
|
||
|
|
+/* A cheap DCE interface. WORKLIST is a list of possibly dead stmts and
|
||
|
|
+ is consumed by this function. The function has linear complexity in
|
||
|
|
+ the number of dead stmts with a constant factor like the average SSA
|
||
|
|
+ use operands number. */
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+simple_dce_from_worklist (bitmap worklist)
|
||
|
|
+{
|
||
|
|
+ while (! bitmap_empty_p (worklist))
|
||
|
|
+ {
|
||
|
|
+ /* Pop item. */
|
||
|
|
+ unsigned i = bitmap_first_set_bit (worklist);
|
||
|
|
+ bitmap_clear_bit (worklist, i);
|
||
|
|
+
|
||
|
|
+ tree def = ssa_name (i);
|
||
|
|
+ /* Removed by somebody else or still in use. */
|
||
|
|
+ if (! def || ! has_zero_uses (def))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ gimple *t = SSA_NAME_DEF_STMT (def);
|
||
|
|
+ if (gimple_has_side_effects (t))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ /* Add uses to the worklist. */
|
||
|
|
+ ssa_op_iter iter;
|
||
|
|
+ use_operand_p use_p;
|
||
|
|
+ FOR_EACH_PHI_OR_STMT_USE (use_p, t, iter, SSA_OP_USE)
|
||
|
|
+ {
|
||
|
|
+ tree use = USE_FROM_PTR (use_p);
|
||
|
|
+ if (TREE_CODE (use) == SSA_NAME
|
||
|
|
+ && ! SSA_NAME_IS_DEFAULT_DEF (use))
|
||
|
|
+ bitmap_set_bit (worklist, SSA_NAME_VERSION (use));
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Remove stmt. */
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "Removing dead stmt:");
|
||
|
|
+ print_gimple_stmt (dump_file, t, 0);
|
||
|
|
+ }
|
||
|
|
+ gimple_stmt_iterator gsi = gsi_for_stmt (t);
|
||
|
|
+ if (gimple_code (t) == GIMPLE_PHI)
|
||
|
|
+ remove_phi_node (&gsi, true);
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ gsi_remove (&gsi, true);
|
||
|
|
+ release_defs (t);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
diff -N -urp a/gcc/tree-ssa-dce.h b/gcc/tree-ssa-dce.h
|
||
|
|
--- a/gcc/tree-ssa-dce.h 1970-01-01 08:00:00.000000000 +0800
|
||
|
|
+++ b/gcc/tree-ssa-dce.h 2018-11-15 16:03:17.463054437 +0800
|
||
|
|
@@ -0,0 +1,22 @@
|
||
|
|
+/* Copyright (C) 2017 Free Software Foundation, Inc.
|
||
|
|
+
|
||
|
|
+This file is part of GCC.
|
||
|
|
+
|
||
|
|
+GCC is free software; you can redistribute it and/or modify it
|
||
|
|
+under the terms of the GNU General Public License as published by the
|
||
|
|
+Free Software Foundation; either version 3, or (at your option) any
|
||
|
|
+later version.
|
||
|
|
+
|
||
|
|
+GCC is distributed in the hope that it will be useful, but WITHOUT
|
||
|
|
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
|
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
|
+for more details.
|
||
|
|
+
|
||
|
|
+You should have received a copy of the GNU General Public License
|
||
|
|
+along with GCC; see the file COPYING3. If not see
|
||
|
|
+<http://www.gnu.org/licenses/>. */
|
||
|
|
+
|
||
|
|
+#ifndef TREE_SSA_DCE_H
|
||
|
|
+#define TREE_SSA_DCE_H
|
||
|
|
+extern void simple_dce_from_worklist (bitmap);
|
||
|
|
+#endif
|
||
|
|
diff -N -urp a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c
|
||
|
|
--- a/gcc/tree-ssa-loop-ivcanon.c 2018-11-15 15:54:01.447039800 +0800
|
||
|
|
+++ b/gcc/tree-ssa-loop-ivcanon.c 2018-11-15 16:03:17.467054437 +0800
|
||
|
|
@@ -76,10 +76,13 @@ enum unroll_level
|
||
|
|
};
|
||
|
|
|
||
|
|
/* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
|
||
|
|
- is the exit edge whose condition is replaced. */
|
||
|
|
-
|
||
|
|
-static void
|
||
|
|
-create_canonical_iv (struct loop *loop, edge exit, tree niter)
|
||
|
|
+ is the exit edge whose condition is replaced. The ssa versions of the new
|
||
|
|
+ IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
|
||
|
|
+ if they are not NULL. */
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+create_canonical_iv (struct loop *loop, edge exit, tree niter,
|
||
|
|
+ tree *var_before = NULL, tree *var_after = NULL)
|
||
|
|
{
|
||
|
|
edge in;
|
||
|
|
tree type, var;
|
||
|
|
@@ -112,7 +115,9 @@ create_canonical_iv (struct loop *loop,
|
||
|
|
create_iv (niter,
|
||
|
|
build_int_cst (type, -1),
|
||
|
|
NULL_TREE, loop,
|
||
|
|
- &incr_at, false, NULL, &var);
|
||
|
|
+ &incr_at, false, var_before, &var);
|
||
|
|
+ if (var_after)
|
||
|
|
+ *var_after = var;
|
||
|
|
|
||
|
|
cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
|
||
|
|
gimple_cond_set_code (cond, cmp);
|
||
|
|
diff -N -urp a/gcc/tree-ssa-loop-ivopts.h b/gcc/tree-ssa-loop-ivopts.h
|
||
|
|
--- a/gcc/tree-ssa-loop-ivopts.h 2018-11-15 15:54:01.447039800 +0800
|
||
|
|
+++ b/gcc/tree-ssa-loop-ivopts.h 2018-11-15 16:03:17.467054437 +0800
|
||
|
|
@@ -33,4 +33,6 @@ bool multiplier_allowed_in_address_p (HO
|
||
|
|
addr_space_t);
|
||
|
|
void tree_ssa_iv_optimize (void);
|
||
|
|
|
||
|
|
+void create_canonical_iv (struct loop *, edge, tree,
|
||
|
|
+ tree * = NULL, tree * = NULL);
|
||
|
|
#endif /* GCC_TREE_SSA_LOOP_IVOPTS_H */
|
||
|
|
diff -N -urp a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
|
||
|
|
--- a/gcc/tree-ssa-pre.c 2018-11-15 15:54:01.447039800 +0800
|
||
|
|
+++ b/gcc/tree-ssa-pre.c 2018-11-15 16:03:17.471054437 +0800
|
||
|
|
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.
|
||
|
|
#include "gimplify.h"
|
||
|
|
#include "gimple-iterator.h"
|
||
|
|
#include "tree-cfg.h"
|
||
|
|
+#include "tree-ssa-dce.h"
|
||
|
|
#include "tree-ssa-loop.h"
|
||
|
|
#include "tree-into-ssa.h"
|
||
|
|
#include "tree-dfa.h"
|
||
|
|
@@ -4908,99 +4909,6 @@ mark_operand_necessary (tree op)
|
||
|
|
return stmt;
|
||
|
|
}
|
||
|
|
|
||
|
|
-/* Because we don't follow exactly the standard PRE algorithm, and decide not
|
||
|
|
- to insert PHI nodes sometimes, and because value numbering of casts isn't
|
||
|
|
- perfect, we sometimes end up inserting dead code. This simple DCE-like
|
||
|
|
- pass removes any insertions we made that weren't actually used. */
|
||
|
|
-
|
||
|
|
-static void
|
||
|
|
-remove_dead_inserted_code (void)
|
||
|
|
-{
|
||
|
|
- bitmap worklist;
|
||
|
|
- unsigned i;
|
||
|
|
- bitmap_iterator bi;
|
||
|
|
- gimple *t;
|
||
|
|
-
|
||
|
|
- worklist = BITMAP_ALLOC (NULL);
|
||
|
|
- EXECUTE_IF_SET_IN_BITMAP (inserted_exprs, 0, i, bi)
|
||
|
|
- {
|
||
|
|
- t = SSA_NAME_DEF_STMT (ssa_name (i));
|
||
|
|
- if (gimple_plf (t, NECESSARY))
|
||
|
|
- bitmap_set_bit (worklist, i);
|
||
|
|
- }
|
||
|
|
- while (!bitmap_empty_p (worklist))
|
||
|
|
- {
|
||
|
|
- i = bitmap_first_set_bit (worklist);
|
||
|
|
- bitmap_clear_bit (worklist, i);
|
||
|
|
- t = SSA_NAME_DEF_STMT (ssa_name (i));
|
||
|
|
-
|
||
|
|
- /* PHI nodes are somewhat special in that each PHI alternative has
|
||
|
|
- data and control dependencies. All the statements feeding the
|
||
|
|
- PHI node's arguments are always necessary. */
|
||
|
|
- if (gimple_code (t) == GIMPLE_PHI)
|
||
|
|
- {
|
||
|
|
- unsigned k;
|
||
|
|
-
|
||
|
|
- for (k = 0; k < gimple_phi_num_args (t); k++)
|
||
|
|
- {
|
||
|
|
- tree arg = PHI_ARG_DEF (t, k);
|
||
|
|
- if (TREE_CODE (arg) == SSA_NAME)
|
||
|
|
- {
|
||
|
|
- gimple *n = mark_operand_necessary (arg);
|
||
|
|
- if (n)
|
||
|
|
- bitmap_set_bit (worklist, SSA_NAME_VERSION (arg));
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
- else
|
||
|
|
- {
|
||
|
|
- /* Propagate through the operands. Examine all the USE, VUSE and
|
||
|
|
- VDEF operands in this statement. Mark all the statements
|
||
|
|
- which feed this statement's uses as necessary. */
|
||
|
|
- ssa_op_iter iter;
|
||
|
|
- tree use;
|
||
|
|
-
|
||
|
|
- /* The operands of VDEF expressions are also needed as they
|
||
|
|
- represent potential definitions that may reach this
|
||
|
|
- statement (VDEF operands allow us to follow def-def
|
||
|
|
- links). */
|
||
|
|
-
|
||
|
|
- FOR_EACH_SSA_TREE_OPERAND (use, t, iter, SSA_OP_ALL_USES)
|
||
|
|
- {
|
||
|
|
- gimple *n = mark_operand_necessary (use);
|
||
|
|
- if (n)
|
||
|
|
- bitmap_set_bit (worklist, SSA_NAME_VERSION (use));
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
-
|
||
|
|
- EXECUTE_IF_SET_IN_BITMAP (inserted_exprs, 0, i, bi)
|
||
|
|
- {
|
||
|
|
- t = SSA_NAME_DEF_STMT (ssa_name (i));
|
||
|
|
- if (!gimple_plf (t, NECESSARY))
|
||
|
|
- {
|
||
|
|
- gimple_stmt_iterator gsi;
|
||
|
|
-
|
||
|
|
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
- {
|
||
|
|
- fprintf (dump_file, "Removing unnecessary insertion:");
|
||
|
|
- print_gimple_stmt (dump_file, t, 0, 0);
|
||
|
|
- }
|
||
|
|
-
|
||
|
|
- gsi = gsi_for_stmt (t);
|
||
|
|
- if (gimple_code (t) == GIMPLE_PHI)
|
||
|
|
- remove_phi_node (&gsi, true);
|
||
|
|
- else
|
||
|
|
- {
|
||
|
|
- gsi_remove (&gsi, true);
|
||
|
|
- release_defs (t);
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
- BITMAP_FREE (worklist);
|
||
|
|
-}
|
||
|
|
-
|
||
|
|
-
|
||
|
|
/* Initialize data structures used by PRE. */
|
||
|
|
|
||
|
|
static void
|
||
|
|
@@ -5142,8 +5050,7 @@ pass_pre::execute (function *fun)
|
||
|
|
statistics_counter_event (fun, "Eliminated", pre_stats.eliminations);
|
||
|
|
|
||
|
|
clear_expression_ids ();
|
||
|
|
- remove_dead_inserted_code ();
|
||
|
|
-
|
||
|
|
+
|
||
|
|
scev_finalize ();
|
||
|
|
fini_pre ();
|
||
|
|
todo |= fini_eliminate ();
|
||
|
|
diff -N -urp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
|
||
|
|
--- a/gcc/tree-vect-loop.c 2018-11-15 15:54:01.447039800 +0800
|
||
|
|
+++ b/gcc/tree-vect-loop.c 2018-11-15 16:03:17.471054437 +0800
|
||
|
|
@@ -2632,6 +2632,112 @@ vect_is_slp_reduction (loop_vec_info loo
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
|
||
|
|
+ reduction operation CODE has a handled computation expression. */
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg,
|
||
|
|
+ enum tree_code code)
|
||
|
|
+{
|
||
|
|
+ auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
|
||
|
|
+ auto_bitmap visited;
|
||
|
|
+ tree lookfor = PHI_RESULT (phi);
|
||
|
|
+ ssa_op_iter curri;
|
||
|
|
+ use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
|
||
|
|
+ while (USE_FROM_PTR (curr) != loop_arg)
|
||
|
|
+ curr = op_iter_next_use (&curri);
|
||
|
|
+ curri.i = curri.numops;
|
||
|
|
+ do
|
||
|
|
+ {
|
||
|
|
+ path.safe_push (std::make_pair (curri, curr));
|
||
|
|
+ tree use = USE_FROM_PTR (curr);
|
||
|
|
+ if (use == lookfor)
|
||
|
|
+ break;
|
||
|
|
+ gimple *def = SSA_NAME_DEF_STMT (use);
|
||
|
|
+ if (gimple_nop_p (def)
|
||
|
|
+ || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
|
||
|
|
+ {
|
||
|
|
+pop:
|
||
|
|
+ do
|
||
|
|
+ {
|
||
|
|
+ std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
|
||
|
|
+ curri = x.first;
|
||
|
|
+ curr = x.second;
|
||
|
|
+ do
|
||
|
|
+ curr = op_iter_next_use (&curri);
|
||
|
|
+ /* Skip already visited or non-SSA operands (from iterating
|
||
|
|
+ over PHI args). */
|
||
|
|
+ while (curr != NULL_USE_OPERAND_P
|
||
|
|
+ && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
|
||
|
|
+ || ! bitmap_set_bit (visited,
|
||
|
|
+ SSA_NAME_VERSION
|
||
|
|
+ (USE_FROM_PTR (curr)))));
|
||
|
|
+ }
|
||
|
|
+ while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
|
||
|
|
+ if (curr == NULL_USE_OPERAND_P)
|
||
|
|
+ break;
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ if (gimple_code (def) == GIMPLE_PHI)
|
||
|
|
+ curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE);
|
||
|
|
+ else
|
||
|
|
+ curr = op_iter_init_use (&curri, def, SSA_OP_USE);
|
||
|
|
+ while (curr != NULL_USE_OPERAND_P
|
||
|
|
+ && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
|
||
|
|
+ || ! bitmap_set_bit (visited,
|
||
|
|
+ SSA_NAME_VERSION
|
||
|
|
+ (USE_FROM_PTR (curr)))))
|
||
|
|
+ curr = op_iter_next_use (&curri);
|
||
|
|
+ if (curr == NULL_USE_OPERAND_P)
|
||
|
|
+ goto pop;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ while (1);
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
|
||
|
|
+ unsigned i;
|
||
|
|
+ std::pair<ssa_op_iter, use_operand_p> *x;
|
||
|
|
+ FOR_EACH_VEC_ELT (path, i, x)
|
||
|
|
+ {
|
||
|
|
+ dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
|
||
|
|
+ dump_printf (MSG_NOTE, " ");
|
||
|
|
+ }
|
||
|
|
+ dump_printf (MSG_NOTE, "\n");
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Check whether the reduction path detected is valid. */
|
||
|
|
+ bool fail = path.length () == 0;
|
||
|
|
+ bool neg = false;
|
||
|
|
+ for (unsigned i = 1; i < path.length (); ++i)
|
||
|
|
+ {
|
||
|
|
+ gimple *use_stmt = USE_STMT (path[i].second);
|
||
|
|
+ tree op = USE_FROM_PTR (path[i].second);
|
||
|
|
+ if (! has_single_use (op)
|
||
|
|
+ || ! is_gimple_assign (use_stmt))
|
||
|
|
+ {
|
||
|
|
+ fail = true;
|
||
|
|
+ break;
|
||
|
|
+ }
|
||
|
|
+ if (gimple_assign_rhs_code (use_stmt) != code)
|
||
|
|
+ {
|
||
|
|
+ if (code == PLUS_EXPR
|
||
|
|
+ && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
|
||
|
|
+ {
|
||
|
|
+ /* Track whether we negate the reduction value each iteration. */
|
||
|
|
+ if (gimple_assign_rhs2 (use_stmt) == op)
|
||
|
|
+ neg = ! neg;
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ fail = true;
|
||
|
|
+ break;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ return ! fail && ! neg;
|
||
|
|
+}
|
||
|
|
|
||
|
|
/* Function vect_is_simple_reduction_1
|
||
|
|
|
||
|
|
diff -N -urp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
|
||
|
|
--- a/gcc/tree-vectorizer.h 2018-11-15 15:54:01.451039800 +0800
|
||
|
|
+++ b/gcc/tree-vectorizer.h 2018-11-15 16:03:17.475054437 +0800
|
||
|
|
@@ -1166,6 +1166,9 @@ extern tree vect_create_addr_base_for_ve
|
||
|
|
extern void destroy_loop_vec_info (loop_vec_info, bool);
|
||
|
|
extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool,
|
||
|
|
bool *, bool);
|
||
|
|
+/* Used in gimple-loop-interchange.c. */
|
||
|
|
+extern bool check_reduction_path (location_t, loop_p, gphi *, tree,
|
||
|
|
+ enum tree_code);
|
||
|
|
/* Drive for loop analysis stage. */
|
||
|
|
extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info);
|
||
|
|
extern tree vect_build_loop_niters (loop_vec_info);
|