1001 lines
30 KiB
Diff
1001 lines
30 KiB
Diff
|
|
From 6b944bed1158d3454b1db27aeab4ec1f2b8e5866 Mon Sep 17 00:00:00 2001
|
||
|
|
From: huangxiaoquan <huangxiaoquan1@huawei.com>
|
||
|
|
Date: Thu, 27 Jan 2022 18:24:53 +0800
|
||
|
|
Subject: [PATCH 27/28] [Autoprefetch] Support auto feedback prefetch
|
||
|
|
|
||
|
|
1.Add option -fprefetch-loop-arrays=[value].
|
||
|
|
|
||
|
|
2.A prefetch distance analysis algorithm based on branch weight
|
||
|
|
is proposed to improve the accuracy of prefetch distance.
|
||
|
|
|
||
|
|
3.Propose automatic feedback prefetching:
|
||
|
|
use the cache-miss profile information to guide the insertion of
|
||
|
|
prefetching instructions.
|
||
|
|
---
|
||
|
|
gcc/auto-profile.c | 5 +-
|
||
|
|
gcc/common.opt | 5 +
|
||
|
|
gcc/opts.c | 7 +
|
||
|
|
gcc/params.opt | 16 +
|
||
|
|
gcc/tree-ssa-loop-prefetch.c | 735 ++++++++++++++++++++++++++++++++++-
|
||
|
|
5 files changed, 748 insertions(+), 20 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
|
||
|
|
index e6164b91b..f221978fc 100644
|
||
|
|
--- a/gcc/auto-profile.c
|
||
|
|
+++ b/gcc/auto-profile.c
|
||
|
|
@@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see
|
||
|
|
#include "config.h"
|
||
|
|
#define INCLUDE_MAP
|
||
|
|
#define INCLUDE_SET
|
||
|
|
+#define INCLUDE_ALGORITHM
|
||
|
|
+#define INCLUDE_VECTOR
|
||
|
|
#include "system.h"
|
||
|
|
#include "coretypes.h"
|
||
|
|
#include "backend.h"
|
||
|
|
@@ -49,9 +51,6 @@ along with GCC; see the file COPYING3. If not see
|
||
|
|
#include "auto-profile.h"
|
||
|
|
#include "tree-pretty-print.h"
|
||
|
|
#include "gimple-pretty-print.h"
|
||
|
|
-#include <map>
|
||
|
|
-#include <vector>
|
||
|
|
-#include <algorithm>
|
||
|
|
|
||
|
|
/* The following routines implements AutoFDO optimization.
|
||
|
|
|
||
|
|
diff --git a/gcc/common.opt b/gcc/common.opt
|
||
|
|
index 37cbbd8c0..9488bd90f 100644
|
||
|
|
--- a/gcc/common.opt
|
||
|
|
+++ b/gcc/common.opt
|
||
|
|
@@ -2201,6 +2201,11 @@ fprefetch-loop-arrays
|
||
|
|
Common Report Var(flag_prefetch_loop_arrays) Init(-1) Optimization
|
||
|
|
Generate prefetch instructions, if available, for arrays in loops.
|
||
|
|
|
||
|
|
+fprefetch-loop-arrays=
|
||
|
|
+Common Joined RejectNegative UInteger Var(prefetch_level) Init(0) IntegerRange(0, 3)
|
||
|
|
+Generate prefetch instructions, if available, for arrays in loops. The prefetch
|
||
|
|
+level can control the optimize level to array prefetch.
|
||
|
|
+
|
||
|
|
fprofile
|
||
|
|
Common Report Var(profile_flag)
|
||
|
|
Enable basic program profiling code.
|
||
|
|
diff --git a/gcc/opts.c b/gcc/opts.c
|
||
|
|
index 7a39f618b..f49f5ee58 100644
|
||
|
|
--- a/gcc/opts.c
|
||
|
|
+++ b/gcc/opts.c
|
||
|
|
@@ -1747,6 +1747,8 @@ set_cache_misses_profile_params (struct gcc_options *opts,
|
||
|
|
struct gcc_options *opts_set)
|
||
|
|
{
|
||
|
|
SET_OPTION_IF_UNSET (opts, opts_set, flag_prefetch_loop_arrays, 1);
|
||
|
|
+ SET_OPTION_IF_UNSET (opts, opts_set, prefetch_level, 2);
|
||
|
|
+ SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 100);
|
||
|
|
}
|
||
|
|
|
||
|
|
/* -f{,no-}sanitize{,-recover}= suboptions. */
|
||
|
|
@@ -2645,6 +2647,11 @@ common_handle_option (struct gcc_options *opts,
|
||
|
|
SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_bit_cp, value);
|
||
|
|
break;
|
||
|
|
|
||
|
|
+ case OPT_fprefetch_loop_arrays_:
|
||
|
|
+ opts->x_prefetch_level = value;
|
||
|
|
+ opts->x_flag_prefetch_loop_arrays = true;
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
case OPT_fpatchable_function_entry_:
|
||
|
|
{
|
||
|
|
char *patch_area_arg = xstrdup (arg);
|
||
|
|
diff --git a/gcc/params.opt b/gcc/params.opt
|
||
|
|
index 2db69cc87..9d1faa7ab 100644
|
||
|
|
--- a/gcc/params.opt
|
||
|
|
+++ b/gcc/params.opt
|
||
|
|
@@ -968,4 +968,20 @@ Bound on number of runtime checks inserted by the vectorizer's loop versioning f
|
||
|
|
Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6) Param Optimization
|
||
|
|
Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check.
|
||
|
|
|
||
|
|
+-param=param-prefetch-func-topn=
|
||
|
|
+Common Joined UInteger Var(param_prefetch_func_topn) Init(3) Param Optimization
|
||
|
|
+TopN functions of cache miss counts to be analyzed in prefetching.
|
||
|
|
+
|
||
|
|
+-param=param-prefetch-ref-topn=
|
||
|
|
+Common Joined UInteger Var(param_prefetch_ref_topn) Init(5) Param Optimization
|
||
|
|
+TopN ref of cache miss counts to be analyzed in prefetching.
|
||
|
|
+
|
||
|
|
+-param=param-high-loop-execution-rate=
|
||
|
|
+Common Joined UInteger Var(param_high_loop_execution_rate) Init(95) IntegerRange(0, 100) Param Optimization
|
||
|
|
+High execution rate loops to be analyzed in prefetch (in%).
|
||
|
|
+
|
||
|
|
+-param=param-prefetch-func-counts-threshold=
|
||
|
|
+Common Joined UInteger Var(param_prefetch_func_counts_threshold) Init(100) Param Optimization
|
||
|
|
+Threshold functions of cache miss counts to be analyzed in prefetching.
|
||
|
|
+
|
||
|
|
; This comment is to ensure we retain the blank line above.
|
||
|
|
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
|
||
|
|
index d19ece641..3a5aef0fc 100644
|
||
|
|
--- a/gcc/tree-ssa-loop-prefetch.c
|
||
|
|
+++ b/gcc/tree-ssa-loop-prefetch.c
|
||
|
|
@@ -18,6 +18,9 @@ along with GCC; see the file COPYING3. If not see
|
||
|
|
<http://www.gnu.org/licenses/>. */
|
||
|
|
|
||
|
|
#include "config.h"
|
||
|
|
+#define INCLUDE_ALGORITHM
|
||
|
|
+#define INCLUDE_MAP
|
||
|
|
+#define INCLUDE_VECTOR
|
||
|
|
#include "system.h"
|
||
|
|
#include "coretypes.h"
|
||
|
|
#include "backend.h"
|
||
|
|
@@ -48,6 +51,11 @@ along with GCC; see the file COPYING3. If not see
|
||
|
|
#include "tree-data-ref.h"
|
||
|
|
#include "diagnostic-core.h"
|
||
|
|
#include "dbgcnt.h"
|
||
|
|
+#include "gimple-pretty-print.h"
|
||
|
|
+#include "tree-cfg.h"
|
||
|
|
+#include "auto-profile.h"
|
||
|
|
+#include "cgraph.h"
|
||
|
|
+#include "print-tree.h"
|
||
|
|
|
||
|
|
/* This pass inserts prefetch instructions to optimize cache usage during
|
||
|
|
accesses to arrays in loops. It processes loops sequentially and:
|
||
|
|
@@ -253,6 +261,22 @@ struct mem_ref_group
|
||
|
|
#define PREFETCH_MAX_MEM_REFS_PER_LOOP 200
|
||
|
|
#endif
|
||
|
|
|
||
|
|
+#ifndef PREFETCH_FUNC_TOPN
|
||
|
|
+#define PREFETCH_FUNC_TOPN param_prefetch_func_topn
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
+#ifndef PREFETCH_FUNC_COUNTS_THRESHOLD
|
||
|
|
+#define PREFETCH_FUNC_COUNTS_THRESHOLD param_prefetch_func_counts_threshold
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
+#ifndef PREFETCH_REF_TOPN
|
||
|
|
+#define PREFETCH_REF_TOPN param_prefetch_ref_topn
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
+#ifndef LOOP_EXECUTION_RATE
|
||
|
|
+#define LOOP_EXECUTION_RATE param_high_loop_execution_rate
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
/* The memory reference. */
|
||
|
|
|
||
|
|
struct mem_ref
|
||
|
|
@@ -279,6 +303,131 @@ struct mem_ref
|
||
|
|
nontemporal one. */
|
||
|
|
};
|
||
|
|
|
||
|
|
+/* Probability information of basic blocks and branches. */
|
||
|
|
+struct bb_bp
|
||
|
|
+{
|
||
|
|
+ basic_block bb;
|
||
|
|
+ basic_block true_edge_bb;
|
||
|
|
+ basic_block false_edge_bb;
|
||
|
|
+ float true_edge_prob;
|
||
|
|
+ float false_edge_prob;
|
||
|
|
+ float bb_prob;
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+typedef struct bb_bp bb_bp;
|
||
|
|
+
|
||
|
|
+enum PREFETCH_MODE
|
||
|
|
+{
|
||
|
|
+ ORIGINAL_MODE=0, /* Original prefetch method. */
|
||
|
|
+ REFINE_BB_AHEAD,
|
||
|
|
+ /* Prefetch distance algorithm for removing
|
||
|
|
+ irrelevant bb. */
|
||
|
|
+ BRANCH_WEIGHTED_AHEAD,
|
||
|
|
+ /* Branch weighted prefetch
|
||
|
|
+ distance algorithm. */
|
||
|
|
+ INDIRECT_MODE /* Indirect array prefetch mode. */
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+typedef std::map <unsigned int, unsigned int> uid_rank_map;
|
||
|
|
+typedef std::map <location_t, unsigned int> loc_rank_map;
|
||
|
|
+typedef std::vector <std::pair<location_t, gcov_type> > loc_gcov_type_vec;
|
||
|
|
+typedef std::map <location_t, std::vector<gimple *> > loc_gimple_vec_map;
|
||
|
|
+
|
||
|
|
+static loc_rank_map ref_rank;
|
||
|
|
+
|
||
|
|
+/* Callback function for event_count comparison. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+event_count_cmp (std::pair<unsigned int, gcov_type> &a,
|
||
|
|
+ std::pair<unsigned int, gcov_type> &b)
|
||
|
|
+{
|
||
|
|
+ return a.second > b.second;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Prepared mappings from location to counts and from location
|
||
|
|
+ to stmt list. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+prepare_loc_count_info (function *fun, loc_gcov_type_vec &ref_sorted,
|
||
|
|
+ loc_gimple_vec_map &loc_stmt, event_type event)
|
||
|
|
+{
|
||
|
|
+ basic_block bb = NULL;
|
||
|
|
+ gimple_stmt_iterator bsi;
|
||
|
|
+ gimple *stmt;
|
||
|
|
+ tree lhs = NULL_TREE;
|
||
|
|
+ tree rhs = NULL_TREE;
|
||
|
|
+
|
||
|
|
+ FOR_EACH_BB_FN (bb, fun)
|
||
|
|
+ {
|
||
|
|
+ for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
|
||
|
|
+ {
|
||
|
|
+ stmt = gsi_stmt (bsi);
|
||
|
|
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
|
||
|
|
+ {
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ if (!gimple_vuse (stmt))
|
||
|
|
+ {
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ lhs = gimple_assign_lhs (stmt);
|
||
|
|
+ rhs = gimple_assign_rhs1 (stmt);
|
||
|
|
+ if (REFERENCE_CLASS_P (rhs) || REFERENCE_CLASS_P (lhs))
|
||
|
|
+ {
|
||
|
|
+ gcov_type loc_count =
|
||
|
|
+ event_get_loc_count (gimple_location (stmt), event);
|
||
|
|
+ if (loc_count > 0)
|
||
|
|
+ {
|
||
|
|
+ /* There may be multiple gimple correspond to the same
|
||
|
|
+ location. */
|
||
|
|
+ if (loc_stmt.count (gimple_location (stmt)) == 0)
|
||
|
|
+ {
|
||
|
|
+ ref_sorted.push_back (std::make_pair (gimple_location (stmt),
|
||
|
|
+ loc_count));
|
||
|
|
+ }
|
||
|
|
+ loc_stmt[gimple_location (stmt)].push_back (stmt);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Sort references by event_count and dump loc count information after
|
||
|
|
+ sorting. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+sort_ref_by_event_count (function *fun, event_type event)
|
||
|
|
+{
|
||
|
|
+ loc_gcov_type_vec ref_sorted;
|
||
|
|
+ loc_gimple_vec_map loc_stmt;
|
||
|
|
+
|
||
|
|
+ prepare_loc_count_info (fun, ref_sorted, loc_stmt, event);
|
||
|
|
+ sort (ref_sorted.begin (), ref_sorted.end (), event_count_cmp);
|
||
|
|
+
|
||
|
|
+ for (unsigned i = 0; i < ref_sorted.size (); ++i)
|
||
|
|
+ {
|
||
|
|
+ ref_rank[ref_sorted[i].first] = i + 1;
|
||
|
|
+ /* Print the stmt and count of the topn ref. */
|
||
|
|
+ if (i < PREFETCH_REF_TOPN && dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "stmt: \n");
|
||
|
|
+ for (unsigned j = 0; j < loc_stmt[ref_sorted[i].first].size ();
|
||
|
|
+ ++j)
|
||
|
|
+ {
|
||
|
|
+ print_gimple_stmt (dump_file,
|
||
|
|
+ loc_stmt[ref_sorted[i].first][j], 0);
|
||
|
|
+ }
|
||
|
|
+ gcov_type loc_count =
|
||
|
|
+ event_get_loc_count (ref_sorted[i].first, event);
|
||
|
|
+ fprintf (dump_file, "stmt loc %u counts is %lu: "
|
||
|
|
+ "rank %d in top %d, (candidate analysis)\n\n",
|
||
|
|
+ ref_sorted[i].first, loc_count,
|
||
|
|
+ ref_rank[ref_sorted[i].first], PREFETCH_REF_TOPN);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ return;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Dumps information about memory reference */
|
||
|
|
static void
|
||
|
|
dump_mem_details (FILE *file, tree base, tree step,
|
||
|
|
@@ -479,6 +628,30 @@ idx_analyze_ref (tree base, tree *index, void *data)
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Dumps information about ar_data structure. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+dump_ar_data_details (FILE *file, tree ref, struct ar_data &ar_data)
|
||
|
|
+{
|
||
|
|
+ print_generic_expr (file, ref, TDF_SLIM);
|
||
|
|
+ fprintf (file, "\n");
|
||
|
|
+ if (*(ar_data.step))
|
||
|
|
+ {
|
||
|
|
+ fprintf (file, " step ");
|
||
|
|
+ if (cst_and_fits_in_hwi (*(ar_data.step)))
|
||
|
|
+ fprintf (file, HOST_WIDE_INT_PRINT_DEC,
|
||
|
|
+ int_cst_value (*(ar_data.step)));
|
||
|
|
+ else
|
||
|
|
+ print_generic_expr (file, *(ar_data.step), TDF_SLIM);
|
||
|
|
+ }
|
||
|
|
+ fprintf (file, "\n");
|
||
|
|
+ if (*(ar_data.delta))
|
||
|
|
+ {
|
||
|
|
+ fprintf (file, " delta " HOST_WIDE_INT_PRINT_DEC "\n",
|
||
|
|
+ *(ar_data.delta));
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Tries to express REF_P in shape &BASE + STEP * iter + DELTA, where DELTA and
|
||
|
|
STEP are integer constants and iter is number of iterations of LOOP. The
|
||
|
|
reference occurs in statement STMT. Strips nonaddressable component
|
||
|
|
@@ -526,7 +699,17 @@ analyze_ref (class loop *loop, tree *ref_p, tree *base,
|
||
|
|
ar_data.stmt = stmt;
|
||
|
|
ar_data.step = step;
|
||
|
|
ar_data.delta = delta;
|
||
|
|
- return for_each_index (base, idx_analyze_ref, &ar_data);
|
||
|
|
+
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ dump_ar_data_details (dump_file, ref, ar_data);
|
||
|
|
+ }
|
||
|
|
+ bool idx_flag = for_each_index (base, idx_analyze_ref, &ar_data);
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "idx_flag = %d \n\n", idx_flag);
|
||
|
|
+ }
|
||
|
|
+ return idx_flag;
|
||
|
|
}
|
||
|
|
|
||
|
|
/* Record a memory reference REF to the list REFS. The reference occurs in
|
||
|
|
@@ -601,6 +784,55 @@ gather_memory_references_ref (class loop *loop, struct mem_ref_group **refs,
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Determine whether to collect the memory references based on the
|
||
|
|
+ ranking of ref cache miss counts. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+should_gather_memory_references (gimple *stmt)
|
||
|
|
+{
|
||
|
|
+ if (!(profile_exist (CACHE_MISSES)))
|
||
|
|
+ {
|
||
|
|
+ return true;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "stmt:");
|
||
|
|
+ print_gimple_stmt (dump_file, stmt, 0);
|
||
|
|
+ fprintf (dump_file, "\n");
|
||
|
|
+ }
|
||
|
|
+ if (ref_rank.count (gimple_location (stmt)) == 0)
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "stmt location no found, skip prefetch "
|
||
|
|
+ "analysis\n");
|
||
|
|
+ }
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ gcov_type loc_count = event_get_loc_count (gimple_location (stmt), CACHE_MISSES);
|
||
|
|
+ if (ref_rank[gimple_location (stmt)] > PREFETCH_REF_TOPN)
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "stmt loc %u counts is %lu:"
|
||
|
|
+ "rank %d exceed topn %d, skip prefetch "
|
||
|
|
+ "analysis\n",
|
||
|
|
+ gimple_location (stmt), loc_count,
|
||
|
|
+ ref_rank[gimple_location (stmt)], PREFETCH_REF_TOPN);
|
||
|
|
+ }
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "stmt loc %u counts is %lu: rank %d in top %d,"
|
||
|
|
+ "continue prefetch analysis\n",
|
||
|
|
+ gimple_location (stmt), loc_count,
|
||
|
|
+ ref_rank[gimple_location (stmt)], PREFETCH_REF_TOPN);
|
||
|
|
+ }
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Record the suitable memory references in LOOP. NO_OTHER_REFS is set to
|
||
|
|
true if there are no other memory references inside the loop. */
|
||
|
|
|
||
|
|
@@ -626,6 +858,13 @@ gather_memory_references (class loop *loop, bool *no_other_refs, unsigned *ref_c
|
||
|
|
if (bb->loop_father != loop)
|
||
|
|
continue;
|
||
|
|
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "==== the %dth loop bb body ====\n", i);
|
||
|
|
+ gimple_dump_bb (dump_file, bb, 0, dump_flags);
|
||
|
|
+ fprintf (dump_file, "\n");
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
|
||
|
|
{
|
||
|
|
stmt = gsi_stmt (bsi);
|
||
|
|
@@ -642,20 +881,31 @@ gather_memory_references (class loop *loop, bool *no_other_refs, unsigned *ref_c
|
||
|
|
if (! gimple_vuse (stmt))
|
||
|
|
continue;
|
||
|
|
|
||
|
|
+ if (!should_gather_memory_references (stmt))
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
lhs = gimple_assign_lhs (stmt);
|
||
|
|
rhs = gimple_assign_rhs1 (stmt);
|
||
|
|
|
||
|
|
if (REFERENCE_CLASS_P (rhs))
|
||
|
|
{
|
||
|
|
- *no_other_refs &= gather_memory_references_ref (loop, &refs,
|
||
|
|
- rhs, false, stmt);
|
||
|
|
- *ref_count += 1;
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "====> the %dth ref \n", *ref_count);
|
||
|
|
+ }
|
||
|
|
+ *no_other_refs &= gather_memory_references_ref (loop, &refs, rhs,
|
||
|
|
+ false, stmt);
|
||
|
|
+ *ref_count += 1;
|
||
|
|
}
|
||
|
|
if (REFERENCE_CLASS_P (lhs))
|
||
|
|
{
|
||
|
|
- *no_other_refs &= gather_memory_references_ref (loop, &refs,
|
||
|
|
- lhs, true, stmt);
|
||
|
|
- *ref_count += 1;
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "====> the %dth ref \n", *ref_count);
|
||
|
|
+ }
|
||
|
|
+ *no_other_refs &= gather_memory_references_ref (loop, &refs, lhs,
|
||
|
|
+ true, stmt);
|
||
|
|
+ *ref_count += 1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
@@ -1168,9 +1418,9 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
|
||
|
|
bool nontemporal = ref->reuse_distance >= L2_CACHE_SIZE_BYTES;
|
||
|
|
|
||
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
- fprintf (dump_file, "Issued%s prefetch for reference %u:%u.\n",
|
||
|
|
- nontemporal ? " nontemporal" : "",
|
||
|
|
- ref->group->uid, ref->uid);
|
||
|
|
+ fprintf (dump_file, "Issued%s prefetch for reference %u:%u.\n",
|
||
|
|
+ nontemporal ? " nontemporal" : "",
|
||
|
|
+ ref->group->uid, ref->uid);
|
||
|
|
|
||
|
|
bsi = gsi_for_stmt (ref->stmt);
|
||
|
|
|
||
|
|
@@ -1875,6 +2125,306 @@ insn_to_prefetch_ratio_too_small_p (unsigned ninsns, unsigned prefetch_count,
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Obtain the edge probability information of each basic block in the loop. */
|
||
|
|
+
|
||
|
|
+static float
|
||
|
|
+get_edge_prob (edge e)
|
||
|
|
+{
|
||
|
|
+ /* Limit the minimum probability value. */
|
||
|
|
+ const float MINNUM_PROB = 0.00001f;
|
||
|
|
+ float fvalue = 1;
|
||
|
|
+
|
||
|
|
+ profile_probability probability = e->probability;
|
||
|
|
+ if (probability.initialized_p ())
|
||
|
|
+ {
|
||
|
|
+ fvalue = probability.to_reg_br_prob_base () / float (REG_BR_PROB_BASE);
|
||
|
|
+ if (fvalue < MINNUM_PROB && probability.to_reg_br_prob_base ())
|
||
|
|
+ {
|
||
|
|
+ fvalue = MINNUM_PROB;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ return fvalue;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+
|
||
|
|
+/* Dump the bb information in a loop. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+dump_loop_bb (struct loop *loop)
|
||
|
|
+{
|
||
|
|
+ basic_block *body = get_loop_body_in_dom_order (loop);
|
||
|
|
+ basic_block bb = NULL;
|
||
|
|
+
|
||
|
|
+ for (unsigned i = 0; i < loop->num_nodes; i++)
|
||
|
|
+ {
|
||
|
|
+ bb = body[i];
|
||
|
|
+ if (bb->loop_father != loop)
|
||
|
|
+ {
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "===== the %dth loop bb body ======= \n", i);
|
||
|
|
+ gimple_dump_bb (dump_file, bb, 0, dump_flags);
|
||
|
|
+ fprintf (dump_file, "\n");
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ free (body);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+
|
||
|
|
+/* Obtain the branch probability information of each basic block
|
||
|
|
+ in the loop. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+get_bb_branch_prob (hash_map <basic_block, bb_bp> &bb_branch_prob,
|
||
|
|
+ struct loop *loop)
|
||
|
|
+{
|
||
|
|
+ basic_block *body = get_loop_body (loop);
|
||
|
|
+ basic_block bb = NULL;
|
||
|
|
+ for (unsigned i = 0; i < loop->num_nodes; i++)
|
||
|
|
+ {
|
||
|
|
+ bb = body[i];
|
||
|
|
+ if (bb->loop_father != loop)
|
||
|
|
+ {
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ bb_bp &branch_prob = bb_branch_prob.get_or_insert (bb);
|
||
|
|
+ branch_prob.bb = bb;
|
||
|
|
+ branch_prob.true_edge_bb = NULL;
|
||
|
|
+ branch_prob.false_edge_bb = NULL;
|
||
|
|
+ branch_prob.true_edge_prob = 0;
|
||
|
|
+ branch_prob.false_edge_prob = 0;
|
||
|
|
+ branch_prob.bb_prob = 0;
|
||
|
|
+
|
||
|
|
+ gimple *stmt = last_stmt (bb);
|
||
|
|
+ if (stmt && gimple_code (stmt) == GIMPLE_COND)
|
||
|
|
+ {
|
||
|
|
+ if (EDGE_COUNT (bb->succs) != 2)
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ fprintf (dump_file, "The number of successful edges of bb"
|
||
|
|
+ "is abnormal\n");
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ edge true_edge = NULL;
|
||
|
|
+ edge false_edge = NULL;
|
||
|
|
+ extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
|
||
|
|
+
|
||
|
|
+ /* If it is exiting bb, and the destination bb of the edge does not
|
||
|
|
+ belong to the current loop, the information of the edge is not
|
||
|
|
+ recorded. */
|
||
|
|
+ if (true_edge->dest->loop_father == loop)
|
||
|
|
+ {
|
||
|
|
+ branch_prob.true_edge_bb = true_edge->dest;
|
||
|
|
+ branch_prob.true_edge_prob = get_edge_prob (true_edge);
|
||
|
|
+ }
|
||
|
|
+ if (false_edge->dest->loop_father == loop)
|
||
|
|
+ {
|
||
|
|
+ branch_prob.false_edge_bb = false_edge->dest;
|
||
|
|
+ branch_prob.false_edge_prob = get_edge_prob (false_edge);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ edge e = find_fallthru_edge (bb->succs);
|
||
|
|
+ if (e)
|
||
|
|
+ {
|
||
|
|
+ branch_prob.true_edge_bb = e->dest;
|
||
|
|
+ branch_prob.true_edge_prob = get_edge_prob (e);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Traverse each bb in the loop and prune fake loops. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+traverse_prune_bb_branch (hash_map <basic_block, bb_bp> &bb_branch_prob,
|
||
|
|
+ int& max_path, hash_set <basic_block> &path_node,
|
||
|
|
+ basic_block current_bb, basic_block latch_bb)
|
||
|
|
+{
|
||
|
|
+ /* Limit the maximum number of analysis paths. */
|
||
|
|
+ if (max_path <= 0 || current_bb == NULL)
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ /* Do not join edges that do not form a complete loop. */
|
||
|
|
+ bb_bp *bb_bp_node = bb_branch_prob.get (current_bb);
|
||
|
|
+ if (bb_bp_node == NULL || (bb_bp_node->true_edge_bb == NULL
|
||
|
|
+ && bb_bp_node->false_edge_bb == NULL))
|
||
|
|
+ return false;
|
||
|
|
+
|
||
|
|
+ if (current_bb == latch_bb)
|
||
|
|
+ {
|
||
|
|
+ max_path--;
|
||
|
|
+ return true;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Do not join edges that return to non-dominate nodes. */
|
||
|
|
+ if (path_node.contains (bb_bp_node->true_edge_bb)
|
||
|
|
+ || path_node.contains (bb_bp_node->false_edge_bb))
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ fprintf (dump_file, "fake loop: in bb%d\n", current_bb->index);
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ path_node.add (current_bb);
|
||
|
|
+ if (bb_bp_node->true_edge_bb)
|
||
|
|
+ {
|
||
|
|
+ if (traverse_prune_bb_branch (bb_branch_prob, max_path,
|
||
|
|
+ path_node, bb_bp_node->true_edge_bb, latch_bb) == false)
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ if (bb_bp_node->false_edge_bb)
|
||
|
|
+ {
|
||
|
|
+ if (traverse_prune_bb_branch (bb_branch_prob, max_path,
|
||
|
|
+ path_node, bb_bp_node->false_edge_bb, latch_bb) == false)
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ path_node.remove (current_bb);
|
||
|
|
+
|
||
|
|
+ max_path--;
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Traverse and calculate the probability of basic block. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+traverse_calculate_bb_prob (hash_map <basic_block, bb_bp> &bb_branch_prob,
|
||
|
|
+ basic_block current_bb, basic_block latch_bb,
|
||
|
|
+ float prob)
|
||
|
|
+{
|
||
|
|
+ /* Limit bb block access probability, the probability is
|
||
|
|
+ less than 100% and include delta. */
|
||
|
|
+ const float MAX_BB_PROBABILITY = 1.001f;
|
||
|
|
+
|
||
|
|
+ if (current_bb == NULL)
|
||
|
|
+ {
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+ bb_bp *bb_bp_node = bb_branch_prob.get (current_bb);
|
||
|
|
+ bb_bp_node->bb_prob += prob;
|
||
|
|
+
|
||
|
|
+ gcc_assert (bb_bp_node->bb_prob <= MAX_BB_PROBABILITY);
|
||
|
|
+
|
||
|
|
+ if (bb_bp_node == NULL || (bb_bp_node->true_edge_bb == NULL
|
||
|
|
+ && bb_bp_node->false_edge_bb == NULL))
|
||
|
|
+ {
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+ if (current_bb == latch_bb)
|
||
|
|
+ {
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ bool assign = (bb_bp_node->true_edge_bb && bb_bp_node->false_edge_bb);
|
||
|
|
+ if (bb_bp_node->true_edge_bb)
|
||
|
|
+ {
|
||
|
|
+ float assign_prob = assign ? bb_bp_node->true_edge_prob * prob : prob;
|
||
|
|
+ traverse_calculate_bb_prob (bb_branch_prob,
|
||
|
|
+ bb_bp_node->true_edge_bb, latch_bb, assign_prob);
|
||
|
|
+ }
|
||
|
|
+ if (bb_bp_node->false_edge_bb)
|
||
|
|
+ {
|
||
|
|
+ float assign_prob = assign ? bb_bp_node->false_edge_prob * prob : prob;
|
||
|
|
+ traverse_calculate_bb_prob (bb_branch_prob,
|
||
|
|
+ bb_bp_node->false_edge_bb, latch_bb, assign_prob);
|
||
|
|
+ }
|
||
|
|
+ return;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Obtain the probability of basic block. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+get_bb_prob (hash_map <basic_block, bb_bp> &bb_branch_prob, struct loop *loop)
|
||
|
|
+{
|
||
|
|
+ /* The upper limit of the branch path in the loop is 10000. */
|
||
|
|
+ const int MAX_BB_BRANCH_PATH = 10000;
|
||
|
|
+
|
||
|
|
+ if (loop->header == NULL || loop->latch == NULL
|
||
|
|
+ || loop->header == loop->latch)
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ fprintf (dump_file, "get_bb_prob failed: without the header bb or "
|
||
|
|
+ "latch bb\n");
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ bb_bp *latch_branch_prob = bb_branch_prob.get (loop->latch);
|
||
|
|
+ bb_bp *header_branch_prob = bb_branch_prob.get (loop->header);
|
||
|
|
+ if (header_branch_prob == NULL || latch_branch_prob == NULL
|
||
|
|
+ || (latch_branch_prob->true_edge_bb != header_branch_prob->bb
|
||
|
|
+ && latch_branch_prob->false_edge_bb != header_branch_prob->bb))
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ fprintf (dump_file, "get_bb_prob failed: loop data exception\n");
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ hash_set <basic_block> path_node;
|
||
|
|
+ int max_path = MAX_BB_BRANCH_PATH;
|
||
|
|
+ if (traverse_prune_bb_branch (bb_branch_prob, max_path, path_node,
|
||
|
|
+ header_branch_prob->bb, loop->latch) == false)
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ fprintf (dump_file, "traverse_prune_bb_branch false.\n");
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ traverse_calculate_bb_prob (bb_branch_prob,
|
||
|
|
+ header_branch_prob->bb, loop->latch, 1);
|
||
|
|
+
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Computes an estimated number of insns in LOOP, weighted by WEIGHTS. */
|
||
|
|
+
|
||
|
|
+static unsigned
|
||
|
|
+estimate_num_loop_insns (struct loop *loop, eni_weights *weights)
|
||
|
|
+{
|
||
|
|
+ basic_block *body = get_loop_body_in_dom_order (loop);
|
||
|
|
+ gimple_stmt_iterator gsi;
|
||
|
|
+ float size = 0;
|
||
|
|
+ basic_block bb = NULL;
|
||
|
|
+ hash_map <basic_block, bb_bp> bb_branch_prob;
|
||
|
|
+
|
||
|
|
+ if (prefetch_level >= BRANCH_WEIGHTED_AHEAD)
|
||
|
|
+ {
|
||
|
|
+ get_bb_branch_prob (bb_branch_prob, loop);
|
||
|
|
+ if (get_bb_prob (bb_branch_prob, loop) == false)
|
||
|
|
+ {
|
||
|
|
+ dump_loop_bb (loop);
|
||
|
|
+ return 0;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ for (unsigned i = 0; i < loop->num_nodes; i++)
|
||
|
|
+ {
|
||
|
|
+ bb = body[i];
|
||
|
|
+ /* For nested loops, the bb of the inner loop is not calculated. */
|
||
|
|
+ if (bb->loop_father != loop)
|
||
|
|
+ {
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ float size_tmp = 0;
|
||
|
|
+ for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
|
||
|
|
+ {
|
||
|
|
+ size_tmp += estimate_num_insns (gsi_stmt (gsi), weights);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (prefetch_level >= BRANCH_WEIGHTED_AHEAD)
|
||
|
|
+ {
|
||
|
|
+ float bb_prob = bb_branch_prob.get (bb)->bb_prob;
|
||
|
|
+ size += size_tmp * bb_prob;
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ size += size_tmp;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ free (body);
|
||
|
|
+
|
||
|
|
+ return unsigned (size);
|
||
|
|
+}
|
||
|
|
|
||
|
|
/* Issue prefetch instructions for array references in LOOP. Returns
|
||
|
|
true if the LOOP was unrolled. */
|
||
|
|
@@ -1899,7 +2449,15 @@ loop_prefetch_arrays (class loop *loop)
|
||
|
|
|
||
|
|
/* FIXME: the time should be weighted by the probabilities of the blocks in
|
||
|
|
the loop body. */
|
||
|
|
- time = tree_num_loop_insns (loop, &eni_time_weights);
|
||
|
|
+
|
||
|
|
+ if (prefetch_level >= REFINE_BB_AHEAD)
|
||
|
|
+ {
|
||
|
|
+ time = estimate_num_loop_insns (loop, &eni_time_weights);
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ time = tree_num_loop_insns (loop, &eni_time_weights);
|
||
|
|
+ }
|
||
|
|
if (time == 0)
|
||
|
|
return false;
|
||
|
|
|
||
|
|
@@ -1913,7 +2471,14 @@ loop_prefetch_arrays (class loop *loop)
|
||
|
|
if (trip_count_to_ahead_ratio_too_small_p (ahead, est_niter))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
- ninsns = tree_num_loop_insns (loop, &eni_size_weights);
|
||
|
|
+ if (prefetch_level >= REFINE_BB_AHEAD)
|
||
|
|
+ {
|
||
|
|
+ ninsns = estimate_num_loop_insns (loop, &eni_size_weights);
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ ninsns = tree_num_loop_insns (loop, &eni_size_weights);
|
||
|
|
+ }
|
||
|
|
|
||
|
|
/* Step 1: gather the memory references. */
|
||
|
|
refs = gather_memory_references (loop, &no_other_refs, &mem_ref_count);
|
||
|
|
@@ -1978,10 +2543,49 @@ fail:
|
||
|
|
return unrolled;
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Determine if it is a high execution rate loop. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+is_high_exec_rate_loop (struct loop *loop)
|
||
|
|
+{
|
||
|
|
+ vec<edge> exit_edges = get_loop_exit_edges (loop);
|
||
|
|
+ if (exit_edges == vNULL)
|
||
|
|
+ {
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ unsigned i = 0;
|
||
|
|
+ gcov_type exit_count = 0;
|
||
|
|
+ edge e = NULL;
|
||
|
|
+ float loop_exec_rate = 0;
|
||
|
|
+ gcov_type header_bb_count = loop->header->count.to_gcov_type ();
|
||
|
|
+ FOR_EACH_VEC_ELT (exit_edges, i, e)
|
||
|
|
+ {
|
||
|
|
+ gcov_type exiting_bb_count = e->src->count.to_gcov_type ();
|
||
|
|
+ float exit_edge_prob = get_edge_prob (e);
|
||
|
|
+ exit_count += exit_edge_prob * exiting_bb_count;
|
||
|
|
+
|
||
|
|
+ loop_exec_rate = 1.0 - ((double) exit_count / header_bb_count);
|
||
|
|
+
|
||
|
|
+ if (loop_exec_rate < (float) LOOP_EXECUTION_RATE / 100.0)
|
||
|
|
+ {
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "loop with high execution rate: %f >= %f\n\n",
|
||
|
|
+ loop_exec_rate, (float) LOOP_EXECUTION_RATE / 100.0);
|
||
|
|
+ dump_loop_bb (loop);
|
||
|
|
+ }
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Issue prefetch instructions for array references in loops. */
|
||
|
|
|
||
|
|
unsigned int
|
||
|
|
-tree_ssa_prefetch_arrays (void)
|
||
|
|
+tree_ssa_prefetch_arrays (function *fun)
|
||
|
|
{
|
||
|
|
class loop *loop;
|
||
|
|
bool unrolled = false;
|
||
|
|
@@ -2012,6 +2616,12 @@ tree_ssa_prefetch_arrays (void)
|
||
|
|
param_min_insn_to_prefetch_ratio);
|
||
|
|
fprintf (dump_file, " min insn-to-mem ratio: %d \n",
|
||
|
|
param_prefetch_min_insn_to_mem_ratio);
|
||
|
|
+ fprintf (dump_file, " prefetch_func_topn: %d \n",
|
||
|
|
+ param_prefetch_func_topn);
|
||
|
|
+ fprintf (dump_file, " prefetch_ref_topn: %d \n",
|
||
|
|
+ param_prefetch_ref_topn);
|
||
|
|
+ fprintf (dump_file, " high_loop_execution_rate: %d \n",
|
||
|
|
+ LOOP_EXECUTION_RATE);
|
||
|
|
fprintf (dump_file, "\n");
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -2028,13 +2638,42 @@ tree_ssa_prefetch_arrays (void)
|
||
|
|
set_builtin_decl (BUILT_IN_PREFETCH, decl, false);
|
||
|
|
}
|
||
|
|
|
||
|
|
- FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
|
||
|
|
+ enum li_flags LI = LI_FROM_INNERMOST;
|
||
|
|
+
|
||
|
|
+ if (profile_exist (CACHE_MISSES))
|
||
|
|
+ {
|
||
|
|
+ LI = LI_ONLY_INNERMOST;
|
||
|
|
+ }
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "Processing model %d:\n", LI);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (profile_exist (CACHE_MISSES))
|
||
|
|
+ {
|
||
|
|
+ sort_ref_by_event_count (fun, CACHE_MISSES);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ FOR_EACH_LOOP (loop, LI)
|
||
|
|
{
|
||
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
- fprintf (dump_file, "Processing loop %d:\n", loop->num);
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "======================================\n");
|
||
|
|
+ fprintf (dump_file, "Processing loop %d:\n", loop->num);
|
||
|
|
+ fprintf (dump_file, "======================================\n");
|
||
|
|
+ flow_loop_dump (loop, dump_file, NULL, 1);
|
||
|
|
+ fprintf (dump_file, "\n\n");
|
||
|
|
+ }
|
||
|
|
|
||
|
|
- unrolled |= loop_prefetch_arrays (loop);
|
||
|
|
+ if (profile_exist (CACHE_MISSES))
|
||
|
|
+ {
|
||
|
|
+ if (!is_high_exec_rate_loop (loop))
|
||
|
|
+ {
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
|
||
|
|
+ unrolled |= loop_prefetch_arrays (loop);
|
||
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
fprintf (dump_file, "\n\n");
|
||
|
|
}
|
||
|
|
@@ -2049,6 +2688,56 @@ tree_ssa_prefetch_arrays (void)
|
||
|
|
return todo_flags;
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Determine whether to analyze the function according to
|
||
|
|
+ the sorting of the function containing cache-miss counts. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+should_analyze_func_p (void)
|
||
|
|
+{
|
||
|
|
+ gcov_type decl_uid = DECL_UID (current_function_decl);
|
||
|
|
+ struct rank_info func_rank_info =
|
||
|
|
+ event_get_func_rank (decl_uid, CACHE_MISSES);
|
||
|
|
+ if (func_rank_info.total == 0)
|
||
|
|
+ {
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ gcov_type func_count = event_get_func_count (decl_uid, CACHE_MISSES);
|
||
|
|
+ if (func_count == 0)
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "function uid %d cannot find profile data "
|
||
|
|
+ "and skip prefetch analysis\n",
|
||
|
|
+ decl_uid);
|
||
|
|
+ }
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ if (func_rank_info.rank > PREFETCH_FUNC_TOPN
|
||
|
|
+ || func_count < PREFETCH_FUNC_COUNTS_THRESHOLD)
|
||
|
|
+ {
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "function uid %d total counts is %lu: "
|
||
|
|
+ "rank %d > topn %d, counts %lu < threshold %lu "
|
||
|
|
+ "skip prefetch analysis\n",
|
||
|
|
+ decl_uid, func_count,
|
||
|
|
+ func_rank_info.rank, PREFETCH_FUNC_TOPN,
|
||
|
|
+ func_count, PREFETCH_FUNC_COUNTS_THRESHOLD);
|
||
|
|
+ }
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
+ {
|
||
|
|
+ fprintf (dump_file, "function uid %d total counts is %lu: "
|
||
|
|
+ "rank %d in topn %d, counts %lu > threshold %lu "
|
||
|
|
+ "continue prefetch analysis\n",
|
||
|
|
+ decl_uid, func_count,
|
||
|
|
+ func_rank_info.rank, PREFETCH_FUNC_TOPN,
|
||
|
|
+ func_count, PREFETCH_FUNC_COUNTS_THRESHOLD);
|
||
|
|
+ }
|
||
|
|
+ return true;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Prefetching. */
|
||
|
|
|
||
|
|
namespace {
|
||
|
|
@@ -2085,6 +2774,18 @@ pass_loop_prefetch::execute (function *fun)
|
||
|
|
if (number_of_loops (fun) <= 1)
|
||
|
|
return 0;
|
||
|
|
|
||
|
|
+ /* Filter only when combined with cache-miss. When the should_analyze_func_p
|
||
|
|
+ analysis fails (for example, the function without cache-miss count),
|
||
|
|
+ in order to ensure the accuracy of the prefetch analysis, the function
|
||
|
|
+ does not perform native prefetch processing. */
|
||
|
|
+ if (profile_exist (CACHE_MISSES))
|
||
|
|
+ {
|
||
|
|
+ if (!should_analyze_func_p ())
|
||
|
|
+ {
|
||
|
|
+ return 0;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
if ((PREFETCH_BLOCK & (PREFETCH_BLOCK - 1)) != 0)
|
||
|
|
{
|
||
|
|
static bool warned = false;
|
||
|
|
@@ -2099,7 +2800,7 @@ pass_loop_prefetch::execute (function *fun)
|
||
|
|
return 0;
|
||
|
|
}
|
||
|
|
|
||
|
|
- return tree_ssa_prefetch_arrays ();
|
||
|
|
+ return tree_ssa_prefetch_arrays (fun);
|
||
|
|
}
|
||
|
|
|
||
|
|
} // anon namespace
|
||
|
|
--
|
||
|
|
2.27.0.windows.1
|
||
|
|
|