!137 [Sync] Sync patch from openeuler/gcc

From: @benniaobufeijiushiji 
Reviewed-by: @eastb233 
Signed-off-by: @eastb233
This commit is contained in:
openeuler-ci-bot 2022-02-24 01:29:08 +00:00 committed by Gitee
commit 26e766f11e
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
5 changed files with 2189 additions and 2 deletions

View File

@ -0,0 +1,669 @@
From 26e4ba63112f55c27b7dd3d5f8c4497ef9a2f459 Mon Sep 17 00:00:00 2001
From: benniaobufeijiushiji <linda7@huawei.com>
Date: Thu, 6 Jan 2022 15:33:29 +0800
Subject: [PATCH 25/28] [AutoPrefetch] Support cache misses profile
Add pass ex-afdo after pass afdo in auto-profile.c.
Add flag -fcache-misses-profile.
Read profile of different types of perf events and build maps for
function and gimple location to its count of each perf event.
Currently, instruction execution and cahce misses are supported.
---
gcc/auto-profile.c | 415 +++++++++++++++++++++++++++++++++++++++++++++
gcc/auto-profile.h | 28 +++
gcc/common.opt | 14 ++
gcc/opts.c | 26 +++
gcc/passes.def | 1 +
gcc/timevar.def | 1 +
gcc/toplev.c | 6 +
gcc/tree-pass.h | 2 +
8 files changed, 493 insertions(+)
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
index 7d09887c9..aced8fca5 100644
--- a/gcc/auto-profile.c
+++ b/gcc/auto-profile.c
@@ -49,6 +49,9 @@ along with GCC; see the file COPYING3. If not see
#include "auto-profile.h"
#include "tree-pretty-print.h"
#include "gimple-pretty-print.h"
+#include <map>
+#include <vector>
+#include <algorithm>
/* The following routines implements AutoFDO optimization.
@@ -95,6 +98,7 @@ along with GCC; see the file COPYING3. If not see
*/
#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
+#define DEFAULT_CACHE_MISSES_PROFILE_FILE "cmsdata.gcov"
#define AUTO_PROFILE_VERSION 1
namespace autofdo
@@ -117,6 +121,14 @@ private:
bool annotated_;
};
+/* pair <func_decl, count> */
+static bool
+event_count_cmp (std::pair<unsigned, gcov_type> &a,
+ std::pair<unsigned, gcov_type> &b)
+{
+ return a.second > b.second;
+}
+
/* Represent a source location: (function_decl, lineno). */
typedef std::pair<tree, unsigned> decl_lineno;
@@ -338,6 +350,206 @@ static autofdo_source_profile *afdo_source_profile;
/* gcov_summary structure to store the profile_info. */
static gcov_summary *afdo_profile_info;
+/* Check opts->x_flags and put file name into EVENT_FILES. */
+
+static bool
+get_all_profile_names (const char **event_files)
+{
+ if (!(flag_auto_profile || flag_cache_misses_profile))
+ {
+ return false;
+ }
+
+ event_files[INST_EXEC] = auto_profile_file;
+
+ if (cache_misses_profile_file == NULL)
+ {
+ cache_misses_profile_file = DEFAULT_CACHE_MISSES_PROFILE_FILE;
+ }
+ event_files[CACHE_MISSES] = cache_misses_profile_file;
+
+ return true;
+}
+
+static void read_profile (void);
+
+/* Maintain multiple profile data of different events with event_loc_count_map
+ and event_func_count_map. */
+
+class extend_auto_profile
+{
+public:
+ bool auto_profile_exist (enum event_type type);
+ gcov_type get_loc_count (location_t, event_type);
+ gcov_type get_func_count (unsigned, event_type);
+ struct rank_info get_func_rank (unsigned, enum event_type);
+ /* There should be only one instance of class EXTEND_AUTO_PROFILE. */
+ static extend_auto_profile *create ()
+ {
+ extend_auto_profile *map = new extend_auto_profile ();
+ if (map->read ())
+ {
+ return map;
+ }
+ delete map;
+ return NULL;
+ }
+private:
+ /* Basic maps of extend_auto_profile. */
+ typedef std::map<location_t, gcov_type> loc_count_map;
+ typedef std::map<unsigned, gcov_type> func_count_map;
+
+ /* Map of function_uid to its descending order rank of counts. */
+ typedef std::map<unsigned, unsigned> rank_map;
+
+ /* Mapping hardware events to corresponding basic maps. */
+ typedef std::map<event_type, loc_count_map> event_loc_count_map;
+ typedef std::map<event_type, func_count_map> event_func_count_map;
+ typedef std::map<event_type, rank_map> event_rank_map;
+
+ extend_auto_profile () {}
+ bool read ();
+ void set_loc_count ();
+ void process_extend_source_profile ();
+ void read_extend_afdo_file (const char*, event_type);
+ void rank_all_func ();
+ void dump_event ();
+ event_loc_count_map event_loc_map;
+ event_func_count_map event_func_map;
+ event_rank_map func_rank;
+ event_type profile_type;
+};
+
+/* Member functions for extend_auto_profile. */
+
+bool
+extend_auto_profile::auto_profile_exist (enum event_type type)
+{
+ switch (type)
+ {
+ case INST_EXEC:
+ return event_func_map.count (INST_EXEC) != 0
+ || event_loc_map.count (INST_EXEC) != 0;
+ case CACHE_MISSES:
+ return event_func_map.count (CACHE_MISSES) != 0
+ || event_loc_map.count (CACHE_MISSES) != 0;
+ default:
+ return false;
+ }
+}
+
+void
+extend_auto_profile::dump_event ()
+{
+ if (dump_file)
+ {
+ switch (profile_type)
+ {
+ case INST_EXEC:
+ fprintf (dump_file, "Processing event instruction execution.\n");
+ break;
+ case CACHE_MISSES:
+ fprintf (dump_file, "Processing event cache misses.\n");
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+/* Return true if any profile data was read. */
+
+bool
+extend_auto_profile::read ()
+{
+ const char *event_files[EVENT_NUMBER] = {NULL};
+ if (!get_all_profile_names (event_files))
+ {
+ return false;
+ }
+
+ /* Backup AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE since we will create
+ new ones for each event_type. */
+ autofdo::string_table *string_table_afdo = afdo_string_table;
+ autofdo::autofdo_source_profile *source_profile_afdo = afdo_source_profile;
+
+ for (unsigned i = 0; i < EVENT_NUMBER; i++)
+ {
+ if (event_files[i] == NULL)
+ {
+ continue;
+ }
+ profile_type = (enum event_type) i;
+ dump_event ();
+ gcov_close ();
+ auto_profile_file = event_files[i];
+ read_profile ();
+ gcov_close ();
+
+ process_extend_source_profile ();
+
+ delete afdo_source_profile;
+ delete afdo_string_table;
+ }
+
+ /* Restore AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE. Function
+ END_AUTO_PROFILE will free them at the end of compilation. */
+ afdo_string_table = string_table_afdo;
+ afdo_source_profile = source_profile_afdo;
+ return true;
+}
+
+/* Helper functions. */
+
+gcov_type
+extend_auto_profile::get_loc_count (location_t loc, event_type type)
+{
+ event_loc_count_map::iterator event_iter = event_loc_map.find (type);
+ if (event_iter != event_loc_map.end ())
+ {
+ loc_count_map::iterator loc_iter = event_iter->second.find (loc);
+ if (loc_iter != event_iter->second.end ())
+ {
+ return loc_iter->second;
+ }
+ }
+ return 0;
+}
+
+struct rank_info
+extend_auto_profile::get_func_rank (unsigned decl_uid, enum event_type type)
+{
+ struct rank_info info = {0, 0};
+ event_rank_map::iterator event_iter = func_rank.find (type);
+ if (event_iter != func_rank.end ())
+ {
+ rank_map::iterator func_iter = event_iter->second.find (decl_uid);
+ if (func_iter != event_iter->second.end ())
+ {
+ info.rank = func_iter->second;
+ info.total = event_iter->second.size ();
+ }
+ }
+ return info;
+}
+
+gcov_type
+extend_auto_profile::get_func_count (unsigned decl_uid, event_type type)
+{
+ event_func_count_map::iterator event_iter = event_func_map.find (type);
+ if (event_iter != event_func_map.end ())
+ {
+ func_count_map::iterator func_iter = event_iter->second.find (decl_uid);
+ if (func_iter != event_iter->second.end ())
+ {
+ return func_iter->second;
+ }
+ }
+ return 0;
+}
+
+static extend_auto_profile *extend_profile;
+
/* Helper functions. */
/* Return the original name of NAME: strip the suffix that starts
@@ -1654,6 +1866,131 @@ auto_profile (void)
return TODO_rebuild_cgraph_edges;
}
+
+void
+extend_auto_profile::rank_all_func ()
+{
+ std::vector<std::pair<unsigned, gcov_type> > func_sorted;
+ event_func_count_map::iterator event_iter
+ = event_func_map.find (profile_type);
+ if (event_iter != event_func_map.end ())
+ {
+ func_count_map::iterator func_iter;
+ for (func_iter = event_iter->second.begin ();
+ func_iter != event_iter->second.end (); func_iter++)
+ {
+ func_sorted.push_back (std::make_pair (func_iter->first,
+ func_iter->second));
+ }
+
+ std::sort (func_sorted.begin (), func_sorted.end (), event_count_cmp);
+
+ for (unsigned i = 0; i < func_sorted.size (); ++i)
+ {
+ func_rank[profile_type][func_sorted[i].first] = i + 1;
+ }
+ }
+}
+
+/* Iterate stmts in cfun and maintain its count to EVENT_LOC_MAP. */
+
+void
+extend_auto_profile::set_loc_count ()
+{
+ basic_block bb;
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ count_info info;
+ gimple *stmt = gsi_stmt (gsi);
+ if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
+ {
+ continue;
+ }
+ if (afdo_source_profile->get_count_info (stmt, &info))
+ {
+ location_t loc = gimple_location (stmt);
+ event_loc_map[profile_type][loc] += info.count;
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "stmt ");
+ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+ fprintf (dump_file, "counts %ld\n",
+ event_loc_map[profile_type][loc]);
+ }
+ }
+ }
+ }
+}
+
+/* Process data in extend_auto_source_profile, save them into two maps.
+ 1. gimple_location to count.
+ 2. function_index to count. */
+void
+extend_auto_profile::process_extend_source_profile ()
+{
+ struct cgraph_node *node;
+ if (symtab->state == FINISHED)
+ {
+ return;
+ }
+ FOR_EACH_FUNCTION (node)
+ {
+ if (!gimple_has_body_p (node->decl) || node->inlined_to)
+ {
+ continue;
+ }
+
+ /* Don't profile functions produced for builtin stuff. */
+ if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
+ {
+ continue;
+ }
+
+ function *fn = DECL_STRUCT_FUNCTION (node->decl);
+ push_cfun (fn);
+
+ const function_instance *s
+ = afdo_source_profile->get_function_instance_by_decl (
+ current_function_decl);
+
+ if (s == NULL)
+ {
+ pop_cfun ();
+ continue;
+ }
+ unsigned int decl_uid = DECL_UID (current_function_decl);
+ gcov_type count = s->total_count ();
+ if (dump_file)
+ {
+ fprintf (dump_file, "Extend auto-profile for function %s.\n",
+ node->dump_name ());
+ }
+ event_func_map[profile_type][decl_uid] += count;
+ set_loc_count ();
+ pop_cfun ();
+ }
+ rank_all_func ();
+}
+
+/* Main entry of extend_auto_profile. */
+
+static void
+extend_source_profile ()
+{
+ extend_profile = autofdo::extend_auto_profile::create ();
+ if (dump_file)
+ {
+ if (extend_profile == NULL)
+ {
+ fprintf (dump_file, "No profile file is found.\n");
+ return;
+ }
+ fprintf (dump_file, "Extend profile info generated.\n");
+ }
+}
} /* namespace autofdo. */
/* Read the profile from the profile data file. */
@@ -1682,6 +2019,42 @@ end_auto_profile (void)
profile_info = NULL;
}
+/* Extern function to get profile info in other passes. */
+
+bool
+profile_exist (enum event_type type)
+{
+ return autofdo::extend_profile != NULL
+ && autofdo::extend_profile->auto_profile_exist (type);
+}
+
+gcov_type
+event_get_loc_count (location_t loc, event_type type)
+{
+ return autofdo::extend_profile->get_loc_count (loc, type);
+}
+
+gcov_type
+event_get_func_count (unsigned decl_uid, event_type type)
+{
+ return autofdo::extend_profile->get_func_count (decl_uid, type);
+}
+
+struct rank_info
+event_get_func_rank (unsigned decl_uid, enum event_type type)
+{
+ return autofdo::extend_profile->get_func_rank (decl_uid, type);
+}
+
+void
+free_extend_profile_info ()
+{
+ if (autofdo::extend_profile != NULL)
+ {
+ delete autofdo::extend_profile;
+ }
+}
+
/* Returns TRUE if EDGE is hot enough to be inlined early. */
bool
@@ -1743,8 +2116,50 @@ public:
} // anon namespace
+namespace
+{
+const pass_data pass_data_ipa_extend_auto_profile =
+{
+ SIMPLE_IPA_PASS, /* type */
+ "ex-afdo", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_IPA_EXTEND_AUTO_PROFILE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ 0, /* todo_flags_finish */
+};
+
+class pass_ipa_extend_auto_profile : public simple_ipa_opt_pass
+{
+public:
+ pass_ipa_extend_auto_profile (gcc::context *ctxt)
+ : simple_ipa_opt_pass (pass_data_ipa_extend_auto_profile, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ virtual bool gate (function *) {return (flag_ipa_extend_auto_profile > 0);}
+ virtual unsigned int execute (function *);
+
+};
+
+unsigned int
+pass_ipa_extend_auto_profile::execute (function *fun)
+{
+ autofdo::extend_source_profile ();
+ return 0;
+}
+} // anon namespace
+
simple_ipa_opt_pass *
make_pass_ipa_auto_profile (gcc::context *ctxt)
{
return new pass_ipa_auto_profile (ctxt);
}
+
+simple_ipa_opt_pass *
+make_pass_ipa_extend_auto_profile (gcc::context *ctxt)
+{
+ return new pass_ipa_extend_auto_profile (ctxt);
+}
\ No newline at end of file
diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h
index f5cff091d..230d7e68a 100644
--- a/gcc/auto-profile.h
+++ b/gcc/auto-profile.h
@@ -21,6 +21,13 @@ along with GCC; see the file COPYING3. If not see
#ifndef AUTO_PROFILE_H
#define AUTO_PROFILE_H
+enum event_type
+{
+ INST_EXEC = 0,
+ CACHE_MISSES,
+ EVENT_NUMBER
+};
+
/* Read, process, finalize AutoFDO data structures. */
extern void read_autofdo_file (void);
extern void end_auto_profile (void);
@@ -28,4 +35,25 @@ extern void end_auto_profile (void);
/* Returns TRUE if EDGE is hot enough to be inlined early. */
extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);
+/* Chcek if profile exists before using this profile. */
+extern bool profile_exist (enum event_type);
+
+/* Given func decl_uid or gimple location and event_type, return count.
+ Count is 0 if function or gimple is not sampled. */
+extern gcov_type event_get_func_count (unsigned, enum event_type);
+extern gcov_type event_get_loc_count (location_t, enum event_type);
+
+struct rank_info
+{
+ unsigned total;
+ unsigned rank;
+};
+
+/* Given function decl_uid and event type, return rank_info. Rank_info
+ is {0, 0} if function was not sampled. */
+extern struct rank_info event_get_func_rank (unsigned, enum event_type);
+
+/* Free memory allocated by autofdo::extern_profile. */
+extern void free_extend_profile_info ();
+
#endif /* AUTO_PROFILE_H */
diff --git a/gcc/common.opt b/gcc/common.opt
index 73c24f28d..37cbbd8c0 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1074,6 +1074,16 @@ Common Joined RejectNegative Var(auto_profile_file)
Use sample profile information for call graph node weights. The profile
file is specified in the argument.
+fcache-misses-profile
+Common Report Var(flag_cache_misses_profile)
+Use sample profile information for source code cache miss count. The default
+profile file is cmsdata.gcov in `pwd`.
+
+fcache-misses-profile=
+Common Joined RejectNegative Var(cache_misses_profile_file)
+Use sample profile information for source code cache miss count. The profile
+file is specified in the argument.
+
; -fcheck-bounds causes gcc to generate array bounds checks.
; For C, C++ and ObjC: defaults off.
; For Java: defaults to on.
@@ -1873,6 +1883,10 @@ fipa-struct-reorg
Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization
Perform structure layout optimizations.
+fipa-extend-auto-profile
+Common Report Var(flag_ipa_extend_auto_profile)
+Use sample profile information for source code.
+
fipa-vrp
Common Report Var(flag_ipa_vrp) Optimization
Perform IPA Value Range Propagation.
diff --git a/gcc/opts.c b/gcc/opts.c
index 6924a973a..642327296 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1742,6 +1742,13 @@ enable_fdo_optimizations (struct gcc_options *opts,
SET_OPTION_IF_UNSET (opts, opts_set, flag_tree_loop_distribution, value);
}
+static void
+set_cache_misses_profile_params (struct gcc_options *opts,
+ struct gcc_options *opts_set)
+{
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_prefetch_loop_arrays, 1);
+}
+
/* -f{,no-}sanitize{,-recover}= suboptions. */
const struct sanitizer_opts_s sanitizer_opts[] =
{
@@ -2604,6 +2611,25 @@ common_handle_option (struct gcc_options *opts,
param_early_inliner_max_iterations, 10);
break;
+ case OPT_fipa_extend_auto_profile:
+ opts->x_flag_ipa_extend_auto_profile = opts->x_flag_cache_misses_profile
+ ? true : value;
+ break;
+
+ case OPT_fcache_misses_profile_:
+ opts->x_cache_misses_profile_file = xstrdup (arg);
+ opts->x_flag_cache_misses_profile = true;
+ value = true;
+ /* No break here - do -fcache-misses-profile processing. */
+ /* FALLTHRU */
+ case OPT_fcache_misses_profile:
+ opts->x_flag_ipa_extend_auto_profile = value;
+ if (value)
+ {
+ set_cache_misses_profile_params (opts, opts_set);
+ }
+ break;
+
case OPT_fprofile_generate_:
opts->x_profile_data_prefix = xstrdup (arg);
value = true;
diff --git a/gcc/passes.def b/gcc/passes.def
index 63303ab65..e9c91d26e 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -133,6 +133,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_target_clone);
NEXT_PASS (pass_ipa_auto_profile);
+ NEXT_PASS (pass_ipa_extend_auto_profile);
NEXT_PASS (pass_ipa_tree_profile);
PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile)
NEXT_PASS (pass_feedback_split_functions);
diff --git a/gcc/timevar.def b/gcc/timevar.def
index ee25eccbb..e873747a8 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -82,6 +82,7 @@ DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting")
DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats")
DEFTIMEVAR (TV_IPA_REORDER_FIELDS , "ipa struct reorder fields optimization")
DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization")
+DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile")
DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations")
DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression")
DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream compression")
diff --git a/gcc/toplev.c b/gcc/toplev.c
index eaed6f6c7..51e6bd400 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -577,6 +577,12 @@ compile_file (void)
targetm.asm_out.output_ident (ident_str);
}
+ /* Extend auto profile finalization. */
+ if (flag_ipa_extend_auto_profile)
+ {
+ free_extend_profile_info ();
+ }
+
/* Auto profile finalization. */
if (flag_auto_profile)
end_auto_profile ();
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index eb32c5d44..be6387768 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -511,6 +511,8 @@ extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
+extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context
+ *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt);
--
2.27.0.windows.1

View File

@ -0,0 +1,353 @@
From eb58d920a95696d8d5a7db9a6d640d4494fb023f Mon Sep 17 00:00:00 2001
From: liyancheng <412998149@qq.com>
Date: Tue, 25 Jan 2022 16:57:28 +0800
Subject: [PATCH 26/28] [AutoFDO] Enable discriminator and MCF algorithm on
AutoFDO
1. Support discriminator for distinguishes among several
basic blocks that share a common locus, allowing for
more accurate autofdo.
2. Using option -fprofile-correction for calling MCF algorithm
to smooth non conservative BB counts.
---
gcc/auto-profile.c | 172 ++++++++++++++++++++++++++++++++++++++++++++-
gcc/cfghooks.c | 7 ++
gcc/ipa-cp.c | 21 ++++++
gcc/opts.c | 5 +-
gcc/tree-inline.c | 14 ++++
5 files changed, 215 insertions(+), 4 deletions(-)
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
index aced8fca5..e6164b91b 100644
--- a/gcc/auto-profile.c
+++ b/gcc/auto-profile.c
@@ -678,6 +678,17 @@ string_table::get_index (const char *name) const
if (name == NULL)
return -1;
string_index_map::const_iterator iter = map_.find (name);
+ /* Function name may be duplicate. Try to distinguish by the
+ #file_name#function_name defined by the autofdo tool chain. */
+ if (iter == map_.end ())
+ {
+ char* file_name = get_original_name (lbasename (dump_base_name));
+ char* file_func_name
+ = concat ("#", file_name, "#", name, NULL);
+ iter = map_.find (file_func_name);
+ free (file_name);
+ free (file_func_name);
+ }
if (iter == map_.end ())
return -1;
@@ -866,7 +877,7 @@ function_instance::read_function_instance (function_instance_stack *stack,
for (unsigned i = 0; i < num_pos_counts; i++)
{
- unsigned offset = gcov_read_unsigned () & 0xffff0000;
+ unsigned offset = gcov_read_unsigned ();
unsigned num_targets = gcov_read_unsigned ();
gcov_type count = gcov_read_counter ();
s->pos_counts[offset].count = count;
@@ -945,6 +956,10 @@ autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
function_instance *s = get_function_instance_by_inline_stack (stack);
if (s == NULL)
return false;
+ if (s->get_count_info (stack[0].second + stmt->bb->discriminator, info))
+ {
+ return true;
+ }
return s->get_count_info (stack[0].second, info);
}
@@ -1583,6 +1598,68 @@ afdo_propagate (bb_set *annotated_bb)
}
}
+/* Process the following scene when the branch probability
+ inversion when do function afdo_propagate (). E.g.
+ BB_NUM (sample count)
+ BB1 (1000)
+ / \
+ BB2 (10) BB3 (0)
+ \ /
+ BB4
+ In afdo_propagate(), count of BB3 is calculated by
+ COUNT (BB3) = 990 (990 = COUNT (BB1) - COUNT (BB2) = 1000 - 10)
+
+ In fact, BB3 may be colder than BB2 by sample count.
+
+ This function allocate source BB count to each succ BB by sample
+ rate, E.g.
+ BB2_COUNT = BB1_COUNT * (BB2_COUNT / (BB2_COUNT + BB3_COUNT)) */
+
+static void
+afdo_preprocess_bb_count ()
+{
+ basic_block bb;
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ if (bb->count.ipa_p () && EDGE_COUNT (bb->succs) > 1
+ && bb->count > profile_count::zero ().afdo ())
+ {
+ basic_block bb1 = EDGE_SUCC (bb, 0)->dest;
+ basic_block bb2 = EDGE_SUCC (bb, 1)->dest;
+ if (single_succ_p (bb1) && single_succ_p (bb2)
+ && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
+ {
+ gcov_type max_count = 0;
+ gcov_type total_count = 0;
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (!e->dest->count.ipa_p ())
+ {
+ continue;
+ }
+ max_count = MAX(max_count, e->dest->count.to_gcov_type ());
+ total_count += e->dest->count.to_gcov_type ();
+ }
+ /* Only bb_count > max_count * 2, branch probability will
+ inversion. */
+ if (max_count > 0
+ && bb->count.to_gcov_type () > max_count * 2)
+ {
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ gcov_type target_count = bb->count.to_gcov_type ()
+ * e->dest->count.to_gcov_type () / total_count;
+ e->dest->count
+ = profile_count::from_gcov_type (target_count).afdo ();
+ }
+ }
+ }
+ }
+ }
+}
+
/* Propagate counts on control flow graph and calculate branch
probabilities. */
@@ -1608,6 +1685,7 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
}
afdo_find_equiv_class (annotated_bb);
+ afdo_preprocess_bb_count ();
afdo_propagate (annotated_bb);
FOR_EACH_BB_FN (bb, cfun)
@@ -1711,6 +1789,82 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
return false;
}
+/* Preparation before executing MCF algorithm. */
+
+static void
+afdo_init_mcf ()
+{
+ basic_block bb;
+ edge e;
+ edge_iterator ei;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n init calling mcf_smooth_cfg (). \n");
+ }
+
+ /* Step1: when use mcf, BB id must be continous,
+ so we need compact_blocks (). */
+ compact_blocks ();
+
+ /* Step2: allocate memory for MCF input data. */
+ bb_gcov_counts.safe_grow_cleared (cfun->cfg->x_last_basic_block);
+ edge_gcov_counts = new hash_map<edge, gcov_type>;
+
+ /* Step3: init MCF input data from cfg. */
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ /* Init BB count for MCF. */
+ bb_gcov_count (bb) = bb->count.to_gcov_type ();
+
+ gcov_type total_count = 0;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ total_count += e->dest->count.to_gcov_type ();
+ }
+
+ /* If there is no sample in each successor blocks, source
+ BB samples are allocated to each edge by branch static prob. */
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (total_count == 0)
+ {
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
+ * e->probability.to_reg_br_prob_base () / REG_BR_PROB_BASE;
+ }
+ else
+ {
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
+ * e->dest->count.to_gcov_type () / total_count;
+ }
+ }
+ }
+}
+
+/* Free the resources used by MCF and reset BB count from MCF result,
+ branch probability has been updated in mcf_smooth_cfg (). */
+
+static void
+afdo_process_after_mcf ()
+{
+ basic_block bb;
+ /* Reset BB count from MCF result. */
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ if (bb_gcov_count (bb))
+ {
+ bb->count
+ = profile_count::from_gcov_type (bb_gcov_count (bb)).afdo ();
+ }
+ }
+
+ /* Clean up MCF resource. */
+ bb_gcov_counts.release ();
+ delete edge_gcov_counts;
+ edge_gcov_counts = NULL;
+}
+
/* Annotate auto profile to the control flow graph. Do not annotate value
profile for stmts in PROMOTED_STMTS. */
@@ -1762,8 +1916,20 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
afdo_source_profile->mark_annotated (cfun->function_end_locus);
if (max_count > profile_count::zero ())
{
- /* Calculate, propagate count and probability information on CFG. */
- afdo_calculate_branch_prob (&annotated_bb);
+ /* 1 means -fprofile-correction is enabled manually, and MCF
+ algorithm will be used to calculate count and probability.
+ Otherwise, use the default calculate algorithm. */
+ if (flag_profile_correction == 1)
+ {
+ afdo_init_mcf ();
+ mcf_smooth_cfg ();
+ afdo_process_after_mcf ();
+ }
+ else
+ {
+ /* Calculate, propagate count and probability information on CFG. */
+ afdo_calculate_branch_prob (&annotated_bb);
+ }
}
update_max_bb_count ();
profile_status_for_fn (cfun) = PROFILE_READ;
diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
index ea558b469..4ea490a8a 100644
--- a/gcc/cfghooks.c
+++ b/gcc/cfghooks.c
@@ -526,6 +526,9 @@ split_block_1 (basic_block bb, void *i)
return NULL;
new_bb->count = bb->count;
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
new_bb->discriminator = bb->discriminator;
if (dom_info_available_p (CDI_DOMINATORS))
@@ -1091,6 +1094,10 @@ duplicate_block (basic_block bb, edge e, basic_block after, copy_bb_data *id)
move_block_after (new_bb, after);
new_bb->flags = (bb->flags & ~BB_DUPLICATED);
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
+ new_bb->discriminator = bb->discriminator;
FOR_EACH_EDGE (s, ei, bb->succs)
{
/* Since we are creating edges from a new block to successors
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index b1f0881bd..c208070c9 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -4365,6 +4365,27 @@ update_profiling_info (struct cgraph_node *orig_node,
orig_node_count.dump (dump_file);
fprintf (dump_file, "\n");
}
+
+ /* When autofdo uses PMU as the sampling unit, the count of
+ cgraph_node->count cannot be obtained directly and will
+ be zero. It using for apply_scale will cause the node
+ count incorrectly overestimated. So set orig_new_node_count
+ equal to orig_node_count, which is same as known error
+ handling. */
+ if (orig_node->count == profile_count::zero ().afdo ()
+ && new_node->count == profile_count::zero ().global0adjusted ())
+ {
+ orig_new_node_count = (orig_sum + new_sum).apply_scale (12, 10);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, " node %s with zero count from afdo ",
+ new_node->dump_name ());
+ fprintf (dump_file, " proceeding by pretending it was ");
+ orig_new_node_count.dump (dump_file);
+ fprintf (dump_file, "\n");
+ }
+ }
}
remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa ()
diff --git a/gcc/opts.c b/gcc/opts.c
index 642327296..7a39f618b 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -2606,7 +2606,10 @@ common_handle_option (struct gcc_options *opts,
/* FALLTHRU */
case OPT_fauto_profile:
enable_fdo_optimizations (opts, opts_set, value);
- SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value);
+ /* 2 is special and means flag_profile_correction trun on by
+ -fauto-profile. */
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction,
+ (value ? 2 : 0));
SET_OPTION_IF_UNSET (opts, opts_set,
param_early_inliner_max_iterations, 10);
break;
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index efde5d158..8405a959c 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -2015,6 +2015,10 @@ copy_bb (copy_body_data *id, basic_block bb,
basic_block_info automatically. */
copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux);
copy_basic_block->count = bb->count.apply_scale (num, den);
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
+ copy_basic_block->discriminator = bb->discriminator;
copy_gsi = gsi_start_bb (copy_basic_block);
@@ -3028,6 +3032,16 @@ copy_cfg_body (copy_body_data * id,
den += e->count ();
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den;
}
+ /* When autofdo uses PMU as the sampling unit, the number of
+ ENTRY_BLOCK_PTR_FOR_FN cannot be obtained directly and will
+ be zero. It using for adjust_for_ipa_scaling will cause the
+ inlined BB count incorrectly overestimated. So set den equal
+ to num, which is the source inline BB count to avoid
+ overestimated. */
+ if (den == profile_count::zero ().afdo ())
+ {
+ den = num;
+ }
profile_count::adjust_for_ipa_scaling (&num, &den);
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,151 @@
From 3d20b13bc2e5af8d52e221a33881423e38c3dfdd Mon Sep 17 00:00:00 2001
From: dingguangya <dingguangya1@huawei.com>
Date: Thu, 17 Feb 2022 21:53:31 +0800
Subject: [PATCH 28/28] [AutoPrefetch] Handle the case that the basic block
branch probability is invalid
When the node branch probability value is not initialized,
the branch probability must be set to 0 to ensure that
the calculation of the basic block execution probability
must be less than or equal to 100%.
---
.../gcc.dg/autoprefetch/autoprefetch.exp | 27 +++++++++++++++++++
.../autoprefetch/branch-weighted-prefetch.c | 22 +++++++++++++++
.../autoprefetch/get-edge-prob-non-init.c | 24 +++++++++++++++++
gcc/tree-ssa-loop-prefetch.c | 17 +++++++++++-
4 files changed, 89 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
new file mode 100644
index 000000000..a7408e338
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
@@ -0,0 +1,27 @@
+# Copyright (C) 1997-2022 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3. If not see
+# <http://www.gnu.org/licenses/>.
+
+load_lib gcc-dg.exp
+load_lib target-supports.exp
+
+# Initialize `dg'.
+dg-init
+
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
+ "" "-fprefetch-loop-arrays"
+
+# All done.
+dg-finish
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
new file mode 100644
index 000000000..c63c5e5cb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fprefetch-loop-arrays=2 --param min-insn-to-prefetch-ratio=5 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
+#define N 10000000
+
+long long a[N];
+
+long long func ()
+{
+ long long i;
+ long long sum = 0;
+
+ for (i = 0; i < N; i+=1) {
+ if (i < 100000)
+ sum += a[i];
+ else
+ continue;
+ }
+
+ return sum;
+}
+/* { dg-final { scan-tree-dump-times "Ahead 40" 1 "aprefetch" } } */
+/* { dg-final { scan-tree-dump-times "builtin_prefetch" 1 "optimized" } } */
\ No newline at end of file
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c b/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
new file mode 100644
index 000000000..f55481008
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -fprefetch-loop-arrays=2 -fdump-tree-aprefetch-details" } */
+
+int a, c, f;
+static int *b = &a;
+int *d;
+int e[0];
+void g() {
+ int h;
+ for (;;) {
+ h = 1;
+ for (; h >= 0; h--) {
+ c = 2;
+ for (; c; c--)
+ if (e[0])
+ if (e[c])
+ *b = 0;
+ f || (*d = 0);
+ }
+ }
+}
+int main() {}
+
+/* { dg-final } */
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index 3a5aef0fc..673f453a4 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -2132,7 +2132,7 @@ get_edge_prob (edge e)
{
/* Limit the minimum probability value. */
const float MINNUM_PROB = 0.00001f;
- float fvalue = 1;
+ float fvalue = 0;
profile_probability probability = e->probability;
if (probability.initialized_p ())
@@ -2143,6 +2143,21 @@ get_edge_prob (edge e)
fvalue = MINNUM_PROB;
}
}
+ else
+ {
+ /* When the node branch probability value is not initialized, the branch
+ probability must be set to 0 to ensure that the calculation of the
+ basic block execution probability must be less than or equal to 100%.
+ i.e,
+ ...
+ <bb 3> [local count: 20000]
+ if (f_2 != 0)
+ goto <bb 6>; [INV]
+ else
+ goto <bb 7>; [100.00%]
+ ... */
+ fvalue = 0;
+ }
return fvalue;
}
--
2.27.0.windows.1

View File

@ -1,4 +1,4 @@
%global DATE 20220105
%global DATE 20220223
%global gcc_version 10.3.1
%global gcc_major 10.3.1
@ -63,7 +63,7 @@
Summary: Various compilers (C, C++, Objective-C, ...)
Name: gcc
Version: %{gcc_version}
Release: %{DATE}.6
Release: %{DATE}.7
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
URL: https://gcc.gnu.org
@ -142,6 +142,10 @@ Patch21: 0021-mcmodel-Bugfix-for-mcmodel-medium-on-x86.patch
Patch22: 0022-StructReorderFields-Fix-pointer-layer-check-bug.patch
Patch23: 0023-StructReorderFields-Add-pointer-offset-check.patch
Patch24: 0024-StructReorderFields-Add-lto-and-whole-program-gate.patch
Patch25: 0025-AutoPrefetch-Support-cache-misses-profile.patch
Patch26: 0026-AutoFDO-Enable-discriminator-and-MCF-algorithm-on-Au.patch
Patch27: 0027-Autoprefetch-Support-auto-feedback-prefetch.patch
Patch28: 0028-AutoPrefetch-Handle-the-case-that-the-basic-block-br.patch
%global gcc_target_platform %{_arch}-linux-gnu
@ -607,6 +611,10 @@ not stable, so plugins must be rebuilt any time GCC is updated.
%patch22 -p1
%patch23 -p1
%patch24 -p1
%patch25 -p1
%patch26 -p1
%patch27 -p1
%patch28 -p1
%build
@ -2569,6 +2577,12 @@ end
%doc rpm.doc/changelogs/libcc1/ChangeLog*
%changelog
* Wed Feb 23 2022 benniaobufeijiushiji <linda7@huawei.com> - 10.3.1-20220223.7
- Type:Sync
- ID:NA
- SUG:NA
- DESC:Sync patch from openeuler/gcc
* Wed Jan 05 2022 eastb233 <xiezhiheng@huawei.com> - 10.3.1-20220105.6
- Type:SPEC
- ID:NA