!137 [Sync] Sync patch from openeuler/gcc
From: @benniaobufeijiushiji Reviewed-by: @eastb233 Signed-off-by: @eastb233
This commit is contained in:
commit
26e766f11e
669
0025-AutoPrefetch-Support-cache-misses-profile.patch
Normal file
669
0025-AutoPrefetch-Support-cache-misses-profile.patch
Normal file
@ -0,0 +1,669 @@
|
|||||||
|
From 26e4ba63112f55c27b7dd3d5f8c4497ef9a2f459 Mon Sep 17 00:00:00 2001
|
||||||
|
From: benniaobufeijiushiji <linda7@huawei.com>
|
||||||
|
Date: Thu, 6 Jan 2022 15:33:29 +0800
|
||||||
|
Subject: [PATCH 25/28] [AutoPrefetch] Support cache misses profile
|
||||||
|
|
||||||
|
Add pass ex-afdo after pass afdo in auto-profile.c.
|
||||||
|
Add flag -fcache-misses-profile.
|
||||||
|
Read profile of different types of perf events and build maps for
|
||||||
|
function and gimple location to its count of each perf event.
|
||||||
|
Currently, instruction execution and cahce misses are supported.
|
||||||
|
---
|
||||||
|
gcc/auto-profile.c | 415 +++++++++++++++++++++++++++++++++++++++++++++
|
||||||
|
gcc/auto-profile.h | 28 +++
|
||||||
|
gcc/common.opt | 14 ++
|
||||||
|
gcc/opts.c | 26 +++
|
||||||
|
gcc/passes.def | 1 +
|
||||||
|
gcc/timevar.def | 1 +
|
||||||
|
gcc/toplev.c | 6 +
|
||||||
|
gcc/tree-pass.h | 2 +
|
||||||
|
8 files changed, 493 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
|
||||||
|
index 7d09887c9..aced8fca5 100644
|
||||||
|
--- a/gcc/auto-profile.c
|
||||||
|
+++ b/gcc/auto-profile.c
|
||||||
|
@@ -49,6 +49,9 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#include "auto-profile.h"
|
||||||
|
#include "tree-pretty-print.h"
|
||||||
|
#include "gimple-pretty-print.h"
|
||||||
|
+#include <map>
|
||||||
|
+#include <vector>
|
||||||
|
+#include <algorithm>
|
||||||
|
|
||||||
|
/* The following routines implements AutoFDO optimization.
|
||||||
|
|
||||||
|
@@ -95,6 +98,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
|
||||||
|
+#define DEFAULT_CACHE_MISSES_PROFILE_FILE "cmsdata.gcov"
|
||||||
|
#define AUTO_PROFILE_VERSION 1
|
||||||
|
|
||||||
|
namespace autofdo
|
||||||
|
@@ -117,6 +121,14 @@ private:
|
||||||
|
bool annotated_;
|
||||||
|
};
|
||||||
|
|
||||||
|
+/* pair <func_decl, count> */
|
||||||
|
+static bool
|
||||||
|
+event_count_cmp (std::pair<unsigned, gcov_type> &a,
|
||||||
|
+ std::pair<unsigned, gcov_type> &b)
|
||||||
|
+{
|
||||||
|
+ return a.second > b.second;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Represent a source location: (function_decl, lineno). */
|
||||||
|
typedef std::pair<tree, unsigned> decl_lineno;
|
||||||
|
|
||||||
|
@@ -338,6 +350,206 @@ static autofdo_source_profile *afdo_source_profile;
|
||||||
|
/* gcov_summary structure to store the profile_info. */
|
||||||
|
static gcov_summary *afdo_profile_info;
|
||||||
|
|
||||||
|
+/* Check opts->x_flags and put file name into EVENT_FILES. */
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+get_all_profile_names (const char **event_files)
|
||||||
|
+{
|
||||||
|
+ if (!(flag_auto_profile || flag_cache_misses_profile))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ event_files[INST_EXEC] = auto_profile_file;
|
||||||
|
+
|
||||||
|
+ if (cache_misses_profile_file == NULL)
|
||||||
|
+ {
|
||||||
|
+ cache_misses_profile_file = DEFAULT_CACHE_MISSES_PROFILE_FILE;
|
||||||
|
+ }
|
||||||
|
+ event_files[CACHE_MISSES] = cache_misses_profile_file;
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void read_profile (void);
|
||||||
|
+
|
||||||
|
+/* Maintain multiple profile data of different events with event_loc_count_map
|
||||||
|
+ and event_func_count_map. */
|
||||||
|
+
|
||||||
|
+class extend_auto_profile
|
||||||
|
+{
|
||||||
|
+public:
|
||||||
|
+ bool auto_profile_exist (enum event_type type);
|
||||||
|
+ gcov_type get_loc_count (location_t, event_type);
|
||||||
|
+ gcov_type get_func_count (unsigned, event_type);
|
||||||
|
+ struct rank_info get_func_rank (unsigned, enum event_type);
|
||||||
|
+ /* There should be only one instance of class EXTEND_AUTO_PROFILE. */
|
||||||
|
+ static extend_auto_profile *create ()
|
||||||
|
+ {
|
||||||
|
+ extend_auto_profile *map = new extend_auto_profile ();
|
||||||
|
+ if (map->read ())
|
||||||
|
+ {
|
||||||
|
+ return map;
|
||||||
|
+ }
|
||||||
|
+ delete map;
|
||||||
|
+ return NULL;
|
||||||
|
+ }
|
||||||
|
+private:
|
||||||
|
+ /* Basic maps of extend_auto_profile. */
|
||||||
|
+ typedef std::map<location_t, gcov_type> loc_count_map;
|
||||||
|
+ typedef std::map<unsigned, gcov_type> func_count_map;
|
||||||
|
+
|
||||||
|
+ /* Map of function_uid to its descending order rank of counts. */
|
||||||
|
+ typedef std::map<unsigned, unsigned> rank_map;
|
||||||
|
+
|
||||||
|
+ /* Mapping hardware events to corresponding basic maps. */
|
||||||
|
+ typedef std::map<event_type, loc_count_map> event_loc_count_map;
|
||||||
|
+ typedef std::map<event_type, func_count_map> event_func_count_map;
|
||||||
|
+ typedef std::map<event_type, rank_map> event_rank_map;
|
||||||
|
+
|
||||||
|
+ extend_auto_profile () {}
|
||||||
|
+ bool read ();
|
||||||
|
+ void set_loc_count ();
|
||||||
|
+ void process_extend_source_profile ();
|
||||||
|
+ void read_extend_afdo_file (const char*, event_type);
|
||||||
|
+ void rank_all_func ();
|
||||||
|
+ void dump_event ();
|
||||||
|
+ event_loc_count_map event_loc_map;
|
||||||
|
+ event_func_count_map event_func_map;
|
||||||
|
+ event_rank_map func_rank;
|
||||||
|
+ event_type profile_type;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+/* Member functions for extend_auto_profile. */
|
||||||
|
+
|
||||||
|
+bool
|
||||||
|
+extend_auto_profile::auto_profile_exist (enum event_type type)
|
||||||
|
+{
|
||||||
|
+ switch (type)
|
||||||
|
+ {
|
||||||
|
+ case INST_EXEC:
|
||||||
|
+ return event_func_map.count (INST_EXEC) != 0
|
||||||
|
+ || event_loc_map.count (INST_EXEC) != 0;
|
||||||
|
+ case CACHE_MISSES:
|
||||||
|
+ return event_func_map.count (CACHE_MISSES) != 0
|
||||||
|
+ || event_loc_map.count (CACHE_MISSES) != 0;
|
||||||
|
+ default:
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+extend_auto_profile::dump_event ()
|
||||||
|
+{
|
||||||
|
+ if (dump_file)
|
||||||
|
+ {
|
||||||
|
+ switch (profile_type)
|
||||||
|
+ {
|
||||||
|
+ case INST_EXEC:
|
||||||
|
+ fprintf (dump_file, "Processing event instruction execution.\n");
|
||||||
|
+ break;
|
||||||
|
+ case CACHE_MISSES:
|
||||||
|
+ fprintf (dump_file, "Processing event cache misses.\n");
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Return true if any profile data was read. */
|
||||||
|
+
|
||||||
|
+bool
|
||||||
|
+extend_auto_profile::read ()
|
||||||
|
+{
|
||||||
|
+ const char *event_files[EVENT_NUMBER] = {NULL};
|
||||||
|
+ if (!get_all_profile_names (event_files))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Backup AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE since we will create
|
||||||
|
+ new ones for each event_type. */
|
||||||
|
+ autofdo::string_table *string_table_afdo = afdo_string_table;
|
||||||
|
+ autofdo::autofdo_source_profile *source_profile_afdo = afdo_source_profile;
|
||||||
|
+
|
||||||
|
+ for (unsigned i = 0; i < EVENT_NUMBER; i++)
|
||||||
|
+ {
|
||||||
|
+ if (event_files[i] == NULL)
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ profile_type = (enum event_type) i;
|
||||||
|
+ dump_event ();
|
||||||
|
+ gcov_close ();
|
||||||
|
+ auto_profile_file = event_files[i];
|
||||||
|
+ read_profile ();
|
||||||
|
+ gcov_close ();
|
||||||
|
+
|
||||||
|
+ process_extend_source_profile ();
|
||||||
|
+
|
||||||
|
+ delete afdo_source_profile;
|
||||||
|
+ delete afdo_string_table;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Restore AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE. Function
|
||||||
|
+ END_AUTO_PROFILE will free them at the end of compilation. */
|
||||||
|
+ afdo_string_table = string_table_afdo;
|
||||||
|
+ afdo_source_profile = source_profile_afdo;
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Helper functions. */
|
||||||
|
+
|
||||||
|
+gcov_type
|
||||||
|
+extend_auto_profile::get_loc_count (location_t loc, event_type type)
|
||||||
|
+{
|
||||||
|
+ event_loc_count_map::iterator event_iter = event_loc_map.find (type);
|
||||||
|
+ if (event_iter != event_loc_map.end ())
|
||||||
|
+ {
|
||||||
|
+ loc_count_map::iterator loc_iter = event_iter->second.find (loc);
|
||||||
|
+ if (loc_iter != event_iter->second.end ())
|
||||||
|
+ {
|
||||||
|
+ return loc_iter->second;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+struct rank_info
|
||||||
|
+extend_auto_profile::get_func_rank (unsigned decl_uid, enum event_type type)
|
||||||
|
+{
|
||||||
|
+ struct rank_info info = {0, 0};
|
||||||
|
+ event_rank_map::iterator event_iter = func_rank.find (type);
|
||||||
|
+ if (event_iter != func_rank.end ())
|
||||||
|
+ {
|
||||||
|
+ rank_map::iterator func_iter = event_iter->second.find (decl_uid);
|
||||||
|
+ if (func_iter != event_iter->second.end ())
|
||||||
|
+ {
|
||||||
|
+ info.rank = func_iter->second;
|
||||||
|
+ info.total = event_iter->second.size ();
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return info;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+gcov_type
|
||||||
|
+extend_auto_profile::get_func_count (unsigned decl_uid, event_type type)
|
||||||
|
+{
|
||||||
|
+ event_func_count_map::iterator event_iter = event_func_map.find (type);
|
||||||
|
+ if (event_iter != event_func_map.end ())
|
||||||
|
+ {
|
||||||
|
+ func_count_map::iterator func_iter = event_iter->second.find (decl_uid);
|
||||||
|
+ if (func_iter != event_iter->second.end ())
|
||||||
|
+ {
|
||||||
|
+ return func_iter->second;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static extend_auto_profile *extend_profile;
|
||||||
|
+
|
||||||
|
/* Helper functions. */
|
||||||
|
|
||||||
|
/* Return the original name of NAME: strip the suffix that starts
|
||||||
|
@@ -1654,6 +1866,131 @@ auto_profile (void)
|
||||||
|
|
||||||
|
return TODO_rebuild_cgraph_edges;
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+extend_auto_profile::rank_all_func ()
|
||||||
|
+{
|
||||||
|
+ std::vector<std::pair<unsigned, gcov_type> > func_sorted;
|
||||||
|
+ event_func_count_map::iterator event_iter
|
||||||
|
+ = event_func_map.find (profile_type);
|
||||||
|
+ if (event_iter != event_func_map.end ())
|
||||||
|
+ {
|
||||||
|
+ func_count_map::iterator func_iter;
|
||||||
|
+ for (func_iter = event_iter->second.begin ();
|
||||||
|
+ func_iter != event_iter->second.end (); func_iter++)
|
||||||
|
+ {
|
||||||
|
+ func_sorted.push_back (std::make_pair (func_iter->first,
|
||||||
|
+ func_iter->second));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ std::sort (func_sorted.begin (), func_sorted.end (), event_count_cmp);
|
||||||
|
+
|
||||||
|
+ for (unsigned i = 0; i < func_sorted.size (); ++i)
|
||||||
|
+ {
|
||||||
|
+ func_rank[profile_type][func_sorted[i].first] = i + 1;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Iterate stmts in cfun and maintain its count to EVENT_LOC_MAP. */
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+extend_auto_profile::set_loc_count ()
|
||||||
|
+{
|
||||||
|
+ basic_block bb;
|
||||||
|
+ FOR_EACH_BB_FN (bb, cfun)
|
||||||
|
+ {
|
||||||
|
+ gimple_stmt_iterator gsi;
|
||||||
|
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||||
|
+ {
|
||||||
|
+ count_info info;
|
||||||
|
+ gimple *stmt = gsi_stmt (gsi);
|
||||||
|
+ if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ if (afdo_source_profile->get_count_info (stmt, &info))
|
||||||
|
+ {
|
||||||
|
+ location_t loc = gimple_location (stmt);
|
||||||
|
+ event_loc_map[profile_type][loc] += info.count;
|
||||||
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
+ {
|
||||||
|
+ fprintf (dump_file, "stmt ");
|
||||||
|
+ print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
|
||||||
|
+ fprintf (dump_file, "counts %ld\n",
|
||||||
|
+ event_loc_map[profile_type][loc]);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Process data in extend_auto_source_profile, save them into two maps.
|
||||||
|
+ 1. gimple_location to count.
|
||||||
|
+ 2. function_index to count. */
|
||||||
|
+void
|
||||||
|
+extend_auto_profile::process_extend_source_profile ()
|
||||||
|
+{
|
||||||
|
+ struct cgraph_node *node;
|
||||||
|
+ if (symtab->state == FINISHED)
|
||||||
|
+ {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ FOR_EACH_FUNCTION (node)
|
||||||
|
+ {
|
||||||
|
+ if (!gimple_has_body_p (node->decl) || node->inlined_to)
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Don't profile functions produced for builtin stuff. */
|
||||||
|
+ if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ function *fn = DECL_STRUCT_FUNCTION (node->decl);
|
||||||
|
+ push_cfun (fn);
|
||||||
|
+
|
||||||
|
+ const function_instance *s
|
||||||
|
+ = afdo_source_profile->get_function_instance_by_decl (
|
||||||
|
+ current_function_decl);
|
||||||
|
+
|
||||||
|
+ if (s == NULL)
|
||||||
|
+ {
|
||||||
|
+ pop_cfun ();
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ unsigned int decl_uid = DECL_UID (current_function_decl);
|
||||||
|
+ gcov_type count = s->total_count ();
|
||||||
|
+ if (dump_file)
|
||||||
|
+ {
|
||||||
|
+ fprintf (dump_file, "Extend auto-profile for function %s.\n",
|
||||||
|
+ node->dump_name ());
|
||||||
|
+ }
|
||||||
|
+ event_func_map[profile_type][decl_uid] += count;
|
||||||
|
+ set_loc_count ();
|
||||||
|
+ pop_cfun ();
|
||||||
|
+ }
|
||||||
|
+ rank_all_func ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Main entry of extend_auto_profile. */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+extend_source_profile ()
|
||||||
|
+{
|
||||||
|
+ extend_profile = autofdo::extend_auto_profile::create ();
|
||||||
|
+ if (dump_file)
|
||||||
|
+ {
|
||||||
|
+ if (extend_profile == NULL)
|
||||||
|
+ {
|
||||||
|
+ fprintf (dump_file, "No profile file is found.\n");
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ fprintf (dump_file, "Extend profile info generated.\n");
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
} /* namespace autofdo. */
|
||||||
|
|
||||||
|
/* Read the profile from the profile data file. */
|
||||||
|
@@ -1682,6 +2019,42 @@ end_auto_profile (void)
|
||||||
|
profile_info = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Extern function to get profile info in other passes. */
|
||||||
|
+
|
||||||
|
+bool
|
||||||
|
+profile_exist (enum event_type type)
|
||||||
|
+{
|
||||||
|
+ return autofdo::extend_profile != NULL
|
||||||
|
+ && autofdo::extend_profile->auto_profile_exist (type);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+gcov_type
|
||||||
|
+event_get_loc_count (location_t loc, event_type type)
|
||||||
|
+{
|
||||||
|
+ return autofdo::extend_profile->get_loc_count (loc, type);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+gcov_type
|
||||||
|
+event_get_func_count (unsigned decl_uid, event_type type)
|
||||||
|
+{
|
||||||
|
+ return autofdo::extend_profile->get_func_count (decl_uid, type);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+struct rank_info
|
||||||
|
+event_get_func_rank (unsigned decl_uid, enum event_type type)
|
||||||
|
+{
|
||||||
|
+ return autofdo::extend_profile->get_func_rank (decl_uid, type);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+free_extend_profile_info ()
|
||||||
|
+{
|
||||||
|
+ if (autofdo::extend_profile != NULL)
|
||||||
|
+ {
|
||||||
|
+ delete autofdo::extend_profile;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Returns TRUE if EDGE is hot enough to be inlined early. */
|
||||||
|
|
||||||
|
bool
|
||||||
|
@@ -1743,8 +2116,50 @@ public:
|
||||||
|
|
||||||
|
} // anon namespace
|
||||||
|
|
||||||
|
+namespace
|
||||||
|
+{
|
||||||
|
+const pass_data pass_data_ipa_extend_auto_profile =
|
||||||
|
+{
|
||||||
|
+ SIMPLE_IPA_PASS, /* type */
|
||||||
|
+ "ex-afdo", /* name */
|
||||||
|
+ OPTGROUP_NONE, /* optinfo_flags */
|
||||||
|
+ TV_IPA_EXTEND_AUTO_PROFILE, /* tv_id */
|
||||||
|
+ 0, /* properties_required */
|
||||||
|
+ 0, /* properties_provided */
|
||||||
|
+ 0, /* properties_destroyed */
|
||||||
|
+ 0, /* todo_flags_start */
|
||||||
|
+ 0, /* todo_flags_finish */
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+class pass_ipa_extend_auto_profile : public simple_ipa_opt_pass
|
||||||
|
+{
|
||||||
|
+public:
|
||||||
|
+ pass_ipa_extend_auto_profile (gcc::context *ctxt)
|
||||||
|
+ : simple_ipa_opt_pass (pass_data_ipa_extend_auto_profile, ctxt)
|
||||||
|
+ {}
|
||||||
|
+
|
||||||
|
+ /* opt_pass methods: */
|
||||||
|
+ virtual bool gate (function *) {return (flag_ipa_extend_auto_profile > 0);}
|
||||||
|
+ virtual unsigned int execute (function *);
|
||||||
|
+
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+unsigned int
|
||||||
|
+pass_ipa_extend_auto_profile::execute (function *fun)
|
||||||
|
+{
|
||||||
|
+ autofdo::extend_source_profile ();
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+} // anon namespace
|
||||||
|
+
|
||||||
|
simple_ipa_opt_pass *
|
||||||
|
make_pass_ipa_auto_profile (gcc::context *ctxt)
|
||||||
|
{
|
||||||
|
return new pass_ipa_auto_profile (ctxt);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+simple_ipa_opt_pass *
|
||||||
|
+make_pass_ipa_extend_auto_profile (gcc::context *ctxt)
|
||||||
|
+{
|
||||||
|
+ return new pass_ipa_extend_auto_profile (ctxt);
|
||||||
|
+}
|
||||||
|
\ No newline at end of file
|
||||||
|
diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h
|
||||||
|
index f5cff091d..230d7e68a 100644
|
||||||
|
--- a/gcc/auto-profile.h
|
||||||
|
+++ b/gcc/auto-profile.h
|
||||||
|
@@ -21,6 +21,13 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#ifndef AUTO_PROFILE_H
|
||||||
|
#define AUTO_PROFILE_H
|
||||||
|
|
||||||
|
+enum event_type
|
||||||
|
+{
|
||||||
|
+ INST_EXEC = 0,
|
||||||
|
+ CACHE_MISSES,
|
||||||
|
+ EVENT_NUMBER
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
/* Read, process, finalize AutoFDO data structures. */
|
||||||
|
extern void read_autofdo_file (void);
|
||||||
|
extern void end_auto_profile (void);
|
||||||
|
@@ -28,4 +35,25 @@ extern void end_auto_profile (void);
|
||||||
|
/* Returns TRUE if EDGE is hot enough to be inlined early. */
|
||||||
|
extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);
|
||||||
|
|
||||||
|
+/* Chcek if profile exists before using this profile. */
|
||||||
|
+extern bool profile_exist (enum event_type);
|
||||||
|
+
|
||||||
|
+/* Given func decl_uid or gimple location and event_type, return count.
|
||||||
|
+ Count is 0 if function or gimple is not sampled. */
|
||||||
|
+extern gcov_type event_get_func_count (unsigned, enum event_type);
|
||||||
|
+extern gcov_type event_get_loc_count (location_t, enum event_type);
|
||||||
|
+
|
||||||
|
+struct rank_info
|
||||||
|
+{
|
||||||
|
+ unsigned total;
|
||||||
|
+ unsigned rank;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+/* Given function decl_uid and event type, return rank_info. Rank_info
|
||||||
|
+ is {0, 0} if function was not sampled. */
|
||||||
|
+extern struct rank_info event_get_func_rank (unsigned, enum event_type);
|
||||||
|
+
|
||||||
|
+/* Free memory allocated by autofdo::extern_profile. */
|
||||||
|
+extern void free_extend_profile_info ();
|
||||||
|
+
|
||||||
|
#endif /* AUTO_PROFILE_H */
|
||||||
|
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||||
|
index 73c24f28d..37cbbd8c0 100644
|
||||||
|
--- a/gcc/common.opt
|
||||||
|
+++ b/gcc/common.opt
|
||||||
|
@@ -1074,6 +1074,16 @@ Common Joined RejectNegative Var(auto_profile_file)
|
||||||
|
Use sample profile information for call graph node weights. The profile
|
||||||
|
file is specified in the argument.
|
||||||
|
|
||||||
|
+fcache-misses-profile
|
||||||
|
+Common Report Var(flag_cache_misses_profile)
|
||||||
|
+Use sample profile information for source code cache miss count. The default
|
||||||
|
+profile file is cmsdata.gcov in `pwd`.
|
||||||
|
+
|
||||||
|
+fcache-misses-profile=
|
||||||
|
+Common Joined RejectNegative Var(cache_misses_profile_file)
|
||||||
|
+Use sample profile information for source code cache miss count. The profile
|
||||||
|
+file is specified in the argument.
|
||||||
|
+
|
||||||
|
; -fcheck-bounds causes gcc to generate array bounds checks.
|
||||||
|
; For C, C++ and ObjC: defaults off.
|
||||||
|
; For Java: defaults to on.
|
||||||
|
@@ -1873,6 +1883,10 @@ fipa-struct-reorg
|
||||||
|
Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization
|
||||||
|
Perform structure layout optimizations.
|
||||||
|
|
||||||
|
+fipa-extend-auto-profile
|
||||||
|
+Common Report Var(flag_ipa_extend_auto_profile)
|
||||||
|
+Use sample profile information for source code.
|
||||||
|
+
|
||||||
|
fipa-vrp
|
||||||
|
Common Report Var(flag_ipa_vrp) Optimization
|
||||||
|
Perform IPA Value Range Propagation.
|
||||||
|
diff --git a/gcc/opts.c b/gcc/opts.c
|
||||||
|
index 6924a973a..642327296 100644
|
||||||
|
--- a/gcc/opts.c
|
||||||
|
+++ b/gcc/opts.c
|
||||||
|
@@ -1742,6 +1742,13 @@ enable_fdo_optimizations (struct gcc_options *opts,
|
||||||
|
SET_OPTION_IF_UNSET (opts, opts_set, flag_tree_loop_distribution, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void
|
||||||
|
+set_cache_misses_profile_params (struct gcc_options *opts,
|
||||||
|
+ struct gcc_options *opts_set)
|
||||||
|
+{
|
||||||
|
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_prefetch_loop_arrays, 1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* -f{,no-}sanitize{,-recover}= suboptions. */
|
||||||
|
const struct sanitizer_opts_s sanitizer_opts[] =
|
||||||
|
{
|
||||||
|
@@ -2604,6 +2611,25 @@ common_handle_option (struct gcc_options *opts,
|
||||||
|
param_early_inliner_max_iterations, 10);
|
||||||
|
break;
|
||||||
|
|
||||||
|
+ case OPT_fipa_extend_auto_profile:
|
||||||
|
+ opts->x_flag_ipa_extend_auto_profile = opts->x_flag_cache_misses_profile
|
||||||
|
+ ? true : value;
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ case OPT_fcache_misses_profile_:
|
||||||
|
+ opts->x_cache_misses_profile_file = xstrdup (arg);
|
||||||
|
+ opts->x_flag_cache_misses_profile = true;
|
||||||
|
+ value = true;
|
||||||
|
+ /* No break here - do -fcache-misses-profile processing. */
|
||||||
|
+ /* FALLTHRU */
|
||||||
|
+ case OPT_fcache_misses_profile:
|
||||||
|
+ opts->x_flag_ipa_extend_auto_profile = value;
|
||||||
|
+ if (value)
|
||||||
|
+ {
|
||||||
|
+ set_cache_misses_profile_params (opts, opts_set);
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
case OPT_fprofile_generate_:
|
||||||
|
opts->x_profile_data_prefix = xstrdup (arg);
|
||||||
|
value = true;
|
||||||
|
diff --git a/gcc/passes.def b/gcc/passes.def
|
||||||
|
index 63303ab65..e9c91d26e 100644
|
||||||
|
--- a/gcc/passes.def
|
||||||
|
+++ b/gcc/passes.def
|
||||||
|
@@ -133,6 +133,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
|
||||||
|
NEXT_PASS (pass_target_clone);
|
||||||
|
NEXT_PASS (pass_ipa_auto_profile);
|
||||||
|
+ NEXT_PASS (pass_ipa_extend_auto_profile);
|
||||||
|
NEXT_PASS (pass_ipa_tree_profile);
|
||||||
|
PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile)
|
||||||
|
NEXT_PASS (pass_feedback_split_functions);
|
||||||
|
diff --git a/gcc/timevar.def b/gcc/timevar.def
|
||||||
|
index ee25eccbb..e873747a8 100644
|
||||||
|
--- a/gcc/timevar.def
|
||||||
|
+++ b/gcc/timevar.def
|
||||||
|
@@ -82,6 +82,7 @@ DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting")
|
||||||
|
DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats")
|
||||||
|
DEFTIMEVAR (TV_IPA_REORDER_FIELDS , "ipa struct reorder fields optimization")
|
||||||
|
DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization")
|
||||||
|
+DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile")
|
||||||
|
DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations")
|
||||||
|
DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression")
|
||||||
|
DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream compression")
|
||||||
|
diff --git a/gcc/toplev.c b/gcc/toplev.c
|
||||||
|
index eaed6f6c7..51e6bd400 100644
|
||||||
|
--- a/gcc/toplev.c
|
||||||
|
+++ b/gcc/toplev.c
|
||||||
|
@@ -577,6 +577,12 @@ compile_file (void)
|
||||||
|
targetm.asm_out.output_ident (ident_str);
|
||||||
|
}
|
||||||
|
|
||||||
|
+ /* Extend auto profile finalization. */
|
||||||
|
+ if (flag_ipa_extend_auto_profile)
|
||||||
|
+ {
|
||||||
|
+ free_extend_profile_info ();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/* Auto profile finalization. */
|
||||||
|
if (flag_auto_profile)
|
||||||
|
end_auto_profile ();
|
||||||
|
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
|
||||||
|
index eb32c5d44..be6387768 100644
|
||||||
|
--- a/gcc/tree-pass.h
|
||||||
|
+++ b/gcc/tree-pass.h
|
||||||
|
@@ -511,6 +511,8 @@ extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt);
|
||||||
|
extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
|
||||||
|
extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt);
|
||||||
|
extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
|
||||||
|
+extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context
|
||||||
|
+ *ctxt);
|
||||||
|
extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
|
||||||
|
extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
|
||||||
|
extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt);
|
||||||
|
--
|
||||||
|
2.27.0.windows.1
|
||||||
|
|
||||||
353
0026-AutoFDO-Enable-discriminator-and-MCF-algorithm-on-Au.patch
Normal file
353
0026-AutoFDO-Enable-discriminator-and-MCF-algorithm-on-Au.patch
Normal file
@ -0,0 +1,353 @@
|
|||||||
|
From eb58d920a95696d8d5a7db9a6d640d4494fb023f Mon Sep 17 00:00:00 2001
|
||||||
|
From: liyancheng <412998149@qq.com>
|
||||||
|
Date: Tue, 25 Jan 2022 16:57:28 +0800
|
||||||
|
Subject: [PATCH 26/28] [AutoFDO] Enable discriminator and MCF algorithm on
|
||||||
|
AutoFDO
|
||||||
|
|
||||||
|
1. Support discriminator for distinguishes among several
|
||||||
|
basic blocks that share a common locus, allowing for
|
||||||
|
more accurate autofdo.
|
||||||
|
|
||||||
|
2. Using option -fprofile-correction for calling MCF algorithm
|
||||||
|
to smooth non conservative BB counts.
|
||||||
|
---
|
||||||
|
gcc/auto-profile.c | 172 ++++++++++++++++++++++++++++++++++++++++++++-
|
||||||
|
gcc/cfghooks.c | 7 ++
|
||||||
|
gcc/ipa-cp.c | 21 ++++++
|
||||||
|
gcc/opts.c | 5 +-
|
||||||
|
gcc/tree-inline.c | 14 ++++
|
||||||
|
5 files changed, 215 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c
|
||||||
|
index aced8fca5..e6164b91b 100644
|
||||||
|
--- a/gcc/auto-profile.c
|
||||||
|
+++ b/gcc/auto-profile.c
|
||||||
|
@@ -678,6 +678,17 @@ string_table::get_index (const char *name) const
|
||||||
|
if (name == NULL)
|
||||||
|
return -1;
|
||||||
|
string_index_map::const_iterator iter = map_.find (name);
|
||||||
|
+ /* Function name may be duplicate. Try to distinguish by the
|
||||||
|
+ #file_name#function_name defined by the autofdo tool chain. */
|
||||||
|
+ if (iter == map_.end ())
|
||||||
|
+ {
|
||||||
|
+ char* file_name = get_original_name (lbasename (dump_base_name));
|
||||||
|
+ char* file_func_name
|
||||||
|
+ = concat ("#", file_name, "#", name, NULL);
|
||||||
|
+ iter = map_.find (file_func_name);
|
||||||
|
+ free (file_name);
|
||||||
|
+ free (file_func_name);
|
||||||
|
+ }
|
||||||
|
if (iter == map_.end ())
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
@@ -866,7 +877,7 @@ function_instance::read_function_instance (function_instance_stack *stack,
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < num_pos_counts; i++)
|
||||||
|
{
|
||||||
|
- unsigned offset = gcov_read_unsigned () & 0xffff0000;
|
||||||
|
+ unsigned offset = gcov_read_unsigned ();
|
||||||
|
unsigned num_targets = gcov_read_unsigned ();
|
||||||
|
gcov_type count = gcov_read_counter ();
|
||||||
|
s->pos_counts[offset].count = count;
|
||||||
|
@@ -945,6 +956,10 @@ autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
|
||||||
|
function_instance *s = get_function_instance_by_inline_stack (stack);
|
||||||
|
if (s == NULL)
|
||||||
|
return false;
|
||||||
|
+ if (s->get_count_info (stack[0].second + stmt->bb->discriminator, info))
|
||||||
|
+ {
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
return s->get_count_info (stack[0].second, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1583,6 +1598,68 @@ afdo_propagate (bb_set *annotated_bb)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Process the following scene when the branch probability
|
||||||
|
+ inversion when do function afdo_propagate (). E.g.
|
||||||
|
+ BB_NUM (sample count)
|
||||||
|
+ BB1 (1000)
|
||||||
|
+ / \
|
||||||
|
+ BB2 (10) BB3 (0)
|
||||||
|
+ \ /
|
||||||
|
+ BB4
|
||||||
|
+ In afdo_propagate(), count of BB3 is calculated by
|
||||||
|
+ COUNT (BB3) = 990 (990 = COUNT (BB1) - COUNT (BB2) = 1000 - 10)
|
||||||
|
+
|
||||||
|
+ In fact, BB3 may be colder than BB2 by sample count.
|
||||||
|
+
|
||||||
|
+ This function allocate source BB count to each succ BB by sample
|
||||||
|
+ rate, E.g.
|
||||||
|
+ BB2_COUNT = BB1_COUNT * (BB2_COUNT / (BB2_COUNT + BB3_COUNT)) */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+afdo_preprocess_bb_count ()
|
||||||
|
+{
|
||||||
|
+ basic_block bb;
|
||||||
|
+ FOR_ALL_BB_FN (bb, cfun)
|
||||||
|
+ {
|
||||||
|
+ if (bb->count.ipa_p () && EDGE_COUNT (bb->succs) > 1
|
||||||
|
+ && bb->count > profile_count::zero ().afdo ())
|
||||||
|
+ {
|
||||||
|
+ basic_block bb1 = EDGE_SUCC (bb, 0)->dest;
|
||||||
|
+ basic_block bb2 = EDGE_SUCC (bb, 1)->dest;
|
||||||
|
+ if (single_succ_p (bb1) && single_succ_p (bb2)
|
||||||
|
+ && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
|
||||||
|
+ {
|
||||||
|
+ gcov_type max_count = 0;
|
||||||
|
+ gcov_type total_count = 0;
|
||||||
|
+ edge e;
|
||||||
|
+ edge_iterator ei;
|
||||||
|
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||||
|
+ {
|
||||||
|
+ if (!e->dest->count.ipa_p ())
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ max_count = MAX(max_count, e->dest->count.to_gcov_type ());
|
||||||
|
+ total_count += e->dest->count.to_gcov_type ();
|
||||||
|
+ }
|
||||||
|
+ /* Only bb_count > max_count * 2, branch probability will
|
||||||
|
+ inversion. */
|
||||||
|
+ if (max_count > 0
|
||||||
|
+ && bb->count.to_gcov_type () > max_count * 2)
|
||||||
|
+ {
|
||||||
|
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||||
|
+ {
|
||||||
|
+ gcov_type target_count = bb->count.to_gcov_type ()
|
||||||
|
+ * e->dest->count.to_gcov_type () / total_count;
|
||||||
|
+ e->dest->count
|
||||||
|
+ = profile_count::from_gcov_type (target_count).afdo ();
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Propagate counts on control flow graph and calculate branch
|
||||||
|
probabilities. */
|
||||||
|
|
||||||
|
@@ -1608,6 +1685,7 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
|
||||||
|
}
|
||||||
|
|
||||||
|
afdo_find_equiv_class (annotated_bb);
|
||||||
|
+ afdo_preprocess_bb_count ();
|
||||||
|
afdo_propagate (annotated_bb);
|
||||||
|
|
||||||
|
FOR_EACH_BB_FN (bb, cfun)
|
||||||
|
@@ -1711,6 +1789,82 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Preparation before executing MCF algorithm. */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+afdo_init_mcf ()
|
||||||
|
+{
|
||||||
|
+ basic_block bb;
|
||||||
|
+ edge e;
|
||||||
|
+ edge_iterator ei;
|
||||||
|
+
|
||||||
|
+ if (dump_file)
|
||||||
|
+ {
|
||||||
|
+ fprintf (dump_file, "\n init calling mcf_smooth_cfg (). \n");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Step1: when use mcf, BB id must be continous,
|
||||||
|
+ so we need compact_blocks (). */
|
||||||
|
+ compact_blocks ();
|
||||||
|
+
|
||||||
|
+ /* Step2: allocate memory for MCF input data. */
|
||||||
|
+ bb_gcov_counts.safe_grow_cleared (cfun->cfg->x_last_basic_block);
|
||||||
|
+ edge_gcov_counts = new hash_map<edge, gcov_type>;
|
||||||
|
+
|
||||||
|
+ /* Step3: init MCF input data from cfg. */
|
||||||
|
+ FOR_ALL_BB_FN (bb, cfun)
|
||||||
|
+ {
|
||||||
|
+ /* Init BB count for MCF. */
|
||||||
|
+ bb_gcov_count (bb) = bb->count.to_gcov_type ();
|
||||||
|
+
|
||||||
|
+ gcov_type total_count = 0;
|
||||||
|
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||||
|
+ {
|
||||||
|
+ total_count += e->dest->count.to_gcov_type ();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* If there is no sample in each successor blocks, source
|
||||||
|
+ BB samples are allocated to each edge by branch static prob. */
|
||||||
|
+
|
||||||
|
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||||
|
+ {
|
||||||
|
+ if (total_count == 0)
|
||||||
|
+ {
|
||||||
|
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
|
||||||
|
+ * e->probability.to_reg_br_prob_base () / REG_BR_PROB_BASE;
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
|
||||||
|
+ * e->dest->count.to_gcov_type () / total_count;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Free the resources used by MCF and reset BB count from MCF result,
|
||||||
|
+ branch probability has been updated in mcf_smooth_cfg (). */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+afdo_process_after_mcf ()
|
||||||
|
+{
|
||||||
|
+ basic_block bb;
|
||||||
|
+ /* Reset BB count from MCF result. */
|
||||||
|
+ FOR_EACH_BB_FN (bb, cfun)
|
||||||
|
+ {
|
||||||
|
+ if (bb_gcov_count (bb))
|
||||||
|
+ {
|
||||||
|
+ bb->count
|
||||||
|
+ = profile_count::from_gcov_type (bb_gcov_count (bb)).afdo ();
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Clean up MCF resource. */
|
||||||
|
+ bb_gcov_counts.release ();
|
||||||
|
+ delete edge_gcov_counts;
|
||||||
|
+ edge_gcov_counts = NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Annotate auto profile to the control flow graph. Do not annotate value
|
||||||
|
profile for stmts in PROMOTED_STMTS. */
|
||||||
|
|
||||||
|
@@ -1762,8 +1916,20 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
|
||||||
|
afdo_source_profile->mark_annotated (cfun->function_end_locus);
|
||||||
|
if (max_count > profile_count::zero ())
|
||||||
|
{
|
||||||
|
- /* Calculate, propagate count and probability information on CFG. */
|
||||||
|
- afdo_calculate_branch_prob (&annotated_bb);
|
||||||
|
+ /* 1 means -fprofile-correction is enabled manually, and MCF
|
||||||
|
+ algorithm will be used to calculate count and probability.
|
||||||
|
+ Otherwise, use the default calculate algorithm. */
|
||||||
|
+ if (flag_profile_correction == 1)
|
||||||
|
+ {
|
||||||
|
+ afdo_init_mcf ();
|
||||||
|
+ mcf_smooth_cfg ();
|
||||||
|
+ afdo_process_after_mcf ();
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ /* Calculate, propagate count and probability information on CFG. */
|
||||||
|
+ afdo_calculate_branch_prob (&annotated_bb);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
update_max_bb_count ();
|
||||||
|
profile_status_for_fn (cfun) = PROFILE_READ;
|
||||||
|
diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
|
||||||
|
index ea558b469..4ea490a8a 100644
|
||||||
|
--- a/gcc/cfghooks.c
|
||||||
|
+++ b/gcc/cfghooks.c
|
||||||
|
@@ -526,6 +526,9 @@ split_block_1 (basic_block bb, void *i)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
new_bb->count = bb->count;
|
||||||
|
+ /* Copy discriminator from original bb for distinguishes among
|
||||||
|
+ several basic blocks that share a common locus, allowing for
|
||||||
|
+ more accurate autofdo. */
|
||||||
|
new_bb->discriminator = bb->discriminator;
|
||||||
|
|
||||||
|
if (dom_info_available_p (CDI_DOMINATORS))
|
||||||
|
@@ -1091,6 +1094,10 @@ duplicate_block (basic_block bb, edge e, basic_block after, copy_bb_data *id)
|
||||||
|
move_block_after (new_bb, after);
|
||||||
|
|
||||||
|
new_bb->flags = (bb->flags & ~BB_DUPLICATED);
|
||||||
|
+ /* Copy discriminator from original bb for distinguishes among
|
||||||
|
+ several basic blocks that share a common locus, allowing for
|
||||||
|
+ more accurate autofdo. */
|
||||||
|
+ new_bb->discriminator = bb->discriminator;
|
||||||
|
FOR_EACH_EDGE (s, ei, bb->succs)
|
||||||
|
{
|
||||||
|
/* Since we are creating edges from a new block to successors
|
||||||
|
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
|
||||||
|
index b1f0881bd..c208070c9 100644
|
||||||
|
--- a/gcc/ipa-cp.c
|
||||||
|
+++ b/gcc/ipa-cp.c
|
||||||
|
@@ -4365,6 +4365,27 @@ update_profiling_info (struct cgraph_node *orig_node,
|
||||||
|
orig_node_count.dump (dump_file);
|
||||||
|
fprintf (dump_file, "\n");
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ /* When autofdo uses PMU as the sampling unit, the count of
|
||||||
|
+ cgraph_node->count cannot be obtained directly and will
|
||||||
|
+ be zero. It using for apply_scale will cause the node
|
||||||
|
+ count incorrectly overestimated. So set orig_new_node_count
|
||||||
|
+ equal to orig_node_count, which is same as known error
|
||||||
|
+ handling. */
|
||||||
|
+ if (orig_node->count == profile_count::zero ().afdo ()
|
||||||
|
+ && new_node->count == profile_count::zero ().global0adjusted ())
|
||||||
|
+ {
|
||||||
|
+ orig_new_node_count = (orig_sum + new_sum).apply_scale (12, 10);
|
||||||
|
+
|
||||||
|
+ if (dump_file)
|
||||||
|
+ {
|
||||||
|
+ fprintf (dump_file, " node %s with zero count from afdo ",
|
||||||
|
+ new_node->dump_name ());
|
||||||
|
+ fprintf (dump_file, " proceeding by pretending it was ");
|
||||||
|
+ orig_new_node_count.dump (dump_file);
|
||||||
|
+ fprintf (dump_file, "\n");
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
remainder = orig_node_count.combine_with_ipa_count (orig_node_count.ipa ()
|
||||||
|
diff --git a/gcc/opts.c b/gcc/opts.c
|
||||||
|
index 642327296..7a39f618b 100644
|
||||||
|
--- a/gcc/opts.c
|
||||||
|
+++ b/gcc/opts.c
|
||||||
|
@@ -2606,7 +2606,10 @@ common_handle_option (struct gcc_options *opts,
|
||||||
|
/* FALLTHRU */
|
||||||
|
case OPT_fauto_profile:
|
||||||
|
enable_fdo_optimizations (opts, opts_set, value);
|
||||||
|
- SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value);
|
||||||
|
+ /* 2 is special and means flag_profile_correction trun on by
|
||||||
|
+ -fauto-profile. */
|
||||||
|
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction,
|
||||||
|
+ (value ? 2 : 0));
|
||||||
|
SET_OPTION_IF_UNSET (opts, opts_set,
|
||||||
|
param_early_inliner_max_iterations, 10);
|
||||||
|
break;
|
||||||
|
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
|
||||||
|
index efde5d158..8405a959c 100644
|
||||||
|
--- a/gcc/tree-inline.c
|
||||||
|
+++ b/gcc/tree-inline.c
|
||||||
|
@@ -2015,6 +2015,10 @@ copy_bb (copy_body_data *id, basic_block bb,
|
||||||
|
basic_block_info automatically. */
|
||||||
|
copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux);
|
||||||
|
copy_basic_block->count = bb->count.apply_scale (num, den);
|
||||||
|
+ /* Copy discriminator from original bb for distinguishes among
|
||||||
|
+ several basic blocks that share a common locus, allowing for
|
||||||
|
+ more accurate autofdo. */
|
||||||
|
+ copy_basic_block->discriminator = bb->discriminator;
|
||||||
|
|
||||||
|
copy_gsi = gsi_start_bb (copy_basic_block);
|
||||||
|
|
||||||
|
@@ -3028,6 +3032,16 @@ copy_cfg_body (copy_body_data * id,
|
||||||
|
den += e->count ();
|
||||||
|
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den;
|
||||||
|
}
|
||||||
|
+ /* When autofdo uses PMU as the sampling unit, the number of
|
||||||
|
+ ENTRY_BLOCK_PTR_FOR_FN cannot be obtained directly and will
|
||||||
|
+ be zero. It using for adjust_for_ipa_scaling will cause the
|
||||||
|
+ inlined BB count incorrectly overestimated. So set den equal
|
||||||
|
+ to num, which is the source inline BB count to avoid
|
||||||
|
+ overestimated. */
|
||||||
|
+ if (den == profile_count::zero ().afdo ())
|
||||||
|
+ {
|
||||||
|
+ den = num;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
profile_count::adjust_for_ipa_scaling (&num, &den);
|
||||||
|
|
||||||
|
--
|
||||||
|
2.27.0.windows.1
|
||||||
|
|
||||||
1000
0027-Autoprefetch-Support-auto-feedback-prefetch.patch
Normal file
1000
0027-Autoprefetch-Support-auto-feedback-prefetch.patch
Normal file
File diff suppressed because it is too large
Load Diff
151
0028-AutoPrefetch-Handle-the-case-that-the-basic-block-br.patch
Normal file
151
0028-AutoPrefetch-Handle-the-case-that-the-basic-block-br.patch
Normal file
@ -0,0 +1,151 @@
|
|||||||
|
From 3d20b13bc2e5af8d52e221a33881423e38c3dfdd Mon Sep 17 00:00:00 2001
|
||||||
|
From: dingguangya <dingguangya1@huawei.com>
|
||||||
|
Date: Thu, 17 Feb 2022 21:53:31 +0800
|
||||||
|
Subject: [PATCH 28/28] [AutoPrefetch] Handle the case that the basic block
|
||||||
|
branch probability is invalid
|
||||||
|
|
||||||
|
When the node branch probability value is not initialized,
|
||||||
|
the branch probability must be set to 0 to ensure that
|
||||||
|
the calculation of the basic block execution probability
|
||||||
|
must be less than or equal to 100%.
|
||||||
|
---
|
||||||
|
.../gcc.dg/autoprefetch/autoprefetch.exp | 27 +++++++++++++++++++
|
||||||
|
.../autoprefetch/branch-weighted-prefetch.c | 22 +++++++++++++++
|
||||||
|
.../autoprefetch/get-edge-prob-non-init.c | 24 +++++++++++++++++
|
||||||
|
gcc/tree-ssa-loop-prefetch.c | 17 +++++++++++-
|
||||||
|
4 files changed, 89 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
||||||
|
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..a7408e338
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
||||||
|
@@ -0,0 +1,27 @@
|
||||||
|
+# Copyright (C) 1997-2022 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+# This program is free software; you can redistribute it and/or modify
|
||||||
|
+# it under the terms of the GNU General Public License as published by
|
||||||
|
+# the Free Software Foundation; either version 3 of the License, or
|
||||||
|
+# (at your option) any later version.
|
||||||
|
+#
|
||||||
|
+# This program is distributed in the hope that it will be useful,
|
||||||
|
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
+# GNU General Public License for more details.
|
||||||
|
+#
|
||||||
|
+# You should have received a copy of the GNU General Public License
|
||||||
|
+# along with GCC; see the file COPYING3. If not see
|
||||||
|
+# <http://www.gnu.org/licenses/>.
|
||||||
|
+
|
||||||
|
+load_lib gcc-dg.exp
|
||||||
|
+load_lib target-supports.exp
|
||||||
|
+
|
||||||
|
+# Initialize `dg'.
|
||||||
|
+dg-init
|
||||||
|
+
|
||||||
|
+gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
|
||||||
|
+ "" "-fprefetch-loop-arrays"
|
||||||
|
+
|
||||||
|
+# All done.
|
||||||
|
+dg-finish
|
||||||
|
\ No newline at end of file
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..c63c5e5cb
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
||||||
|
@@ -0,0 +1,22 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -fprefetch-loop-arrays=2 --param min-insn-to-prefetch-ratio=5 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
|
||||||
|
+#define N 10000000
|
||||||
|
+
|
||||||
|
+long long a[N];
|
||||||
|
+
|
||||||
|
+long long func ()
|
||||||
|
+{
|
||||||
|
+ long long i;
|
||||||
|
+ long long sum = 0;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < N; i+=1) {
|
||||||
|
+ if (i < 100000)
|
||||||
|
+ sum += a[i];
|
||||||
|
+ else
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return sum;
|
||||||
|
+}
|
||||||
|
+/* { dg-final { scan-tree-dump-times "Ahead 40" 1 "aprefetch" } } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times "builtin_prefetch" 1 "optimized" } } */
|
||||||
|
\ No newline at end of file
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c b/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..f55481008
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/autoprefetch/get-edge-prob-non-init.c
|
||||||
|
@@ -0,0 +1,24 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-Ofast -fprefetch-loop-arrays=2 -fdump-tree-aprefetch-details" } */
|
||||||
|
+
|
||||||
|
+int a, c, f;
|
||||||
|
+static int *b = &a;
|
||||||
|
+int *d;
|
||||||
|
+int e[0];
|
||||||
|
+void g() {
|
||||||
|
+ int h;
|
||||||
|
+ for (;;) {
|
||||||
|
+ h = 1;
|
||||||
|
+ for (; h >= 0; h--) {
|
||||||
|
+ c = 2;
|
||||||
|
+ for (; c; c--)
|
||||||
|
+ if (e[0])
|
||||||
|
+ if (e[c])
|
||||||
|
+ *b = 0;
|
||||||
|
+ f || (*d = 0);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+int main() {}
|
||||||
|
+
|
||||||
|
+/* { dg-final } */
|
||||||
|
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
|
||||||
|
index 3a5aef0fc..673f453a4 100644
|
||||||
|
--- a/gcc/tree-ssa-loop-prefetch.c
|
||||||
|
+++ b/gcc/tree-ssa-loop-prefetch.c
|
||||||
|
@@ -2132,7 +2132,7 @@ get_edge_prob (edge e)
|
||||||
|
{
|
||||||
|
/* Limit the minimum probability value. */
|
||||||
|
const float MINNUM_PROB = 0.00001f;
|
||||||
|
- float fvalue = 1;
|
||||||
|
+ float fvalue = 0;
|
||||||
|
|
||||||
|
profile_probability probability = e->probability;
|
||||||
|
if (probability.initialized_p ())
|
||||||
|
@@ -2143,6 +2143,21 @@ get_edge_prob (edge e)
|
||||||
|
fvalue = MINNUM_PROB;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ /* When the node branch probability value is not initialized, the branch
|
||||||
|
+ probability must be set to 0 to ensure that the calculation of the
|
||||||
|
+ basic block execution probability must be less than or equal to 100%.
|
||||||
|
+ i.e,
|
||||||
|
+ ...
|
||||||
|
+ <bb 3> [local count: 20000]
|
||||||
|
+ if (f_2 != 0)
|
||||||
|
+ goto <bb 6>; [INV]
|
||||||
|
+ else
|
||||||
|
+ goto <bb 7>; [100.00%]
|
||||||
|
+ ... */
|
||||||
|
+ fvalue = 0;
|
||||||
|
+ }
|
||||||
|
return fvalue;
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.27.0.windows.1
|
||||||
|
|
||||||
18
gcc.spec
18
gcc.spec
@ -1,4 +1,4 @@
|
|||||||
%global DATE 20220105
|
%global DATE 20220223
|
||||||
|
|
||||||
%global gcc_version 10.3.1
|
%global gcc_version 10.3.1
|
||||||
%global gcc_major 10.3.1
|
%global gcc_major 10.3.1
|
||||||
@ -63,7 +63,7 @@
|
|||||||
Summary: Various compilers (C, C++, Objective-C, ...)
|
Summary: Various compilers (C, C++, Objective-C, ...)
|
||||||
Name: gcc
|
Name: gcc
|
||||||
Version: %{gcc_version}
|
Version: %{gcc_version}
|
||||||
Release: %{DATE}.6
|
Release: %{DATE}.7
|
||||||
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
||||||
URL: https://gcc.gnu.org
|
URL: https://gcc.gnu.org
|
||||||
|
|
||||||
@ -142,6 +142,10 @@ Patch21: 0021-mcmodel-Bugfix-for-mcmodel-medium-on-x86.patch
|
|||||||
Patch22: 0022-StructReorderFields-Fix-pointer-layer-check-bug.patch
|
Patch22: 0022-StructReorderFields-Fix-pointer-layer-check-bug.patch
|
||||||
Patch23: 0023-StructReorderFields-Add-pointer-offset-check.patch
|
Patch23: 0023-StructReorderFields-Add-pointer-offset-check.patch
|
||||||
Patch24: 0024-StructReorderFields-Add-lto-and-whole-program-gate.patch
|
Patch24: 0024-StructReorderFields-Add-lto-and-whole-program-gate.patch
|
||||||
|
Patch25: 0025-AutoPrefetch-Support-cache-misses-profile.patch
|
||||||
|
Patch26: 0026-AutoFDO-Enable-discriminator-and-MCF-algorithm-on-Au.patch
|
||||||
|
Patch27: 0027-Autoprefetch-Support-auto-feedback-prefetch.patch
|
||||||
|
Patch28: 0028-AutoPrefetch-Handle-the-case-that-the-basic-block-br.patch
|
||||||
|
|
||||||
%global gcc_target_platform %{_arch}-linux-gnu
|
%global gcc_target_platform %{_arch}-linux-gnu
|
||||||
|
|
||||||
@ -607,6 +611,10 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
|||||||
%patch22 -p1
|
%patch22 -p1
|
||||||
%patch23 -p1
|
%patch23 -p1
|
||||||
%patch24 -p1
|
%patch24 -p1
|
||||||
|
%patch25 -p1
|
||||||
|
%patch26 -p1
|
||||||
|
%patch27 -p1
|
||||||
|
%patch28 -p1
|
||||||
|
|
||||||
|
|
||||||
%build
|
%build
|
||||||
@ -2569,6 +2577,12 @@ end
|
|||||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Wed Feb 23 2022 benniaobufeijiushiji <linda7@huawei.com> - 10.3.1-20220223.7
|
||||||
|
- Type:Sync
|
||||||
|
- ID:NA
|
||||||
|
- SUG:NA
|
||||||
|
- DESC:Sync patch from openeuler/gcc
|
||||||
|
|
||||||
* Wed Jan 05 2022 eastb233 <xiezhiheng@huawei.com> - 10.3.1-20220105.6
|
* Wed Jan 05 2022 eastb233 <xiezhiheng@huawei.com> - 10.3.1-20220105.6
|
||||||
- Type:SPEC
|
- Type:SPEC
|
||||||
- ID:NA
|
- ID:NA
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user