gcc/0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
benniaobufeijiushiji 635a0d4206 [Sync] Sync patch from openeuler/gcc
Sync patch from openeuler/gcc - 20220302
2022-03-02 14:52:11 +08:00

549 lines
14 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From c34a02199b1dfd362e81e78cb90fbd11e02eb93e Mon Sep 17 00:00:00 2001
From: liyancheng <412998149@qq.com>
Date: Mon, 14 Feb 2022 14:34:41 +0800
Subject: [PATCH 29/32] [AutoBOLT] Support saving feedback count info to ELF
segment 1/3
Add flag -fauto-bolt to save the feedback count info from PGO or
AutoFDO to segment .text.fdo. The bolt plugin will read and parse
it into the profile of llvm-bolt.
---
gcc/common.opt | 8 +
gcc/final.c | 400 +++++++++++++++++++++++++++++++++++++++++++++++++
gcc/opts.c | 61 ++++++++
3 files changed, 469 insertions(+)
diff --git a/gcc/common.opt b/gcc/common.opt
index 9488bd90f..5eaa667b3 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2403,6 +2403,14 @@ freorder-functions
Common Report Var(flag_reorder_functions) Optimization
Reorder functions to improve code placement.
+fauto-bolt
+Common Report Var(flag_auto_bolt)
+Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
+
+fauto-bolt=
+Common Joined RejectNegative
+Specify the feedback data directory required by BOLT-plugin. The default is the current directory.
+
frerun-cse-after-loop
Common Report Var(flag_rerun_cse_after_loop) Optimization
Add a common subexpression elimination pass after loop optimizations.
diff --git a/gcc/final.c b/gcc/final.c
index a3601964a..b9affd3a7 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
#include "rtl-iter.h"
#include "print-rtl.h"
#include "function-abi.h"
+#include "insn-codes.h"
#ifdef XCOFF_DEBUGGING_INFO
#include "xcoffout.h" /* Needed for external data declarations. */
@@ -4640,6 +4641,399 @@ leaf_renumber_regs_insn (rtx in_rtx)
}
#endif
+
+#define ASM_FDO_SECTION_PREFIX ".text.fdo."
+
+#define ASM_FDO_CALLER_FLAG ".fdo.caller "
+#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind "
+
+#define ASM_FDO_CALLEE_FLAG ".fdo.callee "
+
+/* Return the relative offset address of the start instruction of BB,
+ return -1 if it is empty instruction. */
+
+static int
+get_bb_start_addr (basic_block bb)
+{
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+
+ int insn_code = recog_memoized (insn);
+
+ /* The instruction NOP in llvm-bolt belongs to the previous
+ BB, so it needs to be skipped. */
+ if (insn_code != CODE_FOR_nop)
+ {
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+ }
+ return -1;
+}
+
+/* Return the relative offset address of the end instruction of BB,
+ return -1 if it is empty or call instruction. */
+
+static int
+get_bb_end_addr (basic_block bb)
+{
+ rtx_insn *insn;
+ int num_succs = EDGE_COUNT (bb->succs);
+ FOR_BB_INSNS_REVERSE (bb, insn)
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+ /* The jump target of call is not in this function, so
+ it should be excluded. */
+ if (CALL_P (insn))
+ {
+ return -1;
+ }
+ if ((num_succs == 1)
+ || ((num_succs == 2) && any_condjump_p (insn)))
+ {
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+ else
+ {
+ return -1;
+ }
+ }
+ return -1;
+}
+
+/* Return the end address of cfun. */
+
+static int
+get_function_end_addr ()
+{
+ rtx_insn *insn = get_last_insn ();
+ for (; insn != get_insns (); insn = PREV_INSN (insn))
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+
+ return -1;
+}
+
+/* Return the function profile status string. */
+
+static const char *
+get_function_profile_status ()
+{
+ const char *profile_status[] = {
+ "PROFILE_ABSENT",
+ "PROFILE_GUESSED",
+ "PROFILE_READ",
+ "PROFILE_LAST" /* Last value, used by profile streaming. */
+ };
+
+ return profile_status[profile_status_for_fn (cfun)];
+}
+
+/* Return the count from the feedback data, such as PGO or AFDO. */
+
+inline static gcov_type
+get_fdo_count (profile_count count)
+{
+ return count.quality () >= GUESSED
+ ? count.to_gcov_type () : 0;
+}
+
+/* Return the profile quality string. */
+
+static const char *
+get_fdo_count_quality (profile_count count)
+{
+ const char *profile_quality[] = {
+ "UNINITIALIZED_PROFILE",
+ "GUESSED_LOCAL",
+ "GUESSED_GLOBAL0",
+ "GUESSED_GLOBAL0_ADJUSTED",
+ "GUESSED",
+ "AFDO",
+ "ADJUSTED",
+ "PRECISE"
+ };
+
+ return profile_quality[count.quality ()];
+}
+
+static const char *
+alias_local_functions (const char *fnname)
+{
+ if (TREE_PUBLIC (cfun->decl))
+ {
+ return fnname;
+ }
+
+ return concat (fnname, "/", lbasename (dump_base_name), NULL);
+}
+
+/* Return function bind type string. */
+
+static const char *
+simple_get_function_bind ()
+{
+ const char *function_bind[] = {
+ "GLOBAL",
+ "WEAK",
+ "LOCAL",
+ "UNKNOWN"
+ };
+
+ if (TREE_PUBLIC (cfun->decl))
+ {
+ if (!(DECL_WEAK (cfun->decl)))
+ {
+ return function_bind[0];
+ }
+ else
+ {
+ return function_bind[1];
+ }
+ }
+ else
+ {
+ return function_bind[2];
+ }
+
+ return function_bind[3];
+}
+
+/* Dump the callee functions insn in bb by CALL_P (insn). */
+
+static void
+dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
+{
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (insn && CALL_P (insn))
+ {
+ tree callee = get_call_fndecl (insn);
+
+ if (callee)
+ {
+ fprintf (asm_out_file, "\t.string \"%x\"\n",
+ INSN_ADDRESSES (INSN_UID (insn)));
+
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLEE_FLAG,
+ alias_local_functions (get_fnname_from_decl (callee)));
+
+ fprintf (asm_out_file,
+ "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ call_count);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "call: %x --> %s\n",
+ INSN_ADDRESSES (INSN_UID (insn)),
+ alias_local_functions
+ (get_fnname_from_decl (callee)));
+ }
+ }
+ }
+ }
+}
+
+/* Dump the edge info into asm. */
+
+static void
+dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count)
+{
+ edge e;
+ edge_iterator ei;
+ gcov_type edge_total_count = 0;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ gcov_type edge_count = get_fdo_count (e->count ());
+ edge_total_count += edge_count;
+
+ int edge_start_addr = get_bb_end_addr (e->src);
+ int edge_end_addr = get_bb_start_addr (e->dest);
+
+ if (edge_start_addr == -1 || edge_end_addr == -1)
+ {
+ continue;
+ }
+
+ /* This is a reserved assert for the original design. If this
+ assert is found, use the address of the previous instruction
+ as edge_start_addr. */
+ gcc_assert (edge_start_addr != edge_end_addr);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "edge: %x --> %x = (%ld)\n",
+ edge_start_addr, edge_end_addr, edge_count);
+ }
+
+ if (edge_count > 0)
+ {
+ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_start_addr);
+ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_end_addr);
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ edge_count);
+ }
+ }
+
+ gcov_type call_count = MAX (edge_total_count, bb_count);
+ if (call_count > 0)
+ {
+ dump_direct_callee_info_to_asm (bb, call_count);
+ }
+}
+
+/* Dump the bb info into asm. */
+
+static void
+dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
+{
+ int bb_start_addr = get_bb_start_addr (bb);
+ if (bb_start_addr != -1)
+ {
+ fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr);
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ bb_count);
+ }
+}
+
+/* Dump the function info into asm. */
+
+static void
+dump_function_info_to_asm (const char *fnname)
+{
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
+ fprintf (asm_out_file, "\t.string \"%s%d\"\n",
+ ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ());
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n FUNC_NAME: %s\n",
+ alias_local_functions (fnname));
+ fprintf (dump_file, " file: %s\n",
+ dump_base_name);
+ fprintf (dump_file, " profile_status: %s\n",
+ get_function_profile_status ());
+ fprintf (dump_file, " size: %x\n",
+ get_function_end_addr ());
+ fprintf (dump_file, " function_bind: %s\n",
+ simple_get_function_bind ());
+ }
+}
+
+/* Dump function profile info form AutoFDO or PGO to asm. */
+
+static void
+dump_fdo_info_to_asm (const char *fnname)
+{
+ basic_block bb;
+
+ dump_function_info_to_asm (fnname);
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ gcov_type bb_count = get_fdo_count (bb->count);
+ if (bb_count == 0)
+ {
+ continue;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n",
+ get_bb_start_addr (bb), get_bb_end_addr (bb),
+ bb_count, get_fdo_count_quality (bb->count));
+ }
+
+ if (flag_profile_use)
+ {
+ dump_edge_jump_info_to_asm (bb, bb_count);
+ }
+ else if (flag_auto_profile)
+ {
+ dump_bb_info_to_asm (bb, bb_count);
+ }
+ }
+}
+
+/* When -fauto-bolt option is turned on, the .text.fdo. section
+ will be generated in the *.s file if there is feedback information
+ from PGO or AutoFDO. This section will parserd in BOLT-plugin. */
+
+static void
+dump_profile_to_elf_sections ()
+{
+ if (!flag_function_sections)
+ {
+ error ("-fauto-bolt should work with -ffunction-sections");
+ return;
+ }
+ if (!flag_ipa_ra)
+ {
+ error ("-fauto-bolt should work with -fipa-ra");
+ return;
+ }
+ if (flag_align_jumps)
+ {
+ error ("-fauto-bolt is not supported with -falign-jumps");
+ return;
+ }
+ if (flag_align_labels)
+ {
+ error ("-fauto-bolt is not supported with -falign-labels");
+ return;
+ }
+ if (flag_align_loops)
+ {
+ error ("-fauto-bolt is not supported with -falign-loops");
+ return;
+ }
+
+ /* Return if no feedback data. */
+ if (!flag_profile_use && !flag_auto_profile)
+ {
+ error ("-fauto-bolt should use with -fprofile-use or -fauto-profile");
+ return;
+ }
+
+ /* Avoid empty functions. */
+ if (TREE_CODE (cfun->decl) != FUNCTION_DECL)
+ {
+ return;
+ }
+ int flags = SECTION_DEBUG | SECTION_EXCLUDE;
+ const char *fnname = get_fnname_from_decl (current_function_decl);
+ char *profile_fnname = NULL;
+
+ asprintf (&profile_fnname,"%s%s", ASM_FDO_SECTION_PREFIX, fnname);
+ switch_to_section (get_section (profile_fnname, flags , NULL));
+ dump_fdo_info_to_asm (fnname);
+
+ if (profile_fnname)
+ {
+ free (profile_fnname);
+ profile_fnname = NULL;
+ }
+}
+
/* Turn the RTL into assembly. */
static unsigned int
rest_of_handle_final (void)
@@ -4707,6 +5101,12 @@ rest_of_handle_final (void)
targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0),
decl_fini_priority_lookup
(current_function_decl));
+
+ if (flag_auto_bolt)
+ {
+ dump_profile_to_elf_sections ();
+ }
+
return 0;
}
diff --git a/gcc/opts.c b/gcc/opts.c
index f49f5ee58..0b389ae1d 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1166,6 +1166,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
if (opts->x_flag_vtable_verify && opts->x_flag_lto)
sorry ("vtable verification is not supported with LTO");
+ /* Currently -fauto-bolt is not supported for LTO. */
+ if (opts->x_flag_auto_bolt && opts->x_flag_lto)
+ sorry ("%<-fauto-bolt%> is not supported with LTO");
+
/* Control IPA optimizations based on different -flive-patching level. */
if (opts->x_flag_live_patching)
control_options_for_live_patching (opts, opts_set,
@@ -1183,6 +1187,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
= (opts->x_flag_unroll_loops
|| opts->x_flag_peel_loops
|| opts->x_optimize >= 3);
+
+ if (opts->x_flag_auto_bolt)
+ {
+ /* Record the function section to facilitate the feedback
+ data storage. */
+ if (!opts->x_flag_function_sections)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should work with %<-ffunction-sections%>,"
+ " enabling %<-ffunction-sections%>");
+ opts->x_flag_function_sections = true;
+ }
+
+ /* Cancel the internal alignment of the function. The binary
+ optimizer bolt will cancel the internal alignment optimization
+ of the function, so the alignment is meaningless at this time,
+ and if not, it will bring trouble to the calculation of the
+ offset address of the instruction. */
+ if (opts->x_flag_align_jumps)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-jumps%>,"
+ " disabling %<-falign-jumps%>");
+ opts->x_flag_align_jumps = false;
+ }
+
+ if (opts->x_flag_align_labels)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-labels%>,"
+ " disabling %<-falign-labels%>");
+ opts->x_flag_align_labels = false;
+ }
+
+ if (opts->x_flag_align_loops)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-loops%>,"
+ " disabling %<-falign-loops%>");
+ opts->x_flag_align_loops = false;
+ }
+
+ /* When parsing instructions in RTL phase, we need to know
+ the call information of instructions to avoid being optimized. */
+ if (!opts->x_flag_ipa_ra)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should work with %<-fipa-ra%>,"
+ " enabling %<-fipa-ra%>");
+ opts->x_flag_ipa_ra = true;
+ }
+ }
}
#define LEFT_COLUMN 27
@@ -2881,6 +2937,11 @@ common_handle_option (struct gcc_options *opts,
check_alignment_argument (loc, arg, "functions");
break;
+ case OPT_fauto_bolt_:
+ case OPT_fauto_bolt:
+ /* Deferred. */
+ break;
+
default:
/* If the flag was handled in a standard way, assume the lack of
processing here is intentional. */
--
2.27.0