gcc/0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
2024-04-11 16:47:29 +08:00

551 lines
14 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 72531376df5ed93c2d945469368ba5514eca8407 Mon Sep 17 00:00:00 2001
From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
Date: Tue, 5 Dec 2023 15:33:08 +0800
Subject: [PATCH] [AutoBOLT] Support saving feedback count info to ELF segment
1/3
---
gcc/common.opt | 8 +
gcc/final.cc | 405 ++++++++++++++++++++++++++++++++++++++++++++++++-
gcc/opts.cc | 61 ++++++++
3 files changed, 473 insertions(+), 1 deletion(-)
diff --git a/gcc/common.opt b/gcc/common.opt
index b01df919e..e69947fc2 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2546,6 +2546,14 @@ freorder-functions
Common Var(flag_reorder_functions) Optimization
Reorder functions to improve code placement.
+fauto-bolt
+Common Var(flag_auto_bolt)
+Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
+
+fauto-bolt=
+Common Joined RejectNegative
+Specify the feedback data directory required by BOLT-plugin. The default is the current directory.
+
frerun-cse-after-loop
Common Var(flag_rerun_cse_after_loop) Optimization
Add a common subexpression elimination pass after loop optimizations.
diff --git a/gcc/final.cc b/gcc/final.cc
index a9868861b..d4c4fa08f 100644
--- a/gcc/final.cc
+++ b/gcc/final.cc
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
#include "rtl-iter.h"
#include "print-rtl.h"
#include "function-abi.h"
+#include "insn-codes.h"
#include "common/common-target.h"
#ifdef XCOFF_DEBUGGING_INFO
@@ -4266,7 +4267,403 @@ leaf_renumber_regs_insn (rtx in_rtx)
}
}
#endif
-
+
+#define ASM_FDO_SECTION_PREFIX ".text.fdo."
+
+#define ASM_FDO_CALLER_FLAG ".fdo.caller "
+#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind"
+
+#define ASM_FDO_CALLEE_FLAG ".fdo.callee"
+
+/* Return the relative offset address of the start instruction of BB,
+ return -1 if it is empty instruction. */
+
+static int
+get_bb_start_addr (basic_block bb)
+{
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+ /* The jump target of call is not in this function, so
+ it should be excluded. */
+ if (CALL_P (insn))
+ {
+ return -1;
+ }
+
+ int insn_code = recog_memoized (insn);
+
+ /* The instruction NOP in llvm-bolt belongs to the previous
+ BB, so it needs to be skipped. */
+ if (insn_code != CODE_FOR_nop)
+ {
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+ }
+ return -1;
+}
+
+/* Return the relative offet address of the end instruction of BB,
+ return -1 if it is empty or call instruction. */
+
+static int
+get_bb_end_addr (basic_block bb)
+{
+ rtx_insn *insn;
+ int num_succs = EDGE_COUNT (bb->succs);
+ FOR_BB_INSNS_REVERSE (bb, insn)
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+ /* The jump target of call is not in this function, so
+ it should be excluded. */
+ if (CALL_P (insn))
+ {
+ return -1;
+ }
+ if ((num_succs == 1)
+ || ((num_succs == 2) && any_condjump_p (insn)))
+ {
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+ else
+ {
+ return -1;
+ }
+ }
+ return -1;
+}
+
+/* Return the end address of cfun. */
+
+static int
+get_function_end_addr ()
+{
+ rtx_insn *insn = get_last_insn ();
+ for (; insn != get_insns (); insn = PREV_INSN (insn))
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+
+ return -1;
+}
+
+/* Return the function profile status string. */
+
+static const char *
+get_function_profile_status ()
+{
+ const char *profile_status[] = {
+ "PROFILE_ABSENT",
+ "PROFILE_GUESSED",
+ "PROFILE_READ",
+ "PROFILE_LAST" /* Last value, used by profile streaming. */
+ };
+
+ return profile_status[profile_status_for_fn (cfun)];
+}
+
+/* Return the count from the feedback data, such as PGO or ADDO. */
+
+inline static gcov_type
+get_fdo_count (profile_count count)
+{
+ return count.quality () >= GUESSED
+ ? count.to_gcov_type () : 0;
+}
+
+/* Return the profile quality string. */
+
+static const char *
+get_fdo_count_quality (profile_count count)
+{
+ const char *profile_quality[] = {
+ "UNINITIALIZED_PROFILE",
+ "GUESSED_LOCAL",
+ "GUESSED_GLOBAL0",
+ "GUESSED_GLOBAL0_ADJUSTED",
+ "GUESSED",
+ "AFDO",
+ "ADJUSTED",
+ "PRECISE"
+ };
+
+ return profile_quality[count.quality ()];
+}
+
+static const char *
+alias_local_functions (const char *fnname)
+{
+ if (TREE_PUBLIC (cfun->decl))
+ {
+ return fnname;
+ }
+ return concat (fnname, "/", lbasename (dump_base_name), NULL);
+}
+
+/* Return function bind type string. */
+
+static const char *
+simple_get_function_bind ()
+{
+ const char *function_bind[] = {
+ "GLOBAL",
+ "WEAK",
+ "LOCAL",
+ "UNKNOWN"
+ };
+
+ if (TREE_PUBLIC (cfun->decl))
+ {
+ if (!(DECL_WEAK (cfun->decl)))
+ {
+ return function_bind[0];
+ }
+ else
+ {
+ return function_bind[1];
+ }
+ }
+ else
+ {
+ return function_bind[2];
+ }
+
+ return function_bind[3];
+}
+
+/* Dumo the callee functions insn in bb by CALL_P (insn). */
+
+static void
+dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
+{
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (insn && CALL_P (insn))
+ {
+ tree callee = get_call_fndecl (insn);
+
+ if (callee)
+ {
+ fprintf (asm_out_file, "\t.string \"%x\"\n",
+ INSN_ADDRESSES (INSN_UID (insn)));
+
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLEE_FLAG,
+ alias_local_functions (get_fnname_from_decl (callee)));
+
+ fprintf (asm_out_file,
+ "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ call_count);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "call: %x --> %s \n",
+ INSN_ADDRESSES (INSN_UID (insn)),
+ alias_local_functions
+ (get_fnname_from_decl (callee)));
+ }
+ }
+ }
+ }
+}
+
+/* Dump the edge info into asm. */
+static int
+dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count)
+{
+ edge e;
+ edge_iterator ei;
+ gcov_type edge_total_count = 0;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ gcov_type edge_count = get_fdo_count (e->count ());
+ edge_total_count += edge_count;
+
+ int edge_start_addr = get_bb_end_addr (e->src);
+ int edge_end_addr = get_bb_start_addr(e->dest);
+
+ if (edge_start_addr == -1 || edge_end_addr == -1)
+ {
+ continue;
+ }
+
+ /* This is a reserved assert for the original design. If this
+ assert is found, use the address of the previous instruction
+ as edge_start_addr. */
+ gcc_assert (edge_start_addr != edge_end_addr);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "edge: %x --> %x = (%ld)\n",
+ edge_start_addr, edge_end_addr, edge_count);
+ }
+
+ if (edge_count > 0)
+ {
+ fprintf(asm_out_file, "\t.string \"%x\"\n", edge_start_addr);
+ fprintf(asm_out_file, "\t.string \"%x\"\n", edge_end_addr);
+ fprintf(asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ edge_count);
+ }
+ }
+
+ gcov_type call_count = MAX (edge_total_count, bb_count);
+ if (call_count > 0)
+ {
+ dump_direct_callee_info_to_asm (bb, call_count);
+ }
+}
+
+/* Dump the bb info into asm. */
+
+static void
+dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
+{
+ int bb_start_addr = get_bb_start_addr (bb);
+ if (bb_start_addr != -1)
+ {
+ fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr);
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ bb_count);
+ }
+}
+
+/* Dump the function info into asm. */
+
+static void
+dump_function_info_to_asm (const char *fnname)
+{
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
+ fprintf (asm_out_file, "\t.string \"%s%d\"\n",
+ ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ());
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n FUNC_NAME: %s\n",
+ alias_local_functions (fnname));
+ fprintf (dump_file, " file: %s\n",
+ dump_base_name);
+ fprintf (dump_file, "profile_status: %s\n",
+ get_function_profile_status ());
+ fprintf (dump_file, " size: %x\n",
+ get_function_end_addr ());
+ fprintf (dump_file, " function_bind: %s\n",
+ simple_get_function_bind ());
+ }
+}
+
+/* Dump function profile into form AutoFDO or PGO to asm. */
+
+static void
+dump_fdo_info_to_asm (const char *fnname)
+{
+ basic_block bb;
+
+ dump_function_info_to_asm (fnname);
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ gcov_type bb_count = get_fdo_count (bb->count);
+ if (bb_count == 0)
+ {
+ continue;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n",
+ get_bb_start_addr (bb), get_bb_end_addr (bb),
+ bb_count, get_fdo_count_quality (bb->count));
+ }
+
+ if (flag_profile_use)
+ {
+ dump_edge_jump_info_to_asm (bb, bb_count);
+ }
+ else if (flag_auto_profile)
+ {
+ dump_bb_info_to_asm (bb, bb_count);
+ }
+ }
+}
+
+/* When -fauto-bolt option is turnded on, the .text.fdo section
+ will be generated in the *.s file if there is feedback information
+ from PGO or AutoFDO. This section will parserd in BOLT-plugin. */
+
+static void
+dump_profile_to_elf_sections ()
+{
+ if (!flag_function_sections)
+ {
+ error ("-fauto-bolt should work with -ffunction-section");
+ return;
+ }
+ if (!flag_ipa_ra)
+ {
+ error ("-fauto-bolt should work with -fipa-ra");
+ return;
+ }
+ if (flag_align_jumps)
+ {
+ error ("-fauto-bolt is not supported with -falign-jumps");
+ return;
+ }
+ if (flag_align_labels)
+ {
+ error ("-fauto-bolt is not spported with -falign-loops");
+ return;
+ }
+ if (flag_align_loops)
+ {
+ error ("-fauto-bolt is not supported with -falign-loops");
+ return;
+ }
+
+ /* Return if no feedback data. */
+ if (!flag_profile_use && !flag_auto_profile)
+ {
+ error ("-fauto-bolt should use with -profile-use or -fauto-profile");
+ return;
+ }
+
+ /* Avoid empty functions. */
+ if (TREE_CODE (cfun->decl) != FUNCTION_DECL)
+ {
+ return;
+ }
+ int flags = SECTION_DEBUG | SECTION_EXCLUDE;
+ const char *fnname = get_fnname_from_decl (current_function_decl);
+ char *profile_fnname = NULL;
+
+ asprintf (&profile_fnname, "%s%s", ASM_FDO_SECTION_PREFIX, fnname);
+ switch_to_section (get_section (profile_fnname, flags, NULL));
+ dump_fdo_info_to_asm (fnname);
+
+ if (profile_fnname)
+ {
+ free (profile_fnname);
+ profile_fnname = NULL;
+ }
+}
+
/* Turn the RTL into assembly. */
static unsigned int
rest_of_handle_final (void)
@@ -4334,6 +4731,12 @@ rest_of_handle_final (void)
targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0),
decl_fini_priority_lookup
(current_function_decl));
+
+ if (flag_auto_bolt)
+ {
+ dump_profile_to_elf_sections ();
+ }
+
return 0;
}
diff --git a/gcc/opts.cc b/gcc/opts.cc
index b868d189e..6d57e7d69 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -1279,6 +1279,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
if (opts->x_flag_vtable_verify && opts->x_flag_lto)
sorry ("vtable verification is not supported with LTO");
+ /* Currently -fauto-bolt is not supported for LTO. */
+ if (opts->x_flag_auto_bolt && opts->x_flag_lto)
+ sorry ("%<-fauto-bolt%> is not supported with LTO");
+
/* Control IPA optimizations based on different -flive-patching level. */
if (opts->x_flag_live_patching)
control_options_for_live_patching (opts, opts_set,
@@ -1291,6 +1295,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
= (opts->x_flag_unroll_loops
|| opts->x_flag_peel_loops
|| opts->x_optimize >= 3);
+
+ if (opts->x_flag_auto_bolt)
+ {
+ /* Record the function section to facilitate the feedback
+ data storage. */
+ if (!opts->x_flag_function_sections)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should work with %<-ffunction-sections%>,"
+ " enabling %<-ffunction-sections%>");
+ opts->x_flag_function_sections = true;
+ }
+
+ /* Cancel the internal alignment of the function. The binary
+ optimizer bolt will cancel the internal alignment optimization
+ of the function, so the alignment is meaningless at this time,
+ and if not, it will bring trouble to the calculation of the
+ offset address of the instruction. */
+ if (opts->x_flag_align_jumps)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-jumps%>,"
+ " disabling %<-falign-jumps%>");
+ opts->x_flag_align_jumps = false;
+ }
+
+ if (opts->x_flag_align_labels)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-labels%>,"
+ " disabling %<-falign-labels%>");
+ opts->x_flag_align_labels = false;
+ }
+
+ if (opts->x_flag_align_loops)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-loops%>,"
+ " disabling %<-falign-loops%>");
+ opts->x_flag_align_loops = false;
+ }
+
+ /* When parsing instructions in RTL phase, we need to know
+ the call information of instructions to avoid being optimized. */
+ if (!opts->x_flag_ipa_ra)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should work with %<-fipa-ra%>,"
+ " enabling %<-fipa-ra%>");
+ opts->x_flag_ipa_ra = true;
+ }
+ }
/* With -fcx-limited-range, we do cheap and quick complex arithmetic. */
if (opts->x_flag_cx_limited_range)
@@ -3226,6 +3282,11 @@ common_handle_option (struct gcc_options *opts,
&opts->x_flag_align_functions,
&opts->x_str_align_functions);
break;
+
+ case OPT_fauto_bolt_:
+ case OPT_fauto_bolt:
+ /* Deferred. */
+ break;
case OPT_ftabstop_:
/* It is documented that we silently ignore silly values. */
--
2.33.0