!407 [SYNC]Sync patch from openeuler/gcc
From: @zhenyu--zhao_admin Reviewed-by: @xiongzhou4 Signed-off-by: @xiongzhou4
This commit is contained in:
commit
5db544f251
1191
0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
Normal file
1191
0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
Normal file
File diff suppressed because it is too large
Load Diff
1232
0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch
Normal file
1232
0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch
Normal file
File diff suppressed because it is too large
Load Diff
550
0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
Normal file
550
0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
Normal file
@ -0,0 +1,550 @@
|
|||||||
|
From 72531376df5ed93c2d945469368ba5514eca8407 Mon Sep 17 00:00:00 2001
|
||||||
|
From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
|
||||||
|
Date: Tue, 5 Dec 2023 15:33:08 +0800
|
||||||
|
Subject: [PATCH] [AutoBOLT] Support saving feedback count info to ELF segment
|
||||||
|
1/3
|
||||||
|
|
||||||
|
---
|
||||||
|
gcc/common.opt | 8 +
|
||||||
|
gcc/final.cc | 405 ++++++++++++++++++++++++++++++++++++++++++++++++-
|
||||||
|
gcc/opts.cc | 61 ++++++++
|
||||||
|
3 files changed, 473 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||||
|
index b01df919e..e69947fc2 100644
|
||||||
|
--- a/gcc/common.opt
|
||||||
|
+++ b/gcc/common.opt
|
||||||
|
@@ -2546,6 +2546,14 @@ freorder-functions
|
||||||
|
Common Var(flag_reorder_functions) Optimization
|
||||||
|
Reorder functions to improve code placement.
|
||||||
|
|
||||||
|
+fauto-bolt
|
||||||
|
+Common Var(flag_auto_bolt)
|
||||||
|
+Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
|
||||||
|
+
|
||||||
|
+fauto-bolt=
|
||||||
|
+Common Joined RejectNegative
|
||||||
|
+Specify the feedback data directory required by BOLT-plugin. The default is the current directory.
|
||||||
|
+
|
||||||
|
frerun-cse-after-loop
|
||||||
|
Common Var(flag_rerun_cse_after_loop) Optimization
|
||||||
|
Add a common subexpression elimination pass after loop optimizations.
|
||||||
|
diff --git a/gcc/final.cc b/gcc/final.cc
|
||||||
|
index a9868861b..d4c4fa08f 100644
|
||||||
|
--- a/gcc/final.cc
|
||||||
|
+++ b/gcc/final.cc
|
||||||
|
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#include "rtl-iter.h"
|
||||||
|
#include "print-rtl.h"
|
||||||
|
#include "function-abi.h"
|
||||||
|
+#include "insn-codes.h"
|
||||||
|
#include "common/common-target.h"
|
||||||
|
|
||||||
|
#ifdef XCOFF_DEBUGGING_INFO
|
||||||
|
@@ -4266,7 +4267,403 @@ leaf_renumber_regs_insn (rtx in_rtx)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
-
|
||||||
|
+
|
||||||
|
+#define ASM_FDO_SECTION_PREFIX ".text.fdo."
|
||||||
|
+
|
||||||
|
+#define ASM_FDO_CALLER_FLAG ".fdo.caller "
|
||||||
|
+#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
|
||||||
|
+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind"
|
||||||
|
+
|
||||||
|
+#define ASM_FDO_CALLEE_FLAG ".fdo.callee"
|
||||||
|
+
|
||||||
|
+/* Return the relative offset address of the start instruction of BB,
|
||||||
|
+ return -1 if it is empty instruction. */
|
||||||
|
+
|
||||||
|
+static int
|
||||||
|
+get_bb_start_addr (basic_block bb)
|
||||||
|
+{
|
||||||
|
+ rtx_insn *insn;
|
||||||
|
+ FOR_BB_INSNS (bb, insn)
|
||||||
|
+ {
|
||||||
|
+ if (!INSN_P (insn))
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ /* The jump target of call is not in this function, so
|
||||||
|
+ it should be excluded. */
|
||||||
|
+ if (CALL_P (insn))
|
||||||
|
+ {
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ int insn_code = recog_memoized (insn);
|
||||||
|
+
|
||||||
|
+ /* The instruction NOP in llvm-bolt belongs to the previous
|
||||||
|
+ BB, so it needs to be skipped. */
|
||||||
|
+ if (insn_code != CODE_FOR_nop)
|
||||||
|
+ {
|
||||||
|
+ return INSN_ADDRESSES (INSN_UID (insn));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Return the relative offet address of the end instruction of BB,
|
||||||
|
+ return -1 if it is empty or call instruction. */
|
||||||
|
+
|
||||||
|
+static int
|
||||||
|
+get_bb_end_addr (basic_block bb)
|
||||||
|
+{
|
||||||
|
+ rtx_insn *insn;
|
||||||
|
+ int num_succs = EDGE_COUNT (bb->succs);
|
||||||
|
+ FOR_BB_INSNS_REVERSE (bb, insn)
|
||||||
|
+ {
|
||||||
|
+ if (!INSN_P (insn))
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ /* The jump target of call is not in this function, so
|
||||||
|
+ it should be excluded. */
|
||||||
|
+ if (CALL_P (insn))
|
||||||
|
+ {
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ if ((num_succs == 1)
|
||||||
|
+ || ((num_succs == 2) && any_condjump_p (insn)))
|
||||||
|
+ {
|
||||||
|
+ return INSN_ADDRESSES (INSN_UID (insn));
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ return -1;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Return the end address of cfun. */
|
||||||
|
+
|
||||||
|
+static int
|
||||||
|
+get_function_end_addr ()
|
||||||
|
+{
|
||||||
|
+ rtx_insn *insn = get_last_insn ();
|
||||||
|
+ for (; insn != get_insns (); insn = PREV_INSN (insn))
|
||||||
|
+ {
|
||||||
|
+ if (!INSN_P (insn))
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ return INSN_ADDRESSES (INSN_UID (insn));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Return the function profile status string. */
|
||||||
|
+
|
||||||
|
+static const char *
|
||||||
|
+get_function_profile_status ()
|
||||||
|
+{
|
||||||
|
+ const char *profile_status[] = {
|
||||||
|
+ "PROFILE_ABSENT",
|
||||||
|
+ "PROFILE_GUESSED",
|
||||||
|
+ "PROFILE_READ",
|
||||||
|
+ "PROFILE_LAST" /* Last value, used by profile streaming. */
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ return profile_status[profile_status_for_fn (cfun)];
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Return the count from the feedback data, such as PGO or ADDO. */
|
||||||
|
+
|
||||||
|
+inline static gcov_type
|
||||||
|
+get_fdo_count (profile_count count)
|
||||||
|
+{
|
||||||
|
+ return count.quality () >= GUESSED
|
||||||
|
+ ? count.to_gcov_type () : 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Return the profile quality string. */
|
||||||
|
+
|
||||||
|
+static const char *
|
||||||
|
+get_fdo_count_quality (profile_count count)
|
||||||
|
+{
|
||||||
|
+ const char *profile_quality[] = {
|
||||||
|
+ "UNINITIALIZED_PROFILE",
|
||||||
|
+ "GUESSED_LOCAL",
|
||||||
|
+ "GUESSED_GLOBAL0",
|
||||||
|
+ "GUESSED_GLOBAL0_ADJUSTED",
|
||||||
|
+ "GUESSED",
|
||||||
|
+ "AFDO",
|
||||||
|
+ "ADJUSTED",
|
||||||
|
+ "PRECISE"
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ return profile_quality[count.quality ()];
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static const char *
|
||||||
|
+alias_local_functions (const char *fnname)
|
||||||
|
+{
|
||||||
|
+ if (TREE_PUBLIC (cfun->decl))
|
||||||
|
+ {
|
||||||
|
+ return fnname;
|
||||||
|
+ }
|
||||||
|
+ return concat (fnname, "/", lbasename (dump_base_name), NULL);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Return function bind type string. */
|
||||||
|
+
|
||||||
|
+static const char *
|
||||||
|
+simple_get_function_bind ()
|
||||||
|
+{
|
||||||
|
+ const char *function_bind[] = {
|
||||||
|
+ "GLOBAL",
|
||||||
|
+ "WEAK",
|
||||||
|
+ "LOCAL",
|
||||||
|
+ "UNKNOWN"
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ if (TREE_PUBLIC (cfun->decl))
|
||||||
|
+ {
|
||||||
|
+ if (!(DECL_WEAK (cfun->decl)))
|
||||||
|
+ {
|
||||||
|
+ return function_bind[0];
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ return function_bind[1];
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ return function_bind[2];
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return function_bind[3];
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Dumo the callee functions insn in bb by CALL_P (insn). */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
|
||||||
|
+{
|
||||||
|
+ rtx_insn *insn;
|
||||||
|
+ FOR_BB_INSNS (bb, insn)
|
||||||
|
+ {
|
||||||
|
+ if (insn && CALL_P (insn))
|
||||||
|
+ {
|
||||||
|
+ tree callee = get_call_fndecl (insn);
|
||||||
|
+
|
||||||
|
+ if (callee)
|
||||||
|
+ {
|
||||||
|
+ fprintf (asm_out_file, "\t.string \"%x\"\n",
|
||||||
|
+ INSN_ADDRESSES (INSN_UID (insn)));
|
||||||
|
+
|
||||||
|
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||||
|
+ ASM_FDO_CALLEE_FLAG,
|
||||||
|
+ alias_local_functions (get_fnname_from_decl (callee)));
|
||||||
|
+
|
||||||
|
+ fprintf (asm_out_file,
|
||||||
|
+ "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
||||||
|
+ call_count);
|
||||||
|
+
|
||||||
|
+ if (dump_file)
|
||||||
|
+ {
|
||||||
|
+ fprintf (dump_file, "call: %x --> %s \n",
|
||||||
|
+ INSN_ADDRESSES (INSN_UID (insn)),
|
||||||
|
+ alias_local_functions
|
||||||
|
+ (get_fnname_from_decl (callee)));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Dump the edge info into asm. */
|
||||||
|
+static int
|
||||||
|
+dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count)
|
||||||
|
+{
|
||||||
|
+ edge e;
|
||||||
|
+ edge_iterator ei;
|
||||||
|
+ gcov_type edge_total_count = 0;
|
||||||
|
+
|
||||||
|
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||||
|
+ {
|
||||||
|
+ gcov_type edge_count = get_fdo_count (e->count ());
|
||||||
|
+ edge_total_count += edge_count;
|
||||||
|
+
|
||||||
|
+ int edge_start_addr = get_bb_end_addr (e->src);
|
||||||
|
+ int edge_end_addr = get_bb_start_addr(e->dest);
|
||||||
|
+
|
||||||
|
+ if (edge_start_addr == -1 || edge_end_addr == -1)
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* This is a reserved assert for the original design. If this
|
||||||
|
+ assert is found, use the address of the previous instruction
|
||||||
|
+ as edge_start_addr. */
|
||||||
|
+ gcc_assert (edge_start_addr != edge_end_addr);
|
||||||
|
+
|
||||||
|
+ if (dump_file)
|
||||||
|
+ {
|
||||||
|
+ fprintf (dump_file, "edge: %x --> %x = (%ld)\n",
|
||||||
|
+ edge_start_addr, edge_end_addr, edge_count);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (edge_count > 0)
|
||||||
|
+ {
|
||||||
|
+ fprintf(asm_out_file, "\t.string \"%x\"\n", edge_start_addr);
|
||||||
|
+ fprintf(asm_out_file, "\t.string \"%x\"\n", edge_end_addr);
|
||||||
|
+ fprintf(asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
||||||
|
+ edge_count);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ gcov_type call_count = MAX (edge_total_count, bb_count);
|
||||||
|
+ if (call_count > 0)
|
||||||
|
+ {
|
||||||
|
+ dump_direct_callee_info_to_asm (bb, call_count);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Dump the bb info into asm. */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
|
||||||
|
+{
|
||||||
|
+ int bb_start_addr = get_bb_start_addr (bb);
|
||||||
|
+ if (bb_start_addr != -1)
|
||||||
|
+ {
|
||||||
|
+ fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr);
|
||||||
|
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
||||||
|
+ bb_count);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Dump the function info into asm. */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+dump_function_info_to_asm (const char *fnname)
|
||||||
|
+{
|
||||||
|
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||||
|
+ ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
|
||||||
|
+ fprintf (asm_out_file, "\t.string \"%s%d\"\n",
|
||||||
|
+ ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
|
||||||
|
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||||
|
+ ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ());
|
||||||
|
+
|
||||||
|
+ if (dump_file)
|
||||||
|
+ {
|
||||||
|
+ fprintf (dump_file, "\n FUNC_NAME: %s\n",
|
||||||
|
+ alias_local_functions (fnname));
|
||||||
|
+ fprintf (dump_file, " file: %s\n",
|
||||||
|
+ dump_base_name);
|
||||||
|
+ fprintf (dump_file, "profile_status: %s\n",
|
||||||
|
+ get_function_profile_status ());
|
||||||
|
+ fprintf (dump_file, " size: %x\n",
|
||||||
|
+ get_function_end_addr ());
|
||||||
|
+ fprintf (dump_file, " function_bind: %s\n",
|
||||||
|
+ simple_get_function_bind ());
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Dump function profile into form AutoFDO or PGO to asm. */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+dump_fdo_info_to_asm (const char *fnname)
|
||||||
|
+{
|
||||||
|
+ basic_block bb;
|
||||||
|
+
|
||||||
|
+ dump_function_info_to_asm (fnname);
|
||||||
|
+
|
||||||
|
+ FOR_EACH_BB_FN (bb, cfun)
|
||||||
|
+ {
|
||||||
|
+ gcov_type bb_count = get_fdo_count (bb->count);
|
||||||
|
+ if (bb_count == 0)
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (dump_file)
|
||||||
|
+ {
|
||||||
|
+ fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n",
|
||||||
|
+ get_bb_start_addr (bb), get_bb_end_addr (bb),
|
||||||
|
+ bb_count, get_fdo_count_quality (bb->count));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (flag_profile_use)
|
||||||
|
+ {
|
||||||
|
+ dump_edge_jump_info_to_asm (bb, bb_count);
|
||||||
|
+ }
|
||||||
|
+ else if (flag_auto_profile)
|
||||||
|
+ {
|
||||||
|
+ dump_bb_info_to_asm (bb, bb_count);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* When -fauto-bolt option is turnded on, the .text.fdo section
|
||||||
|
+ will be generated in the *.s file if there is feedback information
|
||||||
|
+ from PGO or AutoFDO. This section will parserd in BOLT-plugin. */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+dump_profile_to_elf_sections ()
|
||||||
|
+{
|
||||||
|
+ if (!flag_function_sections)
|
||||||
|
+ {
|
||||||
|
+ error ("-fauto-bolt should work with -ffunction-section");
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ if (!flag_ipa_ra)
|
||||||
|
+ {
|
||||||
|
+ error ("-fauto-bolt should work with -fipa-ra");
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ if (flag_align_jumps)
|
||||||
|
+ {
|
||||||
|
+ error ("-fauto-bolt is not supported with -falign-jumps");
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ if (flag_align_labels)
|
||||||
|
+ {
|
||||||
|
+ error ("-fauto-bolt is not spported with -falign-loops");
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ if (flag_align_loops)
|
||||||
|
+ {
|
||||||
|
+ error ("-fauto-bolt is not supported with -falign-loops");
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Return if no feedback data. */
|
||||||
|
+ if (!flag_profile_use && !flag_auto_profile)
|
||||||
|
+ {
|
||||||
|
+ error ("-fauto-bolt should use with -profile-use or -fauto-profile");
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Avoid empty functions. */
|
||||||
|
+ if (TREE_CODE (cfun->decl) != FUNCTION_DECL)
|
||||||
|
+ {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ int flags = SECTION_DEBUG | SECTION_EXCLUDE;
|
||||||
|
+ const char *fnname = get_fnname_from_decl (current_function_decl);
|
||||||
|
+ char *profile_fnname = NULL;
|
||||||
|
+
|
||||||
|
+ asprintf (&profile_fnname, "%s%s", ASM_FDO_SECTION_PREFIX, fnname);
|
||||||
|
+ switch_to_section (get_section (profile_fnname, flags, NULL));
|
||||||
|
+ dump_fdo_info_to_asm (fnname);
|
||||||
|
+
|
||||||
|
+ if (profile_fnname)
|
||||||
|
+ {
|
||||||
|
+ free (profile_fnname);
|
||||||
|
+ profile_fnname = NULL;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Turn the RTL into assembly. */
|
||||||
|
static unsigned int
|
||||||
|
rest_of_handle_final (void)
|
||||||
|
@@ -4334,6 +4731,12 @@ rest_of_handle_final (void)
|
||||||
|
targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0),
|
||||||
|
decl_fini_priority_lookup
|
||||||
|
(current_function_decl));
|
||||||
|
+
|
||||||
|
+ if (flag_auto_bolt)
|
||||||
|
+ {
|
||||||
|
+ dump_profile_to_elf_sections ();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/gcc/opts.cc b/gcc/opts.cc
|
||||||
|
index b868d189e..6d57e7d69 100644
|
||||||
|
--- a/gcc/opts.cc
|
||||||
|
+++ b/gcc/opts.cc
|
||||||
|
@@ -1279,6 +1279,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
|
||||||
|
if (opts->x_flag_vtable_verify && opts->x_flag_lto)
|
||||||
|
sorry ("vtable verification is not supported with LTO");
|
||||||
|
|
||||||
|
+ /* Currently -fauto-bolt is not supported for LTO. */
|
||||||
|
+ if (opts->x_flag_auto_bolt && opts->x_flag_lto)
|
||||||
|
+ sorry ("%<-fauto-bolt%> is not supported with LTO");
|
||||||
|
+
|
||||||
|
/* Control IPA optimizations based on different -flive-patching level. */
|
||||||
|
if (opts->x_flag_live_patching)
|
||||||
|
control_options_for_live_patching (opts, opts_set,
|
||||||
|
@@ -1291,6 +1295,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
|
||||||
|
= (opts->x_flag_unroll_loops
|
||||||
|
|| opts->x_flag_peel_loops
|
||||||
|
|| opts->x_optimize >= 3);
|
||||||
|
+
|
||||||
|
+ if (opts->x_flag_auto_bolt)
|
||||||
|
+ {
|
||||||
|
+ /* Record the function section to facilitate the feedback
|
||||||
|
+ data storage. */
|
||||||
|
+ if (!opts->x_flag_function_sections)
|
||||||
|
+ {
|
||||||
|
+ inform (loc,
|
||||||
|
+ "%<-fauto-bolt%> should work with %<-ffunction-sections%>,"
|
||||||
|
+ " enabling %<-ffunction-sections%>");
|
||||||
|
+ opts->x_flag_function_sections = true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Cancel the internal alignment of the function. The binary
|
||||||
|
+ optimizer bolt will cancel the internal alignment optimization
|
||||||
|
+ of the function, so the alignment is meaningless at this time,
|
||||||
|
+ and if not, it will bring trouble to the calculation of the
|
||||||
|
+ offset address of the instruction. */
|
||||||
|
+ if (opts->x_flag_align_jumps)
|
||||||
|
+ {
|
||||||
|
+ inform (loc,
|
||||||
|
+ "%<-fauto-bolt%> should not work with %<-falign-jumps%>,"
|
||||||
|
+ " disabling %<-falign-jumps%>");
|
||||||
|
+ opts->x_flag_align_jumps = false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (opts->x_flag_align_labels)
|
||||||
|
+ {
|
||||||
|
+ inform (loc,
|
||||||
|
+ "%<-fauto-bolt%> should not work with %<-falign-labels%>,"
|
||||||
|
+ " disabling %<-falign-labels%>");
|
||||||
|
+ opts->x_flag_align_labels = false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (opts->x_flag_align_loops)
|
||||||
|
+ {
|
||||||
|
+ inform (loc,
|
||||||
|
+ "%<-fauto-bolt%> should not work with %<-falign-loops%>,"
|
||||||
|
+ " disabling %<-falign-loops%>");
|
||||||
|
+ opts->x_flag_align_loops = false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* When parsing instructions in RTL phase, we need to know
|
||||||
|
+ the call information of instructions to avoid being optimized. */
|
||||||
|
+ if (!opts->x_flag_ipa_ra)
|
||||||
|
+ {
|
||||||
|
+ inform (loc,
|
||||||
|
+ "%<-fauto-bolt%> should work with %<-fipa-ra%>,"
|
||||||
|
+ " enabling %<-fipa-ra%>");
|
||||||
|
+ opts->x_flag_ipa_ra = true;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
|
||||||
|
/* With -fcx-limited-range, we do cheap and quick complex arithmetic. */
|
||||||
|
if (opts->x_flag_cx_limited_range)
|
||||||
|
@@ -3226,6 +3282,11 @@ common_handle_option (struct gcc_options *opts,
|
||||||
|
&opts->x_flag_align_functions,
|
||||||
|
&opts->x_str_align_functions);
|
||||||
|
break;
|
||||||
|
+
|
||||||
|
+ case OPT_fauto_bolt_:
|
||||||
|
+ case OPT_fauto_bolt:
|
||||||
|
+ /* Deferred. */
|
||||||
|
+ break;
|
||||||
|
|
||||||
|
case OPT_ftabstop_:
|
||||||
|
/* It is documented that we silently ignore silly values. */
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
34094
0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
Normal file
34094
0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
Normal file
File diff suppressed because it is too large
Load Diff
345
0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
Normal file
345
0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
Normal file
@ -0,0 +1,345 @@
|
|||||||
|
From 94242286383a80e6ab83d824a4d7ea23ea311f75 Mon Sep 17 00:00:00 2001
|
||||||
|
From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
|
||||||
|
Date: Mon, 22 Jan 2024 15:38:24 +0800
|
||||||
|
Subject: [PATCH] [AutoBOLT] Enable BOLT linker plugin on aarch64 3/3
|
||||||
|
|
||||||
|
---
|
||||||
|
Makefile.def | 10 ++++++++++
|
||||||
|
configure | 27 ++++++++++++++++++++++++++-
|
||||||
|
configure.ac | 22 +++++++++++++++++++++-
|
||||||
|
gcc/config.host | 1 +
|
||||||
|
gcc/config.in | 13 +++++++++++++
|
||||||
|
gcc/configure | 10 ++++++++--
|
||||||
|
gcc/configure.ac | 4 ++++
|
||||||
|
gcc/gcc.cc | 23 +++++++++++++++++++++++
|
||||||
|
8 files changed, 106 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/Makefile.def b/Makefile.def
|
||||||
|
index 72d585496..0ba868890 100644
|
||||||
|
--- a/Makefile.def
|
||||||
|
+++ b/Makefile.def
|
||||||
|
@@ -145,6 +145,9 @@ host_modules= { module= gnattools; };
|
||||||
|
host_modules= { module= lto-plugin; bootstrap=true;
|
||||||
|
extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
|
||||||
|
extra_make_flags='@extra_linker_plugin_flags@'; };
|
||||||
|
+host_modules= { module= bolt-plugin; bootstrap=true;
|
||||||
|
+ extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
|
||||||
|
+ extra_make_flags='@extra_linker_plugin_flags@'; };
|
||||||
|
host_modules= { module= libcc1; extra_configure_flags=--enable-shared; };
|
||||||
|
host_modules= { module= gotools; };
|
||||||
|
host_modules= { module= libctf; bootstrap=true; };
|
||||||
|
@@ -349,6 +352,7 @@ dependencies = { module=configure-gcc; on=all-mpfr; };
|
||||||
|
dependencies = { module=configure-gcc; on=all-mpc; };
|
||||||
|
dependencies = { module=configure-gcc; on=all-isl; };
|
||||||
|
dependencies = { module=configure-gcc; on=all-lto-plugin; };
|
||||||
|
+dependencies = { module=configure-gcc; on=all-bolt-plugin; };
|
||||||
|
dependencies = { module=configure-gcc; on=all-binutils; };
|
||||||
|
dependencies = { module=configure-gcc; on=all-gas; };
|
||||||
|
dependencies = { module=configure-gcc; on=all-ld; };
|
||||||
|
@@ -374,6 +378,7 @@ dependencies = { module=all-gcc; on=all-libdecnumber; hard=true; };
|
||||||
|
dependencies = { module=all-gcc; on=all-libiberty; };
|
||||||
|
dependencies = { module=all-gcc; on=all-fixincludes; };
|
||||||
|
dependencies = { module=all-gcc; on=all-lto-plugin; };
|
||||||
|
+dependencies = { module=all-gcc; on=all-bolt-plugin; };
|
||||||
|
dependencies = { module=all-gcc; on=all-libiconv; };
|
||||||
|
dependencies = { module=info-gcc; on=all-build-libiberty; };
|
||||||
|
dependencies = { module=dvi-gcc; on=all-build-libiberty; };
|
||||||
|
@@ -381,8 +386,10 @@ dependencies = { module=pdf-gcc; on=all-build-libiberty; };
|
||||||
|
dependencies = { module=html-gcc; on=all-build-libiberty; };
|
||||||
|
dependencies = { module=install-gcc ; on=install-fixincludes; };
|
||||||
|
dependencies = { module=install-gcc ; on=install-lto-plugin; };
|
||||||
|
+dependencies = { module=install-gcc ; on=install-bolt-plugin; };
|
||||||
|
dependencies = { module=install-strip-gcc ; on=install-strip-fixincludes; };
|
||||||
|
dependencies = { module=install-strip-gcc ; on=install-strip-lto-plugin; };
|
||||||
|
+dependencies = { module=install-strip-gcc ; on=install-strip-bolt-plugin; };
|
||||||
|
|
||||||
|
dependencies = { module=configure-libcpp; on=configure-libiberty; hard=true; };
|
||||||
|
dependencies = { module=configure-libcpp; on=configure-intl; };
|
||||||
|
@@ -401,6 +408,9 @@ dependencies = { module=all-gnattools; on=all-target-libstdc++-v3; };
|
||||||
|
dependencies = { module=all-lto-plugin; on=all-libiberty; };
|
||||||
|
dependencies = { module=all-lto-plugin; on=all-libiberty-linker-plugin; };
|
||||||
|
|
||||||
|
+dependencies = { module=all-bolt-plugin; on=all-libiberty; };
|
||||||
|
+dependencies = { module=all-bolt-plugin; on=all-libiberty-linker-plugin; };
|
||||||
|
+
|
||||||
|
dependencies = { module=configure-libcc1; on=configure-gcc; };
|
||||||
|
dependencies = { module=all-libcc1; on=all-gcc; };
|
||||||
|
|
||||||
|
diff --git a/configure b/configure
|
||||||
|
index 5dcaab14a..aff62c464 100755
|
||||||
|
--- a/configure
|
||||||
|
+++ b/configure
|
||||||
|
@@ -826,6 +826,7 @@ with_isl
|
||||||
|
with_isl_include
|
||||||
|
with_isl_lib
|
||||||
|
enable_isl_version_check
|
||||||
|
+enable_bolt
|
||||||
|
enable_lto
|
||||||
|
enable_linker_plugin_configure_flags
|
||||||
|
enable_linker_plugin_flags
|
||||||
|
@@ -1550,6 +1551,7 @@ Optional Features:
|
||||||
|
enable the PGO build
|
||||||
|
--disable-isl-version-check
|
||||||
|
disable check for isl version
|
||||||
|
+ --enable-bolt enable bolt optimization support
|
||||||
|
--enable-lto enable link time optimization support
|
||||||
|
--enable-linker-plugin-configure-flags=FLAGS
|
||||||
|
additional flags for configuring linker plugins
|
||||||
|
@@ -8564,6 +8566,15 @@ fi
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
+# Check for BOLT support.
|
||||||
|
+# Check whether --enable-bolt was given.
|
||||||
|
+if test "${enable_bolt+set}" = set; then :
|
||||||
|
+ enableval=$enable_bolt; enable_bolt=$enableval
|
||||||
|
+else
|
||||||
|
+ enable_bolt=no; default_enable_bolt=no
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
+
|
||||||
|
# Check for LTO support.
|
||||||
|
# Check whether --enable-lto was given.
|
||||||
|
if test "${enable_lto+set}" = set; then :
|
||||||
|
@@ -8593,6 +8604,16 @@ if test $target_elf = yes; then :
|
||||||
|
# ELF platforms build the lto-plugin always.
|
||||||
|
build_lto_plugin=yes
|
||||||
|
|
||||||
|
+ # ELF platforms can build the bolt-plugin.
|
||||||
|
+ # NOT BUILD BOLT BY DEFAULT.
|
||||||
|
+ case $target in
|
||||||
|
+ aarch64*-*-linux*)
|
||||||
|
+ if test $enable_bolt = yes; then :
|
||||||
|
+ build_bolt_plugin=yes
|
||||||
|
+ fi
|
||||||
|
+ ;;
|
||||||
|
+ esac
|
||||||
|
+
|
||||||
|
else
|
||||||
|
if test x"$default_enable_lto" = x"yes" ; then
|
||||||
|
case $target in
|
||||||
|
@@ -8780,6 +8801,10 @@ if test -d ${srcdir}/gcc; then
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
+ if test "${build_bolt_plugin}" = "yes" ; then
|
||||||
|
+ configdirs="$configdirs bolt-plugin"
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
# If we're building an offloading compiler, add the LTO front end.
|
||||||
|
if test x"$enable_as_accelerator_for" != x ; then
|
||||||
|
case ,${enable_languages}, in
|
||||||
|
@@ -9202,7 +9227,7 @@ fi
|
||||||
|
extra_host_libiberty_configure_flags=
|
||||||
|
extra_host_zlib_configure_flags=
|
||||||
|
case " $configdirs " in
|
||||||
|
- *" lto-plugin "* | *" libcc1 "*)
|
||||||
|
+ *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)
|
||||||
|
# When these are to be built as shared libraries, the same applies to
|
||||||
|
# libiberty.
|
||||||
|
extra_host_libiberty_configure_flags=--enable-shared
|
||||||
|
diff --git a/configure.ac b/configure.ac
|
||||||
|
index 85977482a..f310d75ca 100644
|
||||||
|
--- a/configure.ac
|
||||||
|
+++ b/configure.ac
|
||||||
|
@@ -1863,6 +1863,12 @@ fi
|
||||||
|
AC_SUBST(isllibs)
|
||||||
|
AC_SUBST(islinc)
|
||||||
|
|
||||||
|
+# Check for BOLT support.
|
||||||
|
+AC_ARG_ENABLE(bolt,
|
||||||
|
+[AS_HELP_STRING([--enable-bolt], [enable bolt optimization support])],
|
||||||
|
+enable_bolt=$enableval,
|
||||||
|
+enable_bolt=no; default_enable_bolt=no)
|
||||||
|
+
|
||||||
|
# Check for LTO support.
|
||||||
|
AC_ARG_ENABLE(lto,
|
||||||
|
[AS_HELP_STRING([--enable-lto], [enable link time optimization support])],
|
||||||
|
@@ -1871,6 +1877,16 @@ enable_lto=yes; default_enable_lto=yes)
|
||||||
|
|
||||||
|
ACX_ELF_TARGET_IFELSE([# ELF platforms build the lto-plugin always.
|
||||||
|
build_lto_plugin=yes
|
||||||
|
+
|
||||||
|
+ # ELF platforms can build the bolt-plugin.
|
||||||
|
+ # NOT BUILD BOLT BY DEFAULT.
|
||||||
|
+ case $target in
|
||||||
|
+ aarch64*-*-linux*)
|
||||||
|
+ if test $enable_bolt = yes; then :
|
||||||
|
+ build_bolt_plugin=yes
|
||||||
|
+ fi
|
||||||
|
+ ;;
|
||||||
|
+ esac
|
||||||
|
],[if test x"$default_enable_lto" = x"yes" ; then
|
||||||
|
case $target in
|
||||||
|
*-apple-darwin[[912]]* | *-cygwin* | *-mingw* | *djgpp*) ;;
|
||||||
|
@@ -2049,6 +2065,10 @@ if test -d ${srcdir}/gcc; then
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
+ if test "${build_bolt_plugin}" = "yes" ; then
|
||||||
|
+ configdirs="$configdirs bolt-plugin"
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
# If we're building an offloading compiler, add the LTO front end.
|
||||||
|
if test x"$enable_as_accelerator_for" != x ; then
|
||||||
|
case ,${enable_languages}, in
|
||||||
|
@@ -2457,7 +2477,7 @@ fi
|
||||||
|
extra_host_libiberty_configure_flags=
|
||||||
|
extra_host_zlib_configure_flags=
|
||||||
|
case " $configdirs " in
|
||||||
|
- *" lto-plugin "* | *" libcc1 "*)
|
||||||
|
+ *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)
|
||||||
|
# When these are to be built as shared libraries, the same applies to
|
||||||
|
# libiberty.
|
||||||
|
extra_host_libiberty_configure_flags=--enable-shared
|
||||||
|
diff --git a/gcc/config.host b/gcc/config.host
|
||||||
|
index 4ca300f11..bf7dcb4cc 100644
|
||||||
|
--- a/gcc/config.host
|
||||||
|
+++ b/gcc/config.host
|
||||||
|
@@ -75,6 +75,7 @@ out_host_hook_obj=host-default.o
|
||||||
|
host_can_use_collect2=yes
|
||||||
|
use_long_long_for_widest_fast_int=no
|
||||||
|
host_lto_plugin_soname=liblto_plugin.so
|
||||||
|
+host_bolt_plugin_soname=libbolt_plugin.so
|
||||||
|
|
||||||
|
# Unsupported hosts list. Generally, only include hosts known to fail here,
|
||||||
|
# since we allow hosts not listed to be supported generically.
|
||||||
|
diff --git a/gcc/config.in b/gcc/config.in
|
||||||
|
index 64c27c9cf..6bb25b25b 100644
|
||||||
|
--- a/gcc/config.in
|
||||||
|
+++ b/gcc/config.in
|
||||||
|
@@ -24,6 +24,13 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
+/* Define to the name of the BOLT plugin DSO that must be passed to the
|
||||||
|
+ linker's -plugin=LIB option. */
|
||||||
|
+#ifndef USED_FOR_TARGET
|
||||||
|
+#undef BOLTPLUGINSONAME
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* Define to the root for URLs about GCC changes. */
|
||||||
|
#ifndef USED_FOR_TARGET
|
||||||
|
#undef CHANGES_ROOT_URL
|
||||||
|
@@ -2208,6 +2215,12 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
+/* Define which stat syscall is able to handle 64bit indodes. */
|
||||||
|
+#ifndef USED_FOR_TARGET
|
||||||
|
+#undef HOST_STAT_FOR_64BIT_INODES
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* Define as const if the declaration of iconv() needs const. */
|
||||||
|
#ifndef USED_FOR_TARGET
|
||||||
|
#undef ICONV_CONST
|
||||||
|
diff --git a/gcc/configure b/gcc/configure
|
||||||
|
index 98bbf0f85..30f386789 100755
|
||||||
|
--- a/gcc/configure
|
||||||
|
+++ b/gcc/configure
|
||||||
|
@@ -13578,6 +13578,12 @@ case $use_collect2 in
|
||||||
|
esac
|
||||||
|
|
||||||
|
|
||||||
|
+cat >>confdefs.h <<_ACEOF
|
||||||
|
+#define BOLTPLUGINSONAME "${host_bolt_plugin_soname}"
|
||||||
|
+_ACEOF
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+
|
||||||
|
cat >>confdefs.h <<_ACEOF
|
||||||
|
#define LTOPLUGINSONAME "${host_lto_plugin_soname}"
|
||||||
|
_ACEOF
|
||||||
|
@@ -19668,7 +19674,7 @@ else
|
||||||
|
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
||||||
|
lt_status=$lt_dlunknown
|
||||||
|
cat > conftest.$ac_ext <<_LT_EOF
|
||||||
|
-#line 19671 "configure"
|
||||||
|
+#line 19677 "configure"
|
||||||
|
#include "confdefs.h"
|
||||||
|
|
||||||
|
#if HAVE_DLFCN_H
|
||||||
|
@@ -19774,7 +19780,7 @@ else
|
||||||
|
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
||||||
|
lt_status=$lt_dlunknown
|
||||||
|
cat > conftest.$ac_ext <<_LT_EOF
|
||||||
|
-#line 19777 "configure"
|
||||||
|
+#line 19783 "configure"
|
||||||
|
#include "confdefs.h"
|
||||||
|
|
||||||
|
#if HAVE_DLFCN_H
|
||||||
|
diff --git a/gcc/configure.ac b/gcc/configure.ac
|
||||||
|
index c74f4b555..dd6cd60f8 100644
|
||||||
|
--- a/gcc/configure.ac
|
||||||
|
+++ b/gcc/configure.ac
|
||||||
|
@@ -2531,6 +2531,10 @@ case $use_collect2 in
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
+AC_DEFINE_UNQUOTED(BOLTPLUGINSONAME,"${host_bolt_plugin_soname}",
|
||||||
|
+[Define to the name of the BOLT plugin DSO that must be
|
||||||
|
+ passed to the linker's -plugin=LIB option.])
|
||||||
|
+
|
||||||
|
AC_DEFINE_UNQUOTED(LTOPLUGINSONAME,"${host_lto_plugin_soname}",
|
||||||
|
[Define to the name of the LTO plugin DSO that must be
|
||||||
|
passed to the linker's -plugin=LIB option.])
|
||||||
|
diff --git a/gcc/gcc.cc b/gcc/gcc.cc
|
||||||
|
index fbcc9d033..b0d03430e 100644
|
||||||
|
--- a/gcc/gcc.cc
|
||||||
|
+++ b/gcc/gcc.cc
|
||||||
|
@@ -1156,6 +1156,8 @@ proper position among the other output files. */
|
||||||
|
%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
|
||||||
|
%(linker) " \
|
||||||
|
LINK_PLUGIN_SPEC \
|
||||||
|
+ "%{fauto-bolt|fauto-bolt=*|fbolt-use|fbolt-use=*: \
|
||||||
|
+ -plugin %(linker_auto_bolt_plugin_file) }"\
|
||||||
|
"%{flto|flto=*:%<fcompare-debug*} \
|
||||||
|
%{flto} %{fno-lto} %{flto=*} %l " LINK_PIE_SPEC \
|
||||||
|
"%{fuse-ld=*:-fuse-ld=%*} " LINK_COMPRESS_DEBUG_SPEC \
|
||||||
|
@@ -1210,6 +1212,7 @@ static const char *endfile_spec = ENDFILE_SPEC;
|
||||||
|
static const char *startfile_spec = STARTFILE_SPEC;
|
||||||
|
static const char *linker_name_spec = LINKER_NAME;
|
||||||
|
static const char *linker_plugin_file_spec = "";
|
||||||
|
+static const char *linker_auto_bolt_plugin_file_spec = "";
|
||||||
|
static const char *lto_wrapper_spec = "";
|
||||||
|
static const char *lto_gcc_spec = "";
|
||||||
|
static const char *post_link_spec = POST_LINK_SPEC;
|
||||||
|
@@ -1723,6 +1726,8 @@ static struct spec_list static_specs[] =
|
||||||
|
INIT_STATIC_SPEC ("multilib_reuse", &multilib_reuse),
|
||||||
|
INIT_STATIC_SPEC ("linker", &linker_name_spec),
|
||||||
|
INIT_STATIC_SPEC ("linker_plugin_file", &linker_plugin_file_spec),
|
||||||
|
+ INIT_STATIC_SPEC ("linker_auto_bolt_plugin_file",
|
||||||
|
+ &linker_auto_bolt_plugin_file_spec),
|
||||||
|
INIT_STATIC_SPEC ("lto_wrapper", <o_wrapper_spec),
|
||||||
|
INIT_STATIC_SPEC ("lto_gcc", <o_gcc_spec),
|
||||||
|
INIT_STATIC_SPEC ("post_link", &post_link_spec),
|
||||||
|
@@ -9118,6 +9123,24 @@ driver::maybe_run_linker (const char *argv0) const
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
set_static_spec_shared (<o_gcc_spec, argv0);
|
||||||
|
+
|
||||||
|
+ /* Set bolt-plugin. */
|
||||||
|
+ const char *fauto_bolt = "fauto-bolt";
|
||||||
|
+ const char *fbolt_use = "fbolt-use";
|
||||||
|
+ if (switch_matches (fauto_bolt, fauto_bolt + strlen (fauto_bolt), 1)
|
||||||
|
+ || switch_matches (fbolt_use, fbolt_use + strlen (fbolt_use), 1))
|
||||||
|
+ {
|
||||||
|
+ linker_auto_bolt_plugin_file_spec = find_a_file (&exec_prefixes,
|
||||||
|
+ BOLTPLUGINSONAME, X_OK, false);
|
||||||
|
+ if (!linker_auto_bolt_plugin_file_spec)
|
||||||
|
+ {
|
||||||
|
+ fatal_error (input_location,
|
||||||
|
+ "-fauto-bolt or -fbolt-use is used, but %s is not found",
|
||||||
|
+ BOLTPLUGINSONAME);
|
||||||
|
+
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Rebuild the COMPILER_PATH and LIBRARY_PATH environment variables
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
312
0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch
Normal file
312
0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch
Normal file
@ -0,0 +1,312 @@
|
|||||||
|
From b020447c840c6e22440a9b9063298a06333fd2f1 Mon Sep 17 00:00:00 2001
|
||||||
|
From: zhenyu--zhao <zhaozhenyu17@huawei.com>
|
||||||
|
Date: Sat, 23 Mar 2024 22:56:09 +0800
|
||||||
|
Subject: [PATCH] [Autofdo]Enable discrimibator and MCF algorithm on Autofdo
|
||||||
|
|
||||||
|
---
|
||||||
|
gcc/auto-profile.cc | 171 +++++++++++++++++++++++++++++++++++++++++++-
|
||||||
|
gcc/cfghooks.cc | 7 ++
|
||||||
|
gcc/opts.cc | 5 +-
|
||||||
|
gcc/tree-inline.cc | 14 ++++
|
||||||
|
4 files changed, 193 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
|
||||||
|
index 2b34b80b8..f45f0ec66 100644
|
||||||
|
--- a/gcc/auto-profile.cc
|
||||||
|
+++ b/gcc/auto-profile.cc
|
||||||
|
@@ -466,6 +466,17 @@ string_table::get_index (const char *name) const
|
||||||
|
if (name == NULL)
|
||||||
|
return -1;
|
||||||
|
string_index_map::const_iterator iter = map_.find (name);
|
||||||
|
+ /* Function name may be duplicate. Try to distinguish by the
|
||||||
|
+ #file_name#function_name defined by the autofdo tool chain. */
|
||||||
|
+ if (iter == map_.end ())
|
||||||
|
+ {
|
||||||
|
+ char* file_name = get_original_name (lbasename (dump_base_name));
|
||||||
|
+ char* file_func_name
|
||||||
|
+ = concat ("#", file_name, "#", name, NULL);
|
||||||
|
+ iter = map_.find (file_func_name);
|
||||||
|
+ free (file_name);
|
||||||
|
+ free (file_func_name);
|
||||||
|
+ }
|
||||||
|
if (iter == map_.end ())
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
@@ -654,7 +665,7 @@ function_instance::read_function_instance (function_instance_stack *stack,
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < num_pos_counts; i++)
|
||||||
|
{
|
||||||
|
- unsigned offset = gcov_read_unsigned () & 0xffff0000;
|
||||||
|
+ unsigned offset = gcov_read_unsigned ();
|
||||||
|
unsigned num_targets = gcov_read_unsigned ();
|
||||||
|
gcov_type count = gcov_read_counter ();
|
||||||
|
s->pos_counts[offset].count = count;
|
||||||
|
@@ -733,6 +744,10 @@ autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
|
||||||
|
function_instance *s = get_function_instance_by_inline_stack (stack);
|
||||||
|
if (s == NULL)
|
||||||
|
return false;
|
||||||
|
+ if (s->get_count_info (stack[0].second + stmt->bb->discriminator, info))
|
||||||
|
+ {
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
return s->get_count_info (stack[0].second, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1395,6 +1410,66 @@ afdo_propagate (bb_set *annotated_bb)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Process the following scene when the branch probability
|
||||||
|
+ inversion when do function afdo_propagate (). E.g.
|
||||||
|
+ BB_NUM (sample count)
|
||||||
|
+ BB1 (1000)
|
||||||
|
+ / \
|
||||||
|
+ BB2 (10) BB3 (0)
|
||||||
|
+ \ /
|
||||||
|
+ BB4
|
||||||
|
+ In afdo_propagate ().count of BB3 is calculated by
|
||||||
|
+ COUNT (BB3) = 990 (990 = COUNT (BB1) - COUNT (BB2) = 1000 - 10)
|
||||||
|
+ In fact, BB3 may be colder than BB2 by sample count.
|
||||||
|
+ This function allocate source BB count to wach succ BB by sample
|
||||||
|
+ rate, E.g.
|
||||||
|
+ BB2_COUNT = BB1_COUNT * (BB2_COUNT / (BB2_COUNT + BB3_COUNT)) */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+afdo_preprocess_bb_count ()
|
||||||
|
+{
|
||||||
|
+ basic_block bb;
|
||||||
|
+ FOR_ALL_BB_FN (bb, cfun)
|
||||||
|
+ {
|
||||||
|
+ if (bb->count.ipa_p () && EDGE_COUNT (bb->succs) > 1
|
||||||
|
+ && bb->count > profile_count::zero ().afdo ())
|
||||||
|
+ {
|
||||||
|
+ basic_block bb1 = EDGE_SUCC (bb, 0)->dest;
|
||||||
|
+ basic_block bb2 = EDGE_SUCC (bb, 1)->dest;
|
||||||
|
+ if (single_succ_edge (bb1) && single_succ_edge (bb2)
|
||||||
|
+ && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
|
||||||
|
+ {
|
||||||
|
+ gcov_type max_count = 0;
|
||||||
|
+ gcov_type total_count = 0;
|
||||||
|
+ edge e;
|
||||||
|
+ edge_iterator ei;
|
||||||
|
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||||
|
+ {
|
||||||
|
+ if (!e->dest->count.ipa_p ())
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+ max_count = MAX (max_count, e->dest->count.to_gcov_type ());
|
||||||
|
+ total_count += e->dest->count.to_gcov_type ();
|
||||||
|
+ }
|
||||||
|
+ /* Only bb_count > max_count * 2, branch probability will
|
||||||
|
+ inversion. */
|
||||||
|
+ if (max_count > 0 && bb->count.to_gcov_type () > max_count * 2)
|
||||||
|
+ {
|
||||||
|
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||||
|
+ {
|
||||||
|
+ gcov_type target_count = bb->count.to_gcov_type ()
|
||||||
|
+ * e->dest->count.to_gcov_type ()/ total_count;
|
||||||
|
+ e->dest->count
|
||||||
|
+ = profile_count::from_gcov_type
|
||||||
|
+ (target_count).afdo ();
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Propagate counts on control flow graph and calculate branch
|
||||||
|
probabilities. */
|
||||||
|
|
||||||
|
@@ -1420,6 +1495,7 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
|
||||||
|
}
|
||||||
|
|
||||||
|
afdo_find_equiv_class (annotated_bb);
|
||||||
|
+ afdo_preprocess_bb_count ();
|
||||||
|
afdo_propagate (annotated_bb);
|
||||||
|
|
||||||
|
FOR_EACH_BB_FN (bb, cfun)
|
||||||
|
@@ -1523,6 +1599,83 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Preparation before executing MCF algorithm. */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+afdo_init_mcf ()
|
||||||
|
+{
|
||||||
|
+ basic_block bb;
|
||||||
|
+ edge e;
|
||||||
|
+ edge_iterator ei;
|
||||||
|
+
|
||||||
|
+ if (dump_file)
|
||||||
|
+ {
|
||||||
|
+ fprintf (dump_file, "\n init calling mcf_smooth_cfg (). \n");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Step1: when use mcf, BB id must be continous,
|
||||||
|
+ so we need compact_blocks (). */
|
||||||
|
+ compact_blocks ();
|
||||||
|
+
|
||||||
|
+ /* Step2: allocate memory for MCF input data. */
|
||||||
|
+ bb_gcov_counts.safe_grow_cleared (cfun->cfg->x_last_basic_block);
|
||||||
|
+ edge_gcov_counts = new hash_map<edge, gcov_type>;
|
||||||
|
+
|
||||||
|
+ /* Step3: init MCF input data from cfg. */
|
||||||
|
+ FOR_ALL_BB_FN (bb, cfun)
|
||||||
|
+ {
|
||||||
|
+ /* Init BB count for MCF. */
|
||||||
|
+ bb_gcov_count (bb) = bb->count.to_gcov_type ();
|
||||||
|
+
|
||||||
|
+ gcov_type total_count = 0;
|
||||||
|
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||||
|
+ {
|
||||||
|
+ total_count += e->dest->count.to_gcov_type ();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* If there is no sample in each successor blocks, source
|
||||||
|
+ BB samples are allocated to each edge by branch static prob. */
|
||||||
|
+
|
||||||
|
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||||
|
+ {
|
||||||
|
+ if (total_count == 0)
|
||||||
|
+ {
|
||||||
|
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
|
||||||
|
+ * e->probability.to_reg_br_prob_base () / REG_BR_PROB_BASE;
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
|
||||||
|
+ * e->dest->count.to_gcov_type () / total_count;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+/* Free the resources used by MCF and reset BB count from MCF result.
|
||||||
|
+ branch probability has been updated in mcf_smooth_cfg (). */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+afdo_process_after_mcf ()
|
||||||
|
+{
|
||||||
|
+ basic_block bb;
|
||||||
|
+ /* Reset BB count from MCF result. */
|
||||||
|
+ FOR_EACH_BB_FN (bb, cfun)
|
||||||
|
+ {
|
||||||
|
+ if (bb_gcov_count (bb))
|
||||||
|
+ {
|
||||||
|
+ bb->count
|
||||||
|
+ = profile_count::from_gcov_type (bb_gcov_count (bb)).afdo ();
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Clean up MCF resource. */
|
||||||
|
+ bb_gcov_counts.release ();
|
||||||
|
+ delete edge_gcov_counts;
|
||||||
|
+ edge_gcov_counts = NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Annotate auto profile to the control flow graph. Do not annotate value
|
||||||
|
profile for stmts in PROMOTED_STMTS. */
|
||||||
|
|
||||||
|
@@ -1574,8 +1727,20 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
|
||||||
|
afdo_source_profile->mark_annotated (cfun->function_end_locus);
|
||||||
|
if (max_count > profile_count::zero ())
|
||||||
|
{
|
||||||
|
- /* Calculate, propagate count and probability information on CFG. */
|
||||||
|
- afdo_calculate_branch_prob (&annotated_bb);
|
||||||
|
+ /* 1 means -fprofile-correction is enbaled manually, and MCF
|
||||||
|
+ algorithm will be used to calculate count and probability.
|
||||||
|
+ Otherwise, use the default calculate algorithm. */
|
||||||
|
+ if (flag_profile_correction == 1)
|
||||||
|
+ {
|
||||||
|
+ afdo_init_mcf ();
|
||||||
|
+ mcf_smooth_cfg ();
|
||||||
|
+ afdo_process_after_mcf ();
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ /* Calculate, propagate count and probability information on CFG. */
|
||||||
|
+ afdo_calculate_branch_prob (&annotated_bb);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
update_max_bb_count ();
|
||||||
|
profile_status_for_fn (cfun) = PROFILE_READ;
|
||||||
|
diff --git a/gcc/cfghooks.cc b/gcc/cfghooks.cc
|
||||||
|
index c0b7bdcd9..323663010 100644
|
||||||
|
--- a/gcc/cfghooks.cc
|
||||||
|
+++ b/gcc/cfghooks.cc
|
||||||
|
@@ -542,6 +542,9 @@ split_block_1 (basic_block bb, void *i)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
new_bb->count = bb->count;
|
||||||
|
+ /* Copy discriminator from original bb for distinguishes among
|
||||||
|
+ several basic blocks that share a common locus, allowing for
|
||||||
|
+ more accurate autofdo. */
|
||||||
|
new_bb->discriminator = bb->discriminator;
|
||||||
|
|
||||||
|
if (dom_info_available_p (CDI_DOMINATORS))
|
||||||
|
@@ -1113,6 +1116,10 @@ duplicate_block (basic_block bb, edge e, basic_block after, copy_bb_data *id)
|
||||||
|
move_block_after (new_bb, after);
|
||||||
|
|
||||||
|
new_bb->flags = (bb->flags & ~BB_DUPLICATED);
|
||||||
|
+ /* Copy discriminator from original bb for distinguishes among
|
||||||
|
+ several basic blocks that share a common locus, allowing for
|
||||||
|
+ more accurate autofdo. */
|
||||||
|
+ new_bb->discriminator = bb->discriminator;
|
||||||
|
FOR_EACH_EDGE (s, ei, bb->succs)
|
||||||
|
{
|
||||||
|
/* Since we are creating edges from a new block to successors
|
||||||
|
diff --git a/gcc/opts.cc b/gcc/opts.cc
|
||||||
|
index 2bba88140..4b4925331 100644
|
||||||
|
--- a/gcc/opts.cc
|
||||||
|
+++ b/gcc/opts.cc
|
||||||
|
@@ -3014,7 +3014,10 @@ common_handle_option (struct gcc_options *opts,
|
||||||
|
/* FALLTHRU */
|
||||||
|
case OPT_fauto_profile:
|
||||||
|
enable_fdo_optimizations (opts, opts_set, value);
|
||||||
|
- SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value);
|
||||||
|
+ /* 2 is special and means flag_profile_correction trun on by
|
||||||
|
+ -fauto-profile. */
|
||||||
|
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction,
|
||||||
|
+ (value ? 2 : 0));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case OPT_fipa_struct_reorg_:
|
||||||
|
diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc
|
||||||
|
index f892cee3f..f50dbbc52 100644
|
||||||
|
--- a/gcc/tree-inline.cc
|
||||||
|
+++ b/gcc/tree-inline.cc
|
||||||
|
@@ -2038,6 +2038,10 @@ copy_bb (copy_body_data *id, basic_block bb,
|
||||||
|
basic_block_info automatically. */
|
||||||
|
copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux);
|
||||||
|
copy_basic_block->count = bb->count.apply_scale (num, den);
|
||||||
|
+ /* Copy discriminator from original bb for distinguishes among
|
||||||
|
+ several basic blocks that share a common locus, allowing for
|
||||||
|
+ more accurate autofdo. */
|
||||||
|
+ copy_basic_block->discriminator = bb->discriminator;
|
||||||
|
|
||||||
|
copy_gsi = gsi_start_bb (copy_basic_block);
|
||||||
|
|
||||||
|
@@ -3058,6 +3062,16 @@ copy_cfg_body (copy_body_data * id,
|
||||||
|
den += e->count ();
|
||||||
|
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den;
|
||||||
|
}
|
||||||
|
+ /* When autofdo uses PMU as the sampling unit, the number of
|
||||||
|
+ ENTRY_BLOCK_PTR_FOR_FN cannot be obtained directly and will
|
||||||
|
+ be zero. It using for adjust_for_ipa_scaling will cause the
|
||||||
|
+ inlined BB count incorrectly overestimated. So set den equal
|
||||||
|
+ to num, which is the source inline BB count to avoid
|
||||||
|
+ overestimated. */
|
||||||
|
+ if (den == profile_count::zero ().afdo ())
|
||||||
|
+ {
|
||||||
|
+ den = num;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
profile_count::adjust_for_ipa_scaling (&num, &den);
|
||||||
|
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
18
gcc.spec
18
gcc.spec
@ -2,7 +2,7 @@
|
|||||||
%global gcc_major 12
|
%global gcc_major 12
|
||||||
# Note, gcc_release must be integer, if you want to add suffixes to
|
# Note, gcc_release must be integer, if you want to add suffixes to
|
||||||
# %%{release}, append them after %%{gcc_release} on Release: line.
|
# %%{release}, append them after %%{gcc_release} on Release: line.
|
||||||
%global gcc_release 19
|
%global gcc_release 20
|
||||||
|
|
||||||
%global _unpackaged_files_terminate_build 0
|
%global _unpackaged_files_terminate_build 0
|
||||||
%global _performance_build 1
|
%global _performance_build 1
|
||||||
@ -166,6 +166,12 @@ Patch25: 0025-AArch64-Rewrite-the-tsv110-option.patch
|
|||||||
Patch26: 0026-GOMP-Enabling-moutline-atomics-improves-libgomp-perf.patch
|
Patch26: 0026-GOMP-Enabling-moutline-atomics-improves-libgomp-perf.patch
|
||||||
Patch27: 0027-LoopElim-Redundant-loop-elimination-optimization.patch
|
Patch27: 0027-LoopElim-Redundant-loop-elimination-optimization.patch
|
||||||
Patch28: 0028-Array-widen-compare-Fix-the-return-value-match-after.patch
|
Patch28: 0028-Array-widen-compare-Fix-the-return-value-match-after.patch
|
||||||
|
Patch29: 0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
|
||||||
|
Patch30: 0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch
|
||||||
|
Patch31: 0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
|
||||||
|
Patch32: 0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
|
||||||
|
Patch33: 0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
|
||||||
|
Patch34: 0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch
|
||||||
|
|
||||||
# Part 3000 ~ 4999
|
# Part 3000 ~ 4999
|
||||||
%ifarch loongarch64
|
%ifarch loongarch64
|
||||||
@ -789,6 +795,12 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
|||||||
%patch26 -p1
|
%patch26 -p1
|
||||||
%patch27 -p1
|
%patch27 -p1
|
||||||
%patch28 -p1
|
%patch28 -p1
|
||||||
|
%patch29 -p1
|
||||||
|
%patch30 -p1
|
||||||
|
%patch31 -p1
|
||||||
|
%patch32 -p1
|
||||||
|
%patch33 -p1
|
||||||
|
%patch34 -p1
|
||||||
|
|
||||||
%ifarch loongarch64
|
%ifarch loongarch64
|
||||||
%patch3001 -p1
|
%patch3001 -p1
|
||||||
@ -3174,6 +3186,10 @@ end
|
|||||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Apr 11 2024 Zhenyu Zhao <zhaozhenyu17@huawei.com> - 12.3.1-20
|
||||||
|
- Type: Sync
|
||||||
|
- DESC: Sync patch from openeuler/gcc
|
||||||
|
|
||||||
* Mon Apr 1 2024 Peng Fan <fanpeng@loongson.cn> 12.3.1-19
|
* Mon Apr 1 2024 Peng Fan <fanpeng@loongson.cn> 12.3.1-19
|
||||||
- Type: SPEC
|
- Type: SPEC
|
||||||
- DESC: fix libcc1 file path for LoongArch.
|
- DESC: fix libcc1 file path for LoongArch.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user