[SYNC] Sync patch from openeuler/gcc

This commit is contained in:
zhenyu--zhao_admin 2024-04-11 14:48:33 +08:00
parent fa340d47b2
commit cda59b581e
7 changed files with 37741 additions and 1 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,550 @@
From 72531376df5ed93c2d945469368ba5514eca8407 Mon Sep 17 00:00:00 2001
From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
Date: Tue, 5 Dec 2023 15:33:08 +0800
Subject: [PATCH] [AutoBOLT] Support saving feedback count info to ELF segment
1/3
---
gcc/common.opt | 8 +
gcc/final.cc | 405 ++++++++++++++++++++++++++++++++++++++++++++++++-
gcc/opts.cc | 61 ++++++++
3 files changed, 473 insertions(+), 1 deletion(-)
diff --git a/gcc/common.opt b/gcc/common.opt
index b01df919e..e69947fc2 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2546,6 +2546,14 @@ freorder-functions
Common Var(flag_reorder_functions) Optimization
Reorder functions to improve code placement.
+fauto-bolt
+Common Var(flag_auto_bolt)
+Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
+
+fauto-bolt=
+Common Joined RejectNegative
+Specify the feedback data directory required by BOLT-plugin. The default is the current directory.
+
frerun-cse-after-loop
Common Var(flag_rerun_cse_after_loop) Optimization
Add a common subexpression elimination pass after loop optimizations.
diff --git a/gcc/final.cc b/gcc/final.cc
index a9868861b..d4c4fa08f 100644
--- a/gcc/final.cc
+++ b/gcc/final.cc
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
#include "rtl-iter.h"
#include "print-rtl.h"
#include "function-abi.h"
+#include "insn-codes.h"
#include "common/common-target.h"
#ifdef XCOFF_DEBUGGING_INFO
@@ -4266,7 +4267,403 @@ leaf_renumber_regs_insn (rtx in_rtx)
}
}
#endif
-
+
+#define ASM_FDO_SECTION_PREFIX ".text.fdo."
+
+#define ASM_FDO_CALLER_FLAG ".fdo.caller "
+#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind"
+
+#define ASM_FDO_CALLEE_FLAG ".fdo.callee"
+
+/* Return the relative offset address of the start instruction of BB,
+ return -1 if it is empty instruction. */
+
+static int
+get_bb_start_addr (basic_block bb)
+{
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+ /* The jump target of call is not in this function, so
+ it should be excluded. */
+ if (CALL_P (insn))
+ {
+ return -1;
+ }
+
+ int insn_code = recog_memoized (insn);
+
+ /* The instruction NOP in llvm-bolt belongs to the previous
+ BB, so it needs to be skipped. */
+ if (insn_code != CODE_FOR_nop)
+ {
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+ }
+ return -1;
+}
+
+/* Return the relative offet address of the end instruction of BB,
+ return -1 if it is empty or call instruction. */
+
+static int
+get_bb_end_addr (basic_block bb)
+{
+ rtx_insn *insn;
+ int num_succs = EDGE_COUNT (bb->succs);
+ FOR_BB_INSNS_REVERSE (bb, insn)
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+ /* The jump target of call is not in this function, so
+ it should be excluded. */
+ if (CALL_P (insn))
+ {
+ return -1;
+ }
+ if ((num_succs == 1)
+ || ((num_succs == 2) && any_condjump_p (insn)))
+ {
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+ else
+ {
+ return -1;
+ }
+ }
+ return -1;
+}
+
+/* Return the end address of cfun. */
+
+static int
+get_function_end_addr ()
+{
+ rtx_insn *insn = get_last_insn ();
+ for (; insn != get_insns (); insn = PREV_INSN (insn))
+ {
+ if (!INSN_P (insn))
+ {
+ continue;
+ }
+ return INSN_ADDRESSES (INSN_UID (insn));
+ }
+
+ return -1;
+}
+
+/* Return the function profile status string. */
+
+static const char *
+get_function_profile_status ()
+{
+ const char *profile_status[] = {
+ "PROFILE_ABSENT",
+ "PROFILE_GUESSED",
+ "PROFILE_READ",
+ "PROFILE_LAST" /* Last value, used by profile streaming. */
+ };
+
+ return profile_status[profile_status_for_fn (cfun)];
+}
+
+/* Return the count from the feedback data, such as PGO or ADDO. */
+
+inline static gcov_type
+get_fdo_count (profile_count count)
+{
+ return count.quality () >= GUESSED
+ ? count.to_gcov_type () : 0;
+}
+
+/* Return the profile quality string. */
+
+static const char *
+get_fdo_count_quality (profile_count count)
+{
+ const char *profile_quality[] = {
+ "UNINITIALIZED_PROFILE",
+ "GUESSED_LOCAL",
+ "GUESSED_GLOBAL0",
+ "GUESSED_GLOBAL0_ADJUSTED",
+ "GUESSED",
+ "AFDO",
+ "ADJUSTED",
+ "PRECISE"
+ };
+
+ return profile_quality[count.quality ()];
+}
+
+static const char *
+alias_local_functions (const char *fnname)
+{
+ if (TREE_PUBLIC (cfun->decl))
+ {
+ return fnname;
+ }
+ return concat (fnname, "/", lbasename (dump_base_name), NULL);
+}
+
+/* Return function bind type string. */
+
+static const char *
+simple_get_function_bind ()
+{
+ const char *function_bind[] = {
+ "GLOBAL",
+ "WEAK",
+ "LOCAL",
+ "UNKNOWN"
+ };
+
+ if (TREE_PUBLIC (cfun->decl))
+ {
+ if (!(DECL_WEAK (cfun->decl)))
+ {
+ return function_bind[0];
+ }
+ else
+ {
+ return function_bind[1];
+ }
+ }
+ else
+ {
+ return function_bind[2];
+ }
+
+ return function_bind[3];
+}
+
+/* Dumo the callee functions insn in bb by CALL_P (insn). */
+
+static void
+dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
+{
+ rtx_insn *insn;
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (insn && CALL_P (insn))
+ {
+ tree callee = get_call_fndecl (insn);
+
+ if (callee)
+ {
+ fprintf (asm_out_file, "\t.string \"%x\"\n",
+ INSN_ADDRESSES (INSN_UID (insn)));
+
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLEE_FLAG,
+ alias_local_functions (get_fnname_from_decl (callee)));
+
+ fprintf (asm_out_file,
+ "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ call_count);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "call: %x --> %s \n",
+ INSN_ADDRESSES (INSN_UID (insn)),
+ alias_local_functions
+ (get_fnname_from_decl (callee)));
+ }
+ }
+ }
+ }
+}
+
+/* Dump the edge info into asm. */
+static int
+dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count)
+{
+ edge e;
+ edge_iterator ei;
+ gcov_type edge_total_count = 0;
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ gcov_type edge_count = get_fdo_count (e->count ());
+ edge_total_count += edge_count;
+
+ int edge_start_addr = get_bb_end_addr (e->src);
+ int edge_end_addr = get_bb_start_addr(e->dest);
+
+ if (edge_start_addr == -1 || edge_end_addr == -1)
+ {
+ continue;
+ }
+
+ /* This is a reserved assert for the original design. If this
+ assert is found, use the address of the previous instruction
+ as edge_start_addr. */
+ gcc_assert (edge_start_addr != edge_end_addr);
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "edge: %x --> %x = (%ld)\n",
+ edge_start_addr, edge_end_addr, edge_count);
+ }
+
+ if (edge_count > 0)
+ {
+ fprintf(asm_out_file, "\t.string \"%x\"\n", edge_start_addr);
+ fprintf(asm_out_file, "\t.string \"%x\"\n", edge_end_addr);
+ fprintf(asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ edge_count);
+ }
+ }
+
+ gcov_type call_count = MAX (edge_total_count, bb_count);
+ if (call_count > 0)
+ {
+ dump_direct_callee_info_to_asm (bb, call_count);
+ }
+}
+
+/* Dump the bb info into asm. */
+
+static void
+dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
+{
+ int bb_start_addr = get_bb_start_addr (bb);
+ if (bb_start_addr != -1)
+ {
+ fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr);
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+ bb_count);
+ }
+}
+
+/* Dump the function info into asm. */
+
+static void
+dump_function_info_to_asm (const char *fnname)
+{
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
+ fprintf (asm_out_file, "\t.string \"%s%d\"\n",
+ ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+ ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ());
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n FUNC_NAME: %s\n",
+ alias_local_functions (fnname));
+ fprintf (dump_file, " file: %s\n",
+ dump_base_name);
+ fprintf (dump_file, "profile_status: %s\n",
+ get_function_profile_status ());
+ fprintf (dump_file, " size: %x\n",
+ get_function_end_addr ());
+ fprintf (dump_file, " function_bind: %s\n",
+ simple_get_function_bind ());
+ }
+}
+
+/* Dump function profile into form AutoFDO or PGO to asm. */
+
+static void
+dump_fdo_info_to_asm (const char *fnname)
+{
+ basic_block bb;
+
+ dump_function_info_to_asm (fnname);
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ gcov_type bb_count = get_fdo_count (bb->count);
+ if (bb_count == 0)
+ {
+ continue;
+ }
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n",
+ get_bb_start_addr (bb), get_bb_end_addr (bb),
+ bb_count, get_fdo_count_quality (bb->count));
+ }
+
+ if (flag_profile_use)
+ {
+ dump_edge_jump_info_to_asm (bb, bb_count);
+ }
+ else if (flag_auto_profile)
+ {
+ dump_bb_info_to_asm (bb, bb_count);
+ }
+ }
+}
+
+/* When -fauto-bolt option is turnded on, the .text.fdo section
+ will be generated in the *.s file if there is feedback information
+ from PGO or AutoFDO. This section will parserd in BOLT-plugin. */
+
+static void
+dump_profile_to_elf_sections ()
+{
+ if (!flag_function_sections)
+ {
+ error ("-fauto-bolt should work with -ffunction-section");
+ return;
+ }
+ if (!flag_ipa_ra)
+ {
+ error ("-fauto-bolt should work with -fipa-ra");
+ return;
+ }
+ if (flag_align_jumps)
+ {
+ error ("-fauto-bolt is not supported with -falign-jumps");
+ return;
+ }
+ if (flag_align_labels)
+ {
+ error ("-fauto-bolt is not spported with -falign-loops");
+ return;
+ }
+ if (flag_align_loops)
+ {
+ error ("-fauto-bolt is not supported with -falign-loops");
+ return;
+ }
+
+ /* Return if no feedback data. */
+ if (!flag_profile_use && !flag_auto_profile)
+ {
+ error ("-fauto-bolt should use with -profile-use or -fauto-profile");
+ return;
+ }
+
+ /* Avoid empty functions. */
+ if (TREE_CODE (cfun->decl) != FUNCTION_DECL)
+ {
+ return;
+ }
+ int flags = SECTION_DEBUG | SECTION_EXCLUDE;
+ const char *fnname = get_fnname_from_decl (current_function_decl);
+ char *profile_fnname = NULL;
+
+ asprintf (&profile_fnname, "%s%s", ASM_FDO_SECTION_PREFIX, fnname);
+ switch_to_section (get_section (profile_fnname, flags, NULL));
+ dump_fdo_info_to_asm (fnname);
+
+ if (profile_fnname)
+ {
+ free (profile_fnname);
+ profile_fnname = NULL;
+ }
+}
+
/* Turn the RTL into assembly. */
static unsigned int
rest_of_handle_final (void)
@@ -4334,6 +4731,12 @@ rest_of_handle_final (void)
targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0),
decl_fini_priority_lookup
(current_function_decl));
+
+ if (flag_auto_bolt)
+ {
+ dump_profile_to_elf_sections ();
+ }
+
return 0;
}
diff --git a/gcc/opts.cc b/gcc/opts.cc
index b868d189e..6d57e7d69 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -1279,6 +1279,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
if (opts->x_flag_vtable_verify && opts->x_flag_lto)
sorry ("vtable verification is not supported with LTO");
+ /* Currently -fauto-bolt is not supported for LTO. */
+ if (opts->x_flag_auto_bolt && opts->x_flag_lto)
+ sorry ("%<-fauto-bolt%> is not supported with LTO");
+
/* Control IPA optimizations based on different -flive-patching level. */
if (opts->x_flag_live_patching)
control_options_for_live_patching (opts, opts_set,
@@ -1291,6 +1295,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
= (opts->x_flag_unroll_loops
|| opts->x_flag_peel_loops
|| opts->x_optimize >= 3);
+
+ if (opts->x_flag_auto_bolt)
+ {
+ /* Record the function section to facilitate the feedback
+ data storage. */
+ if (!opts->x_flag_function_sections)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should work with %<-ffunction-sections%>,"
+ " enabling %<-ffunction-sections%>");
+ opts->x_flag_function_sections = true;
+ }
+
+ /* Cancel the internal alignment of the function. The binary
+ optimizer bolt will cancel the internal alignment optimization
+ of the function, so the alignment is meaningless at this time,
+ and if not, it will bring trouble to the calculation of the
+ offset address of the instruction. */
+ if (opts->x_flag_align_jumps)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-jumps%>,"
+ " disabling %<-falign-jumps%>");
+ opts->x_flag_align_jumps = false;
+ }
+
+ if (opts->x_flag_align_labels)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-labels%>,"
+ " disabling %<-falign-labels%>");
+ opts->x_flag_align_labels = false;
+ }
+
+ if (opts->x_flag_align_loops)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should not work with %<-falign-loops%>,"
+ " disabling %<-falign-loops%>");
+ opts->x_flag_align_loops = false;
+ }
+
+ /* When parsing instructions in RTL phase, we need to know
+ the call information of instructions to avoid being optimized. */
+ if (!opts->x_flag_ipa_ra)
+ {
+ inform (loc,
+ "%<-fauto-bolt%> should work with %<-fipa-ra%>,"
+ " enabling %<-fipa-ra%>");
+ opts->x_flag_ipa_ra = true;
+ }
+ }
/* With -fcx-limited-range, we do cheap and quick complex arithmetic. */
if (opts->x_flag_cx_limited_range)
@@ -3226,6 +3282,11 @@ common_handle_option (struct gcc_options *opts,
&opts->x_flag_align_functions,
&opts->x_str_align_functions);
break;
+
+ case OPT_fauto_bolt_:
+ case OPT_fauto_bolt:
+ /* Deferred. */
+ break;
case OPT_ftabstop_:
/* It is documented that we silently ignore silly values. */
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,345 @@
From 94242286383a80e6ab83d824a4d7ea23ea311f75 Mon Sep 17 00:00:00 2001
From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
Date: Mon, 22 Jan 2024 15:38:24 +0800
Subject: [PATCH] [AutoBOLT] Enable BOLT linker plugin on aarch64 3/3
---
Makefile.def | 10 ++++++++++
configure | 27 ++++++++++++++++++++++++++-
configure.ac | 22 +++++++++++++++++++++-
gcc/config.host | 1 +
gcc/config.in | 13 +++++++++++++
gcc/configure | 10 ++++++++--
gcc/configure.ac | 4 ++++
gcc/gcc.cc | 23 +++++++++++++++++++++++
8 files changed, 106 insertions(+), 4 deletions(-)
diff --git a/Makefile.def b/Makefile.def
index 72d585496..0ba868890 100644
--- a/Makefile.def
+++ b/Makefile.def
@@ -145,6 +145,9 @@ host_modules= { module= gnattools; };
host_modules= { module= lto-plugin; bootstrap=true;
extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
extra_make_flags='@extra_linker_plugin_flags@'; };
+host_modules= { module= bolt-plugin; bootstrap=true;
+ extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
+ extra_make_flags='@extra_linker_plugin_flags@'; };
host_modules= { module= libcc1; extra_configure_flags=--enable-shared; };
host_modules= { module= gotools; };
host_modules= { module= libctf; bootstrap=true; };
@@ -349,6 +352,7 @@ dependencies = { module=configure-gcc; on=all-mpfr; };
dependencies = { module=configure-gcc; on=all-mpc; };
dependencies = { module=configure-gcc; on=all-isl; };
dependencies = { module=configure-gcc; on=all-lto-plugin; };
+dependencies = { module=configure-gcc; on=all-bolt-plugin; };
dependencies = { module=configure-gcc; on=all-binutils; };
dependencies = { module=configure-gcc; on=all-gas; };
dependencies = { module=configure-gcc; on=all-ld; };
@@ -374,6 +378,7 @@ dependencies = { module=all-gcc; on=all-libdecnumber; hard=true; };
dependencies = { module=all-gcc; on=all-libiberty; };
dependencies = { module=all-gcc; on=all-fixincludes; };
dependencies = { module=all-gcc; on=all-lto-plugin; };
+dependencies = { module=all-gcc; on=all-bolt-plugin; };
dependencies = { module=all-gcc; on=all-libiconv; };
dependencies = { module=info-gcc; on=all-build-libiberty; };
dependencies = { module=dvi-gcc; on=all-build-libiberty; };
@@ -381,8 +386,10 @@ dependencies = { module=pdf-gcc; on=all-build-libiberty; };
dependencies = { module=html-gcc; on=all-build-libiberty; };
dependencies = { module=install-gcc ; on=install-fixincludes; };
dependencies = { module=install-gcc ; on=install-lto-plugin; };
+dependencies = { module=install-gcc ; on=install-bolt-plugin; };
dependencies = { module=install-strip-gcc ; on=install-strip-fixincludes; };
dependencies = { module=install-strip-gcc ; on=install-strip-lto-plugin; };
+dependencies = { module=install-strip-gcc ; on=install-strip-bolt-plugin; };
dependencies = { module=configure-libcpp; on=configure-libiberty; hard=true; };
dependencies = { module=configure-libcpp; on=configure-intl; };
@@ -401,6 +408,9 @@ dependencies = { module=all-gnattools; on=all-target-libstdc++-v3; };
dependencies = { module=all-lto-plugin; on=all-libiberty; };
dependencies = { module=all-lto-plugin; on=all-libiberty-linker-plugin; };
+dependencies = { module=all-bolt-plugin; on=all-libiberty; };
+dependencies = { module=all-bolt-plugin; on=all-libiberty-linker-plugin; };
+
dependencies = { module=configure-libcc1; on=configure-gcc; };
dependencies = { module=all-libcc1; on=all-gcc; };
diff --git a/configure b/configure
index 5dcaab14a..aff62c464 100755
--- a/configure
+++ b/configure
@@ -826,6 +826,7 @@ with_isl
with_isl_include
with_isl_lib
enable_isl_version_check
+enable_bolt
enable_lto
enable_linker_plugin_configure_flags
enable_linker_plugin_flags
@@ -1550,6 +1551,7 @@ Optional Features:
enable the PGO build
--disable-isl-version-check
disable check for isl version
+ --enable-bolt enable bolt optimization support
--enable-lto enable link time optimization support
--enable-linker-plugin-configure-flags=FLAGS
additional flags for configuring linker plugins
@@ -8564,6 +8566,15 @@ fi
+# Check for BOLT support.
+# Check whether --enable-bolt was given.
+if test "${enable_bolt+set}" = set; then :
+ enableval=$enable_bolt; enable_bolt=$enableval
+else
+ enable_bolt=no; default_enable_bolt=no
+fi
+
+
# Check for LTO support.
# Check whether --enable-lto was given.
if test "${enable_lto+set}" = set; then :
@@ -8593,6 +8604,16 @@ if test $target_elf = yes; then :
# ELF platforms build the lto-plugin always.
build_lto_plugin=yes
+ # ELF platforms can build the bolt-plugin.
+ # NOT BUILD BOLT BY DEFAULT.
+ case $target in
+ aarch64*-*-linux*)
+ if test $enable_bolt = yes; then :
+ build_bolt_plugin=yes
+ fi
+ ;;
+ esac
+
else
if test x"$default_enable_lto" = x"yes" ; then
case $target in
@@ -8780,6 +8801,10 @@ if test -d ${srcdir}/gcc; then
fi
fi
+ if test "${build_bolt_plugin}" = "yes" ; then
+ configdirs="$configdirs bolt-plugin"
+ fi
+
# If we're building an offloading compiler, add the LTO front end.
if test x"$enable_as_accelerator_for" != x ; then
case ,${enable_languages}, in
@@ -9202,7 +9227,7 @@ fi
extra_host_libiberty_configure_flags=
extra_host_zlib_configure_flags=
case " $configdirs " in
- *" lto-plugin "* | *" libcc1 "*)
+ *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)
# When these are to be built as shared libraries, the same applies to
# libiberty.
extra_host_libiberty_configure_flags=--enable-shared
diff --git a/configure.ac b/configure.ac
index 85977482a..f310d75ca 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1863,6 +1863,12 @@ fi
AC_SUBST(isllibs)
AC_SUBST(islinc)
+# Check for BOLT support.
+AC_ARG_ENABLE(bolt,
+[AS_HELP_STRING([--enable-bolt], [enable bolt optimization support])],
+enable_bolt=$enableval,
+enable_bolt=no; default_enable_bolt=no)
+
# Check for LTO support.
AC_ARG_ENABLE(lto,
[AS_HELP_STRING([--enable-lto], [enable link time optimization support])],
@@ -1871,6 +1877,16 @@ enable_lto=yes; default_enable_lto=yes)
ACX_ELF_TARGET_IFELSE([# ELF platforms build the lto-plugin always.
build_lto_plugin=yes
+
+ # ELF platforms can build the bolt-plugin.
+ # NOT BUILD BOLT BY DEFAULT.
+ case $target in
+ aarch64*-*-linux*)
+ if test $enable_bolt = yes; then :
+ build_bolt_plugin=yes
+ fi
+ ;;
+ esac
],[if test x"$default_enable_lto" = x"yes" ; then
case $target in
*-apple-darwin[[912]]* | *-cygwin* | *-mingw* | *djgpp*) ;;
@@ -2049,6 +2065,10 @@ if test -d ${srcdir}/gcc; then
fi
fi
+ if test "${build_bolt_plugin}" = "yes" ; then
+ configdirs="$configdirs bolt-plugin"
+ fi
+
# If we're building an offloading compiler, add the LTO front end.
if test x"$enable_as_accelerator_for" != x ; then
case ,${enable_languages}, in
@@ -2457,7 +2477,7 @@ fi
extra_host_libiberty_configure_flags=
extra_host_zlib_configure_flags=
case " $configdirs " in
- *" lto-plugin "* | *" libcc1 "*)
+ *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)
# When these are to be built as shared libraries, the same applies to
# libiberty.
extra_host_libiberty_configure_flags=--enable-shared
diff --git a/gcc/config.host b/gcc/config.host
index 4ca300f11..bf7dcb4cc 100644
--- a/gcc/config.host
+++ b/gcc/config.host
@@ -75,6 +75,7 @@ out_host_hook_obj=host-default.o
host_can_use_collect2=yes
use_long_long_for_widest_fast_int=no
host_lto_plugin_soname=liblto_plugin.so
+host_bolt_plugin_soname=libbolt_plugin.so
# Unsupported hosts list. Generally, only include hosts known to fail here,
# since we allow hosts not listed to be supported generically.
diff --git a/gcc/config.in b/gcc/config.in
index 64c27c9cf..6bb25b25b 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -24,6 +24,13 @@
#endif
+/* Define to the name of the BOLT plugin DSO that must be passed to the
+ linker's -plugin=LIB option. */
+#ifndef USED_FOR_TARGET
+#undef BOLTPLUGINSONAME
+#endif
+
+
/* Define to the root for URLs about GCC changes. */
#ifndef USED_FOR_TARGET
#undef CHANGES_ROOT_URL
@@ -2208,6 +2215,12 @@
#endif
+/* Define which stat syscall is able to handle 64bit indodes. */
+#ifndef USED_FOR_TARGET
+#undef HOST_STAT_FOR_64BIT_INODES
+#endif
+
+
/* Define as const if the declaration of iconv() needs const. */
#ifndef USED_FOR_TARGET
#undef ICONV_CONST
diff --git a/gcc/configure b/gcc/configure
index 98bbf0f85..30f386789 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -13578,6 +13578,12 @@ case $use_collect2 in
esac
+cat >>confdefs.h <<_ACEOF
+#define BOLTPLUGINSONAME "${host_bolt_plugin_soname}"
+_ACEOF
+
+
+
cat >>confdefs.h <<_ACEOF
#define LTOPLUGINSONAME "${host_lto_plugin_soname}"
_ACEOF
@@ -19668,7 +19674,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 19671 "configure"
+#line 19677 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
@@ -19774,7 +19780,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<_LT_EOF
-#line 19777 "configure"
+#line 19783 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H
diff --git a/gcc/configure.ac b/gcc/configure.ac
index c74f4b555..dd6cd60f8 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -2531,6 +2531,10 @@ case $use_collect2 in
;;
esac
+AC_DEFINE_UNQUOTED(BOLTPLUGINSONAME,"${host_bolt_plugin_soname}",
+[Define to the name of the BOLT plugin DSO that must be
+ passed to the linker's -plugin=LIB option.])
+
AC_DEFINE_UNQUOTED(LTOPLUGINSONAME,"${host_lto_plugin_soname}",
[Define to the name of the LTO plugin DSO that must be
passed to the linker's -plugin=LIB option.])
diff --git a/gcc/gcc.cc b/gcc/gcc.cc
index fbcc9d033..b0d03430e 100644
--- a/gcc/gcc.cc
+++ b/gcc/gcc.cc
@@ -1156,6 +1156,8 @@ proper position among the other output files. */
%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
%(linker) " \
LINK_PLUGIN_SPEC \
+ "%{fauto-bolt|fauto-bolt=*|fbolt-use|fbolt-use=*: \
+ -plugin %(linker_auto_bolt_plugin_file) }"\
"%{flto|flto=*:%<fcompare-debug*} \
%{flto} %{fno-lto} %{flto=*} %l " LINK_PIE_SPEC \
"%{fuse-ld=*:-fuse-ld=%*} " LINK_COMPRESS_DEBUG_SPEC \
@@ -1210,6 +1212,7 @@ static const char *endfile_spec = ENDFILE_SPEC;
static const char *startfile_spec = STARTFILE_SPEC;
static const char *linker_name_spec = LINKER_NAME;
static const char *linker_plugin_file_spec = "";
+static const char *linker_auto_bolt_plugin_file_spec = "";
static const char *lto_wrapper_spec = "";
static const char *lto_gcc_spec = "";
static const char *post_link_spec = POST_LINK_SPEC;
@@ -1723,6 +1726,8 @@ static struct spec_list static_specs[] =
INIT_STATIC_SPEC ("multilib_reuse", &multilib_reuse),
INIT_STATIC_SPEC ("linker", &linker_name_spec),
INIT_STATIC_SPEC ("linker_plugin_file", &linker_plugin_file_spec),
+ INIT_STATIC_SPEC ("linker_auto_bolt_plugin_file",
+ &linker_auto_bolt_plugin_file_spec),
INIT_STATIC_SPEC ("lto_wrapper", &lto_wrapper_spec),
INIT_STATIC_SPEC ("lto_gcc", &lto_gcc_spec),
INIT_STATIC_SPEC ("post_link", &post_link_spec),
@@ -9118,6 +9123,24 @@ driver::maybe_run_linker (const char *argv0) const
}
#endif
set_static_spec_shared (&lto_gcc_spec, argv0);
+
+ /* Set bolt-plugin. */
+ const char *fauto_bolt = "fauto-bolt";
+ const char *fbolt_use = "fbolt-use";
+ if (switch_matches (fauto_bolt, fauto_bolt + strlen (fauto_bolt), 1)
+ || switch_matches (fbolt_use, fbolt_use + strlen (fbolt_use), 1))
+ {
+ linker_auto_bolt_plugin_file_spec = find_a_file (&exec_prefixes,
+ BOLTPLUGINSONAME, X_OK, false);
+ if (!linker_auto_bolt_plugin_file_spec)
+ {
+ fatal_error (input_location,
+ "-fauto-bolt or -fbolt-use is used, but %s is not found",
+ BOLTPLUGINSONAME);
+
+ }
+ }
+
}
/* Rebuild the COMPILER_PATH and LIBRARY_PATH environment variables
--
2.33.0

View File

@ -0,0 +1,312 @@
From b020447c840c6e22440a9b9063298a06333fd2f1 Mon Sep 17 00:00:00 2001
From: zhenyu--zhao <zhaozhenyu17@huawei.com>
Date: Sat, 23 Mar 2024 22:56:09 +0800
Subject: [PATCH] [Autofdo]Enable discrimibator and MCF algorithm on Autofdo
---
gcc/auto-profile.cc | 171 +++++++++++++++++++++++++++++++++++++++++++-
gcc/cfghooks.cc | 7 ++
gcc/opts.cc | 5 +-
gcc/tree-inline.cc | 14 ++++
4 files changed, 193 insertions(+), 4 deletions(-)
diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
index 2b34b80b8..f45f0ec66 100644
--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
@@ -466,6 +466,17 @@ string_table::get_index (const char *name) const
if (name == NULL)
return -1;
string_index_map::const_iterator iter = map_.find (name);
+ /* Function name may be duplicate. Try to distinguish by the
+ #file_name#function_name defined by the autofdo tool chain. */
+ if (iter == map_.end ())
+ {
+ char* file_name = get_original_name (lbasename (dump_base_name));
+ char* file_func_name
+ = concat ("#", file_name, "#", name, NULL);
+ iter = map_.find (file_func_name);
+ free (file_name);
+ free (file_func_name);
+ }
if (iter == map_.end ())
return -1;
@@ -654,7 +665,7 @@ function_instance::read_function_instance (function_instance_stack *stack,
for (unsigned i = 0; i < num_pos_counts; i++)
{
- unsigned offset = gcov_read_unsigned () & 0xffff0000;
+ unsigned offset = gcov_read_unsigned ();
unsigned num_targets = gcov_read_unsigned ();
gcov_type count = gcov_read_counter ();
s->pos_counts[offset].count = count;
@@ -733,6 +744,10 @@ autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
function_instance *s = get_function_instance_by_inline_stack (stack);
if (s == NULL)
return false;
+ if (s->get_count_info (stack[0].second + stmt->bb->discriminator, info))
+ {
+ return true;
+ }
return s->get_count_info (stack[0].second, info);
}
@@ -1395,6 +1410,66 @@ afdo_propagate (bb_set *annotated_bb)
}
}
+/* Process the following scene when the branch probability
+ inversion when do function afdo_propagate (). E.g.
+ BB_NUM (sample count)
+ BB1 (1000)
+ / \
+ BB2 (10) BB3 (0)
+ \ /
+ BB4
+ In afdo_propagate ().count of BB3 is calculated by
+ COUNT (BB3) = 990 (990 = COUNT (BB1) - COUNT (BB2) = 1000 - 10)
+ In fact, BB3 may be colder than BB2 by sample count.
+ This function allocate source BB count to wach succ BB by sample
+ rate, E.g.
+ BB2_COUNT = BB1_COUNT * (BB2_COUNT / (BB2_COUNT + BB3_COUNT)) */
+
+static void
+afdo_preprocess_bb_count ()
+{
+ basic_block bb;
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ if (bb->count.ipa_p () && EDGE_COUNT (bb->succs) > 1
+ && bb->count > profile_count::zero ().afdo ())
+ {
+ basic_block bb1 = EDGE_SUCC (bb, 0)->dest;
+ basic_block bb2 = EDGE_SUCC (bb, 1)->dest;
+ if (single_succ_edge (bb1) && single_succ_edge (bb2)
+ && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
+ {
+ gcov_type max_count = 0;
+ gcov_type total_count = 0;
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (!e->dest->count.ipa_p ())
+ {
+ continue;
+ }
+ max_count = MAX (max_count, e->dest->count.to_gcov_type ());
+ total_count += e->dest->count.to_gcov_type ();
+ }
+ /* Only bb_count > max_count * 2, branch probability will
+ inversion. */
+ if (max_count > 0 && bb->count.to_gcov_type () > max_count * 2)
+ {
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ gcov_type target_count = bb->count.to_gcov_type ()
+ * e->dest->count.to_gcov_type ()/ total_count;
+ e->dest->count
+ = profile_count::from_gcov_type
+ (target_count).afdo ();
+ }
+ }
+ }
+ }
+ }
+}
+
/* Propagate counts on control flow graph and calculate branch
probabilities. */
@@ -1420,6 +1495,7 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
}
afdo_find_equiv_class (annotated_bb);
+ afdo_preprocess_bb_count ();
afdo_propagate (annotated_bb);
FOR_EACH_BB_FN (bb, cfun)
@@ -1523,6 +1599,83 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
return false;
}
+/* Preparation before executing MCF algorithm. */
+
+static void
+afdo_init_mcf ()
+{
+ basic_block bb;
+ edge e;
+ edge_iterator ei;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "\n init calling mcf_smooth_cfg (). \n");
+ }
+
+ /* Step1: when use mcf, BB id must be continous,
+ so we need compact_blocks (). */
+ compact_blocks ();
+
+ /* Step2: allocate memory for MCF input data. */
+ bb_gcov_counts.safe_grow_cleared (cfun->cfg->x_last_basic_block);
+ edge_gcov_counts = new hash_map<edge, gcov_type>;
+
+ /* Step3: init MCF input data from cfg. */
+ FOR_ALL_BB_FN (bb, cfun)
+ {
+ /* Init BB count for MCF. */
+ bb_gcov_count (bb) = bb->count.to_gcov_type ();
+
+ gcov_type total_count = 0;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ total_count += e->dest->count.to_gcov_type ();
+ }
+
+ /* If there is no sample in each successor blocks, source
+ BB samples are allocated to each edge by branch static prob. */
+
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ if (total_count == 0)
+ {
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
+ * e->probability.to_reg_br_prob_base () / REG_BR_PROB_BASE;
+ }
+ else
+ {
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
+ * e->dest->count.to_gcov_type () / total_count;
+ }
+ }
+ }
+}
+
+
+/* Free the resources used by MCF and reset BB count from MCF result.
+ branch probability has been updated in mcf_smooth_cfg (). */
+
+static void
+afdo_process_after_mcf ()
+{
+ basic_block bb;
+ /* Reset BB count from MCF result. */
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ if (bb_gcov_count (bb))
+ {
+ bb->count
+ = profile_count::from_gcov_type (bb_gcov_count (bb)).afdo ();
+ }
+ }
+
+ /* Clean up MCF resource. */
+ bb_gcov_counts.release ();
+ delete edge_gcov_counts;
+ edge_gcov_counts = NULL;
+}
+
/* Annotate auto profile to the control flow graph. Do not annotate value
profile for stmts in PROMOTED_STMTS. */
@@ -1574,8 +1727,20 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
afdo_source_profile->mark_annotated (cfun->function_end_locus);
if (max_count > profile_count::zero ())
{
- /* Calculate, propagate count and probability information on CFG. */
- afdo_calculate_branch_prob (&annotated_bb);
+ /* 1 means -fprofile-correction is enbaled manually, and MCF
+ algorithm will be used to calculate count and probability.
+ Otherwise, use the default calculate algorithm. */
+ if (flag_profile_correction == 1)
+ {
+ afdo_init_mcf ();
+ mcf_smooth_cfg ();
+ afdo_process_after_mcf ();
+ }
+ else
+ {
+ /* Calculate, propagate count and probability information on CFG. */
+ afdo_calculate_branch_prob (&annotated_bb);
+ }
}
update_max_bb_count ();
profile_status_for_fn (cfun) = PROFILE_READ;
diff --git a/gcc/cfghooks.cc b/gcc/cfghooks.cc
index c0b7bdcd9..323663010 100644
--- a/gcc/cfghooks.cc
+++ b/gcc/cfghooks.cc
@@ -542,6 +542,9 @@ split_block_1 (basic_block bb, void *i)
return NULL;
new_bb->count = bb->count;
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
new_bb->discriminator = bb->discriminator;
if (dom_info_available_p (CDI_DOMINATORS))
@@ -1113,6 +1116,10 @@ duplicate_block (basic_block bb, edge e, basic_block after, copy_bb_data *id)
move_block_after (new_bb, after);
new_bb->flags = (bb->flags & ~BB_DUPLICATED);
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
+ new_bb->discriminator = bb->discriminator;
FOR_EACH_EDGE (s, ei, bb->succs)
{
/* Since we are creating edges from a new block to successors
diff --git a/gcc/opts.cc b/gcc/opts.cc
index 2bba88140..4b4925331 100644
--- a/gcc/opts.cc
+++ b/gcc/opts.cc
@@ -3014,7 +3014,10 @@ common_handle_option (struct gcc_options *opts,
/* FALLTHRU */
case OPT_fauto_profile:
enable_fdo_optimizations (opts, opts_set, value);
- SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value);
+ /* 2 is special and means flag_profile_correction trun on by
+ -fauto-profile. */
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction,
+ (value ? 2 : 0));
break;
case OPT_fipa_struct_reorg_:
diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc
index f892cee3f..f50dbbc52 100644
--- a/gcc/tree-inline.cc
+++ b/gcc/tree-inline.cc
@@ -2038,6 +2038,10 @@ copy_bb (copy_body_data *id, basic_block bb,
basic_block_info automatically. */
copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux);
copy_basic_block->count = bb->count.apply_scale (num, den);
+ /* Copy discriminator from original bb for distinguishes among
+ several basic blocks that share a common locus, allowing for
+ more accurate autofdo. */
+ copy_basic_block->discriminator = bb->discriminator;
copy_gsi = gsi_start_bb (copy_basic_block);
@@ -3058,6 +3062,16 @@ copy_cfg_body (copy_body_data * id,
den += e->count ();
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den;
}
+ /* When autofdo uses PMU as the sampling unit, the number of
+ ENTRY_BLOCK_PTR_FOR_FN cannot be obtained directly and will
+ be zero. It using for adjust_for_ipa_scaling will cause the
+ inlined BB count incorrectly overestimated. So set den equal
+ to num, which is the source inline BB count to avoid
+ overestimated. */
+ if (den == profile_count::zero ().afdo ())
+ {
+ den = num;
+ }
profile_count::adjust_for_ipa_scaling (&num, &den);
--
2.33.0

View File

@ -2,7 +2,7 @@
%global gcc_major 12
# Note, gcc_release must be integer, if you want to add suffixes to
# %%{release}, append them after %%{gcc_release} on Release: line.
%global gcc_release 19
%global gcc_release 20
%global _unpackaged_files_terminate_build 0
%global _performance_build 1
@ -166,6 +166,12 @@ Patch25: 0025-AArch64-Rewrite-the-tsv110-option.patch
Patch26: 0026-GOMP-Enabling-moutline-atomics-improves-libgomp-perf.patch
Patch27: 0027-LoopElim-Redundant-loop-elimination-optimization.patch
Patch28: 0028-Array-widen-compare-Fix-the-return-value-match-after.patch
Patch29: 0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
Patch30: 0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch
Patch31: 0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
Patch32: 0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
Patch33: 0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
Patch34: 0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch
# Part 3000 ~ 4999
%ifarch loongarch64
@ -789,6 +795,12 @@ not stable, so plugins must be rebuilt any time GCC is updated.
%patch26 -p1
%patch27 -p1
%patch28 -p1
%patch29 -p1
%patch30 -p1
%patch31 -p1
%patch32 -p1
%patch33 -p1
%patch34 -p1
%ifarch loongarch64
%patch3001 -p1
@ -3174,6 +3186,10 @@ end
%doc rpm.doc/changelogs/libcc1/ChangeLog*
%changelog
* Thu Apr 11 2024 Zhenyu Zhao <zhaozhenyu17@huawei.com> - 12.3.1-20
- Type: Sync
- DESC: Sync patch from openeuler/gcc
* Mon Apr 1 2024 Peng Fan <fanpeng@loongson.cn> 12.3.1-19
- Type: SPEC
- DESC: fix libcc1 file path for LoongArch.