[Sync] Sync patch from openeuler/gcc
Sync patch from openeuler/gcc - 20220302
This commit is contained in:
parent
fd39bb4457
commit
635a0d4206
548
0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
Normal file
548
0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
Normal file
@ -0,0 +1,548 @@
|
||||
From c34a02199b1dfd362e81e78cb90fbd11e02eb93e Mon Sep 17 00:00:00 2001
|
||||
From: liyancheng <412998149@qq.com>
|
||||
Date: Mon, 14 Feb 2022 14:34:41 +0800
|
||||
Subject: [PATCH 29/32] [AutoBOLT] Support saving feedback count info to ELF
|
||||
segment 1/3
|
||||
|
||||
Add flag -fauto-bolt to save the feedback count info from PGO or
|
||||
AutoFDO to segment .text.fdo. The bolt plugin will read and parse
|
||||
it into the profile of llvm-bolt.
|
||||
---
|
||||
gcc/common.opt | 8 +
|
||||
gcc/final.c | 400 +++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
gcc/opts.c | 61 ++++++++
|
||||
3 files changed, 469 insertions(+)
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index 9488bd90f..5eaa667b3 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -2403,6 +2403,14 @@ freorder-functions
|
||||
Common Report Var(flag_reorder_functions) Optimization
|
||||
Reorder functions to improve code placement.
|
||||
|
||||
+fauto-bolt
|
||||
+Common Report Var(flag_auto_bolt)
|
||||
+Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
|
||||
+
|
||||
+fauto-bolt=
|
||||
+Common Joined RejectNegative
|
||||
+Specify the feedback data directory required by BOLT-plugin. The default is the current directory.
|
||||
+
|
||||
frerun-cse-after-loop
|
||||
Common Report Var(flag_rerun_cse_after_loop) Optimization
|
||||
Add a common subexpression elimination pass after loop optimizations.
|
||||
diff --git a/gcc/final.c b/gcc/final.c
|
||||
index a3601964a..b9affd3a7 100644
|
||||
--- a/gcc/final.c
|
||||
+++ b/gcc/final.c
|
||||
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "rtl-iter.h"
|
||||
#include "print-rtl.h"
|
||||
#include "function-abi.h"
|
||||
+#include "insn-codes.h"
|
||||
|
||||
#ifdef XCOFF_DEBUGGING_INFO
|
||||
#include "xcoffout.h" /* Needed for external data declarations. */
|
||||
@@ -4640,6 +4641,399 @@ leaf_renumber_regs_insn (rtx in_rtx)
|
||||
}
|
||||
#endif
|
||||
|
||||
+
|
||||
+#define ASM_FDO_SECTION_PREFIX ".text.fdo."
|
||||
+
|
||||
+#define ASM_FDO_CALLER_FLAG ".fdo.caller "
|
||||
+#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
|
||||
+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind "
|
||||
+
|
||||
+#define ASM_FDO_CALLEE_FLAG ".fdo.callee "
|
||||
+
|
||||
+/* Return the relative offset address of the start instruction of BB,
|
||||
+ return -1 if it is empty instruction. */
|
||||
+
|
||||
+static int
|
||||
+get_bb_start_addr (basic_block bb)
|
||||
+{
|
||||
+ rtx_insn *insn;
|
||||
+ FOR_BB_INSNS (bb, insn)
|
||||
+ {
|
||||
+ if (!INSN_P (insn))
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ int insn_code = recog_memoized (insn);
|
||||
+
|
||||
+ /* The instruction NOP in llvm-bolt belongs to the previous
|
||||
+ BB, so it needs to be skipped. */
|
||||
+ if (insn_code != CODE_FOR_nop)
|
||||
+ {
|
||||
+ return INSN_ADDRESSES (INSN_UID (insn));
|
||||
+ }
|
||||
+ }
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+/* Return the relative offset address of the end instruction of BB,
|
||||
+ return -1 if it is empty or call instruction. */
|
||||
+
|
||||
+static int
|
||||
+get_bb_end_addr (basic_block bb)
|
||||
+{
|
||||
+ rtx_insn *insn;
|
||||
+ int num_succs = EDGE_COUNT (bb->succs);
|
||||
+ FOR_BB_INSNS_REVERSE (bb, insn)
|
||||
+ {
|
||||
+ if (!INSN_P (insn))
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+ /* The jump target of call is not in this function, so
|
||||
+ it should be excluded. */
|
||||
+ if (CALL_P (insn))
|
||||
+ {
|
||||
+ return -1;
|
||||
+ }
|
||||
+ if ((num_succs == 1)
|
||||
+ || ((num_succs == 2) && any_condjump_p (insn)))
|
||||
+ {
|
||||
+ return INSN_ADDRESSES (INSN_UID (insn));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+/* Return the end address of cfun. */
|
||||
+
|
||||
+static int
|
||||
+get_function_end_addr ()
|
||||
+{
|
||||
+ rtx_insn *insn = get_last_insn ();
|
||||
+ for (; insn != get_insns (); insn = PREV_INSN (insn))
|
||||
+ {
|
||||
+ if (!INSN_P (insn))
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+ return INSN_ADDRESSES (INSN_UID (insn));
|
||||
+ }
|
||||
+
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+/* Return the function profile status string. */
|
||||
+
|
||||
+static const char *
|
||||
+get_function_profile_status ()
|
||||
+{
|
||||
+ const char *profile_status[] = {
|
||||
+ "PROFILE_ABSENT",
|
||||
+ "PROFILE_GUESSED",
|
||||
+ "PROFILE_READ",
|
||||
+ "PROFILE_LAST" /* Last value, used by profile streaming. */
|
||||
+ };
|
||||
+
|
||||
+ return profile_status[profile_status_for_fn (cfun)];
|
||||
+}
|
||||
+
|
||||
+/* Return the count from the feedback data, such as PGO or AFDO. */
|
||||
+
|
||||
+inline static gcov_type
|
||||
+get_fdo_count (profile_count count)
|
||||
+{
|
||||
+ return count.quality () >= GUESSED
|
||||
+ ? count.to_gcov_type () : 0;
|
||||
+}
|
||||
+
|
||||
+/* Return the profile quality string. */
|
||||
+
|
||||
+static const char *
|
||||
+get_fdo_count_quality (profile_count count)
|
||||
+{
|
||||
+ const char *profile_quality[] = {
|
||||
+ "UNINITIALIZED_PROFILE",
|
||||
+ "GUESSED_LOCAL",
|
||||
+ "GUESSED_GLOBAL0",
|
||||
+ "GUESSED_GLOBAL0_ADJUSTED",
|
||||
+ "GUESSED",
|
||||
+ "AFDO",
|
||||
+ "ADJUSTED",
|
||||
+ "PRECISE"
|
||||
+ };
|
||||
+
|
||||
+ return profile_quality[count.quality ()];
|
||||
+}
|
||||
+
|
||||
+static const char *
|
||||
+alias_local_functions (const char *fnname)
|
||||
+{
|
||||
+ if (TREE_PUBLIC (cfun->decl))
|
||||
+ {
|
||||
+ return fnname;
|
||||
+ }
|
||||
+
|
||||
+ return concat (fnname, "/", lbasename (dump_base_name), NULL);
|
||||
+}
|
||||
+
|
||||
+/* Return function bind type string. */
|
||||
+
|
||||
+static const char *
|
||||
+simple_get_function_bind ()
|
||||
+{
|
||||
+ const char *function_bind[] = {
|
||||
+ "GLOBAL",
|
||||
+ "WEAK",
|
||||
+ "LOCAL",
|
||||
+ "UNKNOWN"
|
||||
+ };
|
||||
+
|
||||
+ if (TREE_PUBLIC (cfun->decl))
|
||||
+ {
|
||||
+ if (!(DECL_WEAK (cfun->decl)))
|
||||
+ {
|
||||
+ return function_bind[0];
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ return function_bind[1];
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ return function_bind[2];
|
||||
+ }
|
||||
+
|
||||
+ return function_bind[3];
|
||||
+}
|
||||
+
|
||||
+/* Dump the callee functions insn in bb by CALL_P (insn). */
|
||||
+
|
||||
+static void
|
||||
+dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
|
||||
+{
|
||||
+ rtx_insn *insn;
|
||||
+ FOR_BB_INSNS (bb, insn)
|
||||
+ {
|
||||
+ if (insn && CALL_P (insn))
|
||||
+ {
|
||||
+ tree callee = get_call_fndecl (insn);
|
||||
+
|
||||
+ if (callee)
|
||||
+ {
|
||||
+ fprintf (asm_out_file, "\t.string \"%x\"\n",
|
||||
+ INSN_ADDRESSES (INSN_UID (insn)));
|
||||
+
|
||||
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||
+ ASM_FDO_CALLEE_FLAG,
|
||||
+ alias_local_functions (get_fnname_from_decl (callee)));
|
||||
+
|
||||
+ fprintf (asm_out_file,
|
||||
+ "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
||||
+ call_count);
|
||||
+
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "call: %x --> %s\n",
|
||||
+ INSN_ADDRESSES (INSN_UID (insn)),
|
||||
+ alias_local_functions
|
||||
+ (get_fnname_from_decl (callee)));
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Dump the edge info into asm. */
|
||||
+
|
||||
+static void
|
||||
+dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count)
|
||||
+{
|
||||
+ edge e;
|
||||
+ edge_iterator ei;
|
||||
+ gcov_type edge_total_count = 0;
|
||||
+
|
||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
+ {
|
||||
+ gcov_type edge_count = get_fdo_count (e->count ());
|
||||
+ edge_total_count += edge_count;
|
||||
+
|
||||
+ int edge_start_addr = get_bb_end_addr (e->src);
|
||||
+ int edge_end_addr = get_bb_start_addr (e->dest);
|
||||
+
|
||||
+ if (edge_start_addr == -1 || edge_end_addr == -1)
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ /* This is a reserved assert for the original design. If this
|
||||
+ assert is found, use the address of the previous instruction
|
||||
+ as edge_start_addr. */
|
||||
+ gcc_assert (edge_start_addr != edge_end_addr);
|
||||
+
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "edge: %x --> %x = (%ld)\n",
|
||||
+ edge_start_addr, edge_end_addr, edge_count);
|
||||
+ }
|
||||
+
|
||||
+ if (edge_count > 0)
|
||||
+ {
|
||||
+ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_start_addr);
|
||||
+ fprintf (asm_out_file, "\t.string \"%x\"\n", edge_end_addr);
|
||||
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
||||
+ edge_count);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ gcov_type call_count = MAX (edge_total_count, bb_count);
|
||||
+ if (call_count > 0)
|
||||
+ {
|
||||
+ dump_direct_callee_info_to_asm (bb, call_count);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Dump the bb info into asm. */
|
||||
+
|
||||
+static void
|
||||
+dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
|
||||
+{
|
||||
+ int bb_start_addr = get_bb_start_addr (bb);
|
||||
+ if (bb_start_addr != -1)
|
||||
+ {
|
||||
+ fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr);
|
||||
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
||||
+ bb_count);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Dump the function info into asm. */
|
||||
+
|
||||
+static void
|
||||
+dump_function_info_to_asm (const char *fnname)
|
||||
+{
|
||||
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||
+ ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
|
||||
+ fprintf (asm_out_file, "\t.string \"%s%d\"\n",
|
||||
+ ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
|
||||
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||
+ ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ());
|
||||
+
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "\n FUNC_NAME: %s\n",
|
||||
+ alias_local_functions (fnname));
|
||||
+ fprintf (dump_file, " file: %s\n",
|
||||
+ dump_base_name);
|
||||
+ fprintf (dump_file, " profile_status: %s\n",
|
||||
+ get_function_profile_status ());
|
||||
+ fprintf (dump_file, " size: %x\n",
|
||||
+ get_function_end_addr ());
|
||||
+ fprintf (dump_file, " function_bind: %s\n",
|
||||
+ simple_get_function_bind ());
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Dump function profile info form AutoFDO or PGO to asm. */
|
||||
+
|
||||
+static void
|
||||
+dump_fdo_info_to_asm (const char *fnname)
|
||||
+{
|
||||
+ basic_block bb;
|
||||
+
|
||||
+ dump_function_info_to_asm (fnname);
|
||||
+
|
||||
+ FOR_EACH_BB_FN (bb, cfun)
|
||||
+ {
|
||||
+ gcov_type bb_count = get_fdo_count (bb->count);
|
||||
+ if (bb_count == 0)
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n",
|
||||
+ get_bb_start_addr (bb), get_bb_end_addr (bb),
|
||||
+ bb_count, get_fdo_count_quality (bb->count));
|
||||
+ }
|
||||
+
|
||||
+ if (flag_profile_use)
|
||||
+ {
|
||||
+ dump_edge_jump_info_to_asm (bb, bb_count);
|
||||
+ }
|
||||
+ else if (flag_auto_profile)
|
||||
+ {
|
||||
+ dump_bb_info_to_asm (bb, bb_count);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* When -fauto-bolt option is turned on, the .text.fdo. section
|
||||
+ will be generated in the *.s file if there is feedback information
|
||||
+ from PGO or AutoFDO. This section will parserd in BOLT-plugin. */
|
||||
+
|
||||
+static void
|
||||
+dump_profile_to_elf_sections ()
|
||||
+{
|
||||
+ if (!flag_function_sections)
|
||||
+ {
|
||||
+ error ("-fauto-bolt should work with -ffunction-sections");
|
||||
+ return;
|
||||
+ }
|
||||
+ if (!flag_ipa_ra)
|
||||
+ {
|
||||
+ error ("-fauto-bolt should work with -fipa-ra");
|
||||
+ return;
|
||||
+ }
|
||||
+ if (flag_align_jumps)
|
||||
+ {
|
||||
+ error ("-fauto-bolt is not supported with -falign-jumps");
|
||||
+ return;
|
||||
+ }
|
||||
+ if (flag_align_labels)
|
||||
+ {
|
||||
+ error ("-fauto-bolt is not supported with -falign-labels");
|
||||
+ return;
|
||||
+ }
|
||||
+ if (flag_align_loops)
|
||||
+ {
|
||||
+ error ("-fauto-bolt is not supported with -falign-loops");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* Return if no feedback data. */
|
||||
+ if (!flag_profile_use && !flag_auto_profile)
|
||||
+ {
|
||||
+ error ("-fauto-bolt should use with -fprofile-use or -fauto-profile");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* Avoid empty functions. */
|
||||
+ if (TREE_CODE (cfun->decl) != FUNCTION_DECL)
|
||||
+ {
|
||||
+ return;
|
||||
+ }
|
||||
+ int flags = SECTION_DEBUG | SECTION_EXCLUDE;
|
||||
+ const char *fnname = get_fnname_from_decl (current_function_decl);
|
||||
+ char *profile_fnname = NULL;
|
||||
+
|
||||
+ asprintf (&profile_fnname,"%s%s", ASM_FDO_SECTION_PREFIX, fnname);
|
||||
+ switch_to_section (get_section (profile_fnname, flags , NULL));
|
||||
+ dump_fdo_info_to_asm (fnname);
|
||||
+
|
||||
+ if (profile_fnname)
|
||||
+ {
|
||||
+ free (profile_fnname);
|
||||
+ profile_fnname = NULL;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* Turn the RTL into assembly. */
|
||||
static unsigned int
|
||||
rest_of_handle_final (void)
|
||||
@@ -4707,6 +5101,12 @@ rest_of_handle_final (void)
|
||||
targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0),
|
||||
decl_fini_priority_lookup
|
||||
(current_function_decl));
|
||||
+
|
||||
+ if (flag_auto_bolt)
|
||||
+ {
|
||||
+ dump_profile_to_elf_sections ();
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
diff --git a/gcc/opts.c b/gcc/opts.c
|
||||
index f49f5ee58..0b389ae1d 100644
|
||||
--- a/gcc/opts.c
|
||||
+++ b/gcc/opts.c
|
||||
@@ -1166,6 +1166,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
|
||||
if (opts->x_flag_vtable_verify && opts->x_flag_lto)
|
||||
sorry ("vtable verification is not supported with LTO");
|
||||
|
||||
+ /* Currently -fauto-bolt is not supported for LTO. */
|
||||
+ if (opts->x_flag_auto_bolt && opts->x_flag_lto)
|
||||
+ sorry ("%<-fauto-bolt%> is not supported with LTO");
|
||||
+
|
||||
/* Control IPA optimizations based on different -flive-patching level. */
|
||||
if (opts->x_flag_live_patching)
|
||||
control_options_for_live_patching (opts, opts_set,
|
||||
@@ -1183,6 +1187,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
|
||||
= (opts->x_flag_unroll_loops
|
||||
|| opts->x_flag_peel_loops
|
||||
|| opts->x_optimize >= 3);
|
||||
+
|
||||
+ if (opts->x_flag_auto_bolt)
|
||||
+ {
|
||||
+ /* Record the function section to facilitate the feedback
|
||||
+ data storage. */
|
||||
+ if (!opts->x_flag_function_sections)
|
||||
+ {
|
||||
+ inform (loc,
|
||||
+ "%<-fauto-bolt%> should work with %<-ffunction-sections%>,"
|
||||
+ " enabling %<-ffunction-sections%>");
|
||||
+ opts->x_flag_function_sections = true;
|
||||
+ }
|
||||
+
|
||||
+ /* Cancel the internal alignment of the function. The binary
|
||||
+ optimizer bolt will cancel the internal alignment optimization
|
||||
+ of the function, so the alignment is meaningless at this time,
|
||||
+ and if not, it will bring trouble to the calculation of the
|
||||
+ offset address of the instruction. */
|
||||
+ if (opts->x_flag_align_jumps)
|
||||
+ {
|
||||
+ inform (loc,
|
||||
+ "%<-fauto-bolt%> should not work with %<-falign-jumps%>,"
|
||||
+ " disabling %<-falign-jumps%>");
|
||||
+ opts->x_flag_align_jumps = false;
|
||||
+ }
|
||||
+
|
||||
+ if (opts->x_flag_align_labels)
|
||||
+ {
|
||||
+ inform (loc,
|
||||
+ "%<-fauto-bolt%> should not work with %<-falign-labels%>,"
|
||||
+ " disabling %<-falign-labels%>");
|
||||
+ opts->x_flag_align_labels = false;
|
||||
+ }
|
||||
+
|
||||
+ if (opts->x_flag_align_loops)
|
||||
+ {
|
||||
+ inform (loc,
|
||||
+ "%<-fauto-bolt%> should not work with %<-falign-loops%>,"
|
||||
+ " disabling %<-falign-loops%>");
|
||||
+ opts->x_flag_align_loops = false;
|
||||
+ }
|
||||
+
|
||||
+ /* When parsing instructions in RTL phase, we need to know
|
||||
+ the call information of instructions to avoid being optimized. */
|
||||
+ if (!opts->x_flag_ipa_ra)
|
||||
+ {
|
||||
+ inform (loc,
|
||||
+ "%<-fauto-bolt%> should work with %<-fipa-ra%>,"
|
||||
+ " enabling %<-fipa-ra%>");
|
||||
+ opts->x_flag_ipa_ra = true;
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
#define LEFT_COLUMN 27
|
||||
@@ -2881,6 +2937,11 @@ common_handle_option (struct gcc_options *opts,
|
||||
check_alignment_argument (loc, arg, "functions");
|
||||
break;
|
||||
|
||||
+ case OPT_fauto_bolt_:
|
||||
+ case OPT_fauto_bolt:
|
||||
+ /* Deferred. */
|
||||
+ break;
|
||||
+
|
||||
default:
|
||||
/* If the flag was handled in a standard way, assume the lack of
|
||||
processing here is intentional. */
|
||||
--
|
||||
2.27.0
|
||||
|
||||
21748
0030-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
Normal file
21748
0030-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
Normal file
File diff suppressed because it is too large
Load Diff
1945
0031-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
Normal file
1945
0031-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,82 @@
|
||||
From 071d19832d788422034a3b052ff7ce91e1010344 Mon Sep 17 00:00:00 2001
|
||||
From: dingguangya <dingguangya1@huawei.com>
|
||||
Date: Mon, 28 Feb 2022 16:52:58 +0800
|
||||
Subject: [PATCH 32/32] [Autoprefetch] Prune invaild loops containing edges whose
|
||||
probability exceeds 1
|
||||
|
||||
Skip auto prefetch analysis if the loop contains the bb in which the sum
|
||||
of its outgoing edge probabilities is greater than 1.
|
||||
---
|
||||
gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp | 2 +-
|
||||
.../gcc.dg/autoprefetch/branch-weighted-prefetch.c | 8 ++++----
|
||||
gcc/tree-ssa-loop-prefetch.c | 12 ++++++++++++
|
||||
3 files changed, 17 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
||||
index a7408e338..7cae630a2 100644
|
||||
--- a/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
||||
+++ b/gcc/testsuite/gcc.dg/autoprefetch/autoprefetch.exp
|
||||
@@ -20,7 +20,7 @@ load_lib target-supports.exp
|
||||
# Initialize `dg'.
|
||||
dg-init
|
||||
|
||||
-gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
|
||||
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
|
||||
"" "-fprefetch-loop-arrays"
|
||||
|
||||
# All done.
|
||||
diff --git a/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
||||
index c63c5e5cb..ab537cb29 100644
|
||||
--- a/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
||||
+++ b/gcc/testsuite/gcc.dg/autoprefetch/branch-weighted-prefetch.c
|
||||
@@ -1,5 +1,5 @@
|
||||
-/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fprefetch-loop-arrays=2 --param min-insn-to-prefetch-ratio=5 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O2 -fprefetch-loop-arrays=2 --param min-insn-to-prefetch-ratio=5 --param simultaneous-prefetches=100 --param l1-cache-size=64 --param l1-cache-line-size=32 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
|
||||
#define N 10000000
|
||||
|
||||
long long a[N];
|
||||
@@ -18,5 +18,5 @@ long long func ()
|
||||
|
||||
return sum;
|
||||
}
|
||||
-/* { dg-final { scan-tree-dump-times "Ahead 40" 1 "aprefetch" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "builtin_prefetch" 1 "optimized" } } */
|
||||
\ No newline at end of file
|
||||
+/* { dg-final { scan-tree-dump "Calculating prefetch distance using bb branch weighting method" "aprefetch" } } */
|
||||
+/* { dg-final { scan-tree-dump "builtin_prefetch" "optimized" } } */
|
||||
\ No newline at end of file
|
||||
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
|
||||
index 673f453a4..0d992d8f6 100644
|
||||
--- a/gcc/tree-ssa-loop-prefetch.c
|
||||
+++ b/gcc/tree-ssa-loop-prefetch.c
|
||||
@@ -2267,6 +2267,15 @@ traverse_prune_bb_branch (hash_map <basic_block, bb_bp> &bb_branch_prob,
|
||||
&& bb_bp_node->false_edge_bb == NULL))
|
||||
return false;
|
||||
|
||||
+ /* Do not process the loop with a bb branch probability of an abnormal
|
||||
+ value. */
|
||||
+ if (bb_bp_node->true_edge_prob + bb_bp_node->false_edge_prob > 1)
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "bb branch probability is abnormal\n");
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
if (current_bb == latch_bb)
|
||||
{
|
||||
max_path--;
|
||||
@@ -2409,6 +2418,9 @@ estimate_num_loop_insns (struct loop *loop, eni_weights *weights)
|
||||
dump_loop_bb (loop);
|
||||
return 0;
|
||||
}
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "Calculating prefetch distance using bb branch "
|
||||
+ "weighting method\n");
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < loop->num_nodes; i++)
|
||||
--
|
||||
2.27.0
|
||||
|
||||
22
gcc.spec
22
gcc.spec
@ -61,7 +61,7 @@
|
||||
Summary: Various compilers (C, C++, Objective-C, ...)
|
||||
Name: gcc
|
||||
Version: %{gcc_version}
|
||||
Release: 8
|
||||
Release: 9
|
||||
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
||||
URL: https://gcc.gnu.org
|
||||
|
||||
@ -144,6 +144,10 @@ Patch25: 0025-AutoPrefetch-Support-cache-misses-profile.patch
|
||||
Patch26: 0026-AutoFDO-Enable-discriminator-and-MCF-algorithm-on-Au.patch
|
||||
Patch27: 0027-Autoprefetch-Support-auto-feedback-prefetch.patch
|
||||
Patch28: 0028-AutoPrefetch-Handle-the-case-that-the-basic-block-br.patch
|
||||
Patch29: 0029-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
|
||||
Patch30: 0030-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
|
||||
Patch31: 0031-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
|
||||
Patch32: 0032-Autoprefetch-Prune-invaild-loops-containing-edges-wh.patch
|
||||
|
||||
%global gcc_target_platform %{_arch}-linux-gnu
|
||||
|
||||
@ -613,6 +617,10 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
||||
%patch26 -p1
|
||||
%patch27 -p1
|
||||
%patch28 -p1
|
||||
%patch29 -p1
|
||||
%patch30 -p1
|
||||
%patch31 -p1
|
||||
%patch32 -p1
|
||||
|
||||
|
||||
%build
|
||||
@ -689,7 +697,8 @@ CC="$CC" CFLAGS="$OPT_FLAGS" \
|
||||
--disable-multilib
|
||||
%endif
|
||||
%ifarch aarch64
|
||||
--with-multilib-list=lp64
|
||||
--with-multilib-list=lp64 \
|
||||
--enable-bolt
|
||||
%endif
|
||||
%ifarch riscv64
|
||||
--with-arch=rv64g --with-abi=lp64d \
|
||||
@ -1641,6 +1650,9 @@ end
|
||||
%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/lto1
|
||||
%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/lto-wrapper
|
||||
%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/liblto_plugin.so*
|
||||
%ifarch aarch64
|
||||
%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/libbolt_plugin.so*
|
||||
%endif
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/rpmver
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stddef.h
|
||||
%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdarg.h
|
||||
@ -2575,6 +2587,12 @@ end
|
||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||
|
||||
%changelog
|
||||
* Wed Mar 2 2022 benniaobufeijiushiji <linda7@huawei.com> - 10.3.1-9
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC:Sync patch from openeuler/gcc
|
||||
|
||||
* Tue Mar 1 2022 benniaobufeijiushiji <linda7@huawei.com> - 10.3.1-8
|
||||
- Type:SPEC
|
||||
- ID:NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user