- avoid-cycling-on-vertain-subreg-reloads.patch: Add patch source comment - change-gcc-BASE-VER.patch: Likewise - dont-generate-IF_THEN_ELSE.patch: Likewise - fix-ICE-in-compute_live_loop_exits.patch: Likewise - fix-ICE-in-eliminate_stmt.patch: Likewise - fix-ICE-in-vect_create_epilog_for_reduction.patch: Likewise - fix-ICE-in-vect_stmt_to_vectorize.patch: Likewise - fix-ICE-in-verify_ssa.patch: Likewise - fix-ICE-when-vectorizing-nested-cycles.patch: Likewise - fix-cost-of-plus.patch: Likewise - ipa-const-prop-self-recursion-bugfix.patch: Likewise - simplify-removing-subregs.patch: Likewise - medium-code-mode.patch: Bugfix - fix-when-peeling-for-alignment.patch: Move to ... - fix-PR-92351-When-peeling-for-alignment.patch: ... this - AArch64-Fix-constraints-for-CPY-M.patch: New file - Apply-maximum-nunits-for-BB-SLP.patch: New file - Fix-EXTRACT_LAST_REDUCTION-segfault.patch: New file - Fix-up-push_partial_def-little-endian-bitfield.patch: New file - Fix-zero-masking-for-vcvtps2ph.patch: New file - IRA-Handle-fully-tied-destinations.patch: New file - SLP-VECT-Add-check-to-fix-96837.patch: New file - aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch: New file - aarch64-Fix-bf16-and-matrix-g++-gfortran.patch: New file - aarch64-Fix-mismatched-SVE-predicate-modes.patch: New file - aarch64-fix-sve-acle-error.patch: New file - adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch: New file - bf16-and-matrix-characteristic.patch: New file - fix-ICE-IPA-compare-VRP-types.patch: New file - fix-ICE-in-affine-combination.patch: New file - fix-ICE-in-pass-vect.patch: New file - fix-ICE-in-vect_update_misalignment_for_peel.patch: New file - fix-addlosymdi-ICE-in-pass-reload.patch: New file - fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: New file - fix-avx512vl-vcvttpd2dq-2-fail.patch: New file - fix-issue499-add-nop-convert.patch: New file - fix-issue604-ldist-dependency-fixup.patch: New file - modulo-sched-Carefully-process-loop-counter-initiali.patch: New file - re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch: New file - reduction-paths-with-unhandled-live-stmt.patch: New file - redundant-loop-elimination.patch: New file - sccvn-Improve-handling-of-load-masked-with-integer.patch: New file - speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch: New file - store-merging-Consider-also-overlapping-stores-earlier.patch: New file - tree-optimization-96920-another-ICE-when-vectorizing.patch: New file - tree-optimization-97812-fix-range-query-in-VRP-asser.patch: New file - vectorizable-comparison-Swap-operands-only-once.patch: New file - x86-Fix-bf16-and-matrix.patch: New file
252 lines
7.6 KiB
Diff
252 lines
7.6 KiB
Diff
This backport contains 1 patchs from gcc main stream tree.
|
|
The commit id of these patchs list as following in the order of time.
|
|
|
|
0001-modulo-sched-Carefully-process-loop-counter-initiali.patch
|
|
4eb8f93d026eaa1de9b4820337069f3ce3465cd0
|
|
|
|
diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
|
|
index 6f699a874e3..4568674aa6c 100644
|
|
--- a/gcc/modulo-sched.c
|
|
+++ b/gcc/modulo-sched.c
|
|
@@ -210,8 +210,6 @@ static int sms_order_nodes (ddg_ptr, int, int *, int *);
|
|
static void set_node_sched_params (ddg_ptr);
|
|
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
|
|
static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
|
|
-static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
|
|
- rtx, rtx);
|
|
static int calculate_stage_count (partial_schedule_ptr, int);
|
|
static void calculate_must_precede_follow (ddg_node_ptr, int, int,
|
|
int, int, sbitmap, sbitmap, sbitmap);
|
|
@@ -391,30 +389,40 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail)
|
|
this constant. Otherwise return 0. */
|
|
static rtx_insn *
|
|
const_iteration_count (rtx count_reg, basic_block pre_header,
|
|
- int64_t * count)
|
|
+ int64_t *count, bool* adjust_inplace)
|
|
{
|
|
rtx_insn *insn;
|
|
rtx_insn *head, *tail;
|
|
|
|
+ *adjust_inplace = false;
|
|
+ bool read_after = false;
|
|
+
|
|
if (! pre_header)
|
|
return NULL;
|
|
|
|
get_ebb_head_tail (pre_header, pre_header, &head, &tail);
|
|
|
|
for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
|
|
- if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
|
|
- rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
|
|
+ if (single_set (insn) && rtx_equal_p (count_reg,
|
|
+ SET_DEST (single_set (insn))))
|
|
{
|
|
rtx pat = single_set (insn);
|
|
|
|
if (CONST_INT_P (SET_SRC (pat)))
|
|
{
|
|
*count = INTVAL (SET_SRC (pat));
|
|
+ *adjust_inplace = !read_after;
|
|
return insn;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
+ else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
|
|
+ {
|
|
+ read_after = true;
|
|
+ if (reg_set_p (count_reg, insn))
|
|
+ break;
|
|
+ }
|
|
|
|
return NULL;
|
|
}
|
|
@@ -1126,7 +1134,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
|
|
/* Generate the instructions (including reg_moves) for prolog & epilog. */
|
|
static void
|
|
generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
|
|
- rtx count_reg, rtx count_init)
|
|
+ rtx count_reg, bool adjust_init)
|
|
{
|
|
int i;
|
|
int last_stage = PS_STAGE_COUNT (ps) - 1;
|
|
@@ -1135,12 +1143,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
|
|
/* Generate the prolog, inserting its insns on the loop-entry edge. */
|
|
start_sequence ();
|
|
|
|
- if (!count_init)
|
|
+ if (adjust_init)
|
|
{
|
|
/* Generate instructions at the beginning of the prolog to
|
|
- adjust the loop count by STAGE_COUNT. If loop count is constant
|
|
- (count_init), this constant is adjusted by STAGE_COUNT in
|
|
- generate_prolog_epilog function. */
|
|
+ adjust the loop count by STAGE_COUNT. If loop count is constant
|
|
+ and it not used anywhere in prologue, this constant is adjusted by
|
|
+ STAGE_COUNT outside of generate_prolog_epilog function. */
|
|
rtx sub_reg = NULL_RTX;
|
|
|
|
sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
|
|
@@ -1528,7 +1536,8 @@ sms_schedule (void)
|
|
rtx_insn *count_init;
|
|
int mii, rec_mii, stage_count, min_cycle;
|
|
int64_t loop_count = 0;
|
|
- bool opt_sc_p;
|
|
+ bool opt_sc_p, adjust_inplace = false;
|
|
+ basic_block pre_header;
|
|
|
|
if (! (g = g_arr[loop->num]))
|
|
continue;
|
|
@@ -1569,19 +1578,13 @@ sms_schedule (void)
|
|
}
|
|
|
|
|
|
- /* In case of th loop have doloop register it gets special
|
|
- handling. */
|
|
- count_init = NULL;
|
|
- if ((count_reg = doloop_register_get (head, tail)))
|
|
- {
|
|
- basic_block pre_header;
|
|
-
|
|
- pre_header = loop_preheader_edge (loop)->src;
|
|
- count_init = const_iteration_count (count_reg, pre_header,
|
|
- &loop_count);
|
|
- }
|
|
+ count_reg = doloop_register_get (head, tail);
|
|
gcc_assert (count_reg);
|
|
|
|
+ pre_header = loop_preheader_edge (loop)->src;
|
|
+ count_init = const_iteration_count (count_reg, pre_header, &loop_count,
|
|
+ &adjust_inplace);
|
|
+
|
|
if (dump_file && count_init)
|
|
{
|
|
fprintf (dump_file, "SMS const-doloop ");
|
|
@@ -1701,9 +1704,20 @@ sms_schedule (void)
|
|
print_partial_schedule (ps, dump_file);
|
|
}
|
|
|
|
- /* case the BCT count is not known , Do loop-versioning */
|
|
- if (count_reg && ! count_init)
|
|
+ if (count_init)
|
|
+ {
|
|
+ if (adjust_inplace)
|
|
+ {
|
|
+ /* When possible, set new iteration count of loop kernel in
|
|
+ place. Otherwise, generate_prolog_epilog creates an insn
|
|
+ to adjust. */
|
|
+ SET_SRC (single_set (count_init)) = GEN_INT (loop_count
|
|
+ - stage_count + 1);
|
|
+ }
|
|
+ }
|
|
+ else
|
|
{
|
|
+ /* case the BCT count is not known , Do loop-versioning */
|
|
rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
|
|
gen_int_mode (stage_count,
|
|
GET_MODE (count_reg)));
|
|
@@ -1713,12 +1727,7 @@ sms_schedule (void)
|
|
loop_version (loop, comp_rtx, &condition_bb,
|
|
prob, prob.invert (),
|
|
prob, prob.invert (), true);
|
|
- }
|
|
-
|
|
- /* Set new iteration count of loop kernel. */
|
|
- if (count_reg && count_init)
|
|
- SET_SRC (single_set (count_init)) = GEN_INT (loop_count
|
|
- - stage_count + 1);
|
|
+ }
|
|
|
|
/* Now apply the scheduled kernel to the RTL of the loop. */
|
|
permute_partial_schedule (ps, g->closing_branch->first_note);
|
|
@@ -1735,7 +1744,7 @@ sms_schedule (void)
|
|
if (dump_file)
|
|
print_node_sched_params (dump_file, g->num_nodes, ps);
|
|
/* Generate prolog and epilog. */
|
|
- generate_prolog_epilog (ps, loop, count_reg, count_init);
|
|
+ generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
|
|
break;
|
|
}
|
|
|
|
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c
|
|
new file mode 100644
|
|
index 00000000000..e32fb129f18
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c
|
|
@@ -0,0 +1,23 @@
|
|
+/* PR rtl-optimization/97421 */
|
|
+/* { dg-additional-options "-fmodulo-sched" } */
|
|
+
|
|
+int a, b, d, e;
|
|
+int *volatile c = &a;
|
|
+
|
|
+__attribute__((noinline))
|
|
+void f(void)
|
|
+{
|
|
+ for (int g = 2; g >= 0; g--) {
|
|
+ d = 0;
|
|
+ for (b = 0; b <= 2; b++)
|
|
+ ;
|
|
+ e = *c;
|
|
+ }
|
|
+}
|
|
+
|
|
+int main(void)
|
|
+{
|
|
+ f();
|
|
+ if (b != 3)
|
|
+ __builtin_abort();
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c
|
|
new file mode 100644
|
|
index 00000000000..142bcbcee91
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c
|
|
@@ -0,0 +1,18 @@
|
|
+/* PR rtl-optimization/97421 */
|
|
+/* { dg-additional-options "-fmodulo-sched -fno-dce -fno-strict-aliasing" } */
|
|
+
|
|
+static int a, b, c;
|
|
+int *d = &c;
|
|
+int **e = &d;
|
|
+int ***f = &e;
|
|
+int main()
|
|
+{
|
|
+ int h;
|
|
+ for (a = 2; a; a--)
|
|
+ for (h = 0; h <= 2; h++)
|
|
+ for (b = 0; b <= 2; b++)
|
|
+ ***f = 6;
|
|
+
|
|
+ if (b != 3)
|
|
+ __builtin_abort();
|
|
+}
|
|
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c
|
|
new file mode 100644
|
|
index 00000000000..3f1485a4a3d
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c
|
|
@@ -0,0 +1,22 @@
|
|
+/* PR rtl-optimization/97421 */
|
|
+/* { dg-additional-options "-fmodulo-sched" } */
|
|
+
|
|
+int a, b, c;
|
|
+short d;
|
|
+void e(void) {
|
|
+ unsigned f = 0;
|
|
+ for (; f <= 2; f++) {
|
|
+ int g[1];
|
|
+ int h = (long)g;
|
|
+ c = 0;
|
|
+ for (; c < 10; c++)
|
|
+ g[0] = a = 0;
|
|
+ for (; a <= 2; a++)
|
|
+ b = d;
|
|
+ }
|
|
+}
|
|
+int main(void) {
|
|
+ e();
|
|
+ if (a != 3)
|
|
+ __builtin_abort();
|
|
+}
|