gcc/fix-ICE-in-affine-combination.patch

397 lines
12 KiB
Diff
Raw Normal View History

Upload GCC feature and bugfix patches. - avoid-cycling-on-vertain-subreg-reloads.patch: Add patch source comment - change-gcc-BASE-VER.patch: Likewise - dont-generate-IF_THEN_ELSE.patch: Likewise - fix-ICE-in-compute_live_loop_exits.patch: Likewise - fix-ICE-in-eliminate_stmt.patch: Likewise - fix-ICE-in-vect_create_epilog_for_reduction.patch: Likewise - fix-ICE-in-vect_stmt_to_vectorize.patch: Likewise - fix-ICE-in-verify_ssa.patch: Likewise - fix-ICE-when-vectorizing-nested-cycles.patch: Likewise - fix-cost-of-plus.patch: Likewise - ipa-const-prop-self-recursion-bugfix.patch: Likewise - simplify-removing-subregs.patch: Likewise - medium-code-mode.patch: Bugfix - fix-when-peeling-for-alignment.patch: Move to ... - fix-PR-92351-When-peeling-for-alignment.patch: ... this - AArch64-Fix-constraints-for-CPY-M.patch: New file - Apply-maximum-nunits-for-BB-SLP.patch: New file - Fix-EXTRACT_LAST_REDUCTION-segfault.patch: New file - Fix-up-push_partial_def-little-endian-bitfield.patch: New file - Fix-zero-masking-for-vcvtps2ph.patch: New file - IRA-Handle-fully-tied-destinations.patch: New file - SLP-VECT-Add-check-to-fix-96837.patch: New file - aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch: New file - aarch64-Fix-bf16-and-matrix-g++-gfortran.patch: New file - aarch64-Fix-mismatched-SVE-predicate-modes.patch: New file - aarch64-fix-sve-acle-error.patch: New file - adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch: New file - bf16-and-matrix-characteristic.patch: New file - fix-ICE-IPA-compare-VRP-types.patch: New file - fix-ICE-in-affine-combination.patch: New file - fix-ICE-in-pass-vect.patch: New file - fix-ICE-in-vect_update_misalignment_for_peel.patch: New file - fix-addlosymdi-ICE-in-pass-reload.patch: New file - fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: New file - fix-avx512vl-vcvttpd2dq-2-fail.patch: New file - fix-issue499-add-nop-convert.patch: New file - fix-issue604-ldist-dependency-fixup.patch: New file - modulo-sched-Carefully-process-loop-counter-initiali.patch: New file - re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch: New file - reduction-paths-with-unhandled-live-stmt.patch: New file - redundant-loop-elimination.patch: New file - sccvn-Improve-handling-of-load-masked-with-integer.patch: New file - speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch: New file - store-merging-Consider-also-overlapping-stores-earlier.patch: New file - tree-optimization-96920-another-ICE-when-vectorizing.patch: New file - tree-optimization-97812-fix-range-query-in-VRP-asser.patch: New file - vectorizable-comparison-Swap-operands-only-once.patch: New file - x86-Fix-bf16-and-matrix.patch: New file
2020-12-30 09:54:10 +08:00
This backport contains 2 patchs from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-tree-affine.c-expr_to_aff_combination-New-function-s.patch
5120e0d8d48f4590a275e60565de6c5a4e772fc1
0001-PR-tree-optimization-94574-aarch64-ICE-during-GIMPLE.patch
0447929f11e6a3e1b076841712b90a8b6bc7d33a
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 2020-12-08 14:54:11.467633230 +0800
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE unsigned int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 2020-12-08 14:54:11.467633230 +0800
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 2020-12-08 14:54:11.467633230 +0800
@@ -0,0 +1,30 @@
+__attribute__ ((noinline)) void
+calculate (const double *__restrict__ A, const double *__restrict__ B,
+ double *__restrict__ C)
+{
+ TYPE m = 0;
+ TYPE n = 0;
+ TYPE k = 0;
+
+ A = (const double *) __builtin_assume_aligned (A, 16);
+ B = (const double *) __builtin_assume_aligned (B, 16);
+ C = (double *) __builtin_assume_aligned (C, 16);
+
+ for (n = 0; n < 9; n++)
+ {
+ for (m = 0; m < 10; m++)
+ {
+ C[(n * 10) + m] = 0.0;
+ }
+
+ for (k = 0; k < 17; k++)
+ {
+#pragma simd
+ for (m = 0; m < 10; m++)
+ {
+ C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
+ }
+ }
+ }
+}
+
diff -Nurp a/gcc/tree-affine.c b/gcc/tree-affine.c
--- a/gcc/tree-affine.c 2020-12-09 09:01:13.179633230 +0800
+++ b/gcc/tree-affine.c 2020-12-08 14:54:11.467633230 +0800
@@ -259,104 +259,66 @@ aff_combination_convert (aff_tree *comb,
}
}
-/* Splits EXPR into an affine combination of parts. */
+/* Tries to handle OP0 CODE OP1 as affine combination of parts. Returns
+ true when that was successful and returns the combination in COMB. */
-void
-tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
+static bool
+expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
+ tree op0, tree op1 = NULL_TREE)
{
aff_tree tmp;
- enum tree_code code;
- tree cst, core, toffset;
poly_int64 bitpos, bitsize, bytepos;
- machine_mode mode;
- int unsignedp, reversep, volatilep;
-
- STRIP_NOPS (expr);
- code = TREE_CODE (expr);
switch (code)
{
case POINTER_PLUS_EXPR:
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
+ tree_to_aff_combination (op0, type, comb);
+ tree_to_aff_combination (op1, sizetype, &tmp);
aff_combination_add (comb, &tmp);
- return;
+ return true;
case PLUS_EXPR:
case MINUS_EXPR:
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
- tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
+ tree_to_aff_combination (op0, type, comb);
+ tree_to_aff_combination (op1, type, &tmp);
if (code == MINUS_EXPR)
aff_combination_scale (&tmp, -1);
aff_combination_add (comb, &tmp);
- return;
+ return true;
case MULT_EXPR:
- cst = TREE_OPERAND (expr, 1);
- if (TREE_CODE (cst) != INTEGER_CST)
+ if (TREE_CODE (op1) != INTEGER_CST)
break;
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
- aff_combination_scale (comb, wi::to_widest (cst));
- return;
+ tree_to_aff_combination (op0, type, comb);
+ aff_combination_scale (comb, wi::to_widest (op1));
+ return true;
case NEGATE_EXPR:
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
+ tree_to_aff_combination (op0, type, comb);
aff_combination_scale (comb, -1);
- return;
+ return true;
case BIT_NOT_EXPR:
/* ~x = -x - 1 */
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
+ tree_to_aff_combination (op0, type, comb);
aff_combination_scale (comb, -1);
aff_combination_add_cst (comb, -1);
- return;
-
- case ADDR_EXPR:
- /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
- if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF)
- {
- expr = TREE_OPERAND (expr, 0);
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
- aff_combination_add (comb, &tmp);
- return;
- }
- core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
- &toffset, &mode, &unsignedp, &reversep,
- &volatilep);
- if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
- break;
- aff_combination_const (comb, type, bytepos);
- if (TREE_CODE (core) == MEM_REF)
- {
- tree mem_offset = TREE_OPERAND (core, 1);
- aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset));
- core = TREE_OPERAND (core, 0);
- }
- else
- core = build_fold_addr_expr (core);
-
- if (TREE_CODE (core) == ADDR_EXPR)
- aff_combination_add_elt (comb, core, 1);
- else
- {
- tree_to_aff_combination (core, type, &tmp);
- aff_combination_add (comb, &tmp);
- }
- if (toffset)
- {
- tree_to_aff_combination (toffset, type, &tmp);
- aff_combination_add (comb, &tmp);
- }
- return;
+ return true;
CASE_CONVERT:
{
- tree otype = TREE_TYPE (expr);
- tree inner = TREE_OPERAND (expr, 0);
+ tree otype = type;
+ tree inner = op0;
tree itype = TREE_TYPE (inner);
enum tree_code icode = TREE_CODE (inner);
+ /* STRIP_NOPS */
+ if (tree_nop_conversion_p (otype, itype))
+ {
+ tree_to_aff_combination (op0, type, comb);
+ return true;
+ }
+
/* In principle this is a valid folding, but it isn't necessarily
an optimization, so do it here and not in fold_unary. */
if ((icode == PLUS_EXPR || icode == MINUS_EXPR || icode == MULT_EXPR)
@@ -376,38 +338,127 @@ tree_to_aff_combination (tree expr, tree
{
op0 = fold_convert (otype, op0);
op1 = fold_convert (otype, op1);
- expr = fold_build2 (icode, otype, op0, op1);
- tree_to_aff_combination (expr, type, comb);
- return;
+ return expr_to_aff_combination (comb, icode, otype, op0, op1);
}
wide_int minv, maxv;
/* If inner type has wrapping overflow behavior, fold conversion
for below case:
- (T1)(X - CST) -> (T1)X - (T1)CST
- if X - CST doesn't overflow by range information. Also handle
- (T1)(X + CST) as (T1)(X - (-CST)). */
+ (T1)(X *+- CST) -> (T1)X *+- (T1)CST
+ if X *+- CST doesn't overflow by range information. */
if (TYPE_UNSIGNED (itype)
&& TYPE_OVERFLOW_WRAPS (itype)
- && TREE_CODE (op0) == SSA_NAME
&& TREE_CODE (op1) == INTEGER_CST
- && icode != MULT_EXPR
- && get_range_info (op0, &minv, &maxv) == VR_RANGE)
+ && determine_value_range (op0, &minv, &maxv) == VR_RANGE)
{
+ wi::overflow_type overflow = wi::OVF_NONE;
+ signop sign = UNSIGNED;
if (icode == PLUS_EXPR)
- op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
- if (wi::geu_p (minv, wi::to_wide (op1)))
+ wi::add (maxv, wi::to_wide (op1), sign, &overflow);
+ else if (icode == MULT_EXPR)
+ wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
+ else
+ wi::sub (minv, wi::to_wide (op1), sign, &overflow);
+
+ if (overflow == wi::OVF_NONE)
{
op0 = fold_convert (otype, op0);
op1 = fold_convert (otype, op1);
- expr = fold_build2 (MINUS_EXPR, otype, op0, op1);
- tree_to_aff_combination (expr, type, comb);
- return;
+ return expr_to_aff_combination (comb, icode, otype, op0,
+ op1);
}
}
}
}
break;
+ default:;
+ }
+
+ return false;
+}
+
+/* Splits EXPR into an affine combination of parts. */
+
+void
+tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
+{
+ aff_tree tmp;
+ enum tree_code code;
+ tree core, toffset;
+ poly_int64 bitpos, bitsize, bytepos;
+ machine_mode mode;
+ int unsignedp, reversep, volatilep;
+
+ STRIP_NOPS (expr);
+
+ code = TREE_CODE (expr);
+ switch (code)
+ {
+ case POINTER_PLUS_EXPR:
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ case MULT_EXPR:
+ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0),
+ TREE_OPERAND (expr, 1)))
+ return;
+ break;
+
+ case NEGATE_EXPR:
+ case BIT_NOT_EXPR:
+ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0)))
+ return;
+ break;
+
+ CASE_CONVERT:
+ /* ??? TREE_TYPE (expr) should be equal to type here, but IVOPTS
+ calls this with not showing an outer widening cast. */
+ if (expr_to_aff_combination (comb, code,
+ TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
+ {
+ aff_combination_convert (comb, type);
+ return;
+ }
+ break;
+
+ case ADDR_EXPR:
+ /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
+ if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF)
+ {
+ expr = TREE_OPERAND (expr, 0);
+ tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
+ tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
+ aff_combination_add (comb, &tmp);
+ return;
+ }
+ core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
+ &toffset, &mode, &unsignedp, &reversep,
+ &volatilep);
+ if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
+ break;
+ aff_combination_const (comb, type, bytepos);
+ if (TREE_CODE (core) == MEM_REF)
+ {
+ tree mem_offset = TREE_OPERAND (core, 1);
+ aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset));
+ core = TREE_OPERAND (core, 0);
+ }
+ else
+ core = build_fold_addr_expr (core);
+
+ if (TREE_CODE (core) == ADDR_EXPR)
+ aff_combination_add_elt (comb, core, 1);
+ else
+ {
+ tree_to_aff_combination (core, type, &tmp);
+ aff_combination_add (comb, &tmp);
+ }
+ if (toffset)
+ {
+ tree_to_aff_combination (toffset, type, &tmp);
+ aff_combination_add (comb, &tmp);
+ }
+ return;
+
default:
{
if (poly_int_tree_p (expr))
@@ -665,7 +716,7 @@ aff_combination_expand (aff_tree *comb A
{
unsigned i;
aff_tree to_add, current, curre;
- tree e, rhs;
+ tree e;
gimple *def;
widest_int scale;
struct name_expansion *exp;
@@ -715,20 +766,38 @@ aff_combination_expand (aff_tree *comb A
case PLUS_EXPR:
case MINUS_EXPR:
case MULT_EXPR:
+ if (!expr_to_aff_combination (&current, code, TREE_TYPE (name),
+ gimple_assign_rhs1 (def),
+ gimple_assign_rhs2 (def)))
+ continue;
+ break;
case NEGATE_EXPR:
case BIT_NOT_EXPR:
+ if (!expr_to_aff_combination (&current, code, TREE_TYPE (name),
+ gimple_assign_rhs1 (def)))
+ continue;
+ break;
CASE_CONVERT:
- rhs = gimple_assign_rhs_to_tree (def);
+ if (!expr_to_aff_combination (&current, code, TREE_TYPE (name),
+ gimple_assign_rhs1 (def)))
+ /* This makes us always expand conversions which we did
+ in the past and makes gcc.dg/tree-ssa/ivopts-lt-2.c
+ PASS, eliminating one induction variable in IVOPTs.
+ ??? But it is really excessive and we should try
+ harder to do without it. */
+ aff_combination_elt (&current, TREE_TYPE (name),
+ fold_convert (TREE_TYPE (name),
+ gimple_assign_rhs1 (def)));
break;
case ADDR_EXPR:
case INTEGER_CST:
case POLY_INT_CST:
- rhs = gimple_assign_rhs1 (def);
+ tree_to_aff_combination (gimple_assign_rhs1 (def),
+ TREE_TYPE (name), &current);
break;
default:
continue;
}
- tree_to_aff_combination (rhs, TREE_TYPE (name), &current);
exp = XNEW (struct name_expansion);
exp->in_progress = 1;
if (!*cache)