397 lines
12 KiB
Diff
397 lines
12 KiB
Diff
|
|
This backport contains 2 patchs from gcc main stream tree.
|
||
|
|
The commit id of these patchs list as following in the order of time.
|
||
|
|
|
||
|
|
0001-tree-affine.c-expr_to_aff_combination-New-function-s.patch
|
||
|
|
5120e0d8d48f4590a275e60565de6c5a4e772fc1
|
||
|
|
|
||
|
|
0001-PR-tree-optimization-94574-aarch64-ICE-during-GIMPLE.patch
|
||
|
|
0447929f11e6a3e1b076841712b90a8b6bc7d33a
|
||
|
|
|
||
|
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
|
||
|
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 1970-01-01 08:00:00.000000000 +0800
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 2020-12-08 14:54:11.467633230 +0800
|
||
|
|
@@ -0,0 +1,8 @@
|
||
|
|
+/* { dg-do compile } */
|
||
|
|
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
|
||
|
|
+
|
||
|
|
+#define TYPE unsigned int
|
||
|
|
+
|
||
|
|
+#include "pr83403.h"
|
||
|
|
+
|
||
|
|
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
|
||
|
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
|
||
|
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 1970-01-01 08:00:00.000000000 +0800
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 2020-12-08 14:54:11.467633230 +0800
|
||
|
|
@@ -0,0 +1,8 @@
|
||
|
|
+/* { dg-do compile } */
|
||
|
|
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
|
||
|
|
+
|
||
|
|
+#define TYPE int
|
||
|
|
+
|
||
|
|
+#include "pr83403.h"
|
||
|
|
+
|
||
|
|
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
|
||
|
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
|
||
|
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 1970-01-01 08:00:00.000000000 +0800
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 2020-12-08 14:54:11.467633230 +0800
|
||
|
|
@@ -0,0 +1,30 @@
|
||
|
|
+__attribute__ ((noinline)) void
|
||
|
|
+calculate (const double *__restrict__ A, const double *__restrict__ B,
|
||
|
|
+ double *__restrict__ C)
|
||
|
|
+{
|
||
|
|
+ TYPE m = 0;
|
||
|
|
+ TYPE n = 0;
|
||
|
|
+ TYPE k = 0;
|
||
|
|
+
|
||
|
|
+ A = (const double *) __builtin_assume_aligned (A, 16);
|
||
|
|
+ B = (const double *) __builtin_assume_aligned (B, 16);
|
||
|
|
+ C = (double *) __builtin_assume_aligned (C, 16);
|
||
|
|
+
|
||
|
|
+ for (n = 0; n < 9; n++)
|
||
|
|
+ {
|
||
|
|
+ for (m = 0; m < 10; m++)
|
||
|
|
+ {
|
||
|
|
+ C[(n * 10) + m] = 0.0;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ for (k = 0; k < 17; k++)
|
||
|
|
+ {
|
||
|
|
+#pragma simd
|
||
|
|
+ for (m = 0; m < 10; m++)
|
||
|
|
+ {
|
||
|
|
+ C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
diff -Nurp a/gcc/tree-affine.c b/gcc/tree-affine.c
|
||
|
|
--- a/gcc/tree-affine.c 2020-12-09 09:01:13.179633230 +0800
|
||
|
|
+++ b/gcc/tree-affine.c 2020-12-08 14:54:11.467633230 +0800
|
||
|
|
@@ -259,104 +259,66 @@ aff_combination_convert (aff_tree *comb,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
-/* Splits EXPR into an affine combination of parts. */
|
||
|
|
+/* Tries to handle OP0 CODE OP1 as affine combination of parts. Returns
|
||
|
|
+ true when that was successful and returns the combination in COMB. */
|
||
|
|
|
||
|
|
-void
|
||
|
|
-tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
|
||
|
|
+static bool
|
||
|
|
+expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
|
||
|
|
+ tree op0, tree op1 = NULL_TREE)
|
||
|
|
{
|
||
|
|
aff_tree tmp;
|
||
|
|
- enum tree_code code;
|
||
|
|
- tree cst, core, toffset;
|
||
|
|
poly_int64 bitpos, bitsize, bytepos;
|
||
|
|
- machine_mode mode;
|
||
|
|
- int unsignedp, reversep, volatilep;
|
||
|
|
-
|
||
|
|
- STRIP_NOPS (expr);
|
||
|
|
|
||
|
|
- code = TREE_CODE (expr);
|
||
|
|
switch (code)
|
||
|
|
{
|
||
|
|
case POINTER_PLUS_EXPR:
|
||
|
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||
|
|
- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
|
||
|
|
+ tree_to_aff_combination (op0, type, comb);
|
||
|
|
+ tree_to_aff_combination (op1, sizetype, &tmp);
|
||
|
|
aff_combination_add (comb, &tmp);
|
||
|
|
- return;
|
||
|
|
+ return true;
|
||
|
|
|
||
|
|
case PLUS_EXPR:
|
||
|
|
case MINUS_EXPR:
|
||
|
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||
|
|
- tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
|
||
|
|
+ tree_to_aff_combination (op0, type, comb);
|
||
|
|
+ tree_to_aff_combination (op1, type, &tmp);
|
||
|
|
if (code == MINUS_EXPR)
|
||
|
|
aff_combination_scale (&tmp, -1);
|
||
|
|
aff_combination_add (comb, &tmp);
|
||
|
|
- return;
|
||
|
|
+ return true;
|
||
|
|
|
||
|
|
case MULT_EXPR:
|
||
|
|
- cst = TREE_OPERAND (expr, 1);
|
||
|
|
- if (TREE_CODE (cst) != INTEGER_CST)
|
||
|
|
+ if (TREE_CODE (op1) != INTEGER_CST)
|
||
|
|
break;
|
||
|
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||
|
|
- aff_combination_scale (comb, wi::to_widest (cst));
|
||
|
|
- return;
|
||
|
|
+ tree_to_aff_combination (op0, type, comb);
|
||
|
|
+ aff_combination_scale (comb, wi::to_widest (op1));
|
||
|
|
+ return true;
|
||
|
|
|
||
|
|
case NEGATE_EXPR:
|
||
|
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||
|
|
+ tree_to_aff_combination (op0, type, comb);
|
||
|
|
aff_combination_scale (comb, -1);
|
||
|
|
- return;
|
||
|
|
+ return true;
|
||
|
|
|
||
|
|
case BIT_NOT_EXPR:
|
||
|
|
/* ~x = -x - 1 */
|
||
|
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||
|
|
+ tree_to_aff_combination (op0, type, comb);
|
||
|
|
aff_combination_scale (comb, -1);
|
||
|
|
aff_combination_add_cst (comb, -1);
|
||
|
|
- return;
|
||
|
|
-
|
||
|
|
- case ADDR_EXPR:
|
||
|
|
- /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
|
||
|
|
- if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF)
|
||
|
|
- {
|
||
|
|
- expr = TREE_OPERAND (expr, 0);
|
||
|
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||
|
|
- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
|
||
|
|
- aff_combination_add (comb, &tmp);
|
||
|
|
- return;
|
||
|
|
- }
|
||
|
|
- core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
|
||
|
|
- &toffset, &mode, &unsignedp, &reversep,
|
||
|
|
- &volatilep);
|
||
|
|
- if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
|
||
|
|
- break;
|
||
|
|
- aff_combination_const (comb, type, bytepos);
|
||
|
|
- if (TREE_CODE (core) == MEM_REF)
|
||
|
|
- {
|
||
|
|
- tree mem_offset = TREE_OPERAND (core, 1);
|
||
|
|
- aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset));
|
||
|
|
- core = TREE_OPERAND (core, 0);
|
||
|
|
- }
|
||
|
|
- else
|
||
|
|
- core = build_fold_addr_expr (core);
|
||
|
|
-
|
||
|
|
- if (TREE_CODE (core) == ADDR_EXPR)
|
||
|
|
- aff_combination_add_elt (comb, core, 1);
|
||
|
|
- else
|
||
|
|
- {
|
||
|
|
- tree_to_aff_combination (core, type, &tmp);
|
||
|
|
- aff_combination_add (comb, &tmp);
|
||
|
|
- }
|
||
|
|
- if (toffset)
|
||
|
|
- {
|
||
|
|
- tree_to_aff_combination (toffset, type, &tmp);
|
||
|
|
- aff_combination_add (comb, &tmp);
|
||
|
|
- }
|
||
|
|
- return;
|
||
|
|
+ return true;
|
||
|
|
|
||
|
|
CASE_CONVERT:
|
||
|
|
{
|
||
|
|
- tree otype = TREE_TYPE (expr);
|
||
|
|
- tree inner = TREE_OPERAND (expr, 0);
|
||
|
|
+ tree otype = type;
|
||
|
|
+ tree inner = op0;
|
||
|
|
tree itype = TREE_TYPE (inner);
|
||
|
|
enum tree_code icode = TREE_CODE (inner);
|
||
|
|
|
||
|
|
+ /* STRIP_NOPS */
|
||
|
|
+ if (tree_nop_conversion_p (otype, itype))
|
||
|
|
+ {
|
||
|
|
+ tree_to_aff_combination (op0, type, comb);
|
||
|
|
+ return true;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
/* In principle this is a valid folding, but it isn't necessarily
|
||
|
|
an optimization, so do it here and not in fold_unary. */
|
||
|
|
if ((icode == PLUS_EXPR || icode == MINUS_EXPR || icode == MULT_EXPR)
|
||
|
|
@@ -376,38 +338,127 @@ tree_to_aff_combination (tree expr, tree
|
||
|
|
{
|
||
|
|
op0 = fold_convert (otype, op0);
|
||
|
|
op1 = fold_convert (otype, op1);
|
||
|
|
- expr = fold_build2 (icode, otype, op0, op1);
|
||
|
|
- tree_to_aff_combination (expr, type, comb);
|
||
|
|
- return;
|
||
|
|
+ return expr_to_aff_combination (comb, icode, otype, op0, op1);
|
||
|
|
}
|
||
|
|
wide_int minv, maxv;
|
||
|
|
/* If inner type has wrapping overflow behavior, fold conversion
|
||
|
|
for below case:
|
||
|
|
- (T1)(X - CST) -> (T1)X - (T1)CST
|
||
|
|
- if X - CST doesn't overflow by range information. Also handle
|
||
|
|
- (T1)(X + CST) as (T1)(X - (-CST)). */
|
||
|
|
+ (T1)(X *+- CST) -> (T1)X *+- (T1)CST
|
||
|
|
+ if X *+- CST doesn't overflow by range information. */
|
||
|
|
if (TYPE_UNSIGNED (itype)
|
||
|
|
&& TYPE_OVERFLOW_WRAPS (itype)
|
||
|
|
- && TREE_CODE (op0) == SSA_NAME
|
||
|
|
&& TREE_CODE (op1) == INTEGER_CST
|
||
|
|
- && icode != MULT_EXPR
|
||
|
|
- && get_range_info (op0, &minv, &maxv) == VR_RANGE)
|
||
|
|
+ && determine_value_range (op0, &minv, &maxv) == VR_RANGE)
|
||
|
|
{
|
||
|
|
+ wi::overflow_type overflow = wi::OVF_NONE;
|
||
|
|
+ signop sign = UNSIGNED;
|
||
|
|
if (icode == PLUS_EXPR)
|
||
|
|
- op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
|
||
|
|
- if (wi::geu_p (minv, wi::to_wide (op1)))
|
||
|
|
+ wi::add (maxv, wi::to_wide (op1), sign, &overflow);
|
||
|
|
+ else if (icode == MULT_EXPR)
|
||
|
|
+ wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
|
||
|
|
+ else
|
||
|
|
+ wi::sub (minv, wi::to_wide (op1), sign, &overflow);
|
||
|
|
+
|
||
|
|
+ if (overflow == wi::OVF_NONE)
|
||
|
|
{
|
||
|
|
op0 = fold_convert (otype, op0);
|
||
|
|
op1 = fold_convert (otype, op1);
|
||
|
|
- expr = fold_build2 (MINUS_EXPR, otype, op0, op1);
|
||
|
|
- tree_to_aff_combination (expr, type, comb);
|
||
|
|
- return;
|
||
|
|
+ return expr_to_aff_combination (comb, icode, otype, op0,
|
||
|
|
+ op1);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
|
||
|
|
+ default:;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ return false;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Splits EXPR into an affine combination of parts. */
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
|
||
|
|
+{
|
||
|
|
+ aff_tree tmp;
|
||
|
|
+ enum tree_code code;
|
||
|
|
+ tree core, toffset;
|
||
|
|
+ poly_int64 bitpos, bitsize, bytepos;
|
||
|
|
+ machine_mode mode;
|
||
|
|
+ int unsignedp, reversep, volatilep;
|
||
|
|
+
|
||
|
|
+ STRIP_NOPS (expr);
|
||
|
|
+
|
||
|
|
+ code = TREE_CODE (expr);
|
||
|
|
+ switch (code)
|
||
|
|
+ {
|
||
|
|
+ case POINTER_PLUS_EXPR:
|
||
|
|
+ case PLUS_EXPR:
|
||
|
|
+ case MINUS_EXPR:
|
||
|
|
+ case MULT_EXPR:
|
||
|
|
+ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0),
|
||
|
|
+ TREE_OPERAND (expr, 1)))
|
||
|
|
+ return;
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
+ case NEGATE_EXPR:
|
||
|
|
+ case BIT_NOT_EXPR:
|
||
|
|
+ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0)))
|
||
|
|
+ return;
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
+ CASE_CONVERT:
|
||
|
|
+ /* ??? TREE_TYPE (expr) should be equal to type here, but IVOPTS
|
||
|
|
+ calls this with not showing an outer widening cast. */
|
||
|
|
+ if (expr_to_aff_combination (comb, code,
|
||
|
|
+ TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
|
||
|
|
+ {
|
||
|
|
+ aff_combination_convert (comb, type);
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
+ case ADDR_EXPR:
|
||
|
|
+ /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
|
||
|
|
+ if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF)
|
||
|
|
+ {
|
||
|
|
+ expr = TREE_OPERAND (expr, 0);
|
||
|
|
+ tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||
|
|
+ tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
|
||
|
|
+ aff_combination_add (comb, &tmp);
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+ core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
|
||
|
|
+ &toffset, &mode, &unsignedp, &reversep,
|
||
|
|
+ &volatilep);
|
||
|
|
+ if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
|
||
|
|
+ break;
|
||
|
|
+ aff_combination_const (comb, type, bytepos);
|
||
|
|
+ if (TREE_CODE (core) == MEM_REF)
|
||
|
|
+ {
|
||
|
|
+ tree mem_offset = TREE_OPERAND (core, 1);
|
||
|
|
+ aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset));
|
||
|
|
+ core = TREE_OPERAND (core, 0);
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ core = build_fold_addr_expr (core);
|
||
|
|
+
|
||
|
|
+ if (TREE_CODE (core) == ADDR_EXPR)
|
||
|
|
+ aff_combination_add_elt (comb, core, 1);
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ tree_to_aff_combination (core, type, &tmp);
|
||
|
|
+ aff_combination_add (comb, &tmp);
|
||
|
|
+ }
|
||
|
|
+ if (toffset)
|
||
|
|
+ {
|
||
|
|
+ tree_to_aff_combination (toffset, type, &tmp);
|
||
|
|
+ aff_combination_add (comb, &tmp);
|
||
|
|
+ }
|
||
|
|
+ return;
|
||
|
|
+
|
||
|
|
default:
|
||
|
|
{
|
||
|
|
if (poly_int_tree_p (expr))
|
||
|
|
@@ -665,7 +716,7 @@ aff_combination_expand (aff_tree *comb A
|
||
|
|
{
|
||
|
|
unsigned i;
|
||
|
|
aff_tree to_add, current, curre;
|
||
|
|
- tree e, rhs;
|
||
|
|
+ tree e;
|
||
|
|
gimple *def;
|
||
|
|
widest_int scale;
|
||
|
|
struct name_expansion *exp;
|
||
|
|
@@ -715,20 +766,38 @@ aff_combination_expand (aff_tree *comb A
|
||
|
|
case PLUS_EXPR:
|
||
|
|
case MINUS_EXPR:
|
||
|
|
case MULT_EXPR:
|
||
|
|
+ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name),
|
||
|
|
+ gimple_assign_rhs1 (def),
|
||
|
|
+ gimple_assign_rhs2 (def)))
|
||
|
|
+ continue;
|
||
|
|
+ break;
|
||
|
|
case NEGATE_EXPR:
|
||
|
|
case BIT_NOT_EXPR:
|
||
|
|
+ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name),
|
||
|
|
+ gimple_assign_rhs1 (def)))
|
||
|
|
+ continue;
|
||
|
|
+ break;
|
||
|
|
CASE_CONVERT:
|
||
|
|
- rhs = gimple_assign_rhs_to_tree (def);
|
||
|
|
+ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name),
|
||
|
|
+ gimple_assign_rhs1 (def)))
|
||
|
|
+ /* This makes us always expand conversions which we did
|
||
|
|
+ in the past and makes gcc.dg/tree-ssa/ivopts-lt-2.c
|
||
|
|
+ PASS, eliminating one induction variable in IVOPTs.
|
||
|
|
+ ??? But it is really excessive and we should try
|
||
|
|
+ harder to do without it. */
|
||
|
|
+ aff_combination_elt (¤t, TREE_TYPE (name),
|
||
|
|
+ fold_convert (TREE_TYPE (name),
|
||
|
|
+ gimple_assign_rhs1 (def)));
|
||
|
|
break;
|
||
|
|
case ADDR_EXPR:
|
||
|
|
case INTEGER_CST:
|
||
|
|
case POLY_INT_CST:
|
||
|
|
- rhs = gimple_assign_rhs1 (def);
|
||
|
|
+ tree_to_aff_combination (gimple_assign_rhs1 (def),
|
||
|
|
+ TREE_TYPE (name), ¤t);
|
||
|
|
break;
|
||
|
|
default:
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
- tree_to_aff_combination (rhs, TREE_TYPE (name), ¤t);
|
||
|
|
exp = XNEW (struct name_expansion);
|
||
|
|
exp->in_progress = 1;
|
||
|
|
if (!*cache)
|