This backport contains 2 patchs from gcc main stream tree. The commit id of these patchs list as following in the order of time. 0001-tree-affine.c-expr_to_aff_combination-New-function-s.patch 5120e0d8d48f4590a275e60565de6c5a4e772fc1 0001-PR-tree-optimization-94574-aarch64-ICE-during-GIMPLE.patch 0447929f11e6a3e1b076841712b90a8b6bc7d33a diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 2020-12-08 14:54:11.467633230 +0800 @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ + +#define TYPE unsigned int + +#include "pr83403.h" + +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 2020-12-08 14:54:11.467633230 +0800 @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ + +#define TYPE int + +#include "pr83403.h" + +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h --- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 1970-01-01 08:00:00.000000000 +0800 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 2020-12-08 14:54:11.467633230 +0800 @@ -0,0 +1,30 @@ +__attribute__ ((noinline)) void +calculate (const double *__restrict__ A, const double *__restrict__ B, + double *__restrict__ C) +{ + TYPE m = 0; + TYPE n = 0; + TYPE k = 0; + + A = (const double *) __builtin_assume_aligned (A, 16); + B = (const double *) __builtin_assume_aligned (B, 16); + C = (double *) __builtin_assume_aligned (C, 16); + + for (n = 0; n < 9; n++) + { + for (m = 0; m < 10; m++) + { + C[(n * 10) + m] = 0.0; + } + + for (k = 0; k < 17; k++) + { +#pragma simd + for (m = 0; m < 10; m++) + { + C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k]; + } + } + } +} + diff -Nurp a/gcc/tree-affine.c b/gcc/tree-affine.c --- a/gcc/tree-affine.c 2020-12-09 09:01:13.179633230 +0800 +++ b/gcc/tree-affine.c 2020-12-08 14:54:11.467633230 +0800 @@ -259,104 +259,66 @@ aff_combination_convert (aff_tree *comb, } } -/* Splits EXPR into an affine combination of parts. */ +/* Tries to handle OP0 CODE OP1 as affine combination of parts. Returns + true when that was successful and returns the combination in COMB. */ -void -tree_to_aff_combination (tree expr, tree type, aff_tree *comb) +static bool +expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, + tree op0, tree op1 = NULL_TREE) { aff_tree tmp; - enum tree_code code; - tree cst, core, toffset; poly_int64 bitpos, bitsize, bytepos; - machine_mode mode; - int unsignedp, reversep, volatilep; - - STRIP_NOPS (expr); - code = TREE_CODE (expr); switch (code) { case POINTER_PLUS_EXPR: - tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); - tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp); + tree_to_aff_combination (op0, type, comb); + tree_to_aff_combination (op1, sizetype, &tmp); aff_combination_add (comb, &tmp); - return; + return true; case PLUS_EXPR: case MINUS_EXPR: - tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); - tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp); + tree_to_aff_combination (op0, type, comb); + tree_to_aff_combination (op1, type, &tmp); if (code == MINUS_EXPR) aff_combination_scale (&tmp, -1); aff_combination_add (comb, &tmp); - return; + return true; case MULT_EXPR: - cst = TREE_OPERAND (expr, 1); - if (TREE_CODE (cst) != INTEGER_CST) + if (TREE_CODE (op1) != INTEGER_CST) break; - tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); - aff_combination_scale (comb, wi::to_widest (cst)); - return; + tree_to_aff_combination (op0, type, comb); + aff_combination_scale (comb, wi::to_widest (op1)); + return true; case NEGATE_EXPR: - tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); + tree_to_aff_combination (op0, type, comb); aff_combination_scale (comb, -1); - return; + return true; case BIT_NOT_EXPR: /* ~x = -x - 1 */ - tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); + tree_to_aff_combination (op0, type, comb); aff_combination_scale (comb, -1); aff_combination_add_cst (comb, -1); - return; - - case ADDR_EXPR: - /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ - if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF) - { - expr = TREE_OPERAND (expr, 0); - tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); - tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp); - aff_combination_add (comb, &tmp); - return; - } - core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos, - &toffset, &mode, &unsignedp, &reversep, - &volatilep); - if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos)) - break; - aff_combination_const (comb, type, bytepos); - if (TREE_CODE (core) == MEM_REF) - { - tree mem_offset = TREE_OPERAND (core, 1); - aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset)); - core = TREE_OPERAND (core, 0); - } - else - core = build_fold_addr_expr (core); - - if (TREE_CODE (core) == ADDR_EXPR) - aff_combination_add_elt (comb, core, 1); - else - { - tree_to_aff_combination (core, type, &tmp); - aff_combination_add (comb, &tmp); - } - if (toffset) - { - tree_to_aff_combination (toffset, type, &tmp); - aff_combination_add (comb, &tmp); - } - return; + return true; CASE_CONVERT: { - tree otype = TREE_TYPE (expr); - tree inner = TREE_OPERAND (expr, 0); + tree otype = type; + tree inner = op0; tree itype = TREE_TYPE (inner); enum tree_code icode = TREE_CODE (inner); + /* STRIP_NOPS */ + if (tree_nop_conversion_p (otype, itype)) + { + tree_to_aff_combination (op0, type, comb); + return true; + } + /* In principle this is a valid folding, but it isn't necessarily an optimization, so do it here and not in fold_unary. */ if ((icode == PLUS_EXPR || icode == MINUS_EXPR || icode == MULT_EXPR) @@ -376,38 +338,127 @@ tree_to_aff_combination (tree expr, tree { op0 = fold_convert (otype, op0); op1 = fold_convert (otype, op1); - expr = fold_build2 (icode, otype, op0, op1); - tree_to_aff_combination (expr, type, comb); - return; + return expr_to_aff_combination (comb, icode, otype, op0, op1); } wide_int minv, maxv; /* If inner type has wrapping overflow behavior, fold conversion for below case: - (T1)(X - CST) -> (T1)X - (T1)CST - if X - CST doesn't overflow by range information. Also handle - (T1)(X + CST) as (T1)(X - (-CST)). */ + (T1)(X *+- CST) -> (T1)X *+- (T1)CST + if X *+- CST doesn't overflow by range information. */ if (TYPE_UNSIGNED (itype) && TYPE_OVERFLOW_WRAPS (itype) - && TREE_CODE (op0) == SSA_NAME && TREE_CODE (op1) == INTEGER_CST - && icode != MULT_EXPR - && get_range_info (op0, &minv, &maxv) == VR_RANGE) + && determine_value_range (op0, &minv, &maxv) == VR_RANGE) { + wi::overflow_type overflow = wi::OVF_NONE; + signop sign = UNSIGNED; if (icode == PLUS_EXPR) - op1 = wide_int_to_tree (itype, -wi::to_wide (op1)); - if (wi::geu_p (minv, wi::to_wide (op1))) + wi::add (maxv, wi::to_wide (op1), sign, &overflow); + else if (icode == MULT_EXPR) + wi::mul (maxv, wi::to_wide (op1), sign, &overflow); + else + wi::sub (minv, wi::to_wide (op1), sign, &overflow); + + if (overflow == wi::OVF_NONE) { op0 = fold_convert (otype, op0); op1 = fold_convert (otype, op1); - expr = fold_build2 (MINUS_EXPR, otype, op0, op1); - tree_to_aff_combination (expr, type, comb); - return; + return expr_to_aff_combination (comb, icode, otype, op0, + op1); } } } } break; + default:; + } + + return false; +} + +/* Splits EXPR into an affine combination of parts. */ + +void +tree_to_aff_combination (tree expr, tree type, aff_tree *comb) +{ + aff_tree tmp; + enum tree_code code; + tree core, toffset; + poly_int64 bitpos, bitsize, bytepos; + machine_mode mode; + int unsignedp, reversep, volatilep; + + STRIP_NOPS (expr); + + code = TREE_CODE (expr); + switch (code) + { + case POINTER_PLUS_EXPR: + case PLUS_EXPR: + case MINUS_EXPR: + case MULT_EXPR: + if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0), + TREE_OPERAND (expr, 1))) + return; + break; + + case NEGATE_EXPR: + case BIT_NOT_EXPR: + if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0))) + return; + break; + + CASE_CONVERT: + /* ??? TREE_TYPE (expr) should be equal to type here, but IVOPTS + calls this with not showing an outer widening cast. */ + if (expr_to_aff_combination (comb, code, + TREE_TYPE (expr), TREE_OPERAND (expr, 0))) + { + aff_combination_convert (comb, type); + return; + } + break; + + case ADDR_EXPR: + /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ + if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF) + { + expr = TREE_OPERAND (expr, 0); + tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); + tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp); + aff_combination_add (comb, &tmp); + return; + } + core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos, + &toffset, &mode, &unsignedp, &reversep, + &volatilep); + if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos)) + break; + aff_combination_const (comb, type, bytepos); + if (TREE_CODE (core) == MEM_REF) + { + tree mem_offset = TREE_OPERAND (core, 1); + aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset)); + core = TREE_OPERAND (core, 0); + } + else + core = build_fold_addr_expr (core); + + if (TREE_CODE (core) == ADDR_EXPR) + aff_combination_add_elt (comb, core, 1); + else + { + tree_to_aff_combination (core, type, &tmp); + aff_combination_add (comb, &tmp); + } + if (toffset) + { + tree_to_aff_combination (toffset, type, &tmp); + aff_combination_add (comb, &tmp); + } + return; + default: { if (poly_int_tree_p (expr)) @@ -665,7 +716,7 @@ aff_combination_expand (aff_tree *comb A { unsigned i; aff_tree to_add, current, curre; - tree e, rhs; + tree e; gimple *def; widest_int scale; struct name_expansion *exp; @@ -715,20 +766,38 @@ aff_combination_expand (aff_tree *comb A case PLUS_EXPR: case MINUS_EXPR: case MULT_EXPR: + if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name), + gimple_assign_rhs1 (def), + gimple_assign_rhs2 (def))) + continue; + break; case NEGATE_EXPR: case BIT_NOT_EXPR: + if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name), + gimple_assign_rhs1 (def))) + continue; + break; CASE_CONVERT: - rhs = gimple_assign_rhs_to_tree (def); + if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name), + gimple_assign_rhs1 (def))) + /* This makes us always expand conversions which we did + in the past and makes gcc.dg/tree-ssa/ivopts-lt-2.c + PASS, eliminating one induction variable in IVOPTs. + ??? But it is really excessive and we should try + harder to do without it. */ + aff_combination_elt (¤t, TREE_TYPE (name), + fold_convert (TREE_TYPE (name), + gimple_assign_rhs1 (def))); break; case ADDR_EXPR: case INTEGER_CST: case POLY_INT_CST: - rhs = gimple_assign_rhs1 (def); + tree_to_aff_combination (gimple_assign_rhs1 (def), + TREE_TYPE (name), ¤t); break; default: continue; } - tree_to_aff_combination (rhs, TREE_TYPE (name), ¤t); exp = XNEW (struct name_expansion); exp->in_progress = 1; if (!*cache)