187 lines
7.1 KiB
Diff
187 lines
7.1 KiB
Diff
|
|
From c690da762e873d0f5c66ea084e420ba4842354a6 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Jakub Jelinek <jakub@redhat.com>
|
||
|
|
Date: Wed, 4 Nov 2020 11:55:29 +0100
|
||
|
|
Subject: [PATCH 02/35] [Backport] phiopt: Optimize x ? 1024 : 0 to (int) x <<
|
||
|
|
10 [PR97690]
|
||
|
|
|
||
|
|
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3e190757fa332d327bee27495f37beb01155cfab
|
||
|
|
|
||
|
|
The following patch generalizes the x ? 1 : 0 -> (int) x optimization
|
||
|
|
to handle also left shifts by constant.
|
||
|
|
|
||
|
|
During x86_64-linux and i686-linux bootstraps + regtests it triggered
|
||
|
|
in 1514 unique non-LTO -m64 cases (sort -u on log mentioning
|
||
|
|
filename, function name and shift count) and 1866 -m32 cases.
|
||
|
|
|
||
|
|
Unfortunately, the patch regresses (before the tests have been adjusted):
|
||
|
|
+FAIL: gcc.dg/tree-ssa/ssa-ccp-11.c scan-tree-dump-times optimized "if " 0
|
||
|
|
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c -flto -ffat-lto-objects scan-tree-dump-times slp1 "optimized: basic block" 1
|
||
|
|
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c scan-tree-dump-times slp1 "optimized: basic block" 1
|
||
|
|
and in both cases it actually results in worse code.
|
||
|
|
|
||
|
|
> > We'd need some optimization that would go through all PHI edges and
|
||
|
|
> > compute if some use of the phi results don't actually compute a constant
|
||
|
|
> > across all the PHI edges - 1 & 0 and 0 & 1 is always 0.
|
||
|
|
|
||
|
|
> PRE should do this, IMHO only optimizing it at -O2 is fine.
|
||
|
|
|
||
|
|
> > Similarly, in the slp vectorization test there is:
|
||
|
|
> > a[0] = b[0] ? 1 : 7;
|
||
|
|
|
||
|
|
> note this, carefully avoiding the already "optimized" b[0] ? 1 : 0 ...
|
||
|
|
|
||
|
|
> So the option is to put : 7 in the 2, 4 an 8 case as well. The testcase
|
||
|
|
> wasn't added for any real-world case but is artificial I guess for
|
||
|
|
> COND_EXPR handling of invariants.
|
||
|
|
|
||
|
|
> But yeah, for things like SLP it means we eventually have to
|
||
|
|
> implement reverse transforms for all of this to make the lanes
|
||
|
|
> matching. But that's true anyway for things like x + 1 vs. x + 0
|
||
|
|
> or x / 3 vs. x / 2 or other simplifications we do.
|
||
|
|
|
||
|
|
2020-11-04 Jakub Jelinek <jakub@redhat.com>
|
||
|
|
|
||
|
|
PR tree-optimization/97690
|
||
|
|
* tree-ssa-phiopt.c (conditional_replacement): Also optimize
|
||
|
|
cond ? pow2p_cst : 0 as ((type) cond) << cst.
|
||
|
|
|
||
|
|
* gcc.dg/tree-ssa/phi-opt-22.c: New test.
|
||
|
|
* gcc.dg/tree-ssa/ssa-ccp-11.c: Use -O2 instead of -O1.
|
||
|
|
* gcc.dg/vect/bb-slp-pattern-2.c (foo): Use ? 2 : 7, ? 4 : 7 and
|
||
|
|
? 8 : 7 instead of ? 2 : 0, ? 4 : 0, ? 8 : 0.
|
||
|
|
---
|
||
|
|
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c | 11 ++++++
|
||
|
|
gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c | 2 +-
|
||
|
|
gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c | 6 ++--
|
||
|
|
gcc/tree-ssa-phiopt.c | 38 ++++++++++++++------
|
||
|
|
4 files changed, 43 insertions(+), 14 deletions(-)
|
||
|
|
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
||
|
|
|
||
|
|
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..fd3706666
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
||
|
|
@@ -0,0 +1,11 @@
|
||
|
|
+/* PR tree-optimization/97690 */
|
||
|
|
+/* { dg-do compile } */
|
||
|
|
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
||
|
|
+
|
||
|
|
+int foo (_Bool d) { return d ? 2 : 0; }
|
||
|
|
+int bar (_Bool d) { return d ? 1 : 0; }
|
||
|
|
+int baz (_Bool d) { return d ? -__INT_MAX__ - 1 : 0; }
|
||
|
|
+int qux (_Bool d) { return d ? 1024 : 0; }
|
||
|
|
+
|
||
|
|
+/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */
|
||
|
|
+/* { dg-final { scan-tree-dump-times " << " 3 "phiopt2" } } */
|
||
|
|
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
|
||
|
|
index 36b8e7fc8..d70ea5a01 100644
|
||
|
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
|
||
|
|
@@ -1,5 +1,5 @@
|
||
|
|
/* { dg-do compile } */
|
||
|
|
-/* { dg-options "-O1 -fdump-tree-optimized" } */
|
||
|
|
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||
|
|
|
||
|
|
/* Test for CPROP across a DAG. */
|
||
|
|
|
||
|
|
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
|
||
|
|
index d32cb7585..e64f0115a 100644
|
||
|
|
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
|
||
|
|
@@ -13,13 +13,13 @@ foo (short * __restrict__ a, int * __restrict__ b, int stride)
|
||
|
|
for (i = 0; i < N/stride; i++, a += stride, b += stride)
|
||
|
|
{
|
||
|
|
a[0] = b[0] ? 1 : 7;
|
||
|
|
- a[1] = b[1] ? 2 : 0;
|
||
|
|
+ a[1] = b[1] ? 2 : 7;
|
||
|
|
a[2] = b[2] ? 3 : 0;
|
||
|
|
- a[3] = b[3] ? 4 : 0;
|
||
|
|
+ a[3] = b[3] ? 4 : 7;
|
||
|
|
a[4] = b[4] ? 5 : 0;
|
||
|
|
a[5] = b[5] ? 6 : 0;
|
||
|
|
a[6] = b[6] ? 7 : 0;
|
||
|
|
- a[7] = b[7] ? 8 : 0;
|
||
|
|
+ a[7] = b[7] ? 8 : 7;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||
|
|
index 591b6435f..85587e8d1 100644
|
||
|
|
--- a/gcc/tree-ssa-phiopt.c
|
||
|
|
+++ b/gcc/tree-ssa-phiopt.c
|
||
|
|
@@ -753,7 +753,9 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
||
|
|
gimple_stmt_iterator gsi;
|
||
|
|
edge true_edge, false_edge;
|
||
|
|
tree new_var, new_var2;
|
||
|
|
- bool neg;
|
||
|
|
+ bool neg = false;
|
||
|
|
+ int shift = 0;
|
||
|
|
+ tree nonzero_arg;
|
||
|
|
|
||
|
|
/* FIXME: Gimplification of complex type is too hard for now. */
|
||
|
|
/* We aren't prepared to handle vectors either (and it is a question
|
||
|
|
@@ -764,14 +766,22 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
||
|
|
|| POINTER_TYPE_P (TREE_TYPE (arg1))))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
- /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
|
||
|
|
- convert it to the conditional. */
|
||
|
|
- if ((integer_zerop (arg0) && integer_onep (arg1))
|
||
|
|
- || (integer_zerop (arg1) && integer_onep (arg0)))
|
||
|
|
- neg = false;
|
||
|
|
- else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
|
||
|
|
- || (integer_zerop (arg1) && integer_all_onesp (arg0)))
|
||
|
|
+ /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
|
||
|
|
+ 0 and (1 << cst), then convert it to the conditional. */
|
||
|
|
+ if (integer_zerop (arg0))
|
||
|
|
+ nonzero_arg = arg1;
|
||
|
|
+ else if (integer_zerop (arg1))
|
||
|
|
+ nonzero_arg = arg0;
|
||
|
|
+ else
|
||
|
|
+ return false;
|
||
|
|
+ if (integer_all_onesp (nonzero_arg))
|
||
|
|
neg = true;
|
||
|
|
+ else if (integer_pow2p (nonzero_arg))
|
||
|
|
+ {
|
||
|
|
+ shift = tree_log2 (nonzero_arg);
|
||
|
|
+ if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
else
|
||
|
|
return false;
|
||
|
|
|
||
|
|
@@ -783,12 +793,12 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
||
|
|
falls through into BB.
|
||
|
|
|
||
|
|
There is a single PHI node at the join point (BB) and its arguments
|
||
|
|
- are constants (0, 1) or (0, -1).
|
||
|
|
+ are constants (0, 1) or (0, -1) or (0, (1 << shift)).
|
||
|
|
|
||
|
|
So, given the condition COND, and the two PHI arguments, we can
|
||
|
|
rewrite this PHI into non-branching code:
|
||
|
|
|
||
|
|
- dest = (COND) or dest = COND'
|
||
|
|
+ dest = (COND) or dest = COND' or dest = (COND) << shift
|
||
|
|
|
||
|
|
We use the condition as-is if the argument associated with the
|
||
|
|
true edge has the value one or the argument associated with the
|
||
|
|
@@ -823,6 +833,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
||
|
|
cond = fold_build1_loc (gimple_location (stmt),
|
||
|
|
NEGATE_EXPR, TREE_TYPE (cond), cond);
|
||
|
|
}
|
||
|
|
+ else if (shift)
|
||
|
|
+ {
|
||
|
|
+ cond = fold_convert_loc (gimple_location (stmt),
|
||
|
|
+ TREE_TYPE (result), cond);
|
||
|
|
+ cond = fold_build2_loc (gimple_location (stmt),
|
||
|
|
+ LSHIFT_EXPR, TREE_TYPE (cond), cond,
|
||
|
|
+ build_int_cst (integer_type_node, shift));
|
||
|
|
+ }
|
||
|
|
|
||
|
|
/* Insert our new statements at the end of conditional block before the
|
||
|
|
COND_STMT. */
|
||
|
|
--
|
||
|
|
2.27.0.windows.1
|
||
|
|
|