[Sync] Sync patch from openeuler/gcc

Sync patch from openeuler/gcc - 20221201

(cherry picked from commit 5487e8942c694fd317f2cbf1662e9eaf33f2f612)
This commit is contained in:
benniaobufeijiushiji 2022-12-01 11:33:51 +08:00 committed by openeuler-sync-bot
parent 404b6b59b8
commit a41360f2fb
36 changed files with 14684 additions and 2 deletions

View File

@ -0,0 +1,21 @@
From 7dffda64fcbbd522616d7dc9c70530d146f4fed6 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Tue, 1 Nov 2022 16:38:38 +0800
Subject: [PATCH 01/35] [build] Add some file right to executable
---
libgcc/mkheader.sh | 0
move-if-change | 0
2 files changed, 0 insertions(+), 0 deletions(-)
mode change 100644 => 100755 libgcc/mkheader.sh
mode change 100644 => 100755 move-if-change
diff --git a/libgcc/mkheader.sh b/libgcc/mkheader.sh
old mode 100644
new mode 100755
diff --git a/move-if-change b/move-if-change
old mode 100644
new mode 100755
--
2.27.0.windows.1

View File

@ -0,0 +1,186 @@
From c690da762e873d0f5c66ea084e420ba4842354a6 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Wed, 4 Nov 2020 11:55:29 +0100
Subject: [PATCH 02/35] [Backport] phiopt: Optimize x ? 1024 : 0 to (int) x <<
10 [PR97690]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3e190757fa332d327bee27495f37beb01155cfab
The following patch generalizes the x ? 1 : 0 -> (int) x optimization
to handle also left shifts by constant.
During x86_64-linux and i686-linux bootstraps + regtests it triggered
in 1514 unique non-LTO -m64 cases (sort -u on log mentioning
filename, function name and shift count) and 1866 -m32 cases.
Unfortunately, the patch regresses (before the tests have been adjusted):
+FAIL: gcc.dg/tree-ssa/ssa-ccp-11.c scan-tree-dump-times optimized "if " 0
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c -flto -ffat-lto-objects scan-tree-dump-times slp1 "optimized: basic block" 1
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c scan-tree-dump-times slp1 "optimized: basic block" 1
and in both cases it actually results in worse code.
> > We'd need some optimization that would go through all PHI edges and
> > compute if some use of the phi results don't actually compute a constant
> > across all the PHI edges - 1 & 0 and 0 & 1 is always 0.
> PRE should do this, IMHO only optimizing it at -O2 is fine.
> > Similarly, in the slp vectorization test there is:
> > a[0] = b[0] ? 1 : 7;
> note this, carefully avoiding the already "optimized" b[0] ? 1 : 0 ...
> So the option is to put : 7 in the 2, 4 an 8 case as well. The testcase
> wasn't added for any real-world case but is artificial I guess for
> COND_EXPR handling of invariants.
> But yeah, for things like SLP it means we eventually have to
> implement reverse transforms for all of this to make the lanes
> matching. But that's true anyway for things like x + 1 vs. x + 0
> or x / 3 vs. x / 2 or other simplifications we do.
2020-11-04 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/97690
* tree-ssa-phiopt.c (conditional_replacement): Also optimize
cond ? pow2p_cst : 0 as ((type) cond) << cst.
* gcc.dg/tree-ssa/phi-opt-22.c: New test.
* gcc.dg/tree-ssa/ssa-ccp-11.c: Use -O2 instead of -O1.
* gcc.dg/vect/bb-slp-pattern-2.c (foo): Use ? 2 : 7, ? 4 : 7 and
? 8 : 7 instead of ? 2 : 0, ? 4 : 0, ? 8 : 0.
---
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c | 11 ++++++
gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c | 2 +-
gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c | 6 ++--
gcc/tree-ssa-phiopt.c | 38 ++++++++++++++------
4 files changed, 43 insertions(+), 14 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
new file mode 100644
index 000000000..fd3706666
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
@@ -0,0 +1,11 @@
+/* PR tree-optimization/97690 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+
+int foo (_Bool d) { return d ? 2 : 0; }
+int bar (_Bool d) { return d ? 1 : 0; }
+int baz (_Bool d) { return d ? -__INT_MAX__ - 1 : 0; }
+int qux (_Bool d) { return d ? 1024 : 0; }
+
+/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " << " 3 "phiopt2" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
index 36b8e7fc8..d70ea5a01 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O1 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
/* Test for CPROP across a DAG. */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
index d32cb7585..e64f0115a 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
@@ -13,13 +13,13 @@ foo (short * __restrict__ a, int * __restrict__ b, int stride)
for (i = 0; i < N/stride; i++, a += stride, b += stride)
{
a[0] = b[0] ? 1 : 7;
- a[1] = b[1] ? 2 : 0;
+ a[1] = b[1] ? 2 : 7;
a[2] = b[2] ? 3 : 0;
- a[3] = b[3] ? 4 : 0;
+ a[3] = b[3] ? 4 : 7;
a[4] = b[4] ? 5 : 0;
a[5] = b[5] ? 6 : 0;
a[6] = b[6] ? 7 : 0;
- a[7] = b[7] ? 8 : 0;
+ a[7] = b[7] ? 8 : 7;
}
}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 591b6435f..85587e8d1 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -753,7 +753,9 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
gimple_stmt_iterator gsi;
edge true_edge, false_edge;
tree new_var, new_var2;
- bool neg;
+ bool neg = false;
+ int shift = 0;
+ tree nonzero_arg;
/* FIXME: Gimplification of complex type is too hard for now. */
/* We aren't prepared to handle vectors either (and it is a question
@@ -764,14 +766,22 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|| POINTER_TYPE_P (TREE_TYPE (arg1))))
return false;
- /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
- convert it to the conditional. */
- if ((integer_zerop (arg0) && integer_onep (arg1))
- || (integer_zerop (arg1) && integer_onep (arg0)))
- neg = false;
- else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
- || (integer_zerop (arg1) && integer_all_onesp (arg0)))
+ /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
+ 0 and (1 << cst), then convert it to the conditional. */
+ if (integer_zerop (arg0))
+ nonzero_arg = arg1;
+ else if (integer_zerop (arg1))
+ nonzero_arg = arg0;
+ else
+ return false;
+ if (integer_all_onesp (nonzero_arg))
neg = true;
+ else if (integer_pow2p (nonzero_arg))
+ {
+ shift = tree_log2 (nonzero_arg);
+ if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
+ return false;
+ }
else
return false;
@@ -783,12 +793,12 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
falls through into BB.
There is a single PHI node at the join point (BB) and its arguments
- are constants (0, 1) or (0, -1).
+ are constants (0, 1) or (0, -1) or (0, (1 << shift)).
So, given the condition COND, and the two PHI arguments, we can
rewrite this PHI into non-branching code:
- dest = (COND) or dest = COND'
+ dest = (COND) or dest = COND' or dest = (COND) << shift
We use the condition as-is if the argument associated with the
true edge has the value one or the argument associated with the
@@ -823,6 +833,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
cond = fold_build1_loc (gimple_location (stmt),
NEGATE_EXPR, TREE_TYPE (cond), cond);
}
+ else if (shift)
+ {
+ cond = fold_convert_loc (gimple_location (stmt),
+ TREE_TYPE (result), cond);
+ cond = fold_build2_loc (gimple_location (stmt),
+ LSHIFT_EXPR, TREE_TYPE (cond), cond,
+ build_int_cst (integer_type_node, shift));
+ }
/* Insert our new statements at the end of conditional block before the
COND_STMT. */
--
2.27.0.windows.1

View File

@ -0,0 +1,92 @@
From 79a974bc7bb67cf425a7839f3c1f5689e41c7ee8 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 9 Mar 2021 19:13:11 +0100
Subject: [PATCH 03/35] [Backport] phiopt: Fix up conditional_replacement
[PR99305]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b610c30453d8e4cc88693d85a5a100d089640be5
Before my PR97690 changes, conditional_replacement would not set neg
when the nonzero arg was boolean true.
I've simplified the testing, so that it first finds the zero argument
and then checks the other argument for all the handled cases
(1, -1 and 1 << X, where the last case is what the patch added support for).
But, unfortunately I've placed the integer_all_onesp test first.
For unsigned precision 1 types such as bool integer_all_onesp, integer_onep
and integer_pow2p can all be true and the code set neg to true in that case,
which is undesirable.
The following patch tests integer_pow2p first (which is trivially true
for integer_onep too and tree_log2 in that case gives shift == 0)
and only if that isn't the case, integer_all_onesp.
2021-03-09 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/99305
* tree-ssa-phiopt.c (conditional_replacement): Test integer_pow2p
before integer_all_onesp instead of vice versa.
* g++.dg/opt/pr99305.C: New test.
---
gcc/testsuite/g++.dg/opt/pr99305.C | 26 ++++++++++++++++++++++++++
gcc/tree-ssa-phiopt.c | 6 +++---
2 files changed, 29 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/opt/pr99305.C
diff --git a/gcc/testsuite/g++.dg/opt/pr99305.C b/gcc/testsuite/g++.dg/opt/pr99305.C
new file mode 100644
index 000000000..8a91277e7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr99305.C
@@ -0,0 +1,26 @@
+// PR tree-optimization/99305
+// { dg-do compile }
+// { dg-options "-O3 -fno-ipa-icf -fdump-tree-optimized" }
+// { dg-final { scan-tree-dump-times " = \\\(unsigned char\\\) c_\[0-9]*\\\(D\\\);" 3 "optimized" } }
+// { dg-final { scan-tree-dump-times " = \[^\n\r]* \\+ \[0-9]*;" 3 "optimized" } }
+// { dg-final { scan-tree-dump-times " = \[^\n\r]* <= 9;" 3 "optimized" } }
+// { dg-final { scan-tree-dump-not "if \\\(c_\[0-9]*\\\(D\\\) \[!=]= 0\\\)" "optimized" } }
+// { dg-final { scan-tree-dump-not " = PHI <" "optimized" } }
+
+bool
+foo (char c)
+{
+ return c >= 48 && c <= 57;
+}
+
+bool
+bar (char c)
+{
+ return c != 0 && foo (c);
+}
+
+bool
+baz (char c)
+{
+ return c != 0 && c >= 48 && c <= 57;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 85587e8d1..b9be28474 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -774,14 +774,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
nonzero_arg = arg0;
else
return false;
- if (integer_all_onesp (nonzero_arg))
- neg = true;
- else if (integer_pow2p (nonzero_arg))
+ if (integer_pow2p (nonzero_arg))
{
shift = tree_log2 (nonzero_arg);
if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
return false;
}
+ else if (integer_all_onesp (nonzero_arg))
+ neg = true;
else
return false;
--
2.27.0.windows.1

View File

@ -0,0 +1,122 @@
From 09263d5ed4d81a008ca8ffcc2883dc766e7874d5 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Sun, 6 Dec 2020 10:58:10 +0100
Subject: [PATCH 04/35] [Backport] phiopt: Handle bool in two_value_replacement
[PR796232]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=8c23434fdadcf4caa1f0e966294c5f67ccf4bcf9
The following patch improves code generation on the included testcase by
enabling two_value_replacement on booleans. It does that only for arg0/arg1
values that conditional_replacement doesn't handle. Additionally
it limits two_value_replacement optimization to the late phiopt like
conditional_replacement.
2020-12-06 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/96232
* tree-ssa-phiopt.c (two_value_replacement): Optimize even boolean lhs
cases as long as arg0 has wider precision and conditional_replacement
doesn't handle that case.
(tree_ssa_phiopt_worker): Don't call two_value_replacement during
early phiopt.
* gcc.dg/tree-ssa/pr96232-2.c: New test.
* gcc.dg/tree-ssa/pr88676-2.c: Check phiopt2 dump rather than phiopt1.
---
gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c | 4 ++--
gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c | 18 ++++++++++++++++++
gcc/tree-ssa-phiopt.c | 23 +++++++++++++++++++----
3 files changed, 39 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
index 0e616365b..ea88407b6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
@@ -1,7 +1,7 @@
/* PR tree-optimization/88676 */
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-phiopt1" } */
-/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt1" { target le } } } */
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt2" { target le } } } */
struct foo1 {
int i:1;
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
new file mode 100644
index 000000000..9f51820ed
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
@@ -0,0 +1,18 @@
+/* PR tree-optimization/96232 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump " 38 - " "optimized" } } */
+/* { dg-final { scan-tree-dump " \\+ 97;" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */
+
+int
+foo (_Bool x)
+{
+ return x ? 37 : 38;
+}
+
+int
+bar (_Bool x)
+{
+ return x ? 98 : 97;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index b9be28474..0623d740d 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -339,7 +339,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
}
/* Do the replacement of conditional if it can be done. */
- if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
+ if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
&& conditional_replacement (bb, bb1, e1, e2, phi,
@@ -636,7 +636,6 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
if (TREE_CODE (lhs) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (lhs))
- || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
|| TREE_CODE (rhs) != INTEGER_CST)
return false;
@@ -649,9 +648,25 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
return false;
}
+ /* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to
+ conditional_replacement. */
+ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
+ && (integer_zerop (arg0)
+ || integer_zerop (arg1)
+ || TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE
+ || (TYPE_PRECISION (TREE_TYPE (arg0))
+ <= TYPE_PRECISION (TREE_TYPE (lhs)))))
+ return false;
+
wide_int min, max;
- if (get_range_info (lhs, &min, &max) != VR_RANGE
- || min + 1 != max
+ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
+ {
+ min = wi::to_wide (boolean_false_node);
+ max = wi::to_wide (boolean_true_node);
+ }
+ else if (get_range_info (lhs, &min, &max) != VR_RANGE)
+ return false;
+ if (min + 1 != max
|| (wi::to_wide (rhs) != min
&& wi::to_wide (rhs) != max))
return false;
--
2.27.0.windows.1

View File

@ -0,0 +1,256 @@
From a92cf465f10585350f7cd5739457c3f2852cfc86 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Wed, 21 Oct 2020 10:51:33 +0200
Subject: [PATCH 05/35] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32
in GIMPLE [PR97503]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=5244b4af5e47bc98a2a9cf36f048981583a1b163
While we have at the RTL level noce_try_ifelse_collapse combined with
simplify_cond_clz_ctz, that optimization doesn't always trigger because
e.g. on powerpc there is an define_insn to compare a reg against zero and
copy that register to another one and so we end up with a different pseudo
in the simplify_cond_clz_ctz test and punt.
For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes,
we can optimize it already in phiopt though, just need to ensure that
we transform the __builtin_c?z* calls into .C?Z ifns because my recent
VRP changes codified that the builtin calls are always undefined at zero,
while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2.
And, in phiopt we already have popcount handling that does pretty much the
same thing, except for always using a zero value rather than the one set
by C?Z_DEFINED_VALUE_AT_ZERO.
So, this patch extends that function to handle not just popcount, but also
clz and ctz.
2020-10-21 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/97503
* tree-ssa-phiopt.c: Include internal-fn.h.
(cond_removal_in_popcount_pattern): Rename to ...
(cond_removal_in_popcount_clz_ctz_pattern): ... this. Handle not just
popcount, but also clz and ctz if it has C?Z_DEFINED_VALUE_AT_ZERO 2.
* gcc.dg/tree-ssa/pr97503.c: New test.
---
gcc/testsuite/gcc.dg/tree-ssa/pr97503.c | 19 +++++
gcc/tree-ssa-phiopt.c | 100 ++++++++++++++++++------
2 files changed, 95 insertions(+), 24 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
new file mode 100644
index 000000000..3a3dae6c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
@@ -0,0 +1,19 @@
+/* PR tree-optimization/97503 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-additional-options "-mbmi -mlzcnt" { target i?86-*-* x86_64-*-* } } */
+/* { dg-final { scan-tree-dump-times "\.CLZ" 2 "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-* } && lp64 } } } } */
+/* { dg-final { scan-tree-dump-not "__builtin_clz" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
+
+int
+foo (int x)
+{
+ return x ? __builtin_clz (x) : 32;
+}
+
+int
+bar (unsigned long long x)
+{
+ return x ? __builtin_clzll (x) : 64;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 0623d740d..c1e11916e 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "case-cfn-macros.h"
#include "tree-eh.h"
+#include "internal-fn.h"
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
@@ -60,8 +61,9 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
-static bool cond_removal_in_popcount_pattern (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
+static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
+ edge, edge, gimple *,
+ tree, tree);
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
hash_set<tree> *);
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
@@ -348,8 +350,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
- && cond_removal_in_popcount_pattern (bb, bb1, e1, e2,
- phi, arg0, arg1))
+ && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
+ e2, phi, arg0,
+ arg1))
cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
@@ -1771,16 +1774,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
<bb 4>
c_12 = PHI <_9(2)>
-*/
+
+ Similarly for __builtin_clz or __builtin_ctz if
+ C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and
+ instead of 0 above it uses the value from that macro. */
static bool
-cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
- edge e1, edge e2,
- gimple *phi, tree arg0, tree arg1)
+cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
+ basic_block middle_bb,
+ edge e1, edge e2, gimple *phi,
+ tree arg0, tree arg1)
{
gimple *cond;
gimple_stmt_iterator gsi, gsi_from;
- gimple *popcount;
+ gimple *call;
gimple *cast = NULL;
tree lhs, arg;
@@ -1798,35 +1805,67 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
gsi_next_nondebug (&gsi);
if (!gsi_end_p (gsi))
{
- popcount = gsi_stmt (gsi);
+ call = gsi_stmt (gsi);
gsi_next_nondebug (&gsi);
if (!gsi_end_p (gsi))
return false;
}
else
{
- popcount = cast;
+ call = cast;
cast = NULL;
}
- /* Check that we have a popcount builtin. */
- if (!is_gimple_call (popcount))
+ /* Check that we have a popcount/clz/ctz builtin. */
+ if (!is_gimple_call (call) || gimple_call_num_args (call) != 1)
+ return false;
+
+ arg = gimple_call_arg (call, 0);
+ lhs = gimple_get_lhs (call);
+
+ if (lhs == NULL_TREE)
return false;
- combined_fn cfn = gimple_call_combined_fn (popcount);
+
+ combined_fn cfn = gimple_call_combined_fn (call);
+ internal_fn ifn = IFN_LAST;
+ int val = 0;
switch (cfn)
{
CASE_CFN_POPCOUNT:
break;
+ CASE_CFN_CLZ:
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
+ {
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
+ if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
+ OPTIMIZE_FOR_BOTH)
+ && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ {
+ ifn = IFN_CLZ;
+ break;
+ }
+ }
+ return false;
+ CASE_CFN_CTZ:
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
+ {
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
+ if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg),
+ OPTIMIZE_FOR_BOTH)
+ && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ {
+ ifn = IFN_CTZ;
+ break;
+ }
+ }
+ return false;
default:
return false;
}
- arg = gimple_call_arg (popcount, 0);
- lhs = gimple_get_lhs (popcount);
-
if (cast)
{
- /* We have a cast stmt feeding popcount builtin. */
+ /* We have a cast stmt feeding popcount/clz/ctz builtin. */
/* Check that we have a cast prior to that. */
if (gimple_code (cast) != GIMPLE_ASSIGN
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast)))
@@ -1839,7 +1878,7 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
cond = last_stmt (cond_bb);
- /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount
+ /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount/clz/ctz
builtin. */
if (gimple_code (cond) != GIMPLE_COND
|| (gimple_cond_code (cond) != NE_EXPR
@@ -1859,10 +1898,13 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
}
/* Check PHI arguments. */
- if (lhs != arg0 || !integer_zerop (arg1))
+ if (lhs != arg0
+ || TREE_CODE (arg1) != INTEGER_CST
+ || wi::to_wide (arg1) != val)
return false;
- /* And insert the popcount builtin and cast stmt before the cond_bb. */
+ /* And insert the popcount/clz/ctz builtin and cast stmt before the
+ cond_bb. */
gsi = gsi_last_bb (cond_bb);
if (cast)
{
@@ -1870,9 +1912,19 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
gsi_move_before (&gsi_from, &gsi);
reset_flow_sensitive_info (gimple_get_lhs (cast));
}
- gsi_from = gsi_for_stmt (popcount);
- gsi_move_before (&gsi_from, &gsi);
- reset_flow_sensitive_info (gimple_get_lhs (popcount));
+ gsi_from = gsi_for_stmt (call);
+ if (ifn == IFN_LAST || gimple_call_internal_p (call))
+ gsi_move_before (&gsi_from, &gsi);
+ else
+ {
+ /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only
+ the latter is well defined at zero. */
+ call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0));
+ gimple_call_set_lhs (call, lhs);
+ gsi_insert_before (&gsi, call, GSI_SAME_STMT);
+ gsi_remove (&gsi_from, true);
+ }
+ reset_flow_sensitive_info (lhs);
/* Now update the PHI and remove unneeded bbs. */
replace_phi_edge_with_variable (cond_bb, e2, phi, lhs);
--
2.27.0.windows.1

View File

@ -0,0 +1,69 @@
From 7d5d2ab082ce9986db4f3313013b44faa46bc412 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Thu, 22 Oct 2020 09:34:28 +0200
Subject: [PATCH 06/35] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32
in GIMPLE fallout [PR97503]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=ef2d3ec325b1b720df5da20784eba46249af2294
> this broke sparc-sun-solaris2.11 bootstrap
>
> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c: In function 'bool cond_removal_in_popcount_clz_ctz_pattern(basic_block, basic_block, edge, edge, gimple*, tree, tree)':
> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c:1858:27: error: variable 'mode' set but not used [-Werror=unused-but-set-variable]
> 1858 | scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
> | ^~~~
>
>
> and doubtlessly several other targets that use the defaults.h definition of
>
> #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) 0
Ugh, seems many of those macros do not evaluate the first argument.
This got broken by the change to direct_internal_fn_supported_p, previously
it used mode also in the optab test.
2020-10-22 Jakub Jelinek <jakub@redhat.com>
* tree-ssa-phiopt.c (cond_removal_in_popcount_clz_ctz_pattern):
For CLZ and CTZ tests, use type temporary instead of mode.
---
gcc/tree-ssa-phiopt.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index c1e11916e..707a5882e 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1836,10 +1836,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
CASE_CFN_CLZ:
if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
{
- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
- if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
- OPTIMIZE_FOR_BOTH)
- && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ tree type = TREE_TYPE (arg);
+ if (direct_internal_fn_supported_p (IFN_CLZ, type, OPTIMIZE_FOR_BOTH)
+ && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
+ val) == 2)
{
ifn = IFN_CLZ;
break;
@@ -1849,10 +1849,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
CASE_CFN_CTZ:
if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
{
- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
- if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg),
- OPTIMIZE_FOR_BOTH)
- && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
+ tree type = TREE_TYPE (arg);
+ if (direct_internal_fn_supported_p (IFN_CTZ, type, OPTIMIZE_FOR_BOTH)
+ && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
+ val) == 2)
{
ifn = IFN_CTZ;
break;
--
2.27.0.windows.1

View File

@ -0,0 +1,218 @@
From 018523df11698dd0e2d42326c57bdf724a7a1aa5 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 5 Jan 2021 16:35:22 +0100
Subject: [PATCH 07/35] [Backport] phiopt: Optimize x < 0 ? ~y : y to (x >> 31)
^ y [PR96928]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=576714b309b330df0e80e34114bcdf0bba35e146
As requested in the PR, the one's complement abs can be done more
efficiently without cmov or branching.
Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize
it in ifcvt, on x86_64 with -m32 we generate in the end the exact same
code, but with -m64:
movl %edi, %eax
- notl %eax
- cmpl %edi, %eax
- cmovl %edi, %eax
+ sarl $31, %eax
+ xorl %edi, %eax
ret
2021-01-05 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/96928
* tree-ssa-phiopt.c (xor_replacement): New function.
(tree_ssa_phiopt_worker): Call it.
* gcc.dg/tree-ssa/pr96928.c: New test.
* gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1,
instead of scanning rtl dump for ifcvt message check assembly
for xor instruction.
---
gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 38 +++++++++
gcc/tree-ssa-phiopt.c | 108 ++++++++++++++++++++++++
2 files changed, 146 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
new file mode 100644
index 000000000..209135726
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
@@ -0,0 +1,38 @@
+/* PR tree-optimization/96928 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
+
+int
+foo (int a)
+{
+ return a < 0 ? ~a : a;
+}
+
+int
+bar (int a, int b)
+{
+ return a < 0 ? ~b : b;
+}
+
+unsigned
+baz (int a, unsigned int b)
+{
+ return a < 0 ? ~b : b;
+}
+
+unsigned
+qux (int a, unsigned int c)
+{
+ return a >= 0 ? ~c : c;
+}
+
+int
+corge (int a, int b)
+{
+ return a >= 0 ? b : ~b;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 707a5882e..b9cd07a60 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -61,6 +61,8 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
+static bool xor_replacement (basic_block, basic_block,
+ edge, edge, gimple *, tree, tree);
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
edge, edge, gimple *,
tree, tree);
@@ -349,6 +351,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
+ else if (!early_p
+ && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ cfgchanged = true;
else if (!early_p
&& cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
e2, phi, arg0,
@@ -2059,6 +2064,109 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
return true;
}
+/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
+
+static bool
+xor_replacement (basic_block cond_bb, basic_block middle_bb,
+ edge e0 ATTRIBUTE_UNUSED, edge e1,
+ gimple *phi, tree arg0, tree arg1)
+{
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
+ return false;
+
+ /* OTHER_BLOCK must have only one executable statement which must have the
+ form arg0 = ~arg1 or arg1 = ~arg0. */
+
+ gimple *assign = last_and_only_stmt (middle_bb);
+ /* If we did not find the proper one's complement assignment, then we cannot
+ optimize. */
+ if (assign == NULL)
+ return false;
+
+ /* If we got here, then we have found the only executable statement
+ in OTHER_BLOCK. If it is anything other than arg = ~arg1 or
+ arg1 = ~arg0, then we cannot optimize. */
+ if (!is_gimple_assign (assign))
+ return false;
+
+ if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
+ return false;
+
+ tree lhs = gimple_assign_lhs (assign);
+ tree rhs = gimple_assign_rhs1 (assign);
+
+ /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
+ if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
+ return false;
+
+ gimple *cond = last_stmt (cond_bb);
+ tree result = PHI_RESULT (phi);
+
+ /* Only relationals comparing arg[01] against zero are interesting. */
+ enum tree_code cond_code = gimple_cond_code (cond);
+ if (cond_code != LT_EXPR && cond_code != GE_EXPR)
+ return false;
+
+ /* Make sure the conditional is x OP 0. */
+ tree clhs = gimple_cond_lhs (cond);
+ if (TREE_CODE (clhs) != SSA_NAME
+ || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
+ || TYPE_UNSIGNED (TREE_TYPE (clhs))
+ || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1))
+ || !integer_zerop (gimple_cond_rhs (cond)))
+ return false;
+
+ /* We need to know which is the true edge and which is the false
+ edge so that we know if have xor or inverted xor. */
+ edge true_edge, false_edge;
+ extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
+
+ /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
+ will need to invert the result. Similarly for LT_EXPR if
+ the false edge goes to OTHER_BLOCK. */
+ edge e;
+ if (cond_code == GE_EXPR)
+ e = true_edge;
+ else
+ e = false_edge;
+
+ bool invert = e->dest == middle_bb;
+
+ result = duplicate_ssa_name (result, NULL);
+
+ gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
+
+ int prec = TYPE_PRECISION (TREE_TYPE (clhs));
+ gimple *new_stmt
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs,
+ build_int_cst (integer_type_node, prec - 1));
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+
+ if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs)))
+ {
+ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
+ NOP_EXPR, gimple_assign_lhs (new_stmt));
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+ }
+ lhs = gimple_assign_lhs (new_stmt);
+
+ if (invert)
+ {
+ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
+ BIT_NOT_EXPR, rhs);
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
+ rhs = gimple_assign_lhs (new_stmt);
+ }
+
+ new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs);
+ gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
+
+ replace_phi_edge_with_variable (cond_bb, e1, phi, result);
+
+ /* Note that we optimized this PHI. */
+ return true;
+}
+
/* Auxiliary functions to determine the set of memory accesses which
can't trap because they are preceded by accesses to the same memory
portion. We do that for MEM_REFs, so we only need to track
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,253 @@
From 96afd5b761a74e9eef40a2e843810c503c669de8 Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@gcc.gnu.org>
Date: Thu, 28 May 2020 00:31:15 +0200
Subject: [PATCH 09/35] [Backport] Add support for __builtin_bswap128
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=fe7ebef7fe4f9acb79658ed9db0749b07efc3105
This patch introduces a new builtin named __builtin_bswap128 on targets
where TImode is supported, i.e. 64-bit targets only in practice. The
implementation simply reuses the existing double word path in optab, so
no routine is added to libgcc (which means that you get two calls to
_bswapdi2 in the worst case).
gcc/ChangeLog:
* builtin-types.def (BT_UINT128): New primitive type.
(BT_FN_UINT128_UINT128): New function type.
* builtins.def (BUILT_IN_BSWAP128): New GCC builtin.
* doc/extend.texi (__builtin_bswap128): Document it.
* builtins.c (expand_builtin): Deal with BUILT_IN_BSWAP128.
(is_inexpensive_builtin): Likewise.
* fold-const-call.c (fold_const_call_ss): Likewise.
* fold-const.c (tree_call_nonnegative_warnv_p): Likewise.
* tree-ssa-ccp.c (evaluate_stmt): Likewise.
* tree-vect-stmts.c (vect_get_data_ptr_increment): Likewise.
(vectorizable_call): Likewise.
* optabs.c (expand_unop): Always use the double word path for it.
* tree-core.h (enum tree_index): Add TI_UINT128_TYPE.
* tree.h (uint128_type_node): New global type.
* tree.c (build_common_tree_nodes): Build it if TImode is supported.
gcc/testsuite/ChangeLog:
* gcc.dg/builtin-bswap-10.c: New test.
* gcc.dg/builtin-bswap-11.c: Likewise.
* gcc.dg/builtin-bswap-12.c: Likewise.
* gcc.target/i386/builtin-bswap-5.c: Likewise.
---
gcc/builtin-types.def | 4 ++++
gcc/builtins.c | 2 ++
gcc/builtins.def | 2 ++
gcc/doc/extend.texi | 10 ++++++++--
gcc/fold-const-call.c | 1 +
gcc/fold-const.c | 2 ++
gcc/optabs.c | 5 ++++-
gcc/tree-core.h | 1 +
gcc/tree-ssa-ccp.c | 1 +
gcc/tree-vect-stmts.c | 5 +++--
gcc/tree.c | 2 ++
gcc/tree.h | 1 +
12 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
index c7aa691b2..c46b1bc5c 100644
--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -73,6 +73,9 @@ DEF_PRIMITIVE_TYPE (BT_UINT8, unsigned_char_type_node)
DEF_PRIMITIVE_TYPE (BT_UINT16, uint16_type_node)
DEF_PRIMITIVE_TYPE (BT_UINT32, uint32_type_node)
DEF_PRIMITIVE_TYPE (BT_UINT64, uint64_type_node)
+DEF_PRIMITIVE_TYPE (BT_UINT128, uint128_type_node
+ ? uint128_type_node
+ : error_mark_node)
DEF_PRIMITIVE_TYPE (BT_WORD, (*lang_hooks.types.type_for_mode) (word_mode, 1))
DEF_PRIMITIVE_TYPE (BT_UNWINDWORD, (*lang_hooks.types.type_for_mode)
(targetm.unwind_word_mode (), 1))
@@ -300,6 +303,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_UINT8_FLOAT, BT_UINT8, BT_FLOAT)
DEF_FUNCTION_TYPE_1 (BT_FN_UINT16_UINT16, BT_UINT16, BT_UINT16)
DEF_FUNCTION_TYPE_1 (BT_FN_UINT32_UINT32, BT_UINT32, BT_UINT32)
DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_UINT64, BT_UINT64, BT_UINT64)
+DEF_FUNCTION_TYPE_1 (BT_FN_UINT128_UINT128, BT_UINT128, BT_UINT128)
DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_FLOAT, BT_UINT64, BT_FLOAT)
DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_INT, BT_BOOL, BT_INT)
DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_PTR, BT_BOOL, BT_PTR)
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 10b6fd3bb..1b1c75cc1 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -8015,6 +8015,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
case BUILT_IN_BSWAP16:
case BUILT_IN_BSWAP32:
case BUILT_IN_BSWAP64:
+ case BUILT_IN_BSWAP128:
target = expand_builtin_bswap (target_mode, exp, target, subtarget);
if (target)
return target;
@@ -11732,6 +11733,7 @@ is_inexpensive_builtin (tree decl)
case BUILT_IN_BSWAP16:
case BUILT_IN_BSWAP32:
case BUILT_IN_BSWAP64:
+ case BUILT_IN_BSWAP128:
case BUILT_IN_CLZ:
case BUILT_IN_CLZIMAX:
case BUILT_IN_CLZL:
diff --git a/gcc/builtins.def b/gcc/builtins.def
index fa8b0641a..ee67ac15d 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -834,6 +834,8 @@ DEF_GCC_BUILTIN (BUILT_IN_APPLY_ARGS, "apply_args", BT_FN_PTR_VAR, ATTR_L
DEF_GCC_BUILTIN (BUILT_IN_BSWAP16, "bswap16", BT_FN_UINT16_UINT16, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LEAF_LIST)
DEF_GCC_BUILTIN (BUILT_IN_BSWAP64, "bswap64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LEAF_LIST)
+DEF_GCC_BUILTIN (BUILT_IN_BSWAP128, "bswap128", BT_FN_UINT128_UINT128, ATTR_CONST_NOTHROW_LEAF_LIST)
+
DEF_EXT_LIB_BUILTIN (BUILT_IN_CLEAR_CACHE, "__clear_cache", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LEAF_LIST)
/* [trans-mem]: Adjust BUILT_IN_TM_CALLOC if BUILT_IN_CALLOC is changed. */
DEF_LIB_BUILTIN (BUILT_IN_CALLOC, "calloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_WARN_UNUSED_RESULT_SIZE_1_2_NOTHROW_LEAF_LIST)
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 9c7345959..a7bd772de 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -13727,14 +13727,20 @@ exactly 8 bits.
@deftypefn {Built-in Function} uint32_t __builtin_bswap32 (uint32_t x)
Similar to @code{__builtin_bswap16}, except the argument and return types
-are 32 bit.
+are 32-bit.
@end deftypefn
@deftypefn {Built-in Function} uint64_t __builtin_bswap64 (uint64_t x)
Similar to @code{__builtin_bswap32}, except the argument and return types
-are 64 bit.
+are 64-bit.
@end deftypefn
+@deftypefn {Built-in Function} uint128_t __builtin_bswap128 (uint128_t x)
+Similar to @code{__builtin_bswap64}, except the argument and return types
+are 128-bit. Only supported on targets when 128-bit types are supported.
+@end deftypefn
+
+
@deftypefn {Built-in Function} Pmode __builtin_extend_pointer (void * x)
On targets where the user visible pointer size is smaller than the size
of an actual hardware address this function returns the extended user
diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c
index 6150d7ada..da01759d9 100644
--- a/gcc/fold-const-call.c
+++ b/gcc/fold-const-call.c
@@ -1032,6 +1032,7 @@ fold_const_call_ss (wide_int *result, combined_fn fn, const wide_int_ref &arg,
case CFN_BUILT_IN_BSWAP16:
case CFN_BUILT_IN_BSWAP32:
case CFN_BUILT_IN_BSWAP64:
+ case CFN_BUILT_IN_BSWAP128:
*result = wide_int::from (arg, precision, TYPE_SIGN (arg_type)).bswap ();
return true;
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 6e635382f..78227a83d 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -13889,8 +13889,10 @@ tree_call_nonnegative_warnv_p (tree type, combined_fn fn, tree arg0, tree arg1,
CASE_CFN_POPCOUNT:
CASE_CFN_CLZ:
CASE_CFN_CLRSB:
+ case CFN_BUILT_IN_BSWAP16:
case CFN_BUILT_IN_BSWAP32:
case CFN_BUILT_IN_BSWAP64:
+ case CFN_BUILT_IN_BSWAP128:
/* Always true. */
return true;
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 049a18ceb..c3751fdf7 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -2896,8 +2896,11 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
if (temp)
return temp;
+ /* We do not provide a 128-bit bswap in libgcc so force the use of
+ a double bswap for 64-bit targets. */
if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
- && optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
+ && (UNITS_PER_WORD == 64
+ || optab_handler (unoptab, word_mode) != CODE_FOR_nothing))
{
temp = expand_doubleword_bswap (mode, op0, target);
if (temp)
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index eb01c2434..058e046aa 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -600,6 +600,7 @@ enum tree_index {
TI_UINT16_TYPE,
TI_UINT32_TYPE,
TI_UINT64_TYPE,
+ TI_UINT128_TYPE,
TI_VOID,
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 952fd9cd4..dcdf10369 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -2005,6 +2005,7 @@ evaluate_stmt (gimple *stmt)
case BUILT_IN_BSWAP16:
case BUILT_IN_BSWAP32:
case BUILT_IN_BSWAP64:
+ case BUILT_IN_BSWAP128:
val = get_value_for_expr (gimple_call_arg (stmt, 0), true);
if (val.lattice_val == UNDEFINED)
break;
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index b872cfc8d..4636b7ba2 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -3085,7 +3085,7 @@ vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
return iv_step;
}
-/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
+/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
static bool
vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
@@ -3454,7 +3454,8 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
else if (modifier == NONE
&& (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
|| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
- || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
+ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
+ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
vectype_in, cost_vec);
else
diff --git a/gcc/tree.c b/gcc/tree.c
index 84a440b35..3e6647ae0 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -10394,6 +10394,8 @@ build_common_tree_nodes (bool signed_char)
uint16_type_node = make_or_reuse_type (16, 1);
uint32_type_node = make_or_reuse_type (32, 1);
uint64_type_node = make_or_reuse_type (64, 1);
+ if (targetm.scalar_mode_supported_p (TImode))
+ uint128_type_node = make_or_reuse_type (128, 1);
/* Decimal float types. */
if (targetm.decimal_float_supported_p ())
diff --git a/gcc/tree.h b/gcc/tree.h
index 328a2d5d2..bddc6e528 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -4035,6 +4035,7 @@ tree_strip_any_location_wrapper (tree exp)
#define uint16_type_node global_trees[TI_UINT16_TYPE]
#define uint32_type_node global_trees[TI_UINT32_TYPE]
#define uint64_type_node global_trees[TI_UINT64_TYPE]
+#define uint128_type_node global_trees[TI_UINT128_TYPE]
#define void_node global_trees[TI_VOID]
--
2.27.0.windows.1

View File

@ -0,0 +1,113 @@
From b9ac0cc69aab3c8d662d5b0a9ed43d971c13ac70 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Fri, 29 May 2020 09:25:53 +0200
Subject: [PATCH 10/35] [Backport] tree-optimization/95393 - fold MIN/MAX_EXPR
generated by phiopt
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=07852a81f58532c63a57631d7c3757fc6bcea17d
This makes sure to fold generated stmts so they do not survive
until RTL expansion and cause awkward code generation.
2020-05-29 Richard Biener <rguenther@suse.de>
PR tree-optimization/95393
* tree-ssa-phiopt.c (minmax_replacement): Use gimple_build
to build the min/max expression so we simplify cases like
MAX(0, s) immediately.
* gcc.dg/tree-ssa/phi-opt-21.c: New testcase.
* g++.dg/vect/slp-pr87105.cc: Adjust.
---
gcc/testsuite/g++.dg/vect/slp-pr87105.cc | 2 +-
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c | 15 +++++++++++++
gcc/tree-ssa-phiopt.c | 25 +++++++++++-----------
3 files changed, 29 insertions(+), 13 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
diff --git a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
index 5518f319b..d07b1cd46 100644
--- a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
+++ b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
@@ -102,4 +102,4 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept {
// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
// It's a bit awkward to detect that all stores were vectorized but the
// following more or less does the trick
-// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
+// { dg-final { scan-tree-dump "vect_\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
new file mode 100644
index 000000000..9f3d56957
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt4-details" } */
+
+int f(unsigned s)
+{
+ int i;
+ for (i = 0; i < s; ++i)
+ ;
+
+ return i;
+}
+
+/* { dg-final { scan-tree-dump "converted to straightline code" "phiopt4" } } */
+/* Make sure we fold the detected MAX<s, 0>. */
+/* { dg-final { scan-tree-dump-not "MAX" "phiopt4" } } */
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index fca32222f..269eda21c 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "case-cfn-macros.h"
#include "tree-eh.h"
+#include "gimple-fold.h"
#include "internal-fn.h"
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
@@ -1414,7 +1415,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
{
tree result, type, rhs;
gcond *cond;
- gassign *new_stmt;
edge true_edge, false_edge;
enum tree_code cmp, minmax, ass_code;
tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false;
@@ -1738,19 +1738,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
gsi_move_before (&gsi_from, &gsi);
}
- /* Create an SSA var to hold the min/max result. If we're the only
- things setting the target PHI, then we can clone the PHI
- variable. Otherwise we must create a new one. */
- result = PHI_RESULT (phi);
- if (EDGE_COUNT (gimple_bb (phi)->preds) == 2)
- result = duplicate_ssa_name (result, NULL);
- else
- result = make_ssa_name (TREE_TYPE (result));
-
/* Emit the statement to compute min/max. */
- new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
+ gimple_seq stmts = NULL;
+ tree phi_result = PHI_RESULT (phi);
+ result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1);
+ /* Duplicate range info if we're the only things setting the target PHI. */
+ if (!gimple_seq_empty_p (stmts)
+ && EDGE_COUNT (gimple_bb (phi)->preds) == 2
+ && !POINTER_TYPE_P (TREE_TYPE (phi_result))
+ && SSA_NAME_RANGE_INFO (phi_result))
+ duplicate_ssa_name_range_info (result, SSA_NAME_RANGE_TYPE (phi_result),
+ SSA_NAME_RANGE_INFO (phi_result));
+
gsi = gsi_last_bb (cond_bb);
- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
+ gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);
replace_phi_edge_with_variable (cond_bb, e1, phi, result);
--
2.27.0.windows.1

View File

@ -0,0 +1,91 @@
From 9f3a8c600abe16f172b36d8113862e8f7aea940c Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@marvell.com>
Date: Sun, 16 May 2021 13:07:06 -0700
Subject: [PATCH 11/35] [Backport] Add a couple of A?CST1:CST2 match and
simplify optimizations
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b6bdd7a4cb41ee057f2d064fffcb00f23ce6b497
Instead of some of the more manual optimizations inside phi-opt,
it would be good idea to do a lot of the heavy lifting inside match
and simplify instead. In the process, this moves the three simple
A?CST1:CST2 (where CST1 or CST2 is zero) simplifications.
OK? Boostrapped and tested on x86_64-linux-gnu with no regressions.
Differences from V1:
* Use bit_xor 1 instead of bit_not to fix the problem with boolean types
which are not 1 bit precision.
Thanks,
Andrew Pinski
gcc:
* match.pd (A?CST1:CST2): Add simplifcations for A?0:+-1, A?+-1:0,
A?POW2:0 and A?0:POW2.
---
gcc/match.pd | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
diff --git a/gcc/match.pd b/gcc/match.pd
index 660d5c268..032830b0d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3334,6 +3334,54 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (cst1 && cst2)
(vec_cond @0 { cst1; } { cst2; })))))
+/* A few simplifications of "a ? CST1 : CST2". */
+/* NOTE: Only do this on gimple as the if-chain-to-switch
+ optimization depends on the gimple to have if statements in it. */
+#if GIMPLE
+(simplify
+ (cond @0 INTEGER_CST@1 INTEGER_CST@2)
+ (switch
+ (if (integer_zerop (@2))
+ (switch
+ /* a ? 1 : 0 -> a if 0 and 1 are integral types. */
+ (if (integer_onep (@1))
+ (convert (convert:boolean_type_node @0)))
+ /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
+ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
+ (with {
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@1));
+ }
+ (lshift (convert (convert:boolean_type_node @0)) { shift; })))
+ /* a ? -1 : 0 -> -a. No need to check the TYPE_PRECISION not being 1
+ here as the powerof2cst case above will handle that case correctly. */
+ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1))
+ (negate (convert (convert:boolean_type_node @0))))))
+ (if (integer_zerop (@1))
+ (with {
+ tree booltrue = constant_boolean_node (true, boolean_type_node);
+ }
+ (switch
+ /* a ? 0 : 1 -> !a. */
+ (if (integer_onep (@2))
+ (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )))
+ /* a ? powerof2cst : 0 -> (!a) << (log2(powerof2cst)) */
+ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2))
+ (with {
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
+ }
+ (lshift (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))
+ { shift; })))
+ /* a ? -1 : 0 -> -(!a). No need to check the TYPE_PRECISION not being 1
+ here as the powerof2cst case above will handle that case correctly. */
+ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2))
+ (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))))
+ )
+ )
+ )
+ )
+)
+#endif
+
/* Simplification moved from fold_cond_expr_with_comparison. It may also
be extended. */
/* This pattern implements two kinds simplification:
--
2.27.0.windows.1

View File

@ -0,0 +1,155 @@
From 4352b952ba24c413697fcfc191d06165a8a31ced Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@marvell.com>
Date: Sat, 22 May 2021 19:49:50 +0000
Subject: [PATCH 12/35] [Backport] Optimize x < 0 ? ~y : y to (x >> 31) ^ y in
match.pd
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=1fd76b24306ed4df4cf9e797d900699ed59ce7f7
This copies the optimization that is done in phiopt for
"x < 0 ? ~y : y to (x >> 31) ^ y" into match.pd. The code
for phiopt is kept around until phiopt uses match.pd (which
I am working towards).
Note the original testcase is now optimized early on and I added a
new testcase to optimize during phiopt.
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
Thanks,
Andrew Pinski
Differences from v1:
V2: Add check for integeral type to make sure vector types are not done.
gcc:
* match.pd (x < 0 ? ~y : y): New patterns.
gcc/testsuite:
* gcc.dg/tree-ssa/pr96928.c: Update test for slightly different IR.
* gcc.dg/tree-ssa/pr96928-1.c: New testcase.
---
gcc/match.pd | 32 +++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c | 48 +++++++++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 7 +++-
3 files changed, 85 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
diff --git a/gcc/match.pd b/gcc/match.pd
index 032830b0d..5899eea95 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4390,6 +4390,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(cmp (bit_and@2 @0 integer_pow2p@1) @1)
(icmp @2 { build_zero_cst (TREE_TYPE (@0)); })))
+(for cmp (ge lt)
+/* x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
+/* x >= 0 ? ~y : y into ~((x >> (prec-1)) ^ y). */
+ (simplify
+ (cond (cmp @0 integer_zerop) (bit_not @1) @1)
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && !TYPE_UNSIGNED (TREE_TYPE (@0))
+ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type))
+ (with
+ {
+ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1);
+ }
+ (if (cmp == LT_EXPR)
+ (bit_xor (convert (rshift @0 {shifter;})) @1)
+ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1))))))
+/* x < 0 ? y : ~y into ~((x >> (prec-1)) ^ y). */
+/* x >= 0 ? y : ~y into (x >> (prec-1)) ^ y. */
+ (simplify
+ (cond (cmp @0 integer_zerop) @1 (bit_not @1))
+ (if (INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && !TYPE_UNSIGNED (TREE_TYPE (@0))
+ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type))
+ (with
+ {
+ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1);
+ }
+ (if (cmp == GE_EXPR)
+ (bit_xor (convert (rshift @0 {shifter;})) @1)
+ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1)))))))
+
/* If we have (A & C) != 0 ? D : 0 where C and D are powers of 2,
convert this into a shift followed by ANDing with D. */
(simplify
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
new file mode 100644
index 000000000..a2770e5e8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
@@ -0,0 +1,48 @@
+/* PR tree-optimization/96928 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
+/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
+
+int
+foo (int a)
+{
+ if (a < 0)
+ return ~a;
+ return a;
+}
+
+int
+bar (int a, int b)
+{
+ if (a < 0)
+ return ~b;
+ return b;
+}
+
+unsigned
+baz (int a, unsigned int b)
+{
+ if (a < 0)
+ return ~b;
+ return b;
+}
+
+unsigned
+qux (int a, unsigned int c)
+{
+ if (a >= 0)
+ return ~c;
+ return c;
+}
+
+int
+corge (int a, int b)
+{
+ if (a >= 0)
+ return b;
+ return ~b;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
index 209135726..e8fd82fc2 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
@@ -1,8 +1,11 @@
/* PR tree-optimization/96928 */
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-phiopt2" } */
+/* { dg-options "-O2 -fdump-tree-phiopt2 -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
-/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
+/* The following check is done at optimized because a ^ (~b) is rewritten as ~(a^b)
+ and in the case of match.pd optimizing these ?:, the ~ is moved out already
+ by the time we get to phiopt2. */
+/* { dg-final { scan-tree-dump-times "\\\^ c_\[0-9]*\\\(D\\\);" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
--
2.27.0.windows.1

View File

@ -0,0 +1,249 @@
From 406071e8c1838c824f06c35ef3cf9419aa543e6e Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@marvell.com>
Date: Tue, 1 Jun 2021 01:05:09 +0000
Subject: [PATCH 13/35] [Backport] Replace conditional_replacement with match
and simplify
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=9f55df63154a39d67ef5b24def7044bf87300831
This is the first of series of patches to simplify phi-opt
to use match and simplify in many cases. This simplification
will more things to optimize.
This is what Richard requested in
https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571197.html
and I think it is the right thing to do too.
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
gcc/ChangeLog:
PR tree-optimization/25290
* tree-ssa-phiopt.c (match_simplify_replacement):
New function.
(tree_ssa_phiopt_worker): Use match_simplify_replacement.
(two_value_replacement): Change the comment about
conditional_replacement.
(conditional_replacement): Delete.
---
gcc/tree-ssa-phiopt.c | 144 ++++++++++++------------------------------
1 file changed, 39 insertions(+), 105 deletions(-)
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 269eda21c..9fa6363b6 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -52,8 +52,8 @@ along with GCC; see the file COPYING3. If not see
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
tree, tree);
-static bool conditional_replacement (basic_block, basic_block,
- edge, edge, gphi *, tree, tree);
+static bool match_simplify_replacement (basic_block, basic_block,
+ edge, edge, gphi *, tree, tree);
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
gimple *);
static int value_replacement (basic_block, basic_block,
@@ -349,8 +349,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
- && conditional_replacement (bb, bb1, e1, e2, phi,
- arg0, arg1))
+ && match_simplify_replacement (bb, bb1, e1, e2, phi,
+ arg0, arg1))
cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
@@ -662,7 +662,7 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
}
/* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to
- conditional_replacement. */
+ match_simplify_replacement. */
if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
&& (integer_zerop (arg0)
|| integer_zerop (arg1)
@@ -763,137 +763,71 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
return true;
}
-/* The function conditional_replacement does the main work of doing the
- conditional replacement. Return true if the replacement is done.
+/* The function match_simplify_replacement does the main work of doing the
+ replacement using match and simplify. Return true if the replacement is done.
Otherwise return false.
BB is the basic block where the replacement is going to be done on. ARG0
is argument 0 from PHI. Likewise for ARG1. */
static bool
-conditional_replacement (basic_block cond_bb, basic_block middle_bb,
- edge e0, edge e1, gphi *phi,
- tree arg0, tree arg1)
+match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
+ edge e0, edge e1, gphi *phi,
+ tree arg0, tree arg1)
{
- tree result;
gimple *stmt;
- gassign *new_stmt;
tree cond;
gimple_stmt_iterator gsi;
edge true_edge, false_edge;
- tree new_var, new_var2;
- bool neg = false;
- int shift = 0;
- tree nonzero_arg;
-
- /* FIXME: Gimplification of complex type is too hard for now. */
- /* We aren't prepared to handle vectors either (and it is a question
- if it would be worthwhile anyway). */
- if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
- || POINTER_TYPE_P (TREE_TYPE (arg0)))
- || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
- || POINTER_TYPE_P (TREE_TYPE (arg1))))
- return false;
+ gimple_seq seq = NULL;
+ tree result;
- /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
- 0 and (1 << cst), then convert it to the conditional. */
- if (integer_zerop (arg0))
- nonzero_arg = arg1;
- else if (integer_zerop (arg1))
- nonzero_arg = arg0;
- else
- return false;
- if (integer_pow2p (nonzero_arg))
- {
- shift = tree_log2 (nonzero_arg);
- if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
- return false;
- }
- else if (integer_all_onesp (nonzero_arg))
- neg = true;
- else
+ if (!empty_block_p (middle_bb))
return false;
- if (!empty_block_p (middle_bb))
+ /* Special case A ? B : B as this will always simplify to B. */
+ if (operand_equal_for_phi_arg_p (arg0, arg1))
return false;
- /* At this point we know we have a GIMPLE_COND with two successors.
+ /* At this point we know we have a GIMPLE_COND with two successors.
One successor is BB, the other successor is an empty block which
falls through into BB.
- There is a single PHI node at the join point (BB) and its arguments
- are constants (0, 1) or (0, -1) or (0, (1 << shift)).
-
- So, given the condition COND, and the two PHI arguments, we can
- rewrite this PHI into non-branching code:
+ There is a single PHI node at the join point (BB).
- dest = (COND) or dest = COND' or dest = (COND) << shift
-
- We use the condition as-is if the argument associated with the
- true edge has the value one or the argument associated with the
- false edge as the value zero. Note that those conditions are not
- the same since only one of the outgoing edges from the GIMPLE_COND
- will directly reach BB and thus be associated with an argument. */
+ So, given the condition COND, and the two PHI arguments, match and simplify
+ can happen on (COND) ? arg0 : arg1. */
stmt = last_stmt (cond_bb);
- result = PHI_RESULT (phi);
/* To handle special cases like floating point comparison, it is easier and
less error-prone to build a tree and gimplify it on the fly though it is
- less efficient. */
- cond = fold_build2_loc (gimple_location (stmt),
- gimple_cond_code (stmt), boolean_type_node,
- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
+ less efficient.
+ Don't use fold_build2 here as that might create (bool)a instead of just
+ "a != 0". */
+ cond = build2_loc (gimple_location (stmt),
+ gimple_cond_code (stmt), boolean_type_node,
+ gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
/* We need to know which is the true edge and which is the false
edge so that we know when to invert the condition below. */
extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
- if ((e0 == true_edge && integer_zerop (arg0))
- || (e0 == false_edge && !integer_zerop (arg0))
- || (e1 == true_edge && integer_zerop (arg1))
- || (e1 == false_edge && !integer_zerop (arg1)))
- cond = fold_build1_loc (gimple_location (stmt),
- TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
-
- if (neg)
- {
- cond = fold_convert_loc (gimple_location (stmt),
- TREE_TYPE (result), cond);
- cond = fold_build1_loc (gimple_location (stmt),
- NEGATE_EXPR, TREE_TYPE (cond), cond);
- }
- else if (shift)
- {
- cond = fold_convert_loc (gimple_location (stmt),
- TREE_TYPE (result), cond);
- cond = fold_build2_loc (gimple_location (stmt),
- LSHIFT_EXPR, TREE_TYPE (cond), cond,
- build_int_cst (integer_type_node, shift));
- }
+ if (e1 == true_edge || e0 == false_edge)
+ std::swap (arg0, arg1);
- /* Insert our new statements at the end of conditional block before the
- COND_STMT. */
- gsi = gsi_for_stmt (stmt);
- new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true,
- GSI_SAME_STMT);
+ tree type = TREE_TYPE (gimple_phi_result (phi));
+ result = gimple_simplify (COND_EXPR, type,
+ cond,
+ arg0, arg1,
+ &seq, NULL);
+ if (!result)
+ return false;
- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var)))
- {
- location_t locus_0, locus_1;
+ gsi = gsi_last_bb (cond_bb);
- new_var2 = make_ssa_name (TREE_TYPE (result));
- new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var);
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
- new_var = new_var2;
-
- /* Set the locus to the first argument, unless is doesn't have one. */
- locus_0 = gimple_phi_arg_location (phi, 0);
- locus_1 = gimple_phi_arg_location (phi, 1);
- if (locus_0 == UNKNOWN_LOCATION)
- locus_0 = locus_1;
- gimple_set_location (new_stmt, locus_0);
- }
+ if (seq)
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
- replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
+ replace_phi_edge_with_variable (cond_bb, e1, phi, result);
/* Note that we optimized this PHI. */
return true;
@@ -3905,7 +3839,7 @@ gate_hoist_loads (void)
Conditional Replacement
-----------------------
- This transformation, implemented in conditional_replacement,
+ This transformation, implemented in match_simplify_replacement,
replaces
bb0:
--
2.27.0.windows.1

View File

@ -0,0 +1,174 @@
From fabbe6ccc798d3cb097c6371b4d53cd6dfde6c7c Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@marvell.com>
Date: Fri, 11 Jun 2021 13:21:34 -0700
Subject: [PATCH 14/35] [Backport] Allow match-and-simplified phiopt to run in
early phiopt
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cd48e550d1dc58307ab1c0ab490745673f748ccc
To move a few things more to match-and-simplify from phiopt,
we need to allow match_simplify_replacement to run in early
phiopt. To do this we add a replacement for gimple_simplify
that is explictly for phiopt.
OK? Bootstrapped and tested on x86_64-linux-gnu with no
regressions.
gcc/ChangeLog:
* tree-ssa-phiopt.c (match_simplify_replacement):
Add early_p argument. Call gimple_simplify_phiopt
instead of gimple_simplify.
(tree_ssa_phiopt_worker): Update call to
match_simplify_replacement and allow unconditionally.
(phiopt_early_allow): New function.
(gimple_simplify_phiopt): New function.
---
gcc/tree-ssa-phiopt.c | 89 ++++++++++++++++++++++++++++++++++---------
1 file changed, 70 insertions(+), 19 deletions(-)
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 9fa6363b6..92aeb8415 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -48,12 +48,13 @@ along with GCC; see the file COPYING3. If not see
#include "tree-eh.h"
#include "gimple-fold.h"
#include "internal-fn.h"
+#include "gimple-match.h"
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
tree, tree);
static bool match_simplify_replacement (basic_block, basic_block,
- edge, edge, gphi *, tree, tree);
+ edge, edge, gphi *, tree, tree, bool);
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
gimple *);
static int value_replacement (basic_block, basic_block,
@@ -348,9 +349,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
/* Do the replacement of conditional if it can be done. */
if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
cfgchanged = true;
- else if (!early_p
- && match_simplify_replacement (bb, bb1, e1, e2, phi,
- arg0, arg1))
+ else if (match_simplify_replacement (bb, bb1, e1, e2, phi,
+ arg0, arg1,
+ early_p))
cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
@@ -763,6 +764,67 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
return true;
}
+/* Return TRUE if CODE should be allowed during early phiopt.
+ Currently this is to allow MIN/MAX and ABS/NEGATE. */
+static bool
+phiopt_early_allow (enum tree_code code)
+{
+ switch (code)
+ {
+ case MIN_EXPR:
+ case MAX_EXPR:
+ case ABS_EXPR:
+ case ABSU_EXPR:
+ case NEGATE_EXPR:
+ case SSA_NAME:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/* gimple_simplify_phiopt is like gimple_simplify but designed for PHIOPT.
+ Return NULL if nothing can be simplified or the resulting simplified value
+ with parts pushed if EARLY_P was true. Also rejects non allowed tree code
+ if EARLY_P is set.
+ Takes the comparison from COMP_STMT and two args, ARG0 and ARG1 and tries
+ to simplify CMP ? ARG0 : ARG1. */
+static tree
+gimple_simplify_phiopt (bool early_p, tree type, gimple *comp_stmt,
+ tree arg0, tree arg1,
+ gimple_seq *seq)
+{
+ tree result;
+ enum tree_code comp_code = gimple_cond_code (comp_stmt);
+ location_t loc = gimple_location (comp_stmt);
+ tree cmp0 = gimple_cond_lhs (comp_stmt);
+ tree cmp1 = gimple_cond_rhs (comp_stmt);
+ /* To handle special cases like floating point comparison, it is easier and
+ less error-prone to build a tree and gimplify it on the fly though it is
+ less efficient.
+ Don't use fold_build2 here as that might create (bool)a instead of just
+ "a != 0". */
+ tree cond = build2_loc (loc, comp_code, boolean_type_node,
+ cmp0, cmp1);
+ gimple_match_op op (gimple_match_cond::UNCOND,
+ COND_EXPR, type, cond, arg0, arg1);
+
+ if (op.resimplify (early_p ? NULL : seq, follow_all_ssa_edges))
+ {
+ /* Early we want only to allow some generated tree codes. */
+ if (!early_p
+ || op.code.is_tree_code ()
+ || phiopt_early_allow ((tree_code)op.code))
+ {
+ result = maybe_push_res_to_seq (&op, seq);
+ if (result)
+ return result;
+ }
+ }
+
+ return NULL;
+}
+
/* The function match_simplify_replacement does the main work of doing the
replacement using match and simplify. Return true if the replacement is done.
Otherwise return false.
@@ -772,10 +834,9 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
static bool
match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
edge e0, edge e1, gphi *phi,
- tree arg0, tree arg1)
+ tree arg0, tree arg1, bool early_p)
{
gimple *stmt;
- tree cond;
gimple_stmt_iterator gsi;
edge true_edge, false_edge;
gimple_seq seq = NULL;
@@ -799,15 +860,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
stmt = last_stmt (cond_bb);
- /* To handle special cases like floating point comparison, it is easier and
- less error-prone to build a tree and gimplify it on the fly though it is
- less efficient.
- Don't use fold_build2 here as that might create (bool)a instead of just
- "a != 0". */
- cond = build2_loc (gimple_location (stmt),
- gimple_cond_code (stmt), boolean_type_node,
- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
-
/* We need to know which is the true edge and which is the false
edge so that we know when to invert the condition below. */
extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
@@ -815,10 +867,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
std::swap (arg0, arg1);
tree type = TREE_TYPE (gimple_phi_result (phi));
- result = gimple_simplify (COND_EXPR, type,
- cond,
- arg0, arg1,
- &seq, NULL);
+ result = gimple_simplify_phiopt (early_p, type, stmt,
+ arg0, arg1,
+ &seq);
if (!result)
return false;
--
2.27.0.windows.1

View File

@ -0,0 +1,259 @@
From d212d216be0752370dbe7bc63bd75b3a9249e0b5 Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@marvell.com>
Date: Tue, 1 Jun 2021 06:48:05 +0000
Subject: [PATCH 15/35] [Backport] Improve match_simplify_replacement in
phi-opt
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=c4574d23cb07340918793a5a98ae7bb2988b3791
This improves match_simplify_replace in phi-opt to handle the
case where there is one cheap (non-call) preparation statement in the
middle basic block similar to xor_replacement and others.
This allows to remove xor_replacement which it does too.
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
Thanks,
Andrew Pinski
Changes since v1:
v3 - Just minor changes to using gimple_assign_lhs
instead of gimple_lhs and fixing a comment.
v2 - change the check on the preparation statement to
allow only assignments and no calls and only assignments
that feed into the phi.
gcc/ChangeLog:
PR tree-optimization/25290
* tree-ssa-phiopt.c (xor_replacement): Delete.
(tree_ssa_phiopt_worker): Delete use of xor_replacement.
(match_simplify_replacement): Allow one cheap preparation
statement that can be moved to before the if.
gcc/testsuite/ChangeLog:
* gcc.dg/tree-ssa/pr96928-1.c: Fix testcase for now that ~
happens on the outside of the bit_xor.
---
gcc/tree-ssa-phiopt.c | 164 ++++++++++++++----------------------------
1 file changed, 52 insertions(+), 112 deletions(-)
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 92aeb8415..51a2d3684 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
#include "cfghooks.h"
#include "tree-pass.h"
#include "ssa.h"
+#include "tree-ssa.h"
#include "optabs-tree.h"
#include "insn-config.h"
#include "gimple-pretty-print.h"
@@ -63,8 +64,6 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
static bool abs_replacement (basic_block, basic_block,
edge, edge, gimple *, tree, tree);
-static bool xor_replacement (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
static bool spaceship_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
@@ -355,9 +354,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
- else if (!early_p
- && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
- cfgchanged = true;
else if (!early_p
&& cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
e2, phi, arg0,
@@ -841,14 +837,51 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
edge true_edge, false_edge;
gimple_seq seq = NULL;
tree result;
-
- if (!empty_block_p (middle_bb))
- return false;
+ gimple *stmt_to_move = NULL;
/* Special case A ? B : B as this will always simplify to B. */
if (operand_equal_for_phi_arg_p (arg0, arg1))
return false;
+ /* If the basic block only has a cheap preparation statement,
+ allow it and move it once the transformation is done. */
+ if (!empty_block_p (middle_bb))
+ {
+ stmt_to_move = last_and_only_stmt (middle_bb);
+ if (!stmt_to_move)
+ return false;
+
+ if (gimple_vuse (stmt_to_move))
+ return false;
+
+ if (gimple_could_trap_p (stmt_to_move)
+ || gimple_has_side_effects (stmt_to_move))
+ return false;
+
+ if (gimple_uses_undefined_value_p (stmt_to_move))
+ return false;
+
+ /* Allow assignments and not no calls.
+ As const calls don't match any of the above, yet they could
+ still have some side-effects - they could contain
+ gimple_could_trap_p statements, like floating point
+ exceptions or integer division by zero. See PR70586.
+ FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p
+ should handle this. */
+ if (!is_gimple_assign (stmt_to_move))
+ return false;
+
+ tree lhs = gimple_assign_lhs (stmt_to_move);
+ gimple *use_stmt;
+ use_operand_p use_p;
+
+ /* Allow only a statement which feeds into the phi. */
+ if (!lhs || TREE_CODE (lhs) != SSA_NAME
+ || !single_imm_use (lhs, &use_p, &use_stmt)
+ || use_stmt != phi)
+ return false;
+ }
+
/* At this point we know we have a GIMPLE_COND with two successors.
One successor is BB, the other successor is an empty block which
falls through into BB.
@@ -874,7 +907,17 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
return false;
gsi = gsi_last_bb (cond_bb);
-
+ if (stmt_to_move)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "statement un-sinked:\n");
+ print_gimple_stmt (dump_file, stmt_to_move, 0,
+ TDF_VOPS|TDF_MEMSYMS);
+ }
+ gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt_to_move);
+ gsi_move_before (&gsi1, &gsi);
+ }
if (seq)
gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
@@ -2474,109 +2517,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
return true;
}
-/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
-
-static bool
-xor_replacement (basic_block cond_bb, basic_block middle_bb,
- edge e0 ATTRIBUTE_UNUSED, edge e1,
- gimple *phi, tree arg0, tree arg1)
-{
- if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
- return false;
-
- /* OTHER_BLOCK must have only one executable statement which must have the
- form arg0 = ~arg1 or arg1 = ~arg0. */
-
- gimple *assign = last_and_only_stmt (middle_bb);
- /* If we did not find the proper one's complement assignment, then we cannot
- optimize. */
- if (assign == NULL)
- return false;
-
- /* If we got here, then we have found the only executable statement
- in OTHER_BLOCK. If it is anything other than arg = ~arg1 or
- arg1 = ~arg0, then we cannot optimize. */
- if (!is_gimple_assign (assign))
- return false;
-
- if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
- return false;
-
- tree lhs = gimple_assign_lhs (assign);
- tree rhs = gimple_assign_rhs1 (assign);
-
- /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
- if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
- return false;
-
- gimple *cond = last_stmt (cond_bb);
- tree result = PHI_RESULT (phi);
-
- /* Only relationals comparing arg[01] against zero are interesting. */
- enum tree_code cond_code = gimple_cond_code (cond);
- if (cond_code != LT_EXPR && cond_code != GE_EXPR)
- return false;
-
- /* Make sure the conditional is x OP 0. */
- tree clhs = gimple_cond_lhs (cond);
- if (TREE_CODE (clhs) != SSA_NAME
- || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
- || TYPE_UNSIGNED (TREE_TYPE (clhs))
- || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1))
- || !integer_zerop (gimple_cond_rhs (cond)))
- return false;
-
- /* We need to know which is the true edge and which is the false
- edge so that we know if have xor or inverted xor. */
- edge true_edge, false_edge;
- extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
-
- /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
- will need to invert the result. Similarly for LT_EXPR if
- the false edge goes to OTHER_BLOCK. */
- edge e;
- if (cond_code == GE_EXPR)
- e = true_edge;
- else
- e = false_edge;
-
- bool invert = e->dest == middle_bb;
-
- result = duplicate_ssa_name (result, NULL);
-
- gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
-
- int prec = TYPE_PRECISION (TREE_TYPE (clhs));
- gimple *new_stmt
- = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs,
- build_int_cst (integer_type_node, prec - 1));
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
-
- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs)))
- {
- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
- NOP_EXPR, gimple_assign_lhs (new_stmt));
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
- }
- lhs = gimple_assign_lhs (new_stmt);
-
- if (invert)
- {
- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
- BIT_NOT_EXPR, rhs);
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
- rhs = gimple_assign_lhs (new_stmt);
- }
-
- new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs);
- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
-
- replace_phi_edge_with_variable (cond_bb, e1, phi, result);
-
- /* Note that we optimized this PHI. */
- return true;
-}
-
/* Auxiliary functions to determine the set of memory accesses which
can't trap because they are preceded by accesses to the same memory
portion. We do that for MEM_REFs, so we only need to track
--
2.27.0.windows.1

View File

@ -0,0 +1,103 @@
From 0d55d24aa4e47c40f74e0281d023089cfaafcf74 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Thu, 6 May 2021 14:05:06 +0200
Subject: [PATCH 16/35] [Backport] phiopt: Use gphi *phi instead of gimple *phi
some more
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cfd65e8d5299a7cf7d2ecd92b0e24ea4cfb697d9
Various functions in phiopt are also called with a gphi * but use
gimple * argument for it.
2021-05-06 Jakub Jelinek <jakub@redhat.com>
* tree-ssa-phiopt.c (value_replacement, minmax_replacement,
abs_replacement, xor_replacement,
cond_removal_in_popcount_clz_ctz_pattern,
replace_phi_edge_with_variable): Change type of phi argument from
gimple * to gphi *.
---
gcc/tree-ssa-phiopt.c | 22 ++++++++++------------
1 file changed, 10 insertions(+), 12 deletions(-)
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 51a2d3684..045a7b1b8 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -59,21 +59,21 @@ static bool match_simplify_replacement (basic_block, basic_block,
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
gimple *);
static int value_replacement (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
+ edge, edge, gphi *, tree, tree);
static bool minmax_replacement (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
+ edge, edge, gphi *, tree, tree);
static bool abs_replacement (basic_block, basic_block,
- edge, edge, gimple *, tree, tree);
+ edge, edge, gphi *, tree, tree);
static bool spaceship_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
- edge, edge, gimple *,
+ edge, edge, gphi *,
tree, tree);
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
hash_set<tree> *);
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
static hash_set<tree> * get_non_trapping ();
-static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree);
+static void replace_phi_edge_with_variable (basic_block, edge, gphi *, tree);
static void hoist_adjacent_loads (basic_block, basic_block,
basic_block, basic_block);
static bool do_phiopt_pattern (basic_block, basic_block, basic_block);
@@ -389,7 +389,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
static void
replace_phi_edge_with_variable (basic_block cond_block,
- edge e, gimple *phi, tree new_tree)
+ edge e, gphi *phi, tree new_tree)
{
basic_block bb = gimple_bb (phi);
basic_block block_to_remove;
@@ -1129,8 +1129,7 @@ absorbing_element_p (tree_code code, tree arg, bool right, tree rval)
static int
value_replacement (basic_block cond_bb, basic_block middle_bb,
- edge e0, edge e1, gimple *phi,
- tree arg0, tree arg1)
+ edge e0, edge e1, gphi *phi, tree arg0, tree arg1)
{
gimple_stmt_iterator gsi;
gimple *cond;
@@ -1438,8 +1437,7 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
static bool
minmax_replacement (basic_block cond_bb, basic_block middle_bb,
- edge e0, edge e1, gimple *phi,
- tree arg0, tree arg1)
+ edge e0, edge e1, gphi *phi, tree arg0, tree arg1)
{
tree result, type, rhs;
gcond *cond;
@@ -2240,7 +2238,7 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
static bool
cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
basic_block middle_bb,
- edge e1, edge e2, gimple *phi,
+ edge e1, edge e2, gphi *phi,
tree arg0, tree arg1)
{
gimple *cond;
@@ -2398,7 +2396,7 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
static bool
abs_replacement (basic_block cond_bb, basic_block middle_bb,
edge e0 ATTRIBUTE_UNUSED, edge e1,
- gimple *phi, tree arg0, tree arg1)
+ gphi *phi, tree arg0, tree arg1)
{
tree result;
gassign *new_stmt;
--
2.27.0.windows.1

View File

@ -0,0 +1,212 @@
From 33dc778a34d7b93978efe922bb1b4583d8e6c4bb Mon Sep 17 00:00:00 2001
From: Roger Sayle <roger@nextmovesoftware.com>
Date: Mon, 2 Aug 2021 13:27:53 +0100
Subject: [PATCH 17/35] [Backport] Optimize x ? bswap(x) : 0 in tree-ssa-phiopt
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f9fcf754825a1e01033336f84c18690aaa971a6f
Many thanks again to Jakub Jelinek for a speedy fix for PR 101642.
Interestingly, that test case "bswap16(x) ? : x" also reveals a
missed optimization opportunity. The resulting "x ? bswap(x) : 0"
can be further simplified to just bswap(x).
Conveniently, tree-ssa-phiopt.c already recognizes/optimizes the
related "x ? popcount(x) : 0", so this patch simply makes that
transformation make general, additionally handling bswap, parity,
ffs and clrsb. All of the required infrastructure is already
present thanks to Jakub previously adding support for clz/ctz.
To reflect this generalization, the name of the function is changed
from cond_removal_in_popcount_clz_ctz_pattern to the hopefully
equally descriptive cond_removal_in_builtin_zero_pattern.
2021-08-02 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
* tree-ssa-phiopt.c (cond_removal_in_builtin_zero_pattern):
Renamed from cond_removal_in_popcount_clz_ctz_pattern.
Add support for BSWAP, FFS, PARITY and CLRSB builtins.
(tree_ssa_phiop_worker): Update call to function above.
gcc/testsuite/ChangeLog
* gcc.dg/tree-ssa/phi-opt-25.c: New test case.
---
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c | 83 ++++++++++++++++++++++
gcc/tree-ssa-phiopt.c | 37 +++++++---
2 files changed, 109 insertions(+), 11 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
new file mode 100644
index 000000000..c52c92e1d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
@@ -0,0 +1,83 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned short test_bswap16(unsigned short x)
+{
+ return x ? __builtin_bswap16(x) : 0;
+}
+
+unsigned int test_bswap32(unsigned int x)
+{
+ return x ? __builtin_bswap32(x) : 0;
+}
+
+unsigned long long test_bswap64(unsigned long long x)
+{
+ return x ? __builtin_bswap64(x) : 0;
+}
+
+int test_clrsb(int x)
+{
+ return x ? __builtin_clrsb(x) : (__SIZEOF_INT__*8-1);
+}
+
+int test_clrsbl(long x)
+{
+ return x ? __builtin_clrsbl(x) : (__SIZEOF_LONG__*8-1);
+}
+
+int test_clrsbll(long long x)
+{
+ return x ? __builtin_clrsbll(x) : (__SIZEOF_LONG_LONG__*8-1);
+}
+
+#if 0
+/* BUILT_IN_FFS is transformed by match.pd */
+int test_ffs(unsigned int x)
+{
+ return x ? __builtin_ffs(x) : 0;
+}
+
+int test_ffsl(unsigned long x)
+{
+ return x ? __builtin_ffsl(x) : 0;
+}
+
+int test_ffsll(unsigned long long x)
+{
+ return x ? __builtin_ffsll(x) : 0;
+}
+#endif
+
+int test_parity(int x)
+{
+ return x ? __builtin_parity(x) : 0;
+}
+
+int test_parityl(long x)
+{
+ return x ? __builtin_parityl(x) : 0;
+}
+
+int test_parityll(long long x)
+{
+ return x ? __builtin_parityll(x) : 0;
+}
+
+int test_popcount(int x)
+{
+ return x ? __builtin_popcount(x) : 0;
+}
+
+int test_popcountl(long x)
+{
+ return x ? __builtin_popcountl(x) : 0;
+}
+
+int test_popcountll(long long x)
+{
+ return x ? __builtin_popcountll(x) : 0;
+}
+
+/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
+
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 045a7b1b8..21ac08145 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -66,9 +66,9 @@ static bool abs_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
static bool spaceship_replacement (basic_block, basic_block,
edge, edge, gphi *, tree, tree);
-static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
- edge, edge, gphi *,
- tree, tree);
+static bool cond_removal_in_builtin_zero_pattern (basic_block, basic_block,
+ edge, edge, gphi *,
+ tree, tree);
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
hash_set<tree> *);
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
@@ -355,9 +355,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
- && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
- e2, phi, arg0,
- arg1))
+ && cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
+ phi, arg0, arg1))
cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
@@ -2204,7 +2203,8 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
return true;
}
-/* Convert
+/* Optimize x ? __builtin_fun (x) : C, where C is __builtin_fun (0).
+ Convert
<bb 2>
if (b_4(D) != 0)
@@ -2236,10 +2236,10 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
instead of 0 above it uses the value from that macro. */
static bool
-cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
- basic_block middle_bb,
- edge e1, edge e2, gphi *phi,
- tree arg0, tree arg1)
+cond_removal_in_builtin_zero_pattern (basic_block cond_bb,
+ basic_block middle_bb,
+ edge e1, edge e2, gphi *phi,
+ tree arg0, tree arg1)
{
gimple *cond;
gimple_stmt_iterator gsi, gsi_from;
@@ -2287,6 +2287,12 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
int val = 0;
switch (cfn)
{
+ case CFN_BUILT_IN_BSWAP16:
+ case CFN_BUILT_IN_BSWAP32:
+ case CFN_BUILT_IN_BSWAP64:
+ case CFN_BUILT_IN_BSWAP128:
+ CASE_CFN_FFS:
+ CASE_CFN_PARITY:
CASE_CFN_POPCOUNT:
break;
CASE_CFN_CLZ:
@@ -2315,6 +2321,15 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
}
}
return false;
+ case BUILT_IN_CLRSB:
+ val = TYPE_PRECISION (integer_type_node) - 1;
+ break;
+ case BUILT_IN_CLRSBL:
+ val = TYPE_PRECISION (long_integer_type_node) - 1;
+ break;
+ case BUILT_IN_CLRSBLL:
+ val = TYPE_PRECISION (long_long_integer_type_node) - 1;
+ break;
default:
return false;
}
--
2.27.0.windows.1

View File

@ -0,0 +1,251 @@
From 77398954ce517aa011b7a254c7aa2858521b2093 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Mon, 15 Nov 2021 15:19:36 +0100
Subject: [PATCH 18/35] [Backport] tree-optimization/102880 - make PHI-OPT
recognize more CFGs
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f98f373dd822b35c52356b753d528924e9f89678
This allows extra edges into the middle BB for the PHI-OPT
transforms using replace_phi_edge_with_variable that do not
end up moving stmts from that middle BB. This avoids regressing
gcc.dg/tree-ssa/ssa-hoist-4.c with the actual fix for PR102880
where CFG cleanup has the choice to remove two forwarders and
picks "the wrong" leading to
if (a > b) /
/\ /
/ <BB>
/ |
# PHI <a, b>
rather than
if (a > b) |
/\ |
<BB> \ |
/ \ |
# PHI <a, b, b>
but it's relatively straight-forward to support extra edges
into the middle-BB in paths ending in replace_phi_edge_with_variable
and that do not require moving stmts. That's because we really
only want to remove the edge from the condition to the middle BB.
Of course actually doing that means updating dominators in non-trival
ways which is why I kept the original code for the single edge
case and simply defer to CFG cleanup by adjusting the condition for
the complicated case.
The testcase needs to be a GIMPLE one since it's quite unreliable
to produce the desired CFG.
2021-11-15 Richard Biener <rguenther@suse.de>
PR tree-optimization/102880
* tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Push
single_pred (bb1) condition to places that really need it.
(match_simplify_replacement): Likewise.
(value_replacement): Likewise.
(replace_phi_edge_with_variable): Deal with extra edges
into the middle BB.
* gcc.dg/tree-ssa/phi-opt-26.c: New testcase.
---
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c | 31 +++++++++
gcc/tree-ssa-phiopt.c | 73 +++++++++++++---------
2 files changed, 75 insertions(+), 29 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
new file mode 100644
index 000000000..21aa66e38
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fgimple -fdump-tree-phiopt1" } */
+
+int __GIMPLE (ssa,startwith("phiopt"))
+foo (int a, int b, int flag)
+{
+ int res;
+
+ __BB(2):
+ if (flag_2(D) != 0)
+ goto __BB6;
+ else
+ goto __BB4;
+
+ __BB(4):
+ if (a_3(D) > b_4(D))
+ goto __BB7;
+ else
+ goto __BB6;
+
+ __BB(6):
+ goto __BB7;
+
+ __BB(7):
+ res_1 = __PHI (__BB4: a_3(D), __BB6: b_4(D));
+ return res_1;
+}
+
+/* We should be able to detect MAX despite the extra edge into
+ the middle BB. */
+/* { dg-final { scan-tree-dump "MAX" "phiopt1" } } */
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 21ac08145..079d29e74 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -219,7 +219,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
/* If either bb1's succ or bb2 or bb2's succ is non NULL. */
if (EDGE_COUNT (bb1->succs) == 0
- || bb2 == NULL
|| EDGE_COUNT (bb2->succs) == 0)
continue;
@@ -279,14 +278,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|| (e1->flags & EDGE_FALLTHRU) == 0)
continue;
- /* Also make sure that bb1 only have one predecessor and that it
- is bb. */
- if (!single_pred_p (bb1)
- || single_pred (bb1) != bb)
- continue;
-
if (do_store_elim)
{
+ /* Also make sure that bb1 only have one predecessor and that it
+ is bb. */
+ if (!single_pred_p (bb1)
+ || single_pred (bb1) != bb)
+ continue;
+
/* bb1 is the middle block, bb2 the join block, bb the split block,
e1 the fallthrough edge from bb1 to bb2. We can't do the
optimization if the join block has more than two predecessors. */
@@ -331,10 +330,11 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
node. */
gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE);
- gphi *newphi = factor_out_conditional_conversion (e1, e2, phi,
- arg0, arg1,
- cond_stmt);
- if (newphi != NULL)
+ gphi *newphi;
+ if (single_pred_p (bb1)
+ && (newphi = factor_out_conditional_conversion (e1, e2, phi,
+ arg0, arg1,
+ cond_stmt)))
{
phi = newphi;
/* factor_out_conditional_conversion may create a new PHI in
@@ -355,12 +355,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
+ && single_pred_p (bb1)
&& cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
phi, arg0, arg1))
cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
- else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ else if (single_pred_p (bb1)
+ && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
}
}
@@ -391,35 +393,41 @@ replace_phi_edge_with_variable (basic_block cond_block,
edge e, gphi *phi, tree new_tree)
{
basic_block bb = gimple_bb (phi);
- basic_block block_to_remove;
gimple_stmt_iterator gsi;
/* Change the PHI argument to new. */
SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
/* Remove the empty basic block. */
+ edge edge_to_remove;
if (EDGE_SUCC (cond_block, 0)->dest == bb)
+ edge_to_remove = EDGE_SUCC (cond_block, 1);
+ else
+ edge_to_remove = EDGE_SUCC (cond_block, 0);
+ if (EDGE_COUNT (edge_to_remove->dest->preds) == 1)
{
- EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
- EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
- EDGE_SUCC (cond_block, 0)->probability = profile_probability::always ();
-
- block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
+ e->flags |= EDGE_FALLTHRU;
+ e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
+ e->probability = profile_probability::always ();
+ delete_basic_block (edge_to_remove->dest);
+
+ /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
+ gsi = gsi_last_bb (cond_block);
+ gsi_remove (&gsi, true);
}
else
{
- EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
- EDGE_SUCC (cond_block, 1)->flags
- &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
- EDGE_SUCC (cond_block, 1)->probability = profile_probability::always ();
-
- block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
+ /* If there are other edges into the middle block make
+ CFG cleanup deal with the edge removal to avoid
+ updating dominators here in a non-trivial way. */
+ gcond *cond = as_a <gcond *> (last_stmt (cond_block));
+ if (edge_to_remove->flags & EDGE_TRUE_VALUE)
+ gimple_cond_make_false (cond);
+ else
+ gimple_cond_make_true (cond);
}
- delete_basic_block (block_to_remove);
- /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
- gsi = gsi_last_bb (cond_block);
- gsi_remove (&gsi, true);
+ statistics_counter_event (cfun, "Replace PHI with variable", 1);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
@@ -846,6 +854,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
allow it and move it once the transformation is done. */
if (!empty_block_p (middle_bb))
{
+ if (!single_pred_p (middle_bb))
+ return false;
+
stmt_to_move = last_and_only_stmt (middle_bb);
if (!stmt_to_move)
return false;
@@ -1225,6 +1236,11 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
}
else
{
+ if (!single_pred_p (middle_bb))
+ return 0;
+ statistics_counter_event (cfun, "Replace PHI with "
+ "variable/value_replacement", 1);
+
/* Replace the PHI arguments with arg. */
SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
@@ -1239,7 +1255,6 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
}
return 1;
}
-
}
/* Now optimize (x != 0) ? x + y : y to just x + y. */
--
2.27.0.windows.1

View File

@ -0,0 +1,250 @@
From a2f5e6f38fe7b5b32a252643b00dd2d7ab0e3fac Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 May 2020 09:01:10 +0100
Subject: [PATCH 19/35] [Backport] tree: Add vector_element_bits(_tree)
[PR94980 1/3]
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=d17a896da1e898928d337596d029f0ece0039d55
A lot of code that wants to know the number of bits in a vector
element gets that information from the element's TYPE_SIZE,
which is always equal to TYPE_SIZE_UNIT * BITS_PER_UNIT.
This doesn't work for SVE and AVX512-style packed boolean vectors,
where several elements can occupy a single byte.
This patch introduces a new pair of helpers for getting the true
(possibly sub-byte) size. I made a token attempt to convert obvious
element size calculations, but I'm sure I missed some.
2020-05-12 Richard Sandiford <richard.sandiford@arm.com>
gcc/
PR tree-optimization/94980
* tree.h (vector_element_bits, vector_element_bits_tree): Declare.
* tree.c (vector_element_bits, vector_element_bits_tree): New.
* match.pd: Use the new functions instead of determining the
vector element size directly from TYPE_SIZE(_UNIT).
* tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise.
* tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise.
* tree-vect-stmts.c (vect_is_simple_cond): Likewise.
* tree-vect-generic.c (expand_vector_piecewise): Likewise.
(expand_vector_conversion): Likewise.
(expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as
a divisor. Convert the dividend to bits to compensate.
* tree-vect-loop.c (vectorizable_live_operation): Call
vector_element_bits instead of open-coding it.
---
gcc/ChangeLog | 17 +++++++++++++++++
gcc/match.pd | 2 +-
gcc/tree-vect-data-refs.c | 2 +-
gcc/tree-vect-generic.c | 19 +++++++------------
gcc/tree-vect-loop.c | 4 +---
gcc/tree-vect-patterns.c | 3 +--
gcc/tree-vect-stmts.c | 3 +--
gcc/tree.c | 24 ++++++++++++++++++++++++
gcc/tree.h | 2 ++
9 files changed, 55 insertions(+), 21 deletions(-)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3b1384e70..07aea9b86 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2020-05-12 Richard Sandiford <richard.sandiford@arm.com>
+
+ PR tree-optimization/94980
+ * tree.h (vector_element_bits, vector_element_bits_tree): Declare.
+ * tree.c (vector_element_bits, vector_element_bits_tree): New.
+ * match.pd: Use the new functions instead of determining the
+ vector element size directly from TYPE_SIZE(_UNIT).
+ * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise.
+ * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise.
+ * tree-vect-stmts.c (vect_is_simple_cond): Likewise.
+ * tree-vect-generic.c (expand_vector_piecewise): Likewise.
+ (expand_vector_conversion): Likewise.
+ (expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as
+ a divisor. Convert the dividend to bits to compensate.
+ * tree-vect-loop.c (vectorizable_live_operation): Call
+ vector_element_bits instead of open-coding it.
+
2021-04-08 Release Manager
* GCC 10.3.0 released.
diff --git a/gcc/match.pd b/gcc/match.pd
index 5899eea95..79a0228d2 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6236,7 +6236,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
}
(if (ins)
(bit_insert { op0; } { ins; }
- { bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)))); })
+ { bitsize_int (at * vector_element_bits (type)); })
(if (changed)
(vec_perm { op0; } { op1; } { op2; }))))))))))
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index d78b06455..e4466a4f3 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -3709,7 +3709,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
tree *offset_vectype_out)
{
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
- unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
+ unsigned int element_bits = vector_element_bits (vectype);
if (element_bits != memory_bits)
/* For now the vector elements must be the same width as the
memory elements. */
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index c10492034..37c3956a4 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -276,8 +276,7 @@ expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
tree part_width = TYPE_SIZE (inner_type);
tree index = bitsize_int (0);
int nunits = nunits_for_known_piecewise_op (type);
- int delta = tree_to_uhwi (part_width)
- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
+ int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
int i;
location_t loc = gimple_location (gsi_stmt (*gsi));
@@ -357,8 +356,7 @@ expand_vector_addition (gimple_stmt_iterator *gsi,
elem_op_func f, elem_op_func f_parallel,
tree type, tree a, tree b, enum tree_code code)
{
- int parts_per_word = UNITS_PER_WORD
- / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
+ int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
if (INTEGRAL_TYPE_P (TREE_TYPE (type))
&& parts_per_word >= 4
@@ -1733,19 +1731,17 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
optab optab1 = unknown_optab;
gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (ret_type))));
- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (arg_type))));
if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
code = FIX_TRUNC_EXPR;
else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
code = FLOAT_EXPR;
- if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
- < tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
+ unsigned int ret_elt_bits = vector_element_bits (ret_type);
+ unsigned int arg_elt_bits = vector_element_bits (arg_type);
+ if (ret_elt_bits < arg_elt_bits)
modifier = NARROW;
- else if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
- > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
+ else if (ret_elt_bits > arg_elt_bits)
modifier = WIDEN;
if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
@@ -1908,8 +1904,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
tree part_width = TYPE_SIZE (compute_type);
tree index = bitsize_int (0);
int nunits = nunits_for_known_piecewise_op (arg_type);
- int delta = tree_to_uhwi (part_width)
- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)));
+ int delta = tree_to_uhwi (part_width) / arg_elt_bits;
int i;
location_t loc = gimple_location (gsi_stmt (*gsi));
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 899b56087..7990e31de 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -8059,9 +8059,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
: gimple_get_lhs (stmt);
lhs_type = TREE_TYPE (lhs);
- bitsize = (VECTOR_BOOLEAN_TYPE_P (vectype)
- ? bitsize_int (TYPE_PRECISION (TREE_TYPE (vectype)))
- : TYPE_SIZE (TREE_TYPE (vectype)));
+ bitsize = vector_element_bits_tree (vectype);
vec_bitsize = TYPE_SIZE (vectype);
/* Get the vectorized lhs of STMT and the lane to use (counted in bits). */
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 84d7ddb17..b076740ef 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -4406,8 +4406,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|| dt == vect_constant_def))
{
tree wide_scalar_type = build_nonstandard_integer_type
- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
- TYPE_UNSIGNED (rhs1_type));
+ (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
tree vectype3 = get_vectype_for_scalar_type (vinfo,
wide_scalar_type);
if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 4636b7ba2..0bdf9a547 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10717,8 +10717,7 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
&& tree_int_cst_lt (TYPE_SIZE (scalar_type),
TYPE_SIZE (TREE_TYPE (vectype))))
scalar_type = build_nonstandard_integer_type
- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
- TYPE_UNSIGNED (scalar_type));
+ (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
*comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
slp_node);
}
diff --git a/gcc/tree.c b/gcc/tree.c
index 3e6647ae0..9a0cedf10 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -13892,6 +13892,30 @@ vector_type_mode (const_tree t)
return mode;
}
+/* Return the size in bits of each element of vector type TYPE. */
+
+unsigned int
+vector_element_bits (const_tree type)
+{
+ gcc_checking_assert (VECTOR_TYPE_P (type));
+ if (VECTOR_BOOLEAN_TYPE_P (type))
+ return vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (type)),
+ TYPE_VECTOR_SUBPARTS (type));
+ return tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
+}
+
+/* Calculate the size in bits of each element of vector type TYPE
+ and return the result as a tree of type bitsizetype. */
+
+tree
+vector_element_bits_tree (const_tree type)
+{
+ gcc_checking_assert (VECTOR_TYPE_P (type));
+ if (VECTOR_BOOLEAN_TYPE_P (type))
+ return bitsize_int (vector_element_bits (type));
+ return TYPE_SIZE (TREE_TYPE (type));
+}
+
/* Verify that basic properties of T match TV and thus T can be a variant of
TV. TV should be the more specified variant (i.e. the main variant). */
diff --git a/gcc/tree.h b/gcc/tree.h
index bddc6e528..c66207fa0 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -1996,6 +1996,8 @@ class auto_suppress_location_wrappers
extern machine_mode element_mode (const_tree);
extern machine_mode vector_type_mode (const_tree);
+extern unsigned int vector_element_bits (const_tree);
+extern tree vector_element_bits_tree (const_tree);
/* The "canonical" type for this type node, which is used by frontends to
compare the type for equality with another type. If two types are
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,379 @@
From 21d265af074726b166e08301a2f847c474fcb680 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 30 Nov 2021 09:52:24 +0000
Subject: [PATCH 21/35] [Backport] gimple-match: Add a gimple_extract_op
function
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=33973fa754de1f95d459bfca66c0d80deec36537
code_helper and gimple_match_op seem like generally useful ways
of summing up a gimple_assign or gimple_call (or gimple_cond).
This patch adds a gimple_extract_op function that can be used
for that.
gcc/
* gimple-match.h (code_helper): Add functions for querying whether
the code represents an internal_fn or a built_in_function.
Provide explicit conversion operators for both cases.
(gimple_extract_op): Declare.
* gimple-match-head.c (gimple_extract): New function, extracted from...
(gimple_simplify): ...here.
(gimple_extract_op): New function.
---
gcc/gimple-match-head.c | 219 ++++++++++++++++++++--------------------
gcc/gimple-match.h | 27 +++++
2 files changed, 135 insertions(+), 111 deletions(-)
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index 9b3e7298d..c1dea1734 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -884,12 +884,20 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
return true;
}
-/* The main STMT based simplification entry. It is used by the fold_stmt
- and the fold_stmt_to_constant APIs. */
+/* Common subroutine of gimple_extract_op and gimple_simplify. Try to
+ describe STMT in RES_OP, returning true on success. Before recording
+ an operand, call:
-bool
-gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
- tree (*valueize)(tree), tree (*top_valueize)(tree))
+ - VALUEIZE_CONDITION for a COND_EXPR condition
+ - VALUEIZE_OP for every other top-level operand
+
+ Both routines take a tree argument and returns a tree. */
+
+template<typename ValueizeOp, typename ValueizeCondition>
+inline bool
+gimple_extract (gimple *stmt, gimple_match_op *res_op,
+ ValueizeOp valueize_op,
+ ValueizeCondition valueize_condition)
{
switch (gimple_code (stmt))
{
@@ -905,101 +913,50 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|| code == VIEW_CONVERT_EXPR)
{
tree op0 = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
- bool valueized = false;
- op0 = do_valueize (op0, top_valueize, valueized);
- res_op->set_op (code, type, op0);
- return (gimple_resimplify1 (seq, res_op, valueize)
- || valueized);
+ res_op->set_op (code, type, valueize_op (op0));
+ return true;
}
else if (code == BIT_FIELD_REF)
{
tree rhs1 = gimple_assign_rhs1 (stmt);
- tree op0 = TREE_OPERAND (rhs1, 0);
- bool valueized = false;
- op0 = do_valueize (op0, top_valueize, valueized);
+ tree op0 = valueize_op (TREE_OPERAND (rhs1, 0));
res_op->set_op (code, type, op0,
TREE_OPERAND (rhs1, 1),
TREE_OPERAND (rhs1, 2),
REF_REVERSE_STORAGE_ORDER (rhs1));
- if (res_op->reverse)
- return valueized;
- return (gimple_resimplify3 (seq, res_op, valueize)
- || valueized);
+ return true;
}
- else if (code == SSA_NAME
- && top_valueize)
+ else if (code == SSA_NAME)
{
tree op0 = gimple_assign_rhs1 (stmt);
- tree valueized = top_valueize (op0);
- if (!valueized || op0 == valueized)
- return false;
- res_op->set_op (TREE_CODE (op0), type, valueized);
+ res_op->set_op (TREE_CODE (op0), type, valueize_op (op0));
return true;
}
break;
case GIMPLE_UNARY_RHS:
{
tree rhs1 = gimple_assign_rhs1 (stmt);
- bool valueized = false;
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
- res_op->set_op (code, type, rhs1);
- return (gimple_resimplify1 (seq, res_op, valueize)
- || valueized);
+ res_op->set_op (code, type, valueize_op (rhs1));
+ return true;
}
case GIMPLE_BINARY_RHS:
{
- tree rhs1 = gimple_assign_rhs1 (stmt);
- tree rhs2 = gimple_assign_rhs2 (stmt);
- bool valueized = false;
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
- rhs2 = do_valueize (rhs2, top_valueize, valueized);
+ tree rhs1 = valueize_op (gimple_assign_rhs1 (stmt));
+ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt));
res_op->set_op (code, type, rhs1, rhs2);
- return (gimple_resimplify2 (seq, res_op, valueize)
- || valueized);
+ return true;
}
case GIMPLE_TERNARY_RHS:
{
- bool valueized = false;
tree rhs1 = gimple_assign_rhs1 (stmt);
- /* If this is a [VEC_]COND_EXPR first try to simplify an
- embedded GENERIC condition. */
- if (code == COND_EXPR
- || code == VEC_COND_EXPR)
- {
- if (COMPARISON_CLASS_P (rhs1))
- {
- tree lhs = TREE_OPERAND (rhs1, 0);
- tree rhs = TREE_OPERAND (rhs1, 1);
- lhs = do_valueize (lhs, top_valueize, valueized);
- rhs = do_valueize (rhs, top_valueize, valueized);
- gimple_match_op res_op2 (res_op->cond, TREE_CODE (rhs1),
- TREE_TYPE (rhs1), lhs, rhs);
- if ((gimple_resimplify2 (seq, &res_op2, valueize)
- || valueized)
- && res_op2.code.is_tree_code ())
- {
- valueized = true;
- if (TREE_CODE_CLASS ((enum tree_code) res_op2.code)
- == tcc_comparison)
- rhs1 = build2 (res_op2.code, TREE_TYPE (rhs1),
- res_op2.ops[0], res_op2.ops[1]);
- else if (res_op2.code == SSA_NAME
- || res_op2.code == INTEGER_CST
- || res_op2.code == VECTOR_CST)
- rhs1 = res_op2.ops[0];
- else
- valueized = false;
- }
- }
- }
- tree rhs2 = gimple_assign_rhs2 (stmt);
- tree rhs3 = gimple_assign_rhs3 (stmt);
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
- rhs2 = do_valueize (rhs2, top_valueize, valueized);
- rhs3 = do_valueize (rhs3, top_valueize, valueized);
+ if (code == COND_EXPR && COMPARISON_CLASS_P (rhs1))
+ rhs1 = valueize_condition (rhs1);
+ else
+ rhs1 = valueize_op (rhs1);
+ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt));
+ tree rhs3 = valueize_op (gimple_assign_rhs3 (stmt));
res_op->set_op (code, type, rhs1, rhs2, rhs3);
- return (gimple_resimplify3 (seq, res_op, valueize)
- || valueized);
+ return true;
}
default:
gcc_unreachable ();
@@ -1013,7 +970,6 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
&& gimple_call_num_args (stmt) >= 1
&& gimple_call_num_args (stmt) <= 5)
{
- bool valueized = false;
combined_fn cfn;
if (gimple_call_internal_p (stmt))
cfn = as_combined_fn (gimple_call_internal_fn (stmt));
@@ -1023,7 +979,7 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
if (!fn)
return false;
- fn = do_valueize (fn, top_valueize, valueized);
+ fn = valueize_op (fn);
if (TREE_CODE (fn) != ADDR_EXPR
|| TREE_CODE (TREE_OPERAND (fn, 0)) != FUNCTION_DECL)
return false;
@@ -1039,47 +995,17 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
unsigned int num_args = gimple_call_num_args (stmt);
res_op->set_op (cfn, TREE_TYPE (gimple_call_lhs (stmt)), num_args);
for (unsigned i = 0; i < num_args; ++i)
- {
- tree arg = gimple_call_arg (stmt, i);
- res_op->ops[i] = do_valueize (arg, top_valueize, valueized);
- }
- if (internal_fn_p (cfn)
- && try_conditional_simplification (as_internal_fn (cfn),
- res_op, seq, valueize))
- return true;
- switch (num_args)
- {
- case 1:
- return (gimple_resimplify1 (seq, res_op, valueize)
- || valueized);
- case 2:
- return (gimple_resimplify2 (seq, res_op, valueize)
- || valueized);
- case 3:
- return (gimple_resimplify3 (seq, res_op, valueize)
- || valueized);
- case 4:
- return (gimple_resimplify4 (seq, res_op, valueize)
- || valueized);
- case 5:
- return (gimple_resimplify5 (seq, res_op, valueize)
- || valueized);
- default:
- gcc_unreachable ();
- }
+ res_op->ops[i] = valueize_op (gimple_call_arg (stmt, i));
+ return true;
}
break;
case GIMPLE_COND:
{
- tree lhs = gimple_cond_lhs (stmt);
- tree rhs = gimple_cond_rhs (stmt);
- bool valueized = false;
- lhs = do_valueize (lhs, top_valueize, valueized);
- rhs = do_valueize (rhs, top_valueize, valueized);
+ tree lhs = valueize_op (gimple_cond_lhs (stmt));
+ tree rhs = valueize_op (gimple_cond_rhs (stmt));
res_op->set_op (gimple_cond_code (stmt), boolean_type_node, lhs, rhs);
- return (gimple_resimplify2 (seq, res_op, valueize)
- || valueized);
+ return true;
}
default:
@@ -1089,6 +1015,77 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
return false;
}
+/* Try to describe STMT in RES_OP, returning true on success.
+ For GIMPLE_CONDs, describe the condition that is being tested.
+ For GIMPLE_ASSIGNs, describe the rhs of the assignment.
+ For GIMPLE_CALLs, describe the call. */
+
+bool
+gimple_extract_op (gimple *stmt, gimple_match_op *res_op)
+{
+ auto nop = [](tree op) { return op; };
+ return gimple_extract (stmt, res_op, nop, nop);
+}
+
+/* The main STMT based simplification entry. It is used by the fold_stmt
+ and the fold_stmt_to_constant APIs. */
+
+bool
+gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
+ tree (*valueize)(tree), tree (*top_valueize)(tree))
+{
+ bool valueized = false;
+ auto valueize_op = [&](tree op)
+ {
+ return do_valueize (op, top_valueize, valueized);
+ };
+ auto valueize_condition = [&](tree op) -> tree
+ {
+ bool cond_valueized = false;
+ tree lhs = do_valueize (TREE_OPERAND (op, 0), top_valueize,
+ cond_valueized);
+ tree rhs = do_valueize (TREE_OPERAND (op, 1), top_valueize,
+ cond_valueized);
+ gimple_match_op res_op2 (res_op->cond, TREE_CODE (op),
+ TREE_TYPE (op), lhs, rhs);
+ if ((gimple_resimplify2 (seq, &res_op2, valueize)
+ || cond_valueized)
+ && res_op2.code.is_tree_code ())
+ {
+ if (TREE_CODE_CLASS ((tree_code) res_op2.code) == tcc_comparison)
+ {
+ valueized = true;
+ return build2 (res_op2.code, TREE_TYPE (op),
+ res_op2.ops[0], res_op2.ops[1]);
+ }
+ else if (res_op2.code == SSA_NAME
+ || res_op2.code == INTEGER_CST
+ || res_op2.code == VECTOR_CST)
+ {
+ valueized = true;
+ return res_op2.ops[0];
+ }
+ }
+ return valueize_op (op);
+ };
+
+ if (!gimple_extract (stmt, res_op, valueize_op, valueize_condition))
+ return false;
+
+ if (res_op->code.is_internal_fn ())
+ {
+ internal_fn ifn = internal_fn (res_op->code);
+ if (try_conditional_simplification (ifn, res_op, seq, valueize))
+ return true;
+ }
+
+ if (!res_op->reverse
+ && res_op->num_ops
+ && res_op->resimplify (seq, valueize))
+ return true;
+
+ return valueized;
+}
/* Helper for the autogenerated code, valueize OP. */
diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h
index 097898aed..39858c45f 100644
--- a/gcc/gimple-match.h
+++ b/gcc/gimple-match.h
@@ -33,13 +33,39 @@ public:
code_helper (combined_fn fn) : rep (-(int) fn) {}
operator tree_code () const { return (tree_code) rep; }
operator combined_fn () const { return (combined_fn) -rep; }
+ explicit operator internal_fn () const;
+ explicit operator built_in_function () const;
bool is_tree_code () const { return rep > 0; }
bool is_fn_code () const { return rep < 0; }
+ bool is_internal_fn () const;
+ bool is_builtin_fn () const;
int get_rep () const { return rep; }
private:
int rep;
};
+inline code_helper::operator internal_fn () const
+{
+ return as_internal_fn (combined_fn (*this));
+}
+
+inline code_helper::operator built_in_function () const
+{
+ return as_builtin_fn (combined_fn (*this));
+}
+
+inline bool
+code_helper::is_internal_fn () const
+{
+ return is_fn_code () && internal_fn_p (combined_fn (*this));
+}
+
+inline bool
+code_helper::is_builtin_fn () const
+{
+ return is_fn_code () && builtin_fn_p (combined_fn (*this));
+}
+
/* Represents the condition under which an operation should happen,
and the value to use otherwise. The condition applies elementwise
(as for VEC_COND_EXPR) if the values are vectors. */
@@ -333,6 +359,7 @@ gimple_simplified_result_is_gimple_val (const gimple_match_op *op)
extern tree (*mprts_hook) (gimple_match_op *);
+bool gimple_extract_op (gimple *, gimple_match_op *);
bool gimple_simplify (gimple *, gimple_match_op *, gimple_seq *,
tree (*)(tree), tree (*)(tree));
tree maybe_push_res_to_seq (gimple_match_op *, gimple_seq *,
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,31 @@
From b57c55b282e7a9a7b2cc0d3843e58fd7998685e6 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Fri, 4 Nov 2022 23:19:44 +0800
Subject: [PATCH 23/35] [PHIOPT] Disable the match A?CST1:0 when the CST1 is
negitive value
Fix the regression of gcc.target/aarch64/sve/vcond_3.c
gcc:
* match.pd (A?CST1:CST2): Disable the simplifcations A? (-CST1):0
---
gcc/match.pd | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/gcc/match.pd b/gcc/match.pd
index 79a0228d2..fc1a34dd3 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3347,7 +3347,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (integer_onep (@1))
(convert (convert:boolean_type_node @0)))
/* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
- (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (TREE_TYPE (@1))
+ && integer_pow2p (@1))
(with {
tree shift = build_int_cst (integer_type_node, tree_log2 (@1));
}
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
From 9af03694082c462bee86c167c78717089a93a188 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Sat, 5 Nov 2022 13:22:33 +0800
Subject: [PATCH 25/35] [PHIOPT] Add A ? B op CST : B match and simplify
optimizations
Refer to commit b6bdd7a4, use pattern match to simple
A ? B op CST : B (where CST is power of 2) simplifications.
Fixes the 1st issue of https://gitee.com/openeuler/gcc/issues/I5TSG0?from=project-issue.
gcc/
* match.pd (A ? B op CST : B): Add simplifcations for A ? B op POW2 : B
gcc/testsuite/
* gcc.dg/pr107190.c: New test.
---
gcc/match.pd | 21 +++++++++++++++++++++
gcc/testsuite/gcc.dg/pr107190.c | 27 +++++++++++++++++++++++++++
2 files changed, 48 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/pr107190.c
diff --git a/gcc/match.pd b/gcc/match.pd
index fc1a34dd3..5c5b5f89e 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3383,6 +3383,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
)
#endif
+#if GIMPLE
+(if (canonicalize_math_p ())
+/* These patterns are mostly used by PHIOPT to move some operations outside of
+ the if statements. They should be done late because it gives jump threading
+ and few other passes to reduce what is going on. */
+/* a ? x op C : x -> x op (a << log2(C)) when C is power of 2. */
+ (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate)
+ (simplify
+ (cond @0 (op:s @1 integer_pow2p@2) @1)
+ /* powerof2cst */
+ (if (INTEGRAL_TYPE_P (type))
+ (with {
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
+ }
+ (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; })))
+ )
+ )
+ )
+)
+#endif
+
/* Simplification moved from fold_cond_expr_with_comparison. It may also
be extended. */
/* This pattern implements two kinds simplification:
diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
new file mode 100644
index 000000000..235b2761a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr107190.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
+
+# define BN_BITS4 32
+# define BN_MASK2 (0xffffffffffffffffL)
+# define BN_MASK2l (0xffffffffL)
+# define BN_MASK2h (0xffffffff00000000L)
+# define BN_MASK2h1 (0xffffffff80000000L)
+# define LBITS(a) ((a)&BN_MASK2l)
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
+
+unsigned int test_m(unsigned long in0, unsigned long in1) {
+ unsigned long m, m1, lt, ht, bl, bh;
+ lt = LBITS(in0);
+ ht = HBITS(in0);
+ bl = LBITS(in1);
+ bh = HBITS(in1);
+ m = bh * lt;
+ m1 = bl * ht;
+ ht = bh * ht;
+ m = (m + m1) & BN_MASK2;
+ if (m < m1) ht += L2HBITS((unsigned long)1);
+ return ht + m;
+}
+
+/* { dg-final { scan-tree-dump "COND_EXPR in block 2 and PHI in block 4 converted to straightline code" "phiopt2" } } */
--
2.27.0.windows.1

View File

@ -0,0 +1,130 @@
From 2a2d0ba6a26d64f4c1f9352bb2c69dea8b67d6a6 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Wed, 9 Nov 2022 17:04:13 +0800
Subject: [PATCH 26/35] [FORWPROP] Fold series of instructions into mul
Merge the low part of series instructions into mul
gcc/
* match.pd: Add simplifcations for low part of mul
* common.opt: Add new option fmerge-mull enable with -O2
* opts.c: default_options_table
gcc/testsuite/
* g++.dg/tree-ssa/mull64.C: New test.
---
gcc/common.opt | 4 +++
gcc/match.pd | 27 ++++++++++++++++++++
gcc/opts.c | 1 +
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++
4 files changed, 66 insertions(+)
create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C
diff --git a/gcc/common.opt b/gcc/common.opt
index ad147f7a9..6a7f66624 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2069,6 +2069,10 @@ fmerge-debug-strings
Common Report Var(flag_merge_debug_strings) Init(1)
Attempt to merge identical debug strings across compilation units.
+fmerge-mull
+Common Report Var(flag_merge_mull) Init(0) Optimization
+Attempt to merge series instructions into mul.
+
fmessage-length=
Common RejectNegative Joined UInteger
-fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping.
diff --git a/gcc/match.pd b/gcc/match.pd
index 5c5b5f89e..f6c5befd7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3404,6 +3404,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
)
#endif
+#if GIMPLE
+/* These patterns are mostly used by FORWPROP1 to fold some operations into more
+ simple IR. The following scenario should be matched:
+ In0Lo = In0(D) & 4294967295;
+ In0Hi = In0(D) >> 32;
+ In1Lo = In1(D) & 4294967295;
+ In1Hi = In1(D) >> 32;
+ Addc = In0Lo * In1Hi + In0Hi * In1Lo;
+ addc32 = Addc << 32;
+ ResLo = In0Lo * In1Lo + addc32 */
+(simplify
+ (plus:c (mult @4 @5)
+ (lshift
+ (plus:c
+ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3))
+ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2)))
+ INTEGER_CST@3
+ )
+ )
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
+ && TYPE_PRECISION (type) == 64)
+ (mult (convert:type @0) (convert:type @1))
+ )
+)
+#endif
+
/* Simplification moved from fold_cond_expr_with_comparison. It may also
be extended. */
/* This pattern implements two kinds simplification:
diff --git a/gcc/opts.c b/gcc/opts.c
index f12b13599..751965e46 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -511,6 +511,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
{ OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
/* -O2 and above optimizations, but not -Os or -Og. */
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
new file mode 100644
index 000000000..2a3b74604
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
+
+# define BN_BITS4 32
+# define BN_MASK2 (0xffffffffffffffffL)
+# define BN_MASK2l (0xffffffffL)
+# define BN_MASK2h (0xffffffff00000000L)
+# define BN_MASK2h1 (0xffffffff80000000L)
+# define LBITS(a) ((a)&BN_MASK2l)
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
+
+void mul64(unsigned long in0, unsigned long in1,
+ unsigned long &retLo, unsigned long &retHi) {
+ unsigned long m00, m01, m10, m11, al, ah, bl, bh;
+ unsigned long Addc, addc32, low;
+ al = LBITS(in0);
+ ah = HBITS(in0);
+ bl = LBITS(in1);
+ bh = HBITS(in1);
+ m10 = bh * al;
+ m00 = bl * al;
+ m01 = bl * ah;
+ m11 = bh * ah;
+ Addc = (m10 + m01) & BN_MASK2;
+ if (Addc < m01) m11 += L2HBITS((unsigned long)1);
+ m11 += HBITS(Addc);
+ addc32 = L2HBITS(Addc);
+ low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++;
+ retLo = low;
+ retHi = m11;
+}
+
+/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
--
2.27.0.windows.1

View File

@ -0,0 +1,105 @@
From 315911bd3ae6f42366779e262ab76d9ed79359a0 Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Fri, 11 Nov 2022 11:30:37 +0800
Subject: [PATCH 27/35] [FORWPROP] Fold series of instructions into umulh
Merge the high part of series instructions into umulh
gcc/
* match.pd: Add simplifcations for high part of umulh
gcc/testsuite/
* g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4
---
gcc/match.pd | 56 ++++++++++++++++++++++++++
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++-
2 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/gcc/match.pd b/gcc/match.pd
index f6c5befd7..433682afb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3404,6 +3404,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
)
#endif
+#if GIMPLE
+/* These patterns are mostly used by FORWPROP4 to move some operations outside of
+ the if statements. They should be done late because it gives jump threading
+ and few other passes to reduce what is going on. */
+/* Mul64 is defined as a multiplication algorithm which compute two 64-bit
+ integers to one 128-bit integer. Try to match the high part of mul pattern
+ after the low part of mul pattern is simplified. The following scenario
+ should be matched:
+ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) {
+ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2
+ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3
+ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2
+ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3
+ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6
+ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8
+ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7))
+ addc32 = Addc << 32; -- lshift@10 @9 @3
+ ResLo = In0(D) * In1(D); -- mult @0 @1
+ ResHi = ((long unsigned int) (addc32 > ResLo)) +
+ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH;
+ } */
+(simplify
+ (plus:c
+ (plus:c
+ (convert
+ (gt (lshift@10 @9 @3)
+ (mult:c @0 @1)))
+ (lshift
+ (convert
+ (gt @8 @9))
+ @3))
+ (plus:c@11
+ (rshift
+ (plus:c@9
+ (mult:c (bit_and@4 SSA_NAME@0 @2) @7)
+ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2)))
+ @3)
+ (mult:c (rshift@5 SSA_NAME@0 @3)
+ (rshift@7 SSA_NAME@1 INTEGER_CST@3))
+ )
+ )
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
+ && TYPE_PRECISION (type) == 64)
+ (with {
+ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type));
+ tree shift = build_int_cst (integer_type_node, 64);
+ }
+ (convert:type (rshift
+ (mult (convert:i128_type @0)
+ (convert:i128_type @1))
+ { shift; })))
+ )
+)
+#endif
+
#if GIMPLE
/* These patterns are mostly used by FORWPROP1 to fold some operations into more
simple IR. The following scenario should be matched:
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
index 2a3b74604..f61cf5e6f 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
+/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
# define BN_BITS4 32
# define BN_MASK2 (0xffffffffffffffffL)
@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1,
retHi = m11;
}
-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
+/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */
--
2.27.0.windows.1

View File

@ -0,0 +1,38 @@
From b669b4512e8425f4d752ef76bf61097cf40d9b35 Mon Sep 17 00:00:00 2001
From: zgat <1071107108@qq.com>
Date: Thu, 17 Nov 2022 02:55:48 +0000
Subject: [PATCH 28/35] [Struct Reorg] Fix speccpu2006 462 double free #I60YUV
modify gcc/tree.c. Normal operation speccpu 462 after modifed
Signed-off-by: zgat <1071107108@qq.com>
---
gcc/tree.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/gcc/tree.c b/gcc/tree.c
index 2a532d15a..a61788651 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -5224,8 +5224,7 @@ fld_simplified_type_name (tree type)
optimizations. */
if (flag_ipa_struct_reorg
&& lang_c_p ()
- && flag_lto_partition == LTO_PARTITION_ONE
- && (in_lto_p || flag_whole_program))
+ && flag_lto_partition == LTO_PARTITION_ONE)
return TYPE_NAME (type);
if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
@@ -5471,8 +5470,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
optimizations. */
if (flag_ipa_struct_reorg
&& lang_c_p ()
- && flag_lto_partition == LTO_PARTITION_ONE
- && (in_lto_p || flag_whole_program))
+ && flag_lto_partition == LTO_PARTITION_ONE)
return t;
if (POINTER_TYPE_P (t))
return fld_incomplete_type_of (t, fld);
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,267 @@
From 013544d0b477647c8835a8806c75e7b09155b8ed Mon Sep 17 00:00:00 2001
From: benniaobufeijiushiji <linda7@huawei.com>
Date: Mon, 8 Aug 2022 09:13:53 +0800
Subject: [PATCH 31/35] [loop-vect] Transfer arrays using registers between
loops For vectorized stores in loop, if all succeed loops immediately use the
data, transfer data using registers instead of load store to prevent overhead
from memory access.
---
gcc/testsuite/gcc.dg/vect/vect-perm-1.c | 45 ++++++
gcc/tree-vect-stmts.c | 181 ++++++++++++++++++++++++
2 files changed, 226 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-perm-1.c
diff --git a/gcc/testsuite/gcc.dg/vect/vect-perm-1.c b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
new file mode 100644
index 000000000..d8b29fbd5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
@@ -0,0 +1,45 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O3 -fdump-tree-vect-all-details -save-temps" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static unsigned inline abs2 (unsigned a)
+{
+ unsigned s = ((a>>15)&0x10001)*0xffff;
+ return (a+s)^s;
+}
+
+int foo (unsigned *a00, unsigned *a11, unsigned *a22, unsigned *a33)
+{
+ unsigned tmp[4][4];
+ unsigned a0, a1, a2, a3;
+ int sum = 0;
+ for (int i = 0; i < 4; i++)
+ {
+ int t0 = a00[i] + a11[i];
+ int t1 = a00[i] - a11[i];
+ int t2 = a22[i] + a33[i];
+ int t3 = a22[i] - a33[i];
+ tmp[i][0] = t0 + t2;
+ tmp[i][2] = t0 - t2;
+ tmp[i][1] = t1 + t3;
+ tmp[i][3] = t1 - t3;
+ }
+ for (int i = 0; i < 4; i++)
+ {
+ int t0 = tmp[0][i] + tmp[1][i];
+ int t1 = tmp[0][i] - tmp[1][i];
+ int t2 = tmp[2][i] + tmp[3][i];
+ int t3 = tmp[2][i] - tmp[3][i];
+ a0 = t0 + t2;
+ a2 = t0 - t2;
+ a1 = t1 + t3;
+ a3 = t1 - t3;
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
+ }
+ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 16 "vect" } } */
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2c2197022..98b233718 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2276,6 +2276,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
return NULL_TREE;
}
+/* Check succeedor BB, BB without load is regarded as empty BB. Ignore empty
+ BB in DFS. */
+
+static unsigned
+mem_refs_in_bb (basic_block bb, vec<gimple *> &stmts)
+{
+ unsigned num = 0;
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
+ !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple *stmt = gsi_stmt (gsi);
+ if (is_gimple_debug (stmt))
+ continue;
+ if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt)
+ && !gimple_has_volatile_ops (stmt))
+ {
+ if (gimple_assign_rhs_code (stmt) == MEM_REF
+ || gimple_assign_rhs_code (stmt) == ARRAY_REF)
+ {
+ stmts.safe_push (stmt);
+ num++;
+ }
+ else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF
+ || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF)
+ num++;
+ }
+ }
+ return num;
+}
+
+static bool
+check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr)
+{
+ for (unsigned ui = 0; ui < datarefs->length (); ui++)
+ {
+ tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0);
+ tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0);
+ if (TREE_CODE (op1) != TREE_CODE (op2))
+ continue;
+ if (TREE_CODE (op1) == ADDR_EXPR)
+ {
+ op1 = TREE_OPERAND (op1, 0);
+ op2 = TREE_OPERAND (op2, 0);
+ }
+ enum tree_code code = TREE_CODE (op1);
+ switch (code)
+ {
+ case VAR_DECL:
+ if (DECL_NAME (op1) == DECL_NAME (op2)
+ && DR_IS_READ ((*datarefs)[ui]))
+ return true;
+ break;
+ case SSA_NAME:
+ if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2)
+ && DR_IS_READ ((*datarefs)[ui]))
+ return true;
+ break;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
+/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return,
+ Otherwise, set false to SUCCESS. */
+
+static void
+check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts,
+ stmt_vec_info stmt_info, bool &success)
+{
+ if (stmt_info == NULL)
+ {
+ success = false;
+ return;
+ }
+ if (DR_IS_READ (stmt_info->dr_aux.dr))
+ {
+ success = false;
+ return;
+ }
+ unsigned ui = 0;
+ gimple *candidate = NULL;
+ FOR_EACH_VEC_ELT (stmts, ui, candidate)
+ {
+ if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE)
+ continue;
+
+ if (candidate->bb != candidate->bb->loop_father->header)
+ {
+ success = false;
+ return;
+ }
+ auto_vec<data_reference_p> datarefs;
+ tree res = find_data_references_in_bb (candidate->bb->loop_father,
+ candidate->bb, &datarefs);
+ if (res == chrec_dont_know)
+ {
+ success = false;
+ return;
+ }
+ if (check_same_base (&datarefs, stmt_info->dr_aux.dr))
+ return;
+ }
+ success = false;
+}
+
+/* Deep first search from present BB. If succeedor has load STMTS,
+ stop further searching. */
+
+static void
+dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info,
+ bool &success, vec<basic_block> &visited_bbs)
+{
+ if (bb == cfun->cfg->x_exit_block_ptr)
+ {
+ success = false;
+ return;
+ }
+ if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch)
+ return;
+
+ visited_bbs.safe_push (bb);
+ auto_vec<gimple *> stmts;
+ unsigned num = mem_refs_in_bb (bb, stmts);
+ /* Empty BB. */
+ if (num == 0)
+ {
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ {
+ dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs);
+ if (!success)
+ return;
+ }
+ return;
+ }
+ /* Non-empty BB. */
+ check_vec_use (loop_vinfo, stmts, stmt_info, success);
+}
+
+/* For grouped store, if all succeedors of present BB have vectorized load
+ from same base of store. If so, set memory_access_type using
+ VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES. */
+
+static bool
+conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo)
+{
+ gimple *stmt = stmt_vinfo->stmt;
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
+ return false;
+
+ if (DR_IS_READ (stmt_vinfo->dr_aux.dr))
+ return false;
+
+ basic_block bb = stmt->bb;
+ bool success = true;
+ auto_vec<basic_block> visited_bbs;
+ visited_bbs.safe_push (bb);
+ edge e;
+ edge_iterator ei;
+ FOR_EACH_EDGE (e, ei, bb->succs)
+ dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs);
+ return success;
+}
+
/* A subroutine of get_load_store_type, with a subset of the same
arguments. Handle the case where STMT_INFO is part of a grouped load
or store.
@@ -2434,6 +2601,20 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
*memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
overrun_p = would_overrun_p;
}
+
+ if (*memory_access_type == VMAT_LOAD_STORE_LANES
+ && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST
+ && maybe_eq (tree_to_shwi (loop_vinfo->num_iters),
+ loop_vinfo->vectorization_factor)
+ && conti_perm (stmt_info, loop_vinfo)
+ && (vls_type == VLS_LOAD
+ ? vect_grouped_load_supported (vectype, single_element_p,
+ group_size)
+ : vect_grouped_store_supported (vectype, group_size)))
+ {
+ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
+ overrun_p = would_overrun_p;
+ }
}
/* As a last resort, trying using a gather load or scatter store.
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,826 @@
From ca2a541ed3425bec64f97fe277c6c02bf4f20049 Mon Sep 17 00:00:00 2001
From: benniaobufeijiushiji <linda7@huawei.com>
Date: Thu, 27 Oct 2022 10:26:34 +0800
Subject: [PATCH 33/35] [Loop-distribution] Insert temp arrays built from
isomorphic stmts Use option -ftree-slp-transpose-vectorize Build temp arrays
for isomorphic stmt and regard them as new seed_stmts for loop distribution.
---
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c | 67 +++
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c | 17 +
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c | 19 +
gcc/tree-loop-distribution.c | 577 +++++++++++++++++++-
4 files changed, 663 insertions(+), 17 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
new file mode 100644
index 000000000..649463647
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
@@ -0,0 +1,67 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-do run { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details -save-temps" } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+static unsigned inline abs2 (unsigned a)
+{
+ unsigned s = ((a>>15)&0x10001)*0xffff;
+ return (a+s)^s;
+}
+
+int foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
+{
+ unsigned tmp[4][4];
+ unsigned a0, a1, a2, a3;
+ int sum = 0;
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
+ {
+ a0 = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16);
+ a1 = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16);
+ a2 = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16);
+ a3 = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16);
+ int t0 = a0 + a1;
+ int t1 = a0 - a1;
+ int t2 = a2 + a3;
+ int t3 = a2 - a3;
+ tmp[i][0] = t0 + t2;
+ tmp[i][2] = t0 - t2;
+ tmp[i][1] = t1 + t3;
+ tmp[i][3] = t1 - t3;
+ }
+ for (int i = 0; i < 4; i++)
+ {
+ int t0 = tmp[0][i] + tmp[1][i];
+ int t1 = tmp[0][i] - tmp[1][i];
+ int t2 = tmp[2][i] + tmp[3][i];
+ int t3 = tmp[2][i] - tmp[3][i];
+ a0 = t0 + t2;
+ a2 = t0 - t2;
+ a1 = t1 + t3;
+ a3 = t1 - t3;
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
+ }
+ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
+}
+
+int main ()
+{
+ unsigned char oxa[128] = {0};
+ unsigned char oxb[128] = {0};
+ for (int i = 0; i < 128; i++)
+ {
+ oxa[i] += i * 3;
+ oxb[i] = i * 2;
+ }
+ int sum = foo (oxa, 16, oxb, 32);
+ if (sum != 736)
+ {
+ abort ();
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
new file mode 100644
index 000000000..1b50fd27d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */
+
+unsigned a0[4], a1[4], a2[4], a3[4];
+
+void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
+{
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
+ {
+ a0[i] = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16);
+ a1[i] = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16);
+ a2[i] = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16);
+ a3[i] = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16);
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
new file mode 100644
index 000000000..94b992b05
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */
+
+unsigned a0[4], a1[4], a2[4], a3[4];
+
+void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
+{
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
+ {
+ a0[i] = ((oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16)) + 1;
+ a1[i] = ((oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16)) - 2;
+ a2[i] = ((oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16)) * 3;
+ a3[i] = ((oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16)) / 4;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "Insertion removed" 1 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */
\ No newline at end of file
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index c08af6562..88b56379c 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -36,6 +36,47 @@ along with GCC; see the file COPYING3. If not see
| D(I) = A(I-1)*E
|ENDDO
+ If an unvectorizable loop has grouped loads, and calculations from grouped
+ loads are isomorphic, build temp arrays using stmts where isomorphic
+ calculations end. Afer distribution, the partition built from temp
+ arrays can be vectorized in pass SLP after loop unrolling. For example,
+
+ |DO I = 1, N
+ | A = FOO (ARG_1);
+ | B = FOO (ARG_2);
+ | C = BAR_0 (A);
+ | D = BAR_1 (B);
+ |ENDDO
+
+ is transformed to
+
+ |DO I = 1, N
+ | J = FOO (ARG_1);
+ | K = FOO (ARG_2);
+ | X[I] = J;
+ | Y[I] = K;
+ | A = X[I];
+ | B = Y[I];
+ | C = BAR_0 (A);
+ | D = BAR_1 (B);
+ |ENDDO
+
+ and is then distributed to
+
+ |DO I = 1, N
+ | J = FOO (ARG_1);
+ | K = FOO (ARG_2);
+ | X[I] = J;
+ | Y[I] = K;
+ |ENDDO
+
+ |DO I = 1, N
+ | A = X[I];
+ | B = Y[I];
+ | C = BAR_0 (A);
+ | D = BAR_1 (B);
+ |ENDDO
+
Loop distribution is the dual of loop fusion. It separates statements
of a loop (or loop nest) into multiple loops (or loop nests) with the
same loop header. The major goal is to separate statements which may
@@ -44,7 +85,9 @@ along with GCC; see the file COPYING3. If not see
1) Seed partitions with specific type statements. For now we support
two types seed statements: statement defining variable used outside
- of loop; statement storing to memory.
+ of loop; statement storing to memory. Moreover, for unvectorizable
+ loops, we try to find isomorphic stmts from grouped load and build
+ temp arrays as new seed statements.
2) Build reduced dependence graph (RDG) for loop to be distributed.
The vertices (RDG:V) model all statements in the loop and the edges
(RDG:E) model flow and control dependencies between statements.
@@ -643,7 +686,8 @@ class loop_distribution
/* Returns true when PARTITION1 and PARTITION2 access the same memory
object in RDG. */
bool share_memory_accesses (struct graph *rdg,
- partition *partition1, partition *partition2);
+ partition *partition1, partition *partition2,
+ hash_set<tree> *excluded_arrays);
/* For each seed statement in STARTING_STMTS, this function builds
partition for it by adding depended statements according to RDG.
@@ -686,8 +730,9 @@ class loop_distribution
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
ALIAS_DDRS contains ddrs which need runtime alias check. */
- void finalize_partitions (class loop *loop, vec<struct partition *>
- *partitions, vec<ddr_p> *alias_ddrs);
+ void finalize_partitions (class loop *loop,
+ vec<struct partition *> *partitions,
+ vec<ddr_p> *alias_ddrs, bitmap producers);
/* Analyze loop form and if it's vectorizable to decide if we need to
insert temp arrays to distribute it. */
@@ -701,6 +746,28 @@ class loop_distribution
inline void rebuild_rdg (loop_p loop, struct graph *&rdg,
control_dependences *cd);
+
+ /* If loop is not distributed, remove inserted temp arrays. */
+ void remove_insertion (loop_p loop, struct graph *flow_only_rdg,
+ bitmap producers, struct partition *partition);
+
+ /* Insert temp arrays if isomorphic computation exists. Temp arrays will be
+ regarded as SEED_STMTS for building partitions in succeeding processes. */
+ bool insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts,
+ hash_set<tree> *tmp_array_vars, bitmap producers);
+
+ void build_producers (loop_p loop, bitmap producers,
+ vec<gimple *> &transformed);
+
+ void do_insertion (loop_p loop, struct graph *flow_only_rdg, tree iv,
+ bitmap cut_points, hash_set <tree> *tmp_array_vars,
+ bitmap producers);
+
+ /* Fuse PARTITIONS built from inserted temp arrays into one partition,
+ fuse the rest into another. */
+ void merge_remaining_partitions (vec<struct partition *> *partitions,
+ bitmap producers);
+
/* Distributes the code from LOOP in such a way that producer statements
are placed before consumer statements. Tries to separate only the
statements from STMTS into separate loops. Returns the number of
@@ -1913,7 +1980,8 @@ loop_distribution::classify_partition (loop_p loop,
bool
loop_distribution::share_memory_accesses (struct graph *rdg,
- partition *partition1, partition *partition2)
+ partition *partition1, partition *partition2,
+ hash_set <tree> *excluded_arrays)
{
unsigned i, j;
bitmap_iterator bi, bj;
@@ -1947,7 +2015,10 @@ loop_distribution::share_memory_accesses (struct graph *rdg,
if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
&& operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
&& operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
- && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
+ && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0)
+ /* An exception, if PARTITION1 and PARTITION2 contain the
+ temp array we inserted, do not merge them. */
+ && !excluded_arrays->contains (DR_REF (dr1)))
return true;
}
}
@@ -2909,13 +2980,47 @@ fuse_memset_builtins (vec<struct partition *> *partitions)
}
}
+void
+loop_distribution::merge_remaining_partitions
+ (vec<struct partition *> *partitions,
+ bitmap producers)
+{
+ struct partition *partition = NULL;
+ struct partition *p1 = NULL, *p2 = NULL;
+ for (unsigned i = 0; partitions->iterate (i, &partition); i++)
+ {
+ if (bitmap_intersect_p (producers, partition->stmts))
+ {
+ if (p1 == NULL)
+ {
+ p1 = partition;
+ continue;
+ }
+ partition_merge_into (NULL, p1, partition, FUSE_FINALIZE);
+ }
+ else
+ {
+ if (p2 == NULL)
+ {
+ p2 = partition;
+ continue;
+ }
+ partition_merge_into (NULL, p2, partition, FUSE_FINALIZE);
+ }
+ partitions->unordered_remove (i);
+ partition_free (partition);
+ i--;
+ }
+}
+
void
loop_distribution::finalize_partitions (class loop *loop,
vec<struct partition *> *partitions,
- vec<ddr_p> *alias_ddrs)
+ vec<ddr_p> *alias_ddrs,
+ bitmap producers)
{
unsigned i;
- struct partition *partition, *a;
+ struct partition *partition;
if (partitions->length () == 1
|| alias_ddrs->length () > 0)
@@ -2947,13 +3052,7 @@ loop_distribution::finalize_partitions (class loop *loop,
|| (loop->inner == NULL
&& i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin))
{
- a = (*partitions)[0];
- for (i = 1; partitions->iterate (i, &partition); ++i)
- {
- partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
- partition_free (partition);
- }
- partitions->truncate (1);
+ merge_remaining_partitions (partitions, producers);
}
/* Fuse memset builtins if possible. */
@@ -3758,6 +3857,404 @@ find_isomorphic_stmts (loop_vec_info vinfo, vec<gimple *> &stmts)
return decide_stmts_by_profit (candi_stmts, stmts);
}
+/* Get iv from SEED_STMTS and make sure each seed_stmt has only one iv as index
+ and all indices are the same. */
+
+static tree
+find_index (vec<gimple *> seed_stmts)
+{
+ if (seed_stmts.length () == 0)
+ return NULL;
+ bool found_index = false;
+ tree index = NULL;
+ unsigned ui = 0;
+ for (ui = 0; ui < seed_stmts.length (); ui++)
+ {
+ if (!gimple_vdef (seed_stmts[ui]))
+ return NULL;
+ tree lhs = gimple_assign_lhs (seed_stmts[ui]);
+ unsigned num_index = 0;
+ while (TREE_CODE (lhs) == ARRAY_REF)
+ {
+ if (TREE_CODE (TREE_OPERAND (lhs, 1)) == SSA_NAME)
+ {
+ num_index++;
+ if (num_index > 1)
+ return NULL;
+ if (index == NULL)
+ {
+ index = TREE_OPERAND (lhs, 1);
+ found_index = true;
+ }
+ else if (index != TREE_OPERAND (lhs, 1))
+ return NULL;
+ }
+ lhs = TREE_OPERAND (lhs, 0);
+ }
+ if (!found_index)
+ return NULL;
+ }
+ return index;
+}
+
+/* Check if expression of phi is an increament of a const. */
+
+static void
+check_phi_inc (struct vertex *v_phi, struct graph *rdg, bool &found_inc)
+{
+ struct graph_edge *e_phi;
+ for (e_phi = v_phi->succ; e_phi; e_phi = e_phi->succ_next)
+ {
+ struct vertex *v_inc = &(rdg->vertices[e_phi->dest]);
+ if (!is_gimple_assign (RDGV_STMT (v_inc))
+ || gimple_expr_code (RDGV_STMT (v_inc)) != PLUS_EXPR)
+ continue;
+ tree rhs1 = gimple_assign_rhs1 (RDGV_STMT (v_inc));
+ tree rhs2 = gimple_assign_rhs2 (RDGV_STMT (v_inc));
+ if (!(integer_onep (rhs1) || integer_onep (rhs2)))
+ continue;
+ struct graph_edge *e_inc;
+ /* find cycle with only two vertices inc and phi: inc <--> phi. */
+ bool found_cycle = false;
+ for (e_inc = v_inc->succ; e_inc; e_inc = e_inc->succ_next)
+ {
+ if (e_inc->dest == e_phi->src)
+ {
+ found_cycle = true;
+ break;
+ }
+ }
+ if (!found_cycle)
+ continue;
+ found_inc = true;
+ }
+}
+
+/* Check if phi satisfies form like PHI <0, i>. */
+
+static inline bool
+iv_check_phi_stmt (gimple *phi_stmt)
+{
+ return gimple_phi_num_args (phi_stmt) == 2
+ && (integer_zerop (gimple_phi_arg_def (phi_stmt, 0))
+ || integer_zerop (gimple_phi_arg_def (phi_stmt, 1)));
+}
+
+/* Make sure the iteration varible is a phi. */
+
+static tree
+get_iv_from_seed (struct graph *flow_only_rdg, vec<gimple *> seed_stmts)
+{
+ tree index = find_index (seed_stmts);
+ if (index == NULL)
+ return NULL;
+ for (int i = 0; i < flow_only_rdg->n_vertices; i++)
+ {
+ struct vertex *v = &(flow_only_rdg->vertices[i]);
+ if (RDGV_STMT (v) != seed_stmts[0])
+ continue;
+ struct graph_edge *e;
+ bool found_phi = false;
+ for (e = v->pred; e; e = e->pred_next)
+ {
+ struct vertex *v_phi = &(flow_only_rdg->vertices[e->src]);
+ gimple *phi_stmt = RDGV_STMT (v_phi);
+ if (gimple_code (phi_stmt) != GIMPLE_PHI
+ || gimple_phi_result (phi_stmt) != index)
+ continue;
+ if (!iv_check_phi_stmt (phi_stmt))
+ return NULL;
+ /* find inc expr in succ of phi. */
+ bool found_inc = false;
+ check_phi_inc (v_phi, flow_only_rdg, found_inc);
+ if (!found_inc)
+ return NULL;
+ found_phi = true;
+ break;
+ }
+ if (!found_phi)
+ return NULL;
+ break;
+ }
+ return index;
+}
+
+/* Do not distribute loop if vertexes in ROOT_MAP have antidependence with in
+ FLOW_ONLY_RDG. */
+
+static bool
+check_no_dependency (struct graph *flow_only_rdg, bitmap root_map)
+{
+ bitmap_iterator bi;
+ unsigned ui;
+ auto_vec<unsigned, 16> visited_nodes;
+ auto_bitmap visited_map;
+ EXECUTE_IF_SET_IN_BITMAP (root_map, 0, ui, bi)
+ visited_nodes.safe_push (ui);
+ for (ui = 0; ui < visited_nodes.length (); ui++)
+ {
+ struct vertex *v = &(flow_only_rdg->vertices[visited_nodes[ui]]);
+ struct graph_edge *e;
+ for (e = v->succ; e; e = e->succ_next)
+ {
+ if (bitmap_bit_p (root_map, e->dest))
+ return false;
+ if (bitmap_bit_p (visited_map, e->dest))
+ continue;
+ visited_nodes.safe_push (e->dest);
+ bitmap_set_bit (visited_map, e->dest);
+ }
+ }
+ return true;
+}
+
+/* Find isomorphic stmts from GROUPED_LOADS in VINFO and make sure
+ there is no dependency among those STMT we found. */
+
+static unsigned
+get_cut_points (struct graph *flow_only_rdg, bitmap cut_points,
+ loop_vec_info vinfo)
+{
+ unsigned n_stmts = 0;
+
+ /* STMTS that may be CUT_POINTS. */
+ auto_vec<gimple *> stmts;
+ if (!find_isomorphic_stmts (vinfo, stmts))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "No temp array insertion: no isomorphic stmts"
+ " were found.\n");
+ return 0;
+ }
+
+ for (int i = 0; i < flow_only_rdg->n_vertices; i++)
+ {
+ if (stmts.contains (RDG_STMT (flow_only_rdg, i)))
+ bitmap_set_bit (cut_points, i);
+ }
+ n_stmts = bitmap_count_bits (cut_points);
+
+ bool succ = check_no_dependency (flow_only_rdg, cut_points);
+ if (!succ)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "No temp array inserted: data dependency"
+ " among isomorphic stmts.\n");
+ return 0;
+ }
+ return n_stmts;
+}
+
+static void
+build_temp_array (struct vertex *v, gimple_stmt_iterator &gsi,
+ poly_uint64 array_extent, tree iv,
+ hash_set<tree> *tmp_array_vars, vec<gimple *> *transformed)
+{
+ gimple *stmt = RDGV_STMT (v);
+ tree lhs = gimple_assign_lhs (stmt);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "original stmt:\t");
+ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);
+ }
+ tree var_ssa = duplicate_ssa_name (lhs, stmt);
+ gimple_assign_set_lhs (stmt, var_ssa);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "changed to:\t");
+ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS);
+ }
+ gimple_set_uid (gsi_stmt (gsi), -1);
+ tree vect_elt_type = TREE_TYPE (lhs);
+ tree array_type = build_array_type_nelts (vect_elt_type, array_extent);
+ tree array = create_tmp_var (array_type);
+ tree array_ssa = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
+ tmp_array_vars->add (array_ssa);
+ gimple *store = gimple_build_assign (array_ssa, var_ssa);
+ tree new_vdef = make_ssa_name (gimple_vop (cfun), store);
+ gsi_insert_after (&gsi, store, GSI_NEW_STMT);
+ gimple_set_vdef (store, new_vdef);
+ transformed->safe_push (store);
+ gimple_set_uid (gsi_stmt (gsi), -1);
+ tree array_ssa2 = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
+ tmp_array_vars->add (array_ssa2);
+ gimple *load = gimple_build_assign (lhs, array_ssa2);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "insert stmt:\t");
+ print_gimple_stmt (dump_file, store, 0, TDF_VOPS|TDF_MEMSYMS);
+ fprintf (dump_file, " and stmt:\t");
+ print_gimple_stmt (dump_file, load, 0, TDF_VOPS|TDF_MEMSYMS);
+ }
+ gimple_set_vuse (load, new_vdef);
+ gsi_insert_after (&gsi, load, GSI_NEW_STMT);
+ gimple_set_uid (gsi_stmt (gsi), -1);
+}
+
+/* Set bitmap PRODUCERS based on vec TRANSFORMED. */
+
+void
+loop_distribution::build_producers (loop_p loop, bitmap producers,
+ vec<gimple *> &transformed)
+{
+ auto_vec<gimple *, 10> stmts;
+ stmts_from_loop (loop, &stmts);
+ int i = 0;
+ gimple *stmt = NULL;
+
+ FOR_EACH_VEC_ELT (stmts, i, stmt)
+ gimple_set_uid (stmt, i);
+ i = 0;
+ FOR_EACH_VEC_ELT (transformed, i, stmt)
+ bitmap_set_bit (producers, stmt->uid);
+}
+
+/* Transform stmt
+
+ A = FOO (ARG_1);
+
+ to
+
+ STMT_1: A1 = FOO (ARG_1);
+ STMT_2: X[I] = A1;
+ STMT_3: A = X[I];
+
+ Producer is STMT_2 who defines the temp array and consumer is
+ STMT_3 who uses the temp array. */
+
+void
+loop_distribution::do_insertion (loop_p loop, struct graph *flow_only_rdg,
+ tree iv, bitmap cut_points,
+ hash_set<tree> *tmp_array_vars,
+ bitmap producers)
+{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "=== do insertion ===\n");
+
+ auto_vec<gimple *> transformed;
+
+ /* Execution times of loop. */
+ poly_uint64 array_extent
+ = tree_to_poly_uint64 (number_of_latch_executions (loop)) + 1;
+
+ basic_block *bbs = get_loop_body_in_custom_order (loop, this,
+ bb_top_order_cmp_r);
+
+ for (int i = 0; i < int (loop->num_nodes); i++)
+ {
+ basic_block bb = bbs[i];
+
+ /* Find all cut points in bb and transform them. */
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ unsigned j = gimple_uid (gsi_stmt (gsi));
+ if (bitmap_bit_p (cut_points, j))
+ {
+ struct vertex *v = &(flow_only_rdg->vertices[j]);
+ build_temp_array (v, gsi, array_extent, iv, tmp_array_vars,
+ &transformed);
+ }
+ }
+ }
+ build_producers (loop, producers, transformed);
+ update_ssa (TODO_update_ssa);
+ free (bbs);
+}
+
+/* After temp array insertion, given stmts
+ STMT_1: M = FOO (ARG_1);
+ STMT_2: X[I] = M;
+ STMT_3: A = X[I];
+ STMT_2 is the producer, STMT_1 is its prev and STMT_3 is its next.
+ Replace M with A, and remove STMT_2 and STMT_3. */
+
+static void
+reset_gimple_assign (struct graph *flow_only_rdg, struct partition *partition,
+ gimple_stmt_iterator &gsi, int j)
+{
+ struct vertex *v = &(flow_only_rdg->vertices[j]);
+ gimple *stmt = RDGV_STMT (v);
+ gimple *prev = stmt->prev;
+ gimple *next = stmt->next;
+ tree n_lhs = gimple_assign_lhs (next);
+ gimple_assign_set_lhs (prev, n_lhs);
+ unlink_stmt_vdef (stmt);
+ if (partition)
+ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
+ gsi_remove (&gsi, true);
+ release_defs (stmt);
+ if (partition)
+ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
+ gsi_remove (&gsi, true);
+}
+
+void
+loop_distribution::remove_insertion (loop_p loop, struct graph *flow_only_rdg,
+ bitmap producers, struct partition *partition)
+{
+ basic_block *bbs = get_loop_body_in_custom_order (loop, this,
+ bb_top_order_cmp_r);
+ for (int i = 0; i < int (loop->num_nodes); i++)
+ {
+ basic_block bb = bbs[i];
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
+ gsi_next (&gsi))
+ {
+ unsigned j = gimple_uid (gsi_stmt (gsi));
+ if (bitmap_bit_p (producers, j))
+ reset_gimple_assign (flow_only_rdg, partition, gsi, j);
+ }
+ }
+ update_ssa (TODO_update_ssa);
+ free (bbs);
+}
+
+/* Insert temp arrays if isomorphic computation exists. Temp arrays will be
+ regarded as SEED_STMTS for building partitions in succeeding processes. */
+
+bool
+loop_distribution::insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts,
+ hash_set<tree> *tmp_array_vars, bitmap producers)
+{
+ struct graph *flow_only_rdg = build_rdg (loop, NULL);
+ gcc_checking_assert (flow_only_rdg != NULL);
+ tree iv = get_iv_from_seed (flow_only_rdg, seed_stmts);
+ if (iv == NULL)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Loop %d no temp array insertion: failed to get"
+ " iteration variable.\n", loop->num);
+ free_rdg (flow_only_rdg);
+ return false;
+ }
+ auto_bitmap cut_points;
+ loop_vec_info vinfo = loop_vec_info_for_loop (loop);
+ unsigned n_cut_points = get_cut_points (flow_only_rdg, cut_points, vinfo);
+ delete vinfo;
+ loop->aux = NULL;
+ if (n_cut_points == 0)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Loop %d no temp array insertion: no cut points"
+ " found.\n", loop->num);
+ free_rdg (flow_only_rdg);
+ return false;
+ }
+ do_insertion (loop, flow_only_rdg, iv, cut_points, tmp_array_vars, producers);
+ if (dump_enabled_p ())
+ {
+ dump_user_location_t loc = find_loop_location (loop);
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion done:"
+ " %d temp arrays inserted in Loop %d.\n",
+ n_cut_points, loop->num);
+ }
+ free_rdg (flow_only_rdg);
+ return true;
+}
+
+static bool find_seed_stmts_for_distribution (class loop *, vec<gimple *> *);
+
/* Distributes the code from LOOP in such a way that producer statements
are placed before consumer statements. Tries to separate only the
statements from STMTS into separate loops. Returns the number of
@@ -3814,6 +4311,34 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
return 0;
}
+ /* Try to distribute LOOP if LOOP is simple enough and unable to vectorize.
+ If LOOP has grouped loads, recursively find isomorphic stmts and insert
+ temp arrays, rebuild RDG and call find_seed_stmts_for_distribution
+ to replace STMTS. */
+
+ hash_set<tree> tmp_array_vars;
+
+ /* STMTs that define those inserted TMP_ARRAYs. */
+ auto_bitmap producers;
+
+ /* New SEED_STMTS after insertion. */
+ auto_vec<gimple *> work_list;
+ bool insert_success = false;
+ if (may_insert_temp_arrays (loop, rdg, cd))
+ {
+ if (insert_temp_arrays (loop, stmts, &tmp_array_vars, producers))
+ {
+ if (find_seed_stmts_for_distribution (loop, &work_list))
+ {
+ insert_success = true;
+ stmts = work_list;
+ }
+ else
+ remove_insertion (loop, rdg, producers, NULL);
+ rebuild_rdg (loop, rdg, cd);
+ }
+ }
+
data_reference_p dref;
for (i = 0; datarefs_vec.iterate (i, &dref); ++i)
dref->aux = (void *) (uintptr_t) i;
@@ -3894,7 +4419,7 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
for (int j = i + 1;
partitions.iterate (j, &partition); ++j)
{
- if (share_memory_accesses (rdg, into, partition))
+ if (share_memory_accesses (rdg, into, partition, &tmp_array_vars))
{
partition_merge_into (rdg, into, partition, FUSE_SHARE_REF);
partitions.unordered_remove (j);
@@ -3944,7 +4469,7 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
}
}
- finalize_partitions (loop, &partitions, &alias_ddrs);
+ finalize_partitions (loop, &partitions, &alias_ddrs, producers);
/* If there is a reduction in all partitions make sure the last one
is not classified for builtin code generation. */
@@ -3962,6 +4487,24 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
}
nbp = partitions.length ();
+
+ /* If we have inserted TMP_ARRAYs but there is only one partition left in
+ the succeeding processes, remove those inserted TMP_ARRAYs back to the
+ original version. */
+
+ if (nbp == 1 && insert_success)
+ {
+ struct partition *partition = NULL;
+ partitions.iterate (0, &partition);
+ remove_insertion (loop, rdg, producers, partition);
+ if (dump_enabled_p ())
+ {
+ dump_user_location_t loc = find_loop_location (loop);
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion removed:"
+ " unable to distribute loop %d.\n", loop->num);
+ }
+ }
+
if (nbp == 0
|| (nbp == 1 && !partition_builtin_p (partitions[0]))
|| (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
--
2.27.0.windows.1

View File

@ -0,0 +1,206 @@
From 717782ec36469eb81650b07e8b5536281a59993d Mon Sep 17 00:00:00 2001
From: zhongyunde <zhongyunde@huawei.com>
Date: Tue, 29 Nov 2022 22:12:29 +0800
Subject: [PATCH 34/35] Revert "[Backport] tree-optimization/102880 - make
PHI-OPT recognize more CFGs"
This reverts commit 77398954ce517aa011b7a254c7aa2858521b2093.
---
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c | 31 ---------
gcc/tree-ssa-phiopt.c | 73 +++++++++-------------
2 files changed, 29 insertions(+), 75 deletions(-)
delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
deleted file mode 100644
index 21aa66e38..000000000
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
+++ /dev/null
@@ -1,31 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O -fgimple -fdump-tree-phiopt1" } */
-
-int __GIMPLE (ssa,startwith("phiopt"))
-foo (int a, int b, int flag)
-{
- int res;
-
- __BB(2):
- if (flag_2(D) != 0)
- goto __BB6;
- else
- goto __BB4;
-
- __BB(4):
- if (a_3(D) > b_4(D))
- goto __BB7;
- else
- goto __BB6;
-
- __BB(6):
- goto __BB7;
-
- __BB(7):
- res_1 = __PHI (__BB4: a_3(D), __BB6: b_4(D));
- return res_1;
-}
-
-/* We should be able to detect MAX despite the extra edge into
- the middle BB. */
-/* { dg-final { scan-tree-dump "MAX" "phiopt1" } } */
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 079d29e74..21ac08145 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -219,6 +219,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
/* If either bb1's succ or bb2 or bb2's succ is non NULL. */
if (EDGE_COUNT (bb1->succs) == 0
+ || bb2 == NULL
|| EDGE_COUNT (bb2->succs) == 0)
continue;
@@ -278,14 +279,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|| (e1->flags & EDGE_FALLTHRU) == 0)
continue;
+ /* Also make sure that bb1 only have one predecessor and that it
+ is bb. */
+ if (!single_pred_p (bb1)
+ || single_pred (bb1) != bb)
+ continue;
+
if (do_store_elim)
{
- /* Also make sure that bb1 only have one predecessor and that it
- is bb. */
- if (!single_pred_p (bb1)
- || single_pred (bb1) != bb)
- continue;
-
/* bb1 is the middle block, bb2 the join block, bb the split block,
e1 the fallthrough edge from bb1 to bb2. We can't do the
optimization if the join block has more than two predecessors. */
@@ -330,11 +331,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
node. */
gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE);
- gphi *newphi;
- if (single_pred_p (bb1)
- && (newphi = factor_out_conditional_conversion (e1, e2, phi,
- arg0, arg1,
- cond_stmt)))
+ gphi *newphi = factor_out_conditional_conversion (e1, e2, phi,
+ arg0, arg1,
+ cond_stmt);
+ if (newphi != NULL)
{
phi = newphi;
/* factor_out_conditional_conversion may create a new PHI in
@@ -355,14 +355,12 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (!early_p
- && single_pred_p (bb1)
&& cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
phi, arg0, arg1))
cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
- else if (single_pred_p (bb1)
- && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
+ else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
}
}
@@ -393,41 +391,35 @@ replace_phi_edge_with_variable (basic_block cond_block,
edge e, gphi *phi, tree new_tree)
{
basic_block bb = gimple_bb (phi);
+ basic_block block_to_remove;
gimple_stmt_iterator gsi;
/* Change the PHI argument to new. */
SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
/* Remove the empty basic block. */
- edge edge_to_remove;
if (EDGE_SUCC (cond_block, 0)->dest == bb)
- edge_to_remove = EDGE_SUCC (cond_block, 1);
- else
- edge_to_remove = EDGE_SUCC (cond_block, 0);
- if (EDGE_COUNT (edge_to_remove->dest->preds) == 1)
{
- e->flags |= EDGE_FALLTHRU;
- e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
- e->probability = profile_probability::always ();
- delete_basic_block (edge_to_remove->dest);
-
- /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
- gsi = gsi_last_bb (cond_block);
- gsi_remove (&gsi, true);
+ EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
+ EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
+ EDGE_SUCC (cond_block, 0)->probability = profile_probability::always ();
+
+ block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
}
else
{
- /* If there are other edges into the middle block make
- CFG cleanup deal with the edge removal to avoid
- updating dominators here in a non-trivial way. */
- gcond *cond = as_a <gcond *> (last_stmt (cond_block));
- if (edge_to_remove->flags & EDGE_TRUE_VALUE)
- gimple_cond_make_false (cond);
- else
- gimple_cond_make_true (cond);
+ EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
+ EDGE_SUCC (cond_block, 1)->flags
+ &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
+ EDGE_SUCC (cond_block, 1)->probability = profile_probability::always ();
+
+ block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
}
+ delete_basic_block (block_to_remove);
- statistics_counter_event (cfun, "Replace PHI with variable", 1);
+ /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
+ gsi = gsi_last_bb (cond_block);
+ gsi_remove (&gsi, true);
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
@@ -854,9 +846,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
allow it and move it once the transformation is done. */
if (!empty_block_p (middle_bb))
{
- if (!single_pred_p (middle_bb))
- return false;
-
stmt_to_move = last_and_only_stmt (middle_bb);
if (!stmt_to_move)
return false;
@@ -1236,11 +1225,6 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
}
else
{
- if (!single_pred_p (middle_bb))
- return 0;
- statistics_counter_event (cfun, "Replace PHI with "
- "variable/value_replacement", 1);
-
/* Replace the PHI arguments with arg. */
SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
@@ -1255,6 +1239,7 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
}
return 1;
}
+
}
/* Now optimize (x != 0) ? x + y : y to just x + y. */
--
2.27.0.windows.1

File diff suppressed because it is too large Load Diff

View File

@ -61,7 +61,7 @@
Summary: Various compilers (C, C++, Objective-C, ...)
Name: gcc
Version: %{gcc_version}
Release: 16
Release: 17
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
URL: https://gcc.gnu.org
@ -162,6 +162,41 @@ Patch45: 0045-Transposed-SLP-Enable-Transposed-SLP.patch
Patch46: 0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch
Patch47: 0047-DFE-Fix-the-bug-caused-by-inconsistent-types.patch
Patch48: 0048-Struct-Reorg-Type-simplify-limitation-when-in-struct.patch
Patch49: 0049-build-Add-some-file-right-to-executable.patch
Patch50: 0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch
Patch51: 0051-Backport-phiopt-Fix-up-conditional_replacement-PR993.patch
Patch52: 0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch
Patch53: 0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch
Patch54: 0054-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch
Patch55: 0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch
Patch56: 0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch
Patch57: 0057-Backport-Add-support-for-__builtin_bswap128.patch
Patch58: 0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch
Patch59: 0059-Backport-Add-a-couple-of-A-CST1-CST2-match-and-simpl.patch
Patch60: 0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch
Patch61: 0061-Backport-Replace-conditional_replacement-with-match-.patch
Patch62: 0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch
Patch63: 0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch
Patch64: 0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch
Patch65: 0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch
Patch66: 0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch
Patch67: 0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch
Patch68: 0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch
Patch69: 0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch
Patch70: 0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch
Patch71: 0071-PHIOPT-Disable-the-match-A-CST1-0-when-the-CST1-is-n.patch
Patch72: 0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch
Patch73: 0073-PHIOPT-Add-A-B-op-CST-B-match-and-simplify-optimizat.patch
Patch74: 0074-FORWPROP-Fold-series-of-instructions-into-mul.patch
Patch75: 0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch
Patch76: 0076-Struct-Reorg-Fix-speccpu2006-462-double-free-I60YUV.patch
Patch77: 0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
Patch78: 0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch
Patch79: 0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch
Patch80: 0080-Struct-Reorg-Add-Unsafe-Structure-Pointer-Compressio.patch
Patch81: 0081-Loop-distribution-Insert-temp-arrays-built-from-isom.patch
Patch82: 0082-Revert-Backport-tree-optimization-102880-make-PHI-OP.patch
Patch83: 0083-Struct-reorg-Add-struct-semi-relayout-optimize.patch
%global gcc_target_platform %{_arch}-linux-gnu
@ -664,7 +699,41 @@ not stable, so plugins must be rebuilt any time GCC is updated.
%patch46 -p1
%patch47 -p1
%patch48 -p1
%patch49 -p1
%patch50 -p1
%patch51 -p1
%patch52 -p1
%patch53 -p1
%patch54 -p1
%patch55 -p1
%patch56 -p1
%patch57 -p1
%patch58 -p1
%patch59 -p1
%patch60 -p1
%patch61 -p1
%patch62 -p1
%patch63 -p1
%patch64 -p1
%patch65 -p1
%patch66 -p1
%patch67 -p1
%patch68 -p1
%patch69 -p1
%patch70 -p1
%patch71 -p1
%patch72 -p1
%patch73 -p1
%patch74 -p1
%patch75 -p1
%patch76 -p1
%patch77 -p1
%patch78 -p1
%patch79 -p1
%patch80 -p1
%patch81 -p1
%patch82 -p1
%patch83 -p1
%build
@ -2684,6 +2753,12 @@ end
%doc rpm.doc/changelogs/libcc1/ChangeLog*
%changelog
* Thu Dec 6 2022 benniaobufeijiushiji <linda7@huawei.com> - 10.3.1-17
- Type:Sync
- ID:NA
- SUG:NA
- DESC:Sync patch from openeuler/gcc
* Fri Sep 16 2022 eastb233 <xiezhiheng@huawei.com> - 10.3.1-16
- Type:Sync
- ID:NA