[Sync] Sync patch from openeuler/gcc
Sync patch from openeuler/gcc - 20221201 (cherry picked from commit 5487e8942c694fd317f2cbf1662e9eaf33f2f612)
This commit is contained in:
parent
404b6b59b8
commit
a41360f2fb
21
0049-build-Add-some-file-right-to-executable.patch
Normal file
21
0049-build-Add-some-file-right-to-executable.patch
Normal file
@ -0,0 +1,21 @@
|
||||
From 7dffda64fcbbd522616d7dc9c70530d146f4fed6 Mon Sep 17 00:00:00 2001
|
||||
From: zhongyunde <zhongyunde@huawei.com>
|
||||
Date: Tue, 1 Nov 2022 16:38:38 +0800
|
||||
Subject: [PATCH 01/35] [build] Add some file right to executable
|
||||
|
||||
---
|
||||
libgcc/mkheader.sh | 0
|
||||
move-if-change | 0
|
||||
2 files changed, 0 insertions(+), 0 deletions(-)
|
||||
mode change 100644 => 100755 libgcc/mkheader.sh
|
||||
mode change 100644 => 100755 move-if-change
|
||||
|
||||
diff --git a/libgcc/mkheader.sh b/libgcc/mkheader.sh
|
||||
old mode 100644
|
||||
new mode 100755
|
||||
diff --git a/move-if-change b/move-if-change
|
||||
old mode 100644
|
||||
new mode 100755
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
186
0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch
Normal file
186
0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch
Normal file
@ -0,0 +1,186 @@
|
||||
From c690da762e873d0f5c66ea084e420ba4842354a6 Mon Sep 17 00:00:00 2001
|
||||
From: Jakub Jelinek <jakub@redhat.com>
|
||||
Date: Wed, 4 Nov 2020 11:55:29 +0100
|
||||
Subject: [PATCH 02/35] [Backport] phiopt: Optimize x ? 1024 : 0 to (int) x <<
|
||||
10 [PR97690]
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=3e190757fa332d327bee27495f37beb01155cfab
|
||||
|
||||
The following patch generalizes the x ? 1 : 0 -> (int) x optimization
|
||||
to handle also left shifts by constant.
|
||||
|
||||
During x86_64-linux and i686-linux bootstraps + regtests it triggered
|
||||
in 1514 unique non-LTO -m64 cases (sort -u on log mentioning
|
||||
filename, function name and shift count) and 1866 -m32 cases.
|
||||
|
||||
Unfortunately, the patch regresses (before the tests have been adjusted):
|
||||
+FAIL: gcc.dg/tree-ssa/ssa-ccp-11.c scan-tree-dump-times optimized "if " 0
|
||||
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c -flto -ffat-lto-objects scan-tree-dump-times slp1 "optimized: basic block" 1
|
||||
+FAIL: gcc.dg/vect/bb-slp-pattern-2.c scan-tree-dump-times slp1 "optimized: basic block" 1
|
||||
and in both cases it actually results in worse code.
|
||||
|
||||
> > We'd need some optimization that would go through all PHI edges and
|
||||
> > compute if some use of the phi results don't actually compute a constant
|
||||
> > across all the PHI edges - 1 & 0 and 0 & 1 is always 0.
|
||||
|
||||
> PRE should do this, IMHO only optimizing it at -O2 is fine.
|
||||
|
||||
> > Similarly, in the slp vectorization test there is:
|
||||
> > a[0] = b[0] ? 1 : 7;
|
||||
|
||||
> note this, carefully avoiding the already "optimized" b[0] ? 1 : 0 ...
|
||||
|
||||
> So the option is to put : 7 in the 2, 4 an 8 case as well. The testcase
|
||||
> wasn't added for any real-world case but is artificial I guess for
|
||||
> COND_EXPR handling of invariants.
|
||||
|
||||
> But yeah, for things like SLP it means we eventually have to
|
||||
> implement reverse transforms for all of this to make the lanes
|
||||
> matching. But that's true anyway for things like x + 1 vs. x + 0
|
||||
> or x / 3 vs. x / 2 or other simplifications we do.
|
||||
|
||||
2020-11-04 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/97690
|
||||
* tree-ssa-phiopt.c (conditional_replacement): Also optimize
|
||||
cond ? pow2p_cst : 0 as ((type) cond) << cst.
|
||||
|
||||
* gcc.dg/tree-ssa/phi-opt-22.c: New test.
|
||||
* gcc.dg/tree-ssa/ssa-ccp-11.c: Use -O2 instead of -O1.
|
||||
* gcc.dg/vect/bb-slp-pattern-2.c (foo): Use ? 2 : 7, ? 4 : 7 and
|
||||
? 8 : 7 instead of ? 2 : 0, ? 4 : 0, ? 8 : 0.
|
||||
---
|
||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c | 11 ++++++
|
||||
gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c | 2 +-
|
||||
gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c | 6 ++--
|
||||
gcc/tree-ssa-phiopt.c | 38 ++++++++++++++------
|
||||
4 files changed, 43 insertions(+), 14 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
||||
new file mode 100644
|
||||
index 000000000..fd3706666
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-22.c
|
||||
@@ -0,0 +1,11 @@
|
||||
+/* PR tree-optimization/97690 */
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
||||
+
|
||||
+int foo (_Bool d) { return d ? 2 : 0; }
|
||||
+int bar (_Bool d) { return d ? 1 : 0; }
|
||||
+int baz (_Bool d) { return d ? -__INT_MAX__ - 1 : 0; }
|
||||
+int qux (_Bool d) { return d ? 1024 : 0; }
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-not "if" "phiopt2" } } */
|
||||
+/* { dg-final { scan-tree-dump-times " << " 3 "phiopt2" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
|
||||
index 36b8e7fc8..d70ea5a01 100644
|
||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-ccp-11.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O1 -fdump-tree-optimized" } */
|
||||
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
/* Test for CPROP across a DAG. */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
|
||||
index d32cb7585..e64f0115a 100644
|
||||
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
|
||||
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-2.c
|
||||
@@ -13,13 +13,13 @@ foo (short * __restrict__ a, int * __restrict__ b, int stride)
|
||||
for (i = 0; i < N/stride; i++, a += stride, b += stride)
|
||||
{
|
||||
a[0] = b[0] ? 1 : 7;
|
||||
- a[1] = b[1] ? 2 : 0;
|
||||
+ a[1] = b[1] ? 2 : 7;
|
||||
a[2] = b[2] ? 3 : 0;
|
||||
- a[3] = b[3] ? 4 : 0;
|
||||
+ a[3] = b[3] ? 4 : 7;
|
||||
a[4] = b[4] ? 5 : 0;
|
||||
a[5] = b[5] ? 6 : 0;
|
||||
a[6] = b[6] ? 7 : 0;
|
||||
- a[7] = b[7] ? 8 : 0;
|
||||
+ a[7] = b[7] ? 8 : 7;
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index 591b6435f..85587e8d1 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -753,7 +753,9 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
gimple_stmt_iterator gsi;
|
||||
edge true_edge, false_edge;
|
||||
tree new_var, new_var2;
|
||||
- bool neg;
|
||||
+ bool neg = false;
|
||||
+ int shift = 0;
|
||||
+ tree nonzero_arg;
|
||||
|
||||
/* FIXME: Gimplification of complex type is too hard for now. */
|
||||
/* We aren't prepared to handle vectors either (and it is a question
|
||||
@@ -764,14 +766,22 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
|| POINTER_TYPE_P (TREE_TYPE (arg1))))
|
||||
return false;
|
||||
|
||||
- /* The PHI arguments have the constants 0 and 1, or 0 and -1, then
|
||||
- convert it to the conditional. */
|
||||
- if ((integer_zerop (arg0) && integer_onep (arg1))
|
||||
- || (integer_zerop (arg1) && integer_onep (arg0)))
|
||||
- neg = false;
|
||||
- else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
|
||||
- || (integer_zerop (arg1) && integer_all_onesp (arg0)))
|
||||
+ /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
|
||||
+ 0 and (1 << cst), then convert it to the conditional. */
|
||||
+ if (integer_zerop (arg0))
|
||||
+ nonzero_arg = arg1;
|
||||
+ else if (integer_zerop (arg1))
|
||||
+ nonzero_arg = arg0;
|
||||
+ else
|
||||
+ return false;
|
||||
+ if (integer_all_onesp (nonzero_arg))
|
||||
neg = true;
|
||||
+ else if (integer_pow2p (nonzero_arg))
|
||||
+ {
|
||||
+ shift = tree_log2 (nonzero_arg);
|
||||
+ if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
|
||||
+ return false;
|
||||
+ }
|
||||
else
|
||||
return false;
|
||||
|
||||
@@ -783,12 +793,12 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
falls through into BB.
|
||||
|
||||
There is a single PHI node at the join point (BB) and its arguments
|
||||
- are constants (0, 1) or (0, -1).
|
||||
+ are constants (0, 1) or (0, -1) or (0, (1 << shift)).
|
||||
|
||||
So, given the condition COND, and the two PHI arguments, we can
|
||||
rewrite this PHI into non-branching code:
|
||||
|
||||
- dest = (COND) or dest = COND'
|
||||
+ dest = (COND) or dest = COND' or dest = (COND) << shift
|
||||
|
||||
We use the condition as-is if the argument associated with the
|
||||
true edge has the value one or the argument associated with the
|
||||
@@ -823,6 +833,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
cond = fold_build1_loc (gimple_location (stmt),
|
||||
NEGATE_EXPR, TREE_TYPE (cond), cond);
|
||||
}
|
||||
+ else if (shift)
|
||||
+ {
|
||||
+ cond = fold_convert_loc (gimple_location (stmt),
|
||||
+ TREE_TYPE (result), cond);
|
||||
+ cond = fold_build2_loc (gimple_location (stmt),
|
||||
+ LSHIFT_EXPR, TREE_TYPE (cond), cond,
|
||||
+ build_int_cst (integer_type_node, shift));
|
||||
+ }
|
||||
|
||||
/* Insert our new statements at the end of conditional block before the
|
||||
COND_STMT. */
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
@ -0,0 +1,92 @@
|
||||
From 79a974bc7bb67cf425a7839f3c1f5689e41c7ee8 Mon Sep 17 00:00:00 2001
|
||||
From: Jakub Jelinek <jakub@redhat.com>
|
||||
Date: Tue, 9 Mar 2021 19:13:11 +0100
|
||||
Subject: [PATCH 03/35] [Backport] phiopt: Fix up conditional_replacement
|
||||
[PR99305]
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b610c30453d8e4cc88693d85a5a100d089640be5
|
||||
|
||||
Before my PR97690 changes, conditional_replacement would not set neg
|
||||
when the nonzero arg was boolean true.
|
||||
I've simplified the testing, so that it first finds the zero argument
|
||||
and then checks the other argument for all the handled cases
|
||||
(1, -1 and 1 << X, where the last case is what the patch added support for).
|
||||
But, unfortunately I've placed the integer_all_onesp test first.
|
||||
For unsigned precision 1 types such as bool integer_all_onesp, integer_onep
|
||||
and integer_pow2p can all be true and the code set neg to true in that case,
|
||||
which is undesirable.
|
||||
|
||||
The following patch tests integer_pow2p first (which is trivially true
|
||||
for integer_onep too and tree_log2 in that case gives shift == 0)
|
||||
and only if that isn't the case, integer_all_onesp.
|
||||
|
||||
2021-03-09 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/99305
|
||||
* tree-ssa-phiopt.c (conditional_replacement): Test integer_pow2p
|
||||
before integer_all_onesp instead of vice versa.
|
||||
|
||||
* g++.dg/opt/pr99305.C: New test.
|
||||
---
|
||||
gcc/testsuite/g++.dg/opt/pr99305.C | 26 ++++++++++++++++++++++++++
|
||||
gcc/tree-ssa-phiopt.c | 6 +++---
|
||||
2 files changed, 29 insertions(+), 3 deletions(-)
|
||||
create mode 100644 gcc/testsuite/g++.dg/opt/pr99305.C
|
||||
|
||||
diff --git a/gcc/testsuite/g++.dg/opt/pr99305.C b/gcc/testsuite/g++.dg/opt/pr99305.C
|
||||
new file mode 100644
|
||||
index 000000000..8a91277e7
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/g++.dg/opt/pr99305.C
|
||||
@@ -0,0 +1,26 @@
|
||||
+// PR tree-optimization/99305
|
||||
+// { dg-do compile }
|
||||
+// { dg-options "-O3 -fno-ipa-icf -fdump-tree-optimized" }
|
||||
+// { dg-final { scan-tree-dump-times " = \\\(unsigned char\\\) c_\[0-9]*\\\(D\\\);" 3 "optimized" } }
|
||||
+// { dg-final { scan-tree-dump-times " = \[^\n\r]* \\+ \[0-9]*;" 3 "optimized" } }
|
||||
+// { dg-final { scan-tree-dump-times " = \[^\n\r]* <= 9;" 3 "optimized" } }
|
||||
+// { dg-final { scan-tree-dump-not "if \\\(c_\[0-9]*\\\(D\\\) \[!=]= 0\\\)" "optimized" } }
|
||||
+// { dg-final { scan-tree-dump-not " = PHI <" "optimized" } }
|
||||
+
|
||||
+bool
|
||||
+foo (char c)
|
||||
+{
|
||||
+ return c >= 48 && c <= 57;
|
||||
+}
|
||||
+
|
||||
+bool
|
||||
+bar (char c)
|
||||
+{
|
||||
+ return c != 0 && foo (c);
|
||||
+}
|
||||
+
|
||||
+bool
|
||||
+baz (char c)
|
||||
+{
|
||||
+ return c != 0 && c >= 48 && c <= 57;
|
||||
+}
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index 85587e8d1..b9be28474 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -774,14 +774,14 @@ conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
nonzero_arg = arg0;
|
||||
else
|
||||
return false;
|
||||
- if (integer_all_onesp (nonzero_arg))
|
||||
- neg = true;
|
||||
- else if (integer_pow2p (nonzero_arg))
|
||||
+ if (integer_pow2p (nonzero_arg))
|
||||
{
|
||||
shift = tree_log2 (nonzero_arg);
|
||||
if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
|
||||
return false;
|
||||
}
|
||||
+ else if (integer_all_onesp (nonzero_arg))
|
||||
+ neg = true;
|
||||
else
|
||||
return false;
|
||||
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
122
0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch
Normal file
122
0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch
Normal file
@ -0,0 +1,122 @@
|
||||
From 09263d5ed4d81a008ca8ffcc2883dc766e7874d5 Mon Sep 17 00:00:00 2001
|
||||
From: Jakub Jelinek <jakub@redhat.com>
|
||||
Date: Sun, 6 Dec 2020 10:58:10 +0100
|
||||
Subject: [PATCH 04/35] [Backport] phiopt: Handle bool in two_value_replacement
|
||||
[PR796232]
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=8c23434fdadcf4caa1f0e966294c5f67ccf4bcf9
|
||||
|
||||
The following patch improves code generation on the included testcase by
|
||||
enabling two_value_replacement on booleans. It does that only for arg0/arg1
|
||||
values that conditional_replacement doesn't handle. Additionally
|
||||
it limits two_value_replacement optimization to the late phiopt like
|
||||
conditional_replacement.
|
||||
|
||||
2020-12-06 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/96232
|
||||
* tree-ssa-phiopt.c (two_value_replacement): Optimize even boolean lhs
|
||||
cases as long as arg0 has wider precision and conditional_replacement
|
||||
doesn't handle that case.
|
||||
(tree_ssa_phiopt_worker): Don't call two_value_replacement during
|
||||
early phiopt.
|
||||
|
||||
* gcc.dg/tree-ssa/pr96232-2.c: New test.
|
||||
* gcc.dg/tree-ssa/pr88676-2.c: Check phiopt2 dump rather than phiopt1.
|
||||
---
|
||||
gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c | 4 ++--
|
||||
gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c | 18 ++++++++++++++++++
|
||||
gcc/tree-ssa-phiopt.c | 23 +++++++++++++++++++----
|
||||
3 files changed, 39 insertions(+), 6 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
|
||||
index 0e616365b..ea88407b6 100644
|
||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr88676-2.c
|
||||
@@ -1,7 +1,7 @@
|
||||
/* PR tree-optimization/88676 */
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fdump-tree-phiopt1" } */
|
||||
-/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt1" { target le } } } */
|
||||
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
||||
+/* { dg-final { scan-tree-dump-not " = PHI <" "phiopt2" { target le } } } */
|
||||
|
||||
struct foo1 {
|
||||
int i:1;
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
|
||||
new file mode 100644
|
||||
index 000000000..9f51820ed
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96232-2.c
|
||||
@@ -0,0 +1,18 @@
|
||||
+/* PR tree-optimization/96232 */
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
+/* { dg-final { scan-tree-dump " 38 - " "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump " \\+ 97;" "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" } } */
|
||||
+
|
||||
+int
|
||||
+foo (_Bool x)
|
||||
+{
|
||||
+ return x ? 37 : 38;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+bar (_Bool x)
|
||||
+{
|
||||
+ return x ? 98 : 97;
|
||||
+}
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index b9be28474..0623d740d 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -339,7 +339,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
}
|
||||
|
||||
/* Do the replacement of conditional if it can be done. */
|
||||
- if (two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
|
||||
+ if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
else if (!early_p
|
||||
&& conditional_replacement (bb, bb1, e1, e2, phi,
|
||||
@@ -636,7 +636,6 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
|
||||
if (TREE_CODE (lhs) != SSA_NAME
|
||||
|| !INTEGRAL_TYPE_P (TREE_TYPE (lhs))
|
||||
- || TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
|
||||
|| TREE_CODE (rhs) != INTEGER_CST)
|
||||
return false;
|
||||
|
||||
@@ -649,9 +648,25 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
return false;
|
||||
}
|
||||
|
||||
+ /* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to
|
||||
+ conditional_replacement. */
|
||||
+ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
|
||||
+ && (integer_zerop (arg0)
|
||||
+ || integer_zerop (arg1)
|
||||
+ || TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE
|
||||
+ || (TYPE_PRECISION (TREE_TYPE (arg0))
|
||||
+ <= TYPE_PRECISION (TREE_TYPE (lhs)))))
|
||||
+ return false;
|
||||
+
|
||||
wide_int min, max;
|
||||
- if (get_range_info (lhs, &min, &max) != VR_RANGE
|
||||
- || min + 1 != max
|
||||
+ if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE)
|
||||
+ {
|
||||
+ min = wi::to_wide (boolean_false_node);
|
||||
+ max = wi::to_wide (boolean_true_node);
|
||||
+ }
|
||||
+ else if (get_range_info (lhs, &min, &max) != VR_RANGE)
|
||||
+ return false;
|
||||
+ if (min + 1 != max
|
||||
|| (wi::to_wide (rhs) != min
|
||||
&& wi::to_wide (rhs) != max))
|
||||
return false;
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
256
0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch
Normal file
256
0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch
Normal file
@ -0,0 +1,256 @@
|
||||
From a92cf465f10585350f7cd5739457c3f2852cfc86 Mon Sep 17 00:00:00 2001
|
||||
From: Jakub Jelinek <jakub@redhat.com>
|
||||
Date: Wed, 21 Oct 2020 10:51:33 +0200
|
||||
Subject: [PATCH 05/35] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32
|
||||
in GIMPLE [PR97503]
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=5244b4af5e47bc98a2a9cf36f048981583a1b163
|
||||
|
||||
While we have at the RTL level noce_try_ifelse_collapse combined with
|
||||
simplify_cond_clz_ctz, that optimization doesn't always trigger because
|
||||
e.g. on powerpc there is an define_insn to compare a reg against zero and
|
||||
copy that register to another one and so we end up with a different pseudo
|
||||
in the simplify_cond_clz_ctz test and punt.
|
||||
|
||||
For targets that define C?Z_DEFINED_VALUE_AT_ZERO to 2 for certain modes,
|
||||
we can optimize it already in phiopt though, just need to ensure that
|
||||
we transform the __builtin_c?z* calls into .C?Z ifns because my recent
|
||||
VRP changes codified that the builtin calls are always undefined at zero,
|
||||
while ifns honor C?Z_DEFINED_VALUE_AT_ZERO equal to 2.
|
||||
And, in phiopt we already have popcount handling that does pretty much the
|
||||
same thing, except for always using a zero value rather than the one set
|
||||
by C?Z_DEFINED_VALUE_AT_ZERO.
|
||||
|
||||
So, this patch extends that function to handle not just popcount, but also
|
||||
clz and ctz.
|
||||
|
||||
2020-10-21 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/97503
|
||||
* tree-ssa-phiopt.c: Include internal-fn.h.
|
||||
(cond_removal_in_popcount_pattern): Rename to ...
|
||||
(cond_removal_in_popcount_clz_ctz_pattern): ... this. Handle not just
|
||||
popcount, but also clz and ctz if it has C?Z_DEFINED_VALUE_AT_ZERO 2.
|
||||
|
||||
* gcc.dg/tree-ssa/pr97503.c: New test.
|
||||
---
|
||||
gcc/testsuite/gcc.dg/tree-ssa/pr97503.c | 19 +++++
|
||||
gcc/tree-ssa-phiopt.c | 100 ++++++++++++++++++------
|
||||
2 files changed, 95 insertions(+), 24 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
|
||||
new file mode 100644
|
||||
index 000000000..3a3dae6c7
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr97503.c
|
||||
@@ -0,0 +1,19 @@
|
||||
+/* PR tree-optimization/97503 */
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
+/* { dg-additional-options "-mbmi -mlzcnt" { target i?86-*-* x86_64-*-* } } */
|
||||
+/* { dg-final { scan-tree-dump-times "\.CLZ" 2 "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-* } && lp64 } } } } */
|
||||
+/* { dg-final { scan-tree-dump-not "__builtin_clz" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
|
||||
+/* { dg-final { scan-tree-dump-not "PHI <" "optimized" { target { { i?86-*-* x86_64-*-* aarch64-*-* powerpc*-*-*} && lp64 } } } } */
|
||||
+
|
||||
+int
|
||||
+foo (int x)
|
||||
+{
|
||||
+ return x ? __builtin_clz (x) : 32;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+bar (unsigned long long x)
|
||||
+{
|
||||
+ return x ? __builtin_clzll (x) : 64;
|
||||
+}
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index 0623d740d..c1e11916e 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tree-inline.h"
|
||||
#include "case-cfn-macros.h"
|
||||
#include "tree-eh.h"
|
||||
+#include "internal-fn.h"
|
||||
|
||||
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
|
||||
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
|
||||
@@ -60,8 +61,9 @@ static bool minmax_replacement (basic_block, basic_block,
|
||||
edge, edge, gimple *, tree, tree);
|
||||
static bool abs_replacement (basic_block, basic_block,
|
||||
edge, edge, gimple *, tree, tree);
|
||||
-static bool cond_removal_in_popcount_pattern (basic_block, basic_block,
|
||||
- edge, edge, gimple *, tree, tree);
|
||||
+static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
|
||||
+ edge, edge, gimple *,
|
||||
+ tree, tree);
|
||||
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
|
||||
hash_set<tree> *);
|
||||
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
|
||||
@@ -348,8 +350,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
else if (!early_p
|
||||
- && cond_removal_in_popcount_pattern (bb, bb1, e1, e2,
|
||||
- phi, arg0, arg1))
|
||||
+ && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
|
||||
+ e2, phi, arg0,
|
||||
+ arg1))
|
||||
cfgchanged = true;
|
||||
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
@@ -1771,16 +1774,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
|
||||
<bb 4>
|
||||
c_12 = PHI <_9(2)>
|
||||
-*/
|
||||
+
|
||||
+ Similarly for __builtin_clz or __builtin_ctz if
|
||||
+ C?Z_DEFINED_VALUE_AT_ZERO is 2, optab is present and
|
||||
+ instead of 0 above it uses the value from that macro. */
|
||||
|
||||
static bool
|
||||
-cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
|
||||
- edge e1, edge e2,
|
||||
- gimple *phi, tree arg0, tree arg1)
|
||||
+cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
||||
+ basic_block middle_bb,
|
||||
+ edge e1, edge e2, gimple *phi,
|
||||
+ tree arg0, tree arg1)
|
||||
{
|
||||
gimple *cond;
|
||||
gimple_stmt_iterator gsi, gsi_from;
|
||||
- gimple *popcount;
|
||||
+ gimple *call;
|
||||
gimple *cast = NULL;
|
||||
tree lhs, arg;
|
||||
|
||||
@@ -1798,35 +1805,67 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
|
||||
gsi_next_nondebug (&gsi);
|
||||
if (!gsi_end_p (gsi))
|
||||
{
|
||||
- popcount = gsi_stmt (gsi);
|
||||
+ call = gsi_stmt (gsi);
|
||||
gsi_next_nondebug (&gsi);
|
||||
if (!gsi_end_p (gsi))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
- popcount = cast;
|
||||
+ call = cast;
|
||||
cast = NULL;
|
||||
}
|
||||
|
||||
- /* Check that we have a popcount builtin. */
|
||||
- if (!is_gimple_call (popcount))
|
||||
+ /* Check that we have a popcount/clz/ctz builtin. */
|
||||
+ if (!is_gimple_call (call) || gimple_call_num_args (call) != 1)
|
||||
+ return false;
|
||||
+
|
||||
+ arg = gimple_call_arg (call, 0);
|
||||
+ lhs = gimple_get_lhs (call);
|
||||
+
|
||||
+ if (lhs == NULL_TREE)
|
||||
return false;
|
||||
- combined_fn cfn = gimple_call_combined_fn (popcount);
|
||||
+
|
||||
+ combined_fn cfn = gimple_call_combined_fn (call);
|
||||
+ internal_fn ifn = IFN_LAST;
|
||||
+ int val = 0;
|
||||
switch (cfn)
|
||||
{
|
||||
CASE_CFN_POPCOUNT:
|
||||
break;
|
||||
+ CASE_CFN_CLZ:
|
||||
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
|
||||
+ {
|
||||
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
|
||||
+ if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
|
||||
+ OPTIMIZE_FOR_BOTH)
|
||||
+ && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
|
||||
+ {
|
||||
+ ifn = IFN_CLZ;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ return false;
|
||||
+ CASE_CFN_CTZ:
|
||||
+ if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
|
||||
+ {
|
||||
+ scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
|
||||
+ if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg),
|
||||
+ OPTIMIZE_FOR_BOTH)
|
||||
+ && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
|
||||
+ {
|
||||
+ ifn = IFN_CTZ;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ return false;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
- arg = gimple_call_arg (popcount, 0);
|
||||
- lhs = gimple_get_lhs (popcount);
|
||||
-
|
||||
if (cast)
|
||||
{
|
||||
- /* We have a cast stmt feeding popcount builtin. */
|
||||
+ /* We have a cast stmt feeding popcount/clz/ctz builtin. */
|
||||
/* Check that we have a cast prior to that. */
|
||||
if (gimple_code (cast) != GIMPLE_ASSIGN
|
||||
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (cast)))
|
||||
@@ -1839,7 +1878,7 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
|
||||
|
||||
cond = last_stmt (cond_bb);
|
||||
|
||||
- /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount
|
||||
+ /* Cond_bb has a check for b_4 [!=|==] 0 before calling the popcount/clz/ctz
|
||||
builtin. */
|
||||
if (gimple_code (cond) != GIMPLE_COND
|
||||
|| (gimple_cond_code (cond) != NE_EXPR
|
||||
@@ -1859,10 +1898,13 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
|
||||
}
|
||||
|
||||
/* Check PHI arguments. */
|
||||
- if (lhs != arg0 || !integer_zerop (arg1))
|
||||
+ if (lhs != arg0
|
||||
+ || TREE_CODE (arg1) != INTEGER_CST
|
||||
+ || wi::to_wide (arg1) != val)
|
||||
return false;
|
||||
|
||||
- /* And insert the popcount builtin and cast stmt before the cond_bb. */
|
||||
+ /* And insert the popcount/clz/ctz builtin and cast stmt before the
|
||||
+ cond_bb. */
|
||||
gsi = gsi_last_bb (cond_bb);
|
||||
if (cast)
|
||||
{
|
||||
@@ -1870,9 +1912,19 @@ cond_removal_in_popcount_pattern (basic_block cond_bb, basic_block middle_bb,
|
||||
gsi_move_before (&gsi_from, &gsi);
|
||||
reset_flow_sensitive_info (gimple_get_lhs (cast));
|
||||
}
|
||||
- gsi_from = gsi_for_stmt (popcount);
|
||||
- gsi_move_before (&gsi_from, &gsi);
|
||||
- reset_flow_sensitive_info (gimple_get_lhs (popcount));
|
||||
+ gsi_from = gsi_for_stmt (call);
|
||||
+ if (ifn == IFN_LAST || gimple_call_internal_p (call))
|
||||
+ gsi_move_before (&gsi_from, &gsi);
|
||||
+ else
|
||||
+ {
|
||||
+ /* For __builtin_c[lt]z* force .C[LT]Z ifn, because only
|
||||
+ the latter is well defined at zero. */
|
||||
+ call = gimple_build_call_internal (ifn, 1, gimple_call_arg (call, 0));
|
||||
+ gimple_call_set_lhs (call, lhs);
|
||||
+ gsi_insert_before (&gsi, call, GSI_SAME_STMT);
|
||||
+ gsi_remove (&gsi_from, true);
|
||||
+ }
|
||||
+ reset_flow_sensitive_info (lhs);
|
||||
|
||||
/* Now update the PHI and remove unneeded bbs. */
|
||||
replace_phi_edge_with_variable (cond_bb, e2, phi, lhs);
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
@ -0,0 +1,69 @@
|
||||
From 7d5d2ab082ce9986db4f3313013b44faa46bc412 Mon Sep 17 00:00:00 2001
|
||||
From: Jakub Jelinek <jakub@redhat.com>
|
||||
Date: Thu, 22 Oct 2020 09:34:28 +0200
|
||||
Subject: [PATCH 06/35] [Backport] phiopt: Optimize x ? __builtin_clz (x) : 32
|
||||
in GIMPLE fallout [PR97503]
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=ef2d3ec325b1b720df5da20784eba46249af2294
|
||||
|
||||
> this broke sparc-sun-solaris2.11 bootstrap
|
||||
>
|
||||
> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c: In function 'bool cond_removal_in_popcount_clz_ctz_pattern(basic_block, basic_block, edge, edge, gimple*, tree, tree)':
|
||||
> /vol/gcc/src/hg/master/local/gcc/tree-ssa-phiopt.c:1858:27: error: variable 'mode' set but not used [-Werror=unused-but-set-variable]
|
||||
> 1858 | scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
|
||||
> | ^~~~
|
||||
>
|
||||
>
|
||||
> and doubtlessly several other targets that use the defaults.h definition of
|
||||
>
|
||||
> #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) 0
|
||||
|
||||
Ugh, seems many of those macros do not evaluate the first argument.
|
||||
This got broken by the change to direct_internal_fn_supported_p, previously
|
||||
it used mode also in the optab test.
|
||||
|
||||
2020-10-22 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* tree-ssa-phiopt.c (cond_removal_in_popcount_clz_ctz_pattern):
|
||||
For CLZ and CTZ tests, use type temporary instead of mode.
|
||||
---
|
||||
gcc/tree-ssa-phiopt.c | 16 ++++++++--------
|
||||
1 file changed, 8 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index c1e11916e..707a5882e 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -1836,10 +1836,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
||||
CASE_CFN_CLZ:
|
||||
if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
|
||||
{
|
||||
- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
|
||||
- if (direct_internal_fn_supported_p (IFN_CLZ, TREE_TYPE (arg),
|
||||
- OPTIMIZE_FOR_BOTH)
|
||||
- && CLZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
|
||||
+ tree type = TREE_TYPE (arg);
|
||||
+ if (direct_internal_fn_supported_p (IFN_CLZ, type, OPTIMIZE_FOR_BOTH)
|
||||
+ && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
|
||||
+ val) == 2)
|
||||
{
|
||||
ifn = IFN_CLZ;
|
||||
break;
|
||||
@@ -1849,10 +1849,10 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
||||
CASE_CFN_CTZ:
|
||||
if (INTEGRAL_TYPE_P (TREE_TYPE (arg)))
|
||||
{
|
||||
- scalar_int_mode mode = SCALAR_INT_TYPE_MODE (TREE_TYPE (arg));
|
||||
- if (direct_internal_fn_supported_p (IFN_CTZ, TREE_TYPE (arg),
|
||||
- OPTIMIZE_FOR_BOTH)
|
||||
- && CTZ_DEFINED_VALUE_AT_ZERO (mode, val) == 2)
|
||||
+ tree type = TREE_TYPE (arg);
|
||||
+ if (direct_internal_fn_supported_p (IFN_CTZ, type, OPTIMIZE_FOR_BOTH)
|
||||
+ && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (type),
|
||||
+ val) == 2)
|
||||
{
|
||||
ifn = IFN_CTZ;
|
||||
break;
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
218
0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch
Normal file
218
0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch
Normal file
@ -0,0 +1,218 @@
|
||||
From 018523df11698dd0e2d42326c57bdf724a7a1aa5 Mon Sep 17 00:00:00 2001
|
||||
From: Jakub Jelinek <jakub@redhat.com>
|
||||
Date: Tue, 5 Jan 2021 16:35:22 +0100
|
||||
Subject: [PATCH 07/35] [Backport] phiopt: Optimize x < 0 ? ~y : y to (x >> 31)
|
||||
^ y [PR96928]
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=576714b309b330df0e80e34114bcdf0bba35e146
|
||||
|
||||
As requested in the PR, the one's complement abs can be done more
|
||||
efficiently without cmov or branching.
|
||||
|
||||
Had to change the ifcvt-onecmpl-abs-1.c testcase, we no longer optimize
|
||||
it in ifcvt, on x86_64 with -m32 we generate in the end the exact same
|
||||
code, but with -m64:
|
||||
movl %edi, %eax
|
||||
- notl %eax
|
||||
- cmpl %edi, %eax
|
||||
- cmovl %edi, %eax
|
||||
+ sarl $31, %eax
|
||||
+ xorl %edi, %eax
|
||||
ret
|
||||
|
||||
2021-01-05 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR tree-optimization/96928
|
||||
* tree-ssa-phiopt.c (xor_replacement): New function.
|
||||
(tree_ssa_phiopt_worker): Call it.
|
||||
|
||||
* gcc.dg/tree-ssa/pr96928.c: New test.
|
||||
* gcc.target/i386/ifcvt-onecmpl-abs-1.c: Remove -fdump-rtl-ce1,
|
||||
instead of scanning rtl dump for ifcvt message check assembly
|
||||
for xor instruction.
|
||||
---
|
||||
gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 38 +++++++++
|
||||
gcc/tree-ssa-phiopt.c | 108 ++++++++++++++++++++++++
|
||||
2 files changed, 146 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
||||
new file mode 100644
|
||||
index 000000000..209135726
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
||||
@@ -0,0 +1,38 @@
|
||||
+/* PR tree-optimization/96928 */
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
||||
+/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
|
||||
+/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
|
||||
+/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
|
||||
+/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
|
||||
+/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
|
||||
+
|
||||
+int
|
||||
+foo (int a)
|
||||
+{
|
||||
+ return a < 0 ? ~a : a;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+bar (int a, int b)
|
||||
+{
|
||||
+ return a < 0 ? ~b : b;
|
||||
+}
|
||||
+
|
||||
+unsigned
|
||||
+baz (int a, unsigned int b)
|
||||
+{
|
||||
+ return a < 0 ? ~b : b;
|
||||
+}
|
||||
+
|
||||
+unsigned
|
||||
+qux (int a, unsigned int c)
|
||||
+{
|
||||
+ return a >= 0 ? ~c : c;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+corge (int a, int b)
|
||||
+{
|
||||
+ return a >= 0 ? b : ~b;
|
||||
+}
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index 707a5882e..b9cd07a60 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -61,6 +61,8 @@ static bool minmax_replacement (basic_block, basic_block,
|
||||
edge, edge, gimple *, tree, tree);
|
||||
static bool abs_replacement (basic_block, basic_block,
|
||||
edge, edge, gimple *, tree, tree);
|
||||
+static bool xor_replacement (basic_block, basic_block,
|
||||
+ edge, edge, gimple *, tree, tree);
|
||||
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
|
||||
edge, edge, gimple *,
|
||||
tree, tree);
|
||||
@@ -349,6 +351,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
cfgchanged = true;
|
||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
+ else if (!early_p
|
||||
+ && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
+ cfgchanged = true;
|
||||
else if (!early_p
|
||||
&& cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
|
||||
e2, phi, arg0,
|
||||
@@ -2059,6 +2064,109 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
return true;
|
||||
}
|
||||
|
||||
+/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
|
||||
+
|
||||
+static bool
|
||||
+xor_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
+ edge e0 ATTRIBUTE_UNUSED, edge e1,
|
||||
+ gimple *phi, tree arg0, tree arg1)
|
||||
+{
|
||||
+ if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
|
||||
+ return false;
|
||||
+
|
||||
+ /* OTHER_BLOCK must have only one executable statement which must have the
|
||||
+ form arg0 = ~arg1 or arg1 = ~arg0. */
|
||||
+
|
||||
+ gimple *assign = last_and_only_stmt (middle_bb);
|
||||
+ /* If we did not find the proper one's complement assignment, then we cannot
|
||||
+ optimize. */
|
||||
+ if (assign == NULL)
|
||||
+ return false;
|
||||
+
|
||||
+ /* If we got here, then we have found the only executable statement
|
||||
+ in OTHER_BLOCK. If it is anything other than arg = ~arg1 or
|
||||
+ arg1 = ~arg0, then we cannot optimize. */
|
||||
+ if (!is_gimple_assign (assign))
|
||||
+ return false;
|
||||
+
|
||||
+ if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
|
||||
+ return false;
|
||||
+
|
||||
+ tree lhs = gimple_assign_lhs (assign);
|
||||
+ tree rhs = gimple_assign_rhs1 (assign);
|
||||
+
|
||||
+ /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
|
||||
+ if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
|
||||
+ return false;
|
||||
+
|
||||
+ gimple *cond = last_stmt (cond_bb);
|
||||
+ tree result = PHI_RESULT (phi);
|
||||
+
|
||||
+ /* Only relationals comparing arg[01] against zero are interesting. */
|
||||
+ enum tree_code cond_code = gimple_cond_code (cond);
|
||||
+ if (cond_code != LT_EXPR && cond_code != GE_EXPR)
|
||||
+ return false;
|
||||
+
|
||||
+ /* Make sure the conditional is x OP 0. */
|
||||
+ tree clhs = gimple_cond_lhs (cond);
|
||||
+ if (TREE_CODE (clhs) != SSA_NAME
|
||||
+ || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
|
||||
+ || TYPE_UNSIGNED (TREE_TYPE (clhs))
|
||||
+ || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1))
|
||||
+ || !integer_zerop (gimple_cond_rhs (cond)))
|
||||
+ return false;
|
||||
+
|
||||
+ /* We need to know which is the true edge and which is the false
|
||||
+ edge so that we know if have xor or inverted xor. */
|
||||
+ edge true_edge, false_edge;
|
||||
+ extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
|
||||
+
|
||||
+ /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
|
||||
+ will need to invert the result. Similarly for LT_EXPR if
|
||||
+ the false edge goes to OTHER_BLOCK. */
|
||||
+ edge e;
|
||||
+ if (cond_code == GE_EXPR)
|
||||
+ e = true_edge;
|
||||
+ else
|
||||
+ e = false_edge;
|
||||
+
|
||||
+ bool invert = e->dest == middle_bb;
|
||||
+
|
||||
+ result = duplicate_ssa_name (result, NULL);
|
||||
+
|
||||
+ gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
|
||||
+
|
||||
+ int prec = TYPE_PRECISION (TREE_TYPE (clhs));
|
||||
+ gimple *new_stmt
|
||||
+ = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs,
|
||||
+ build_int_cst (integer_type_node, prec - 1));
|
||||
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
||||
+
|
||||
+ if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs)))
|
||||
+ {
|
||||
+ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
|
||||
+ NOP_EXPR, gimple_assign_lhs (new_stmt));
|
||||
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
||||
+ }
|
||||
+ lhs = gimple_assign_lhs (new_stmt);
|
||||
+
|
||||
+ if (invert)
|
||||
+ {
|
||||
+ new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
|
||||
+ BIT_NOT_EXPR, rhs);
|
||||
+ gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
||||
+ rhs = gimple_assign_lhs (new_stmt);
|
||||
+ }
|
||||
+
|
||||
+ new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs);
|
||||
+ gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
|
||||
+
|
||||
+ replace_phi_edge_with_variable (cond_bb, e1, phi, result);
|
||||
+
|
||||
+ /* Note that we optimized this PHI. */
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
/* Auxiliary functions to determine the set of memory accesses which
|
||||
can't trap because they are preceded by accesses to the same memory
|
||||
portion. We do that for MEM_REFs, so we only need to track
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
1067
0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch
Normal file
1067
0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch
Normal file
File diff suppressed because it is too large
Load Diff
253
0057-Backport-Add-support-for-__builtin_bswap128.patch
Normal file
253
0057-Backport-Add-support-for-__builtin_bswap128.patch
Normal file
@ -0,0 +1,253 @@
|
||||
From 96afd5b761a74e9eef40a2e843810c503c669de8 Mon Sep 17 00:00:00 2001
|
||||
From: Eric Botcazou <ebotcazou@gcc.gnu.org>
|
||||
Date: Thu, 28 May 2020 00:31:15 +0200
|
||||
Subject: [PATCH 09/35] [Backport] Add support for __builtin_bswap128
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=fe7ebef7fe4f9acb79658ed9db0749b07efc3105
|
||||
|
||||
This patch introduces a new builtin named __builtin_bswap128 on targets
|
||||
where TImode is supported, i.e. 64-bit targets only in practice. The
|
||||
implementation simply reuses the existing double word path in optab, so
|
||||
no routine is added to libgcc (which means that you get two calls to
|
||||
_bswapdi2 in the worst case).
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* builtin-types.def (BT_UINT128): New primitive type.
|
||||
(BT_FN_UINT128_UINT128): New function type.
|
||||
* builtins.def (BUILT_IN_BSWAP128): New GCC builtin.
|
||||
* doc/extend.texi (__builtin_bswap128): Document it.
|
||||
* builtins.c (expand_builtin): Deal with BUILT_IN_BSWAP128.
|
||||
(is_inexpensive_builtin): Likewise.
|
||||
* fold-const-call.c (fold_const_call_ss): Likewise.
|
||||
* fold-const.c (tree_call_nonnegative_warnv_p): Likewise.
|
||||
* tree-ssa-ccp.c (evaluate_stmt): Likewise.
|
||||
* tree-vect-stmts.c (vect_get_data_ptr_increment): Likewise.
|
||||
(vectorizable_call): Likewise.
|
||||
* optabs.c (expand_unop): Always use the double word path for it.
|
||||
* tree-core.h (enum tree_index): Add TI_UINT128_TYPE.
|
||||
* tree.h (uint128_type_node): New global type.
|
||||
* tree.c (build_common_tree_nodes): Build it if TImode is supported.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.dg/builtin-bswap-10.c: New test.
|
||||
* gcc.dg/builtin-bswap-11.c: Likewise.
|
||||
* gcc.dg/builtin-bswap-12.c: Likewise.
|
||||
* gcc.target/i386/builtin-bswap-5.c: Likewise.
|
||||
---
|
||||
gcc/builtin-types.def | 4 ++++
|
||||
gcc/builtins.c | 2 ++
|
||||
gcc/builtins.def | 2 ++
|
||||
gcc/doc/extend.texi | 10 ++++++++--
|
||||
gcc/fold-const-call.c | 1 +
|
||||
gcc/fold-const.c | 2 ++
|
||||
gcc/optabs.c | 5 ++++-
|
||||
gcc/tree-core.h | 1 +
|
||||
gcc/tree-ssa-ccp.c | 1 +
|
||||
gcc/tree-vect-stmts.c | 5 +++--
|
||||
gcc/tree.c | 2 ++
|
||||
gcc/tree.h | 1 +
|
||||
12 files changed, 31 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/gcc/builtin-types.def b/gcc/builtin-types.def
|
||||
index c7aa691b2..c46b1bc5c 100644
|
||||
--- a/gcc/builtin-types.def
|
||||
+++ b/gcc/builtin-types.def
|
||||
@@ -73,6 +73,9 @@ DEF_PRIMITIVE_TYPE (BT_UINT8, unsigned_char_type_node)
|
||||
DEF_PRIMITIVE_TYPE (BT_UINT16, uint16_type_node)
|
||||
DEF_PRIMITIVE_TYPE (BT_UINT32, uint32_type_node)
|
||||
DEF_PRIMITIVE_TYPE (BT_UINT64, uint64_type_node)
|
||||
+DEF_PRIMITIVE_TYPE (BT_UINT128, uint128_type_node
|
||||
+ ? uint128_type_node
|
||||
+ : error_mark_node)
|
||||
DEF_PRIMITIVE_TYPE (BT_WORD, (*lang_hooks.types.type_for_mode) (word_mode, 1))
|
||||
DEF_PRIMITIVE_TYPE (BT_UNWINDWORD, (*lang_hooks.types.type_for_mode)
|
||||
(targetm.unwind_word_mode (), 1))
|
||||
@@ -300,6 +303,7 @@ DEF_FUNCTION_TYPE_1 (BT_FN_UINT8_FLOAT, BT_UINT8, BT_FLOAT)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT16_UINT16, BT_UINT16, BT_UINT16)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT32_UINT32, BT_UINT32, BT_UINT32)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_UINT64, BT_UINT64, BT_UINT64)
|
||||
+DEF_FUNCTION_TYPE_1 (BT_FN_UINT128_UINT128, BT_UINT128, BT_UINT128)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_UINT64_FLOAT, BT_UINT64, BT_FLOAT)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_INT, BT_BOOL, BT_INT)
|
||||
DEF_FUNCTION_TYPE_1 (BT_FN_BOOL_PTR, BT_BOOL, BT_PTR)
|
||||
diff --git a/gcc/builtins.c b/gcc/builtins.c
|
||||
index 10b6fd3bb..1b1c75cc1 100644
|
||||
--- a/gcc/builtins.c
|
||||
+++ b/gcc/builtins.c
|
||||
@@ -8015,6 +8015,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
|
||||
case BUILT_IN_BSWAP16:
|
||||
case BUILT_IN_BSWAP32:
|
||||
case BUILT_IN_BSWAP64:
|
||||
+ case BUILT_IN_BSWAP128:
|
||||
target = expand_builtin_bswap (target_mode, exp, target, subtarget);
|
||||
if (target)
|
||||
return target;
|
||||
@@ -11732,6 +11733,7 @@ is_inexpensive_builtin (tree decl)
|
||||
case BUILT_IN_BSWAP16:
|
||||
case BUILT_IN_BSWAP32:
|
||||
case BUILT_IN_BSWAP64:
|
||||
+ case BUILT_IN_BSWAP128:
|
||||
case BUILT_IN_CLZ:
|
||||
case BUILT_IN_CLZIMAX:
|
||||
case BUILT_IN_CLZL:
|
||||
diff --git a/gcc/builtins.def b/gcc/builtins.def
|
||||
index fa8b0641a..ee67ac15d 100644
|
||||
--- a/gcc/builtins.def
|
||||
+++ b/gcc/builtins.def
|
||||
@@ -834,6 +834,8 @@ DEF_GCC_BUILTIN (BUILT_IN_APPLY_ARGS, "apply_args", BT_FN_PTR_VAR, ATTR_L
|
||||
DEF_GCC_BUILTIN (BUILT_IN_BSWAP16, "bswap16", BT_FN_UINT16_UINT16, ATTR_CONST_NOTHROW_LEAF_LIST)
|
||||
DEF_GCC_BUILTIN (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_CONST_NOTHROW_LEAF_LIST)
|
||||
DEF_GCC_BUILTIN (BUILT_IN_BSWAP64, "bswap64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LEAF_LIST)
|
||||
+DEF_GCC_BUILTIN (BUILT_IN_BSWAP128, "bswap128", BT_FN_UINT128_UINT128, ATTR_CONST_NOTHROW_LEAF_LIST)
|
||||
+
|
||||
DEF_EXT_LIB_BUILTIN (BUILT_IN_CLEAR_CACHE, "__clear_cache", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LEAF_LIST)
|
||||
/* [trans-mem]: Adjust BUILT_IN_TM_CALLOC if BUILT_IN_CALLOC is changed. */
|
||||
DEF_LIB_BUILTIN (BUILT_IN_CALLOC, "calloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_WARN_UNUSED_RESULT_SIZE_1_2_NOTHROW_LEAF_LIST)
|
||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||
index 9c7345959..a7bd772de 100644
|
||||
--- a/gcc/doc/extend.texi
|
||||
+++ b/gcc/doc/extend.texi
|
||||
@@ -13727,14 +13727,20 @@ exactly 8 bits.
|
||||
|
||||
@deftypefn {Built-in Function} uint32_t __builtin_bswap32 (uint32_t x)
|
||||
Similar to @code{__builtin_bswap16}, except the argument and return types
|
||||
-are 32 bit.
|
||||
+are 32-bit.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Built-in Function} uint64_t __builtin_bswap64 (uint64_t x)
|
||||
Similar to @code{__builtin_bswap32}, except the argument and return types
|
||||
-are 64 bit.
|
||||
+are 64-bit.
|
||||
@end deftypefn
|
||||
|
||||
+@deftypefn {Built-in Function} uint128_t __builtin_bswap128 (uint128_t x)
|
||||
+Similar to @code{__builtin_bswap64}, except the argument and return types
|
||||
+are 128-bit. Only supported on targets when 128-bit types are supported.
|
||||
+@end deftypefn
|
||||
+
|
||||
+
|
||||
@deftypefn {Built-in Function} Pmode __builtin_extend_pointer (void * x)
|
||||
On targets where the user visible pointer size is smaller than the size
|
||||
of an actual hardware address this function returns the extended user
|
||||
diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c
|
||||
index 6150d7ada..da01759d9 100644
|
||||
--- a/gcc/fold-const-call.c
|
||||
+++ b/gcc/fold-const-call.c
|
||||
@@ -1032,6 +1032,7 @@ fold_const_call_ss (wide_int *result, combined_fn fn, const wide_int_ref &arg,
|
||||
case CFN_BUILT_IN_BSWAP16:
|
||||
case CFN_BUILT_IN_BSWAP32:
|
||||
case CFN_BUILT_IN_BSWAP64:
|
||||
+ case CFN_BUILT_IN_BSWAP128:
|
||||
*result = wide_int::from (arg, precision, TYPE_SIGN (arg_type)).bswap ();
|
||||
return true;
|
||||
|
||||
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
|
||||
index 6e635382f..78227a83d 100644
|
||||
--- a/gcc/fold-const.c
|
||||
+++ b/gcc/fold-const.c
|
||||
@@ -13889,8 +13889,10 @@ tree_call_nonnegative_warnv_p (tree type, combined_fn fn, tree arg0, tree arg1,
|
||||
CASE_CFN_POPCOUNT:
|
||||
CASE_CFN_CLZ:
|
||||
CASE_CFN_CLRSB:
|
||||
+ case CFN_BUILT_IN_BSWAP16:
|
||||
case CFN_BUILT_IN_BSWAP32:
|
||||
case CFN_BUILT_IN_BSWAP64:
|
||||
+ case CFN_BUILT_IN_BSWAP128:
|
||||
/* Always true. */
|
||||
return true;
|
||||
|
||||
diff --git a/gcc/optabs.c b/gcc/optabs.c
|
||||
index 049a18ceb..c3751fdf7 100644
|
||||
--- a/gcc/optabs.c
|
||||
+++ b/gcc/optabs.c
|
||||
@@ -2896,8 +2896,11 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
|
||||
if (temp)
|
||||
return temp;
|
||||
|
||||
+ /* We do not provide a 128-bit bswap in libgcc so force the use of
|
||||
+ a double bswap for 64-bit targets. */
|
||||
if (GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
|
||||
- && optab_handler (unoptab, word_mode) != CODE_FOR_nothing)
|
||||
+ && (UNITS_PER_WORD == 64
|
||||
+ || optab_handler (unoptab, word_mode) != CODE_FOR_nothing))
|
||||
{
|
||||
temp = expand_doubleword_bswap (mode, op0, target);
|
||||
if (temp)
|
||||
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
|
||||
index eb01c2434..058e046aa 100644
|
||||
--- a/gcc/tree-core.h
|
||||
+++ b/gcc/tree-core.h
|
||||
@@ -600,6 +600,7 @@ enum tree_index {
|
||||
TI_UINT16_TYPE,
|
||||
TI_UINT32_TYPE,
|
||||
TI_UINT64_TYPE,
|
||||
+ TI_UINT128_TYPE,
|
||||
|
||||
TI_VOID,
|
||||
|
||||
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
|
||||
index 952fd9cd4..dcdf10369 100644
|
||||
--- a/gcc/tree-ssa-ccp.c
|
||||
+++ b/gcc/tree-ssa-ccp.c
|
||||
@@ -2005,6 +2005,7 @@ evaluate_stmt (gimple *stmt)
|
||||
case BUILT_IN_BSWAP16:
|
||||
case BUILT_IN_BSWAP32:
|
||||
case BUILT_IN_BSWAP64:
|
||||
+ case BUILT_IN_BSWAP128:
|
||||
val = get_value_for_expr (gimple_call_arg (stmt, 0), true);
|
||||
if (val.lattice_val == UNDEFINED)
|
||||
break;
|
||||
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
||||
index b872cfc8d..4636b7ba2 100644
|
||||
--- a/gcc/tree-vect-stmts.c
|
||||
+++ b/gcc/tree-vect-stmts.c
|
||||
@@ -3085,7 +3085,7 @@ vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
|
||||
return iv_step;
|
||||
}
|
||||
|
||||
-/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
|
||||
+/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
|
||||
|
||||
static bool
|
||||
vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
||||
@@ -3454,7 +3454,8 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
||||
else if (modifier == NONE
|
||||
&& (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
|
||||
|| gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
|
||||
- || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
|
||||
+ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
|
||||
+ || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
|
||||
return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
|
||||
vectype_in, cost_vec);
|
||||
else
|
||||
diff --git a/gcc/tree.c b/gcc/tree.c
|
||||
index 84a440b35..3e6647ae0 100644
|
||||
--- a/gcc/tree.c
|
||||
+++ b/gcc/tree.c
|
||||
@@ -10394,6 +10394,8 @@ build_common_tree_nodes (bool signed_char)
|
||||
uint16_type_node = make_or_reuse_type (16, 1);
|
||||
uint32_type_node = make_or_reuse_type (32, 1);
|
||||
uint64_type_node = make_or_reuse_type (64, 1);
|
||||
+ if (targetm.scalar_mode_supported_p (TImode))
|
||||
+ uint128_type_node = make_or_reuse_type (128, 1);
|
||||
|
||||
/* Decimal float types. */
|
||||
if (targetm.decimal_float_supported_p ())
|
||||
diff --git a/gcc/tree.h b/gcc/tree.h
|
||||
index 328a2d5d2..bddc6e528 100644
|
||||
--- a/gcc/tree.h
|
||||
+++ b/gcc/tree.h
|
||||
@@ -4035,6 +4035,7 @@ tree_strip_any_location_wrapper (tree exp)
|
||||
#define uint16_type_node global_trees[TI_UINT16_TYPE]
|
||||
#define uint32_type_node global_trees[TI_UINT32_TYPE]
|
||||
#define uint64_type_node global_trees[TI_UINT64_TYPE]
|
||||
+#define uint128_type_node global_trees[TI_UINT128_TYPE]
|
||||
|
||||
#define void_node global_trees[TI_VOID]
|
||||
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
113
0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch
Normal file
113
0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch
Normal file
@ -0,0 +1,113 @@
|
||||
From b9ac0cc69aab3c8d662d5b0a9ed43d971c13ac70 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Biener <rguenther@suse.de>
|
||||
Date: Fri, 29 May 2020 09:25:53 +0200
|
||||
Subject: [PATCH 10/35] [Backport] tree-optimization/95393 - fold MIN/MAX_EXPR
|
||||
generated by phiopt
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=07852a81f58532c63a57631d7c3757fc6bcea17d
|
||||
|
||||
This makes sure to fold generated stmts so they do not survive
|
||||
until RTL expansion and cause awkward code generation.
|
||||
|
||||
2020-05-29 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/95393
|
||||
* tree-ssa-phiopt.c (minmax_replacement): Use gimple_build
|
||||
to build the min/max expression so we simplify cases like
|
||||
MAX(0, s) immediately.
|
||||
|
||||
* gcc.dg/tree-ssa/phi-opt-21.c: New testcase.
|
||||
* g++.dg/vect/slp-pr87105.cc: Adjust.
|
||||
---
|
||||
gcc/testsuite/g++.dg/vect/slp-pr87105.cc | 2 +-
|
||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c | 15 +++++++++++++
|
||||
gcc/tree-ssa-phiopt.c | 25 +++++++++++-----------
|
||||
3 files changed, 29 insertions(+), 13 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
|
||||
|
||||
diff --git a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
|
||||
index 5518f319b..d07b1cd46 100644
|
||||
--- a/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
|
||||
+++ b/gcc/testsuite/g++.dg/vect/slp-pr87105.cc
|
||||
@@ -102,4 +102,4 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept {
|
||||
// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
|
||||
// It's a bit awkward to detect that all stores were vectorized but the
|
||||
// following more or less does the trick
|
||||
-// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
|
||||
+// { dg-final { scan-tree-dump "vect_\[^\r\m\]* = MIN" "slp2" { xfail { { ! vect_element_align } && { ! vect_hw_misalign } } } } }
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
|
||||
new file mode 100644
|
||||
index 000000000..9f3d56957
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-21.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-phiopt4-details" } */
|
||||
+
|
||||
+int f(unsigned s)
|
||||
+{
|
||||
+ int i;
|
||||
+ for (i = 0; i < s; ++i)
|
||||
+ ;
|
||||
+
|
||||
+ return i;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump "converted to straightline code" "phiopt4" } } */
|
||||
+/* Make sure we fold the detected MAX<s, 0>. */
|
||||
+/* { dg-final { scan-tree-dump-not "MAX" "phiopt4" } } */
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index fca32222f..269eda21c 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tree-inline.h"
|
||||
#include "case-cfn-macros.h"
|
||||
#include "tree-eh.h"
|
||||
+#include "gimple-fold.h"
|
||||
#include "internal-fn.h"
|
||||
|
||||
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
|
||||
@@ -1414,7 +1415,6 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
{
|
||||
tree result, type, rhs;
|
||||
gcond *cond;
|
||||
- gassign *new_stmt;
|
||||
edge true_edge, false_edge;
|
||||
enum tree_code cmp, minmax, ass_code;
|
||||
tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false;
|
||||
@@ -1738,19 +1738,20 @@ minmax_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
gsi_move_before (&gsi_from, &gsi);
|
||||
}
|
||||
|
||||
- /* Create an SSA var to hold the min/max result. If we're the only
|
||||
- things setting the target PHI, then we can clone the PHI
|
||||
- variable. Otherwise we must create a new one. */
|
||||
- result = PHI_RESULT (phi);
|
||||
- if (EDGE_COUNT (gimple_bb (phi)->preds) == 2)
|
||||
- result = duplicate_ssa_name (result, NULL);
|
||||
- else
|
||||
- result = make_ssa_name (TREE_TYPE (result));
|
||||
-
|
||||
/* Emit the statement to compute min/max. */
|
||||
- new_stmt = gimple_build_assign (result, minmax, arg0, arg1);
|
||||
+ gimple_seq stmts = NULL;
|
||||
+ tree phi_result = PHI_RESULT (phi);
|
||||
+ result = gimple_build (&stmts, minmax, TREE_TYPE (phi_result), arg0, arg1);
|
||||
+ /* Duplicate range info if we're the only things setting the target PHI. */
|
||||
+ if (!gimple_seq_empty_p (stmts)
|
||||
+ && EDGE_COUNT (gimple_bb (phi)->preds) == 2
|
||||
+ && !POINTER_TYPE_P (TREE_TYPE (phi_result))
|
||||
+ && SSA_NAME_RANGE_INFO (phi_result))
|
||||
+ duplicate_ssa_name_range_info (result, SSA_NAME_RANGE_TYPE (phi_result),
|
||||
+ SSA_NAME_RANGE_INFO (phi_result));
|
||||
+
|
||||
gsi = gsi_last_bb (cond_bb);
|
||||
- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
|
||||
+ gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);
|
||||
|
||||
replace_phi_edge_with_variable (cond_bb, e1, phi, result);
|
||||
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
@ -0,0 +1,91 @@
|
||||
From 9f3a8c600abe16f172b36d8113862e8f7aea940c Mon Sep 17 00:00:00 2001
|
||||
From: Andrew Pinski <apinski@marvell.com>
|
||||
Date: Sun, 16 May 2021 13:07:06 -0700
|
||||
Subject: [PATCH 11/35] [Backport] Add a couple of A?CST1:CST2 match and
|
||||
simplify optimizations
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=b6bdd7a4cb41ee057f2d064fffcb00f23ce6b497
|
||||
|
||||
Instead of some of the more manual optimizations inside phi-opt,
|
||||
it would be good idea to do a lot of the heavy lifting inside match
|
||||
and simplify instead. In the process, this moves the three simple
|
||||
A?CST1:CST2 (where CST1 or CST2 is zero) simplifications.
|
||||
|
||||
OK? Boostrapped and tested on x86_64-linux-gnu with no regressions.
|
||||
|
||||
Differences from V1:
|
||||
* Use bit_xor 1 instead of bit_not to fix the problem with boolean types
|
||||
which are not 1 bit precision.
|
||||
|
||||
Thanks,
|
||||
Andrew Pinski
|
||||
|
||||
gcc:
|
||||
* match.pd (A?CST1:CST2): Add simplifcations for A?0:+-1, A?+-1:0,
|
||||
A?POW2:0 and A?0:POW2.
|
||||
---
|
||||
gcc/match.pd | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 48 insertions(+)
|
||||
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 660d5c268..032830b0d 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -3334,6 +3334,54 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
(if (cst1 && cst2)
|
||||
(vec_cond @0 { cst1; } { cst2; })))))
|
||||
|
||||
+/* A few simplifications of "a ? CST1 : CST2". */
|
||||
+/* NOTE: Only do this on gimple as the if-chain-to-switch
|
||||
+ optimization depends on the gimple to have if statements in it. */
|
||||
+#if GIMPLE
|
||||
+(simplify
|
||||
+ (cond @0 INTEGER_CST@1 INTEGER_CST@2)
|
||||
+ (switch
|
||||
+ (if (integer_zerop (@2))
|
||||
+ (switch
|
||||
+ /* a ? 1 : 0 -> a if 0 and 1 are integral types. */
|
||||
+ (if (integer_onep (@1))
|
||||
+ (convert (convert:boolean_type_node @0)))
|
||||
+ /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
|
||||
+ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
|
||||
+ (with {
|
||||
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@1));
|
||||
+ }
|
||||
+ (lshift (convert (convert:boolean_type_node @0)) { shift; })))
|
||||
+ /* a ? -1 : 0 -> -a. No need to check the TYPE_PRECISION not being 1
|
||||
+ here as the powerof2cst case above will handle that case correctly. */
|
||||
+ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1))
|
||||
+ (negate (convert (convert:boolean_type_node @0))))))
|
||||
+ (if (integer_zerop (@1))
|
||||
+ (with {
|
||||
+ tree booltrue = constant_boolean_node (true, boolean_type_node);
|
||||
+ }
|
||||
+ (switch
|
||||
+ /* a ? 0 : 1 -> !a. */
|
||||
+ (if (integer_onep (@2))
|
||||
+ (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )))
|
||||
+ /* a ? powerof2cst : 0 -> (!a) << (log2(powerof2cst)) */
|
||||
+ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2))
|
||||
+ (with {
|
||||
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
|
||||
+ }
|
||||
+ (lshift (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))
|
||||
+ { shift; })))
|
||||
+ /* a ? -1 : 0 -> -(!a). No need to check the TYPE_PRECISION not being 1
|
||||
+ here as the powerof2cst case above will handle that case correctly. */
|
||||
+ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2))
|
||||
+ (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } ))))
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+)
|
||||
+#endif
|
||||
+
|
||||
/* Simplification moved from fold_cond_expr_with_comparison. It may also
|
||||
be extended. */
|
||||
/* This pattern implements two kinds simplification:
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
155
0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch
Normal file
155
0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch
Normal file
@ -0,0 +1,155 @@
|
||||
From 4352b952ba24c413697fcfc191d06165a8a31ced Mon Sep 17 00:00:00 2001
|
||||
From: Andrew Pinski <apinski@marvell.com>
|
||||
Date: Sat, 22 May 2021 19:49:50 +0000
|
||||
Subject: [PATCH 12/35] [Backport] Optimize x < 0 ? ~y : y to (x >> 31) ^ y in
|
||||
match.pd
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=1fd76b24306ed4df4cf9e797d900699ed59ce7f7
|
||||
|
||||
This copies the optimization that is done in phiopt for
|
||||
"x < 0 ? ~y : y to (x >> 31) ^ y" into match.pd. The code
|
||||
for phiopt is kept around until phiopt uses match.pd (which
|
||||
I am working towards).
|
||||
|
||||
Note the original testcase is now optimized early on and I added a
|
||||
new testcase to optimize during phiopt.
|
||||
|
||||
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
|
||||
|
||||
Thanks,
|
||||
Andrew Pinski
|
||||
|
||||
Differences from v1:
|
||||
V2: Add check for integeral type to make sure vector types are not done.
|
||||
|
||||
gcc:
|
||||
* match.pd (x < 0 ? ~y : y): New patterns.
|
||||
|
||||
gcc/testsuite:
|
||||
* gcc.dg/tree-ssa/pr96928.c: Update test for slightly different IR.
|
||||
* gcc.dg/tree-ssa/pr96928-1.c: New testcase.
|
||||
---
|
||||
gcc/match.pd | 32 +++++++++++++++
|
||||
gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c | 48 +++++++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/tree-ssa/pr96928.c | 7 +++-
|
||||
3 files changed, 85 insertions(+), 2 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
|
||||
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 032830b0d..5899eea95 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -4390,6 +4390,38 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
(cmp (bit_and@2 @0 integer_pow2p@1) @1)
|
||||
(icmp @2 { build_zero_cst (TREE_TYPE (@0)); })))
|
||||
|
||||
+(for cmp (ge lt)
|
||||
+/* x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
|
||||
+/* x >= 0 ? ~y : y into ~((x >> (prec-1)) ^ y). */
|
||||
+ (simplify
|
||||
+ (cond (cmp @0 integer_zerop) (bit_not @1) @1)
|
||||
+ (if (INTEGRAL_TYPE_P (type)
|
||||
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
|
||||
+ && !TYPE_UNSIGNED (TREE_TYPE (@0))
|
||||
+ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type))
|
||||
+ (with
|
||||
+ {
|
||||
+ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1);
|
||||
+ }
|
||||
+ (if (cmp == LT_EXPR)
|
||||
+ (bit_xor (convert (rshift @0 {shifter;})) @1)
|
||||
+ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1))))))
|
||||
+/* x < 0 ? y : ~y into ~((x >> (prec-1)) ^ y). */
|
||||
+/* x >= 0 ? y : ~y into (x >> (prec-1)) ^ y. */
|
||||
+ (simplify
|
||||
+ (cond (cmp @0 integer_zerop) @1 (bit_not @1))
|
||||
+ (if (INTEGRAL_TYPE_P (type)
|
||||
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
|
||||
+ && !TYPE_UNSIGNED (TREE_TYPE (@0))
|
||||
+ && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION (type))
|
||||
+ (with
|
||||
+ {
|
||||
+ tree shifter = build_int_cst (integer_type_node, TYPE_PRECISION (type) - 1);
|
||||
+ }
|
||||
+ (if (cmp == GE_EXPR)
|
||||
+ (bit_xor (convert (rshift @0 {shifter;})) @1)
|
||||
+ (bit_not (bit_xor (convert (rshift @0 {shifter;})) @1)))))))
|
||||
+
|
||||
/* If we have (A & C) != 0 ? D : 0 where C and D are powers of 2,
|
||||
convert this into a shift followed by ANDing with D. */
|
||||
(simplify
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
|
||||
new file mode 100644
|
||||
index 000000000..a2770e5e8
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928-1.c
|
||||
@@ -0,0 +1,48 @@
|
||||
+/* PR tree-optimization/96928 */
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
||||
+/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
|
||||
+/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
|
||||
+/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
|
||||
+/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
|
||||
+/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
|
||||
+
|
||||
+int
|
||||
+foo (int a)
|
||||
+{
|
||||
+ if (a < 0)
|
||||
+ return ~a;
|
||||
+ return a;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+bar (int a, int b)
|
||||
+{
|
||||
+ if (a < 0)
|
||||
+ return ~b;
|
||||
+ return b;
|
||||
+}
|
||||
+
|
||||
+unsigned
|
||||
+baz (int a, unsigned int b)
|
||||
+{
|
||||
+ if (a < 0)
|
||||
+ return ~b;
|
||||
+ return b;
|
||||
+}
|
||||
+
|
||||
+unsigned
|
||||
+qux (int a, unsigned int c)
|
||||
+{
|
||||
+ if (a >= 0)
|
||||
+ return ~c;
|
||||
+ return c;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+corge (int a, int b)
|
||||
+{
|
||||
+ if (a >= 0)
|
||||
+ return b;
|
||||
+ return ~b;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
||||
index 209135726..e8fd82fc2 100644
|
||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96928.c
|
||||
@@ -1,8 +1,11 @@
|
||||
/* PR tree-optimization/96928 */
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fdump-tree-phiopt2" } */
|
||||
+/* { dg-options "-O2 -fdump-tree-phiopt2 -fdump-tree-optimized" } */
|
||||
/* { dg-final { scan-tree-dump-times " = a_\[0-9]*\\\(D\\\) >> " 5 "phiopt2" } } */
|
||||
-/* { dg-final { scan-tree-dump-times " = ~c_\[0-9]*\\\(D\\\);" 1 "phiopt2" } } */
|
||||
+/* The following check is done at optimized because a ^ (~b) is rewritten as ~(a^b)
|
||||
+ and in the case of match.pd optimizing these ?:, the ~ is moved out already
|
||||
+ by the time we get to phiopt2. */
|
||||
+/* { dg-final { scan-tree-dump-times "\\\^ c_\[0-9]*\\\(D\\\);" 1 "optimized" } } */
|
||||
/* { dg-final { scan-tree-dump-times " = ~" 1 "phiopt2" } } */
|
||||
/* { dg-final { scan-tree-dump-times " = \[abc_0-9\\\(\\\)D]* \\\^ " 5 "phiopt2" } } */
|
||||
/* { dg-final { scan-tree-dump-not "a < 0" "phiopt2" } } */
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
249
0061-Backport-Replace-conditional_replacement-with-match-.patch
Normal file
249
0061-Backport-Replace-conditional_replacement-with-match-.patch
Normal file
@ -0,0 +1,249 @@
|
||||
From 406071e8c1838c824f06c35ef3cf9419aa543e6e Mon Sep 17 00:00:00 2001
|
||||
From: Andrew Pinski <apinski@marvell.com>
|
||||
Date: Tue, 1 Jun 2021 01:05:09 +0000
|
||||
Subject: [PATCH 13/35] [Backport] Replace conditional_replacement with match
|
||||
and simplify
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=9f55df63154a39d67ef5b24def7044bf87300831
|
||||
|
||||
This is the first of series of patches to simplify phi-opt
|
||||
to use match and simplify in many cases. This simplification
|
||||
will more things to optimize.
|
||||
|
||||
This is what Richard requested in
|
||||
https://gcc.gnu.org/pipermail/gcc-patches/2021-May/571197.html
|
||||
and I think it is the right thing to do too.
|
||||
|
||||
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR tree-optimization/25290
|
||||
* tree-ssa-phiopt.c (match_simplify_replacement):
|
||||
New function.
|
||||
(tree_ssa_phiopt_worker): Use match_simplify_replacement.
|
||||
(two_value_replacement): Change the comment about
|
||||
conditional_replacement.
|
||||
(conditional_replacement): Delete.
|
||||
---
|
||||
gcc/tree-ssa-phiopt.c | 144 ++++++++++++------------------------------
|
||||
1 file changed, 39 insertions(+), 105 deletions(-)
|
||||
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index 269eda21c..9fa6363b6 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -52,8 +52,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
|
||||
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
|
||||
tree, tree);
|
||||
-static bool conditional_replacement (basic_block, basic_block,
|
||||
- edge, edge, gphi *, tree, tree);
|
||||
+static bool match_simplify_replacement (basic_block, basic_block,
|
||||
+ edge, edge, gphi *, tree, tree);
|
||||
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
|
||||
gimple *);
|
||||
static int value_replacement (basic_block, basic_block,
|
||||
@@ -349,8 +349,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
else if (!early_p
|
||||
- && conditional_replacement (bb, bb1, e1, e2, phi,
|
||||
- arg0, arg1))
|
||||
+ && match_simplify_replacement (bb, bb1, e1, e2, phi,
|
||||
+ arg0, arg1))
|
||||
cfgchanged = true;
|
||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
@@ -662,7 +662,7 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
}
|
||||
|
||||
/* Defer boolean x ? 0 : {1,-1} or x ? {1,-1} : 0 to
|
||||
- conditional_replacement. */
|
||||
+ match_simplify_replacement. */
|
||||
if (TREE_CODE (TREE_TYPE (lhs)) == BOOLEAN_TYPE
|
||||
&& (integer_zerop (arg0)
|
||||
|| integer_zerop (arg1)
|
||||
@@ -763,137 +763,71 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
return true;
|
||||
}
|
||||
|
||||
-/* The function conditional_replacement does the main work of doing the
|
||||
- conditional replacement. Return true if the replacement is done.
|
||||
+/* The function match_simplify_replacement does the main work of doing the
|
||||
+ replacement using match and simplify. Return true if the replacement is done.
|
||||
Otherwise return false.
|
||||
BB is the basic block where the replacement is going to be done on. ARG0
|
||||
is argument 0 from PHI. Likewise for ARG1. */
|
||||
|
||||
static bool
|
||||
-conditional_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
- edge e0, edge e1, gphi *phi,
|
||||
- tree arg0, tree arg1)
|
||||
+match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
+ edge e0, edge e1, gphi *phi,
|
||||
+ tree arg0, tree arg1)
|
||||
{
|
||||
- tree result;
|
||||
gimple *stmt;
|
||||
- gassign *new_stmt;
|
||||
tree cond;
|
||||
gimple_stmt_iterator gsi;
|
||||
edge true_edge, false_edge;
|
||||
- tree new_var, new_var2;
|
||||
- bool neg = false;
|
||||
- int shift = 0;
|
||||
- tree nonzero_arg;
|
||||
-
|
||||
- /* FIXME: Gimplification of complex type is too hard for now. */
|
||||
- /* We aren't prepared to handle vectors either (and it is a question
|
||||
- if it would be worthwhile anyway). */
|
||||
- if (!(INTEGRAL_TYPE_P (TREE_TYPE (arg0))
|
||||
- || POINTER_TYPE_P (TREE_TYPE (arg0)))
|
||||
- || !(INTEGRAL_TYPE_P (TREE_TYPE (arg1))
|
||||
- || POINTER_TYPE_P (TREE_TYPE (arg1))))
|
||||
- return false;
|
||||
+ gimple_seq seq = NULL;
|
||||
+ tree result;
|
||||
|
||||
- /* The PHI arguments have the constants 0 and 1, or 0 and -1 or
|
||||
- 0 and (1 << cst), then convert it to the conditional. */
|
||||
- if (integer_zerop (arg0))
|
||||
- nonzero_arg = arg1;
|
||||
- else if (integer_zerop (arg1))
|
||||
- nonzero_arg = arg0;
|
||||
- else
|
||||
- return false;
|
||||
- if (integer_pow2p (nonzero_arg))
|
||||
- {
|
||||
- shift = tree_log2 (nonzero_arg);
|
||||
- if (shift && POINTER_TYPE_P (TREE_TYPE (nonzero_arg)))
|
||||
- return false;
|
||||
- }
|
||||
- else if (integer_all_onesp (nonzero_arg))
|
||||
- neg = true;
|
||||
- else
|
||||
+ if (!empty_block_p (middle_bb))
|
||||
return false;
|
||||
|
||||
- if (!empty_block_p (middle_bb))
|
||||
+ /* Special case A ? B : B as this will always simplify to B. */
|
||||
+ if (operand_equal_for_phi_arg_p (arg0, arg1))
|
||||
return false;
|
||||
|
||||
- /* At this point we know we have a GIMPLE_COND with two successors.
|
||||
+ /* At this point we know we have a GIMPLE_COND with two successors.
|
||||
One successor is BB, the other successor is an empty block which
|
||||
falls through into BB.
|
||||
|
||||
- There is a single PHI node at the join point (BB) and its arguments
|
||||
- are constants (0, 1) or (0, -1) or (0, (1 << shift)).
|
||||
-
|
||||
- So, given the condition COND, and the two PHI arguments, we can
|
||||
- rewrite this PHI into non-branching code:
|
||||
+ There is a single PHI node at the join point (BB).
|
||||
|
||||
- dest = (COND) or dest = COND' or dest = (COND) << shift
|
||||
-
|
||||
- We use the condition as-is if the argument associated with the
|
||||
- true edge has the value one or the argument associated with the
|
||||
- false edge as the value zero. Note that those conditions are not
|
||||
- the same since only one of the outgoing edges from the GIMPLE_COND
|
||||
- will directly reach BB and thus be associated with an argument. */
|
||||
+ So, given the condition COND, and the two PHI arguments, match and simplify
|
||||
+ can happen on (COND) ? arg0 : arg1. */
|
||||
|
||||
stmt = last_stmt (cond_bb);
|
||||
- result = PHI_RESULT (phi);
|
||||
|
||||
/* To handle special cases like floating point comparison, it is easier and
|
||||
less error-prone to build a tree and gimplify it on the fly though it is
|
||||
- less efficient. */
|
||||
- cond = fold_build2_loc (gimple_location (stmt),
|
||||
- gimple_cond_code (stmt), boolean_type_node,
|
||||
- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
|
||||
+ less efficient.
|
||||
+ Don't use fold_build2 here as that might create (bool)a instead of just
|
||||
+ "a != 0". */
|
||||
+ cond = build2_loc (gimple_location (stmt),
|
||||
+ gimple_cond_code (stmt), boolean_type_node,
|
||||
+ gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
|
||||
|
||||
/* We need to know which is the true edge and which is the false
|
||||
edge so that we know when to invert the condition below. */
|
||||
extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
|
||||
- if ((e0 == true_edge && integer_zerop (arg0))
|
||||
- || (e0 == false_edge && !integer_zerop (arg0))
|
||||
- || (e1 == true_edge && integer_zerop (arg1))
|
||||
- || (e1 == false_edge && !integer_zerop (arg1)))
|
||||
- cond = fold_build1_loc (gimple_location (stmt),
|
||||
- TRUTH_NOT_EXPR, TREE_TYPE (cond), cond);
|
||||
-
|
||||
- if (neg)
|
||||
- {
|
||||
- cond = fold_convert_loc (gimple_location (stmt),
|
||||
- TREE_TYPE (result), cond);
|
||||
- cond = fold_build1_loc (gimple_location (stmt),
|
||||
- NEGATE_EXPR, TREE_TYPE (cond), cond);
|
||||
- }
|
||||
- else if (shift)
|
||||
- {
|
||||
- cond = fold_convert_loc (gimple_location (stmt),
|
||||
- TREE_TYPE (result), cond);
|
||||
- cond = fold_build2_loc (gimple_location (stmt),
|
||||
- LSHIFT_EXPR, TREE_TYPE (cond), cond,
|
||||
- build_int_cst (integer_type_node, shift));
|
||||
- }
|
||||
+ if (e1 == true_edge || e0 == false_edge)
|
||||
+ std::swap (arg0, arg1);
|
||||
|
||||
- /* Insert our new statements at the end of conditional block before the
|
||||
- COND_STMT. */
|
||||
- gsi = gsi_for_stmt (stmt);
|
||||
- new_var = force_gimple_operand_gsi (&gsi, cond, true, NULL, true,
|
||||
- GSI_SAME_STMT);
|
||||
+ tree type = TREE_TYPE (gimple_phi_result (phi));
|
||||
+ result = gimple_simplify (COND_EXPR, type,
|
||||
+ cond,
|
||||
+ arg0, arg1,
|
||||
+ &seq, NULL);
|
||||
+ if (!result)
|
||||
+ return false;
|
||||
|
||||
- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (new_var)))
|
||||
- {
|
||||
- location_t locus_0, locus_1;
|
||||
+ gsi = gsi_last_bb (cond_bb);
|
||||
|
||||
- new_var2 = make_ssa_name (TREE_TYPE (result));
|
||||
- new_stmt = gimple_build_assign (new_var2, CONVERT_EXPR, new_var);
|
||||
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
||||
- new_var = new_var2;
|
||||
-
|
||||
- /* Set the locus to the first argument, unless is doesn't have one. */
|
||||
- locus_0 = gimple_phi_arg_location (phi, 0);
|
||||
- locus_1 = gimple_phi_arg_location (phi, 1);
|
||||
- if (locus_0 == UNKNOWN_LOCATION)
|
||||
- locus_0 = locus_1;
|
||||
- gimple_set_location (new_stmt, locus_0);
|
||||
- }
|
||||
+ if (seq)
|
||||
+ gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
|
||||
|
||||
- replace_phi_edge_with_variable (cond_bb, e1, phi, new_var);
|
||||
+ replace_phi_edge_with_variable (cond_bb, e1, phi, result);
|
||||
|
||||
/* Note that we optimized this PHI. */
|
||||
return true;
|
||||
@@ -3905,7 +3839,7 @@ gate_hoist_loads (void)
|
||||
Conditional Replacement
|
||||
-----------------------
|
||||
|
||||
- This transformation, implemented in conditional_replacement,
|
||||
+ This transformation, implemented in match_simplify_replacement,
|
||||
replaces
|
||||
|
||||
bb0:
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
174
0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch
Normal file
174
0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch
Normal file
@ -0,0 +1,174 @@
|
||||
From fabbe6ccc798d3cb097c6371b4d53cd6dfde6c7c Mon Sep 17 00:00:00 2001
|
||||
From: Andrew Pinski <apinski@marvell.com>
|
||||
Date: Fri, 11 Jun 2021 13:21:34 -0700
|
||||
Subject: [PATCH 14/35] [Backport] Allow match-and-simplified phiopt to run in
|
||||
early phiopt
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cd48e550d1dc58307ab1c0ab490745673f748ccc
|
||||
|
||||
To move a few things more to match-and-simplify from phiopt,
|
||||
we need to allow match_simplify_replacement to run in early
|
||||
phiopt. To do this we add a replacement for gimple_simplify
|
||||
that is explictly for phiopt.
|
||||
|
||||
OK? Bootstrapped and tested on x86_64-linux-gnu with no
|
||||
regressions.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* tree-ssa-phiopt.c (match_simplify_replacement):
|
||||
Add early_p argument. Call gimple_simplify_phiopt
|
||||
instead of gimple_simplify.
|
||||
(tree_ssa_phiopt_worker): Update call to
|
||||
match_simplify_replacement and allow unconditionally.
|
||||
(phiopt_early_allow): New function.
|
||||
(gimple_simplify_phiopt): New function.
|
||||
---
|
||||
gcc/tree-ssa-phiopt.c | 89 ++++++++++++++++++++++++++++++++++---------
|
||||
1 file changed, 70 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index 9fa6363b6..92aeb8415 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -48,12 +48,13 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tree-eh.h"
|
||||
#include "gimple-fold.h"
|
||||
#include "internal-fn.h"
|
||||
+#include "gimple-match.h"
|
||||
|
||||
static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
|
||||
static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
|
||||
tree, tree);
|
||||
static bool match_simplify_replacement (basic_block, basic_block,
|
||||
- edge, edge, gphi *, tree, tree);
|
||||
+ edge, edge, gphi *, tree, tree, bool);
|
||||
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
|
||||
gimple *);
|
||||
static int value_replacement (basic_block, basic_block,
|
||||
@@ -348,9 +349,9 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
/* Do the replacement of conditional if it can be done. */
|
||||
if (!early_p && two_value_replacement (bb, bb1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
- else if (!early_p
|
||||
- && match_simplify_replacement (bb, bb1, e1, e2, phi,
|
||||
- arg0, arg1))
|
||||
+ else if (match_simplify_replacement (bb, bb1, e1, e2, phi,
|
||||
+ arg0, arg1,
|
||||
+ early_p))
|
||||
cfgchanged = true;
|
||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
@@ -763,6 +764,67 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
return true;
|
||||
}
|
||||
|
||||
+/* Return TRUE if CODE should be allowed during early phiopt.
|
||||
+ Currently this is to allow MIN/MAX and ABS/NEGATE. */
|
||||
+static bool
|
||||
+phiopt_early_allow (enum tree_code code)
|
||||
+{
|
||||
+ switch (code)
|
||||
+ {
|
||||
+ case MIN_EXPR:
|
||||
+ case MAX_EXPR:
|
||||
+ case ABS_EXPR:
|
||||
+ case ABSU_EXPR:
|
||||
+ case NEGATE_EXPR:
|
||||
+ case SSA_NAME:
|
||||
+ return true;
|
||||
+ default:
|
||||
+ return false;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* gimple_simplify_phiopt is like gimple_simplify but designed for PHIOPT.
|
||||
+ Return NULL if nothing can be simplified or the resulting simplified value
|
||||
+ with parts pushed if EARLY_P was true. Also rejects non allowed tree code
|
||||
+ if EARLY_P is set.
|
||||
+ Takes the comparison from COMP_STMT and two args, ARG0 and ARG1 and tries
|
||||
+ to simplify CMP ? ARG0 : ARG1. */
|
||||
+static tree
|
||||
+gimple_simplify_phiopt (bool early_p, tree type, gimple *comp_stmt,
|
||||
+ tree arg0, tree arg1,
|
||||
+ gimple_seq *seq)
|
||||
+{
|
||||
+ tree result;
|
||||
+ enum tree_code comp_code = gimple_cond_code (comp_stmt);
|
||||
+ location_t loc = gimple_location (comp_stmt);
|
||||
+ tree cmp0 = gimple_cond_lhs (comp_stmt);
|
||||
+ tree cmp1 = gimple_cond_rhs (comp_stmt);
|
||||
+ /* To handle special cases like floating point comparison, it is easier and
|
||||
+ less error-prone to build a tree and gimplify it on the fly though it is
|
||||
+ less efficient.
|
||||
+ Don't use fold_build2 here as that might create (bool)a instead of just
|
||||
+ "a != 0". */
|
||||
+ tree cond = build2_loc (loc, comp_code, boolean_type_node,
|
||||
+ cmp0, cmp1);
|
||||
+ gimple_match_op op (gimple_match_cond::UNCOND,
|
||||
+ COND_EXPR, type, cond, arg0, arg1);
|
||||
+
|
||||
+ if (op.resimplify (early_p ? NULL : seq, follow_all_ssa_edges))
|
||||
+ {
|
||||
+ /* Early we want only to allow some generated tree codes. */
|
||||
+ if (!early_p
|
||||
+ || op.code.is_tree_code ()
|
||||
+ || phiopt_early_allow ((tree_code)op.code))
|
||||
+ {
|
||||
+ result = maybe_push_res_to_seq (&op, seq);
|
||||
+ if (result)
|
||||
+ return result;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return NULL;
|
||||
+}
|
||||
+
|
||||
/* The function match_simplify_replacement does the main work of doing the
|
||||
replacement using match and simplify. Return true if the replacement is done.
|
||||
Otherwise return false.
|
||||
@@ -772,10 +834,9 @@ two_value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
static bool
|
||||
match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
edge e0, edge e1, gphi *phi,
|
||||
- tree arg0, tree arg1)
|
||||
+ tree arg0, tree arg1, bool early_p)
|
||||
{
|
||||
gimple *stmt;
|
||||
- tree cond;
|
||||
gimple_stmt_iterator gsi;
|
||||
edge true_edge, false_edge;
|
||||
gimple_seq seq = NULL;
|
||||
@@ -799,15 +860,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
|
||||
stmt = last_stmt (cond_bb);
|
||||
|
||||
- /* To handle special cases like floating point comparison, it is easier and
|
||||
- less error-prone to build a tree and gimplify it on the fly though it is
|
||||
- less efficient.
|
||||
- Don't use fold_build2 here as that might create (bool)a instead of just
|
||||
- "a != 0". */
|
||||
- cond = build2_loc (gimple_location (stmt),
|
||||
- gimple_cond_code (stmt), boolean_type_node,
|
||||
- gimple_cond_lhs (stmt), gimple_cond_rhs (stmt));
|
||||
-
|
||||
/* We need to know which is the true edge and which is the false
|
||||
edge so that we know when to invert the condition below. */
|
||||
extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
|
||||
@@ -815,10 +867,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
std::swap (arg0, arg1);
|
||||
|
||||
tree type = TREE_TYPE (gimple_phi_result (phi));
|
||||
- result = gimple_simplify (COND_EXPR, type,
|
||||
- cond,
|
||||
- arg0, arg1,
|
||||
- &seq, NULL);
|
||||
+ result = gimple_simplify_phiopt (early_p, type, stmt,
|
||||
+ arg0, arg1,
|
||||
+ &seq);
|
||||
if (!result)
|
||||
return false;
|
||||
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
259
0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch
Normal file
259
0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch
Normal file
@ -0,0 +1,259 @@
|
||||
From d212d216be0752370dbe7bc63bd75b3a9249e0b5 Mon Sep 17 00:00:00 2001
|
||||
From: Andrew Pinski <apinski@marvell.com>
|
||||
Date: Tue, 1 Jun 2021 06:48:05 +0000
|
||||
Subject: [PATCH 15/35] [Backport] Improve match_simplify_replacement in
|
||||
phi-opt
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=c4574d23cb07340918793a5a98ae7bb2988b3791
|
||||
|
||||
This improves match_simplify_replace in phi-opt to handle the
|
||||
case where there is one cheap (non-call) preparation statement in the
|
||||
middle basic block similar to xor_replacement and others.
|
||||
This allows to remove xor_replacement which it does too.
|
||||
|
||||
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
|
||||
|
||||
Thanks,
|
||||
Andrew Pinski
|
||||
|
||||
Changes since v1:
|
||||
v3 - Just minor changes to using gimple_assign_lhs
|
||||
instead of gimple_lhs and fixing a comment.
|
||||
v2 - change the check on the preparation statement to
|
||||
allow only assignments and no calls and only assignments
|
||||
that feed into the phi.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR tree-optimization/25290
|
||||
* tree-ssa-phiopt.c (xor_replacement): Delete.
|
||||
(tree_ssa_phiopt_worker): Delete use of xor_replacement.
|
||||
(match_simplify_replacement): Allow one cheap preparation
|
||||
statement that can be moved to before the if.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.dg/tree-ssa/pr96928-1.c: Fix testcase for now that ~
|
||||
happens on the outside of the bit_xor.
|
||||
---
|
||||
gcc/tree-ssa-phiopt.c | 164 ++++++++++++++----------------------------
|
||||
1 file changed, 52 insertions(+), 112 deletions(-)
|
||||
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index 92aeb8415..51a2d3684 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "cfghooks.h"
|
||||
#include "tree-pass.h"
|
||||
#include "ssa.h"
|
||||
+#include "tree-ssa.h"
|
||||
#include "optabs-tree.h"
|
||||
#include "insn-config.h"
|
||||
#include "gimple-pretty-print.h"
|
||||
@@ -63,8 +64,6 @@ static bool minmax_replacement (basic_block, basic_block,
|
||||
edge, edge, gimple *, tree, tree);
|
||||
static bool abs_replacement (basic_block, basic_block,
|
||||
edge, edge, gimple *, tree, tree);
|
||||
-static bool xor_replacement (basic_block, basic_block,
|
||||
- edge, edge, gimple *, tree, tree);
|
||||
static bool spaceship_replacement (basic_block, basic_block,
|
||||
edge, edge, gphi *, tree, tree);
|
||||
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
|
||||
@@ -355,9 +354,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
cfgchanged = true;
|
||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
- else if (!early_p
|
||||
- && xor_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
- cfgchanged = true;
|
||||
else if (!early_p
|
||||
&& cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
|
||||
e2, phi, arg0,
|
||||
@@ -841,14 +837,51 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
edge true_edge, false_edge;
|
||||
gimple_seq seq = NULL;
|
||||
tree result;
|
||||
-
|
||||
- if (!empty_block_p (middle_bb))
|
||||
- return false;
|
||||
+ gimple *stmt_to_move = NULL;
|
||||
|
||||
/* Special case A ? B : B as this will always simplify to B. */
|
||||
if (operand_equal_for_phi_arg_p (arg0, arg1))
|
||||
return false;
|
||||
|
||||
+ /* If the basic block only has a cheap preparation statement,
|
||||
+ allow it and move it once the transformation is done. */
|
||||
+ if (!empty_block_p (middle_bb))
|
||||
+ {
|
||||
+ stmt_to_move = last_and_only_stmt (middle_bb);
|
||||
+ if (!stmt_to_move)
|
||||
+ return false;
|
||||
+
|
||||
+ if (gimple_vuse (stmt_to_move))
|
||||
+ return false;
|
||||
+
|
||||
+ if (gimple_could_trap_p (stmt_to_move)
|
||||
+ || gimple_has_side_effects (stmt_to_move))
|
||||
+ return false;
|
||||
+
|
||||
+ if (gimple_uses_undefined_value_p (stmt_to_move))
|
||||
+ return false;
|
||||
+
|
||||
+ /* Allow assignments and not no calls.
|
||||
+ As const calls don't match any of the above, yet they could
|
||||
+ still have some side-effects - they could contain
|
||||
+ gimple_could_trap_p statements, like floating point
|
||||
+ exceptions or integer division by zero. See PR70586.
|
||||
+ FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p
|
||||
+ should handle this. */
|
||||
+ if (!is_gimple_assign (stmt_to_move))
|
||||
+ return false;
|
||||
+
|
||||
+ tree lhs = gimple_assign_lhs (stmt_to_move);
|
||||
+ gimple *use_stmt;
|
||||
+ use_operand_p use_p;
|
||||
+
|
||||
+ /* Allow only a statement which feeds into the phi. */
|
||||
+ if (!lhs || TREE_CODE (lhs) != SSA_NAME
|
||||
+ || !single_imm_use (lhs, &use_p, &use_stmt)
|
||||
+ || use_stmt != phi)
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
/* At this point we know we have a GIMPLE_COND with two successors.
|
||||
One successor is BB, the other successor is an empty block which
|
||||
falls through into BB.
|
||||
@@ -874,7 +907,17 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
return false;
|
||||
|
||||
gsi = gsi_last_bb (cond_bb);
|
||||
-
|
||||
+ if (stmt_to_move)
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ {
|
||||
+ fprintf (dump_file, "statement un-sinked:\n");
|
||||
+ print_gimple_stmt (dump_file, stmt_to_move, 0,
|
||||
+ TDF_VOPS|TDF_MEMSYMS);
|
||||
+ }
|
||||
+ gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt_to_move);
|
||||
+ gsi_move_before (&gsi1, &gsi);
|
||||
+ }
|
||||
if (seq)
|
||||
gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
|
||||
|
||||
@@ -2474,109 +2517,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
return true;
|
||||
}
|
||||
|
||||
-/* Optimize x < 0 ? ~y : y into (x >> (prec-1)) ^ y. */
|
||||
-
|
||||
-static bool
|
||||
-xor_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
- edge e0 ATTRIBUTE_UNUSED, edge e1,
|
||||
- gimple *phi, tree arg0, tree arg1)
|
||||
-{
|
||||
- if (!INTEGRAL_TYPE_P (TREE_TYPE (arg1)))
|
||||
- return false;
|
||||
-
|
||||
- /* OTHER_BLOCK must have only one executable statement which must have the
|
||||
- form arg0 = ~arg1 or arg1 = ~arg0. */
|
||||
-
|
||||
- gimple *assign = last_and_only_stmt (middle_bb);
|
||||
- /* If we did not find the proper one's complement assignment, then we cannot
|
||||
- optimize. */
|
||||
- if (assign == NULL)
|
||||
- return false;
|
||||
-
|
||||
- /* If we got here, then we have found the only executable statement
|
||||
- in OTHER_BLOCK. If it is anything other than arg = ~arg1 or
|
||||
- arg1 = ~arg0, then we cannot optimize. */
|
||||
- if (!is_gimple_assign (assign))
|
||||
- return false;
|
||||
-
|
||||
- if (gimple_assign_rhs_code (assign) != BIT_NOT_EXPR)
|
||||
- return false;
|
||||
-
|
||||
- tree lhs = gimple_assign_lhs (assign);
|
||||
- tree rhs = gimple_assign_rhs1 (assign);
|
||||
-
|
||||
- /* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
|
||||
- if (!(lhs == arg0 && rhs == arg1) && !(lhs == arg1 && rhs == arg0))
|
||||
- return false;
|
||||
-
|
||||
- gimple *cond = last_stmt (cond_bb);
|
||||
- tree result = PHI_RESULT (phi);
|
||||
-
|
||||
- /* Only relationals comparing arg[01] against zero are interesting. */
|
||||
- enum tree_code cond_code = gimple_cond_code (cond);
|
||||
- if (cond_code != LT_EXPR && cond_code != GE_EXPR)
|
||||
- return false;
|
||||
-
|
||||
- /* Make sure the conditional is x OP 0. */
|
||||
- tree clhs = gimple_cond_lhs (cond);
|
||||
- if (TREE_CODE (clhs) != SSA_NAME
|
||||
- || !INTEGRAL_TYPE_P (TREE_TYPE (clhs))
|
||||
- || TYPE_UNSIGNED (TREE_TYPE (clhs))
|
||||
- || TYPE_PRECISION (TREE_TYPE (clhs)) != TYPE_PRECISION (TREE_TYPE (arg1))
|
||||
- || !integer_zerop (gimple_cond_rhs (cond)))
|
||||
- return false;
|
||||
-
|
||||
- /* We need to know which is the true edge and which is the false
|
||||
- edge so that we know if have xor or inverted xor. */
|
||||
- edge true_edge, false_edge;
|
||||
- extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
|
||||
-
|
||||
- /* For GE_EXPR, if the true edge goes to OTHER_BLOCK, then we
|
||||
- will need to invert the result. Similarly for LT_EXPR if
|
||||
- the false edge goes to OTHER_BLOCK. */
|
||||
- edge e;
|
||||
- if (cond_code == GE_EXPR)
|
||||
- e = true_edge;
|
||||
- else
|
||||
- e = false_edge;
|
||||
-
|
||||
- bool invert = e->dest == middle_bb;
|
||||
-
|
||||
- result = duplicate_ssa_name (result, NULL);
|
||||
-
|
||||
- gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
|
||||
-
|
||||
- int prec = TYPE_PRECISION (TREE_TYPE (clhs));
|
||||
- gimple *new_stmt
|
||||
- = gimple_build_assign (make_ssa_name (TREE_TYPE (clhs)), RSHIFT_EXPR, clhs,
|
||||
- build_int_cst (integer_type_node, prec - 1));
|
||||
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
||||
-
|
||||
- if (!useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (clhs)))
|
||||
- {
|
||||
- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
|
||||
- NOP_EXPR, gimple_assign_lhs (new_stmt));
|
||||
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
||||
- }
|
||||
- lhs = gimple_assign_lhs (new_stmt);
|
||||
-
|
||||
- if (invert)
|
||||
- {
|
||||
- new_stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (result)),
|
||||
- BIT_NOT_EXPR, rhs);
|
||||
- gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
|
||||
- rhs = gimple_assign_lhs (new_stmt);
|
||||
- }
|
||||
-
|
||||
- new_stmt = gimple_build_assign (result, BIT_XOR_EXPR, lhs, rhs);
|
||||
- gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
|
||||
-
|
||||
- replace_phi_edge_with_variable (cond_bb, e1, phi, result);
|
||||
-
|
||||
- /* Note that we optimized this PHI. */
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
/* Auxiliary functions to determine the set of memory accesses which
|
||||
can't trap because they are preceded by accesses to the same memory
|
||||
portion. We do that for MEM_REFs, so we only need to track
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
103
0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch
Normal file
103
0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch
Normal file
@ -0,0 +1,103 @@
|
||||
From 0d55d24aa4e47c40f74e0281d023089cfaafcf74 Mon Sep 17 00:00:00 2001
|
||||
From: Jakub Jelinek <jakub@redhat.com>
|
||||
Date: Thu, 6 May 2021 14:05:06 +0200
|
||||
Subject: [PATCH 16/35] [Backport] phiopt: Use gphi *phi instead of gimple *phi
|
||||
some more
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=cfd65e8d5299a7cf7d2ecd92b0e24ea4cfb697d9
|
||||
|
||||
Various functions in phiopt are also called with a gphi * but use
|
||||
gimple * argument for it.
|
||||
|
||||
2021-05-06 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* tree-ssa-phiopt.c (value_replacement, minmax_replacement,
|
||||
abs_replacement, xor_replacement,
|
||||
cond_removal_in_popcount_clz_ctz_pattern,
|
||||
replace_phi_edge_with_variable): Change type of phi argument from
|
||||
gimple * to gphi *.
|
||||
---
|
||||
gcc/tree-ssa-phiopt.c | 22 ++++++++++------------
|
||||
1 file changed, 10 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index 51a2d3684..045a7b1b8 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -59,21 +59,21 @@ static bool match_simplify_replacement (basic_block, basic_block,
|
||||
static gphi *factor_out_conditional_conversion (edge, edge, gphi *, tree, tree,
|
||||
gimple *);
|
||||
static int value_replacement (basic_block, basic_block,
|
||||
- edge, edge, gimple *, tree, tree);
|
||||
+ edge, edge, gphi *, tree, tree);
|
||||
static bool minmax_replacement (basic_block, basic_block,
|
||||
- edge, edge, gimple *, tree, tree);
|
||||
+ edge, edge, gphi *, tree, tree);
|
||||
static bool abs_replacement (basic_block, basic_block,
|
||||
- edge, edge, gimple *, tree, tree);
|
||||
+ edge, edge, gphi *, tree, tree);
|
||||
static bool spaceship_replacement (basic_block, basic_block,
|
||||
edge, edge, gphi *, tree, tree);
|
||||
static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
|
||||
- edge, edge, gimple *,
|
||||
+ edge, edge, gphi *,
|
||||
tree, tree);
|
||||
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
|
||||
hash_set<tree> *);
|
||||
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
|
||||
static hash_set<tree> * get_non_trapping ();
|
||||
-static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree);
|
||||
+static void replace_phi_edge_with_variable (basic_block, edge, gphi *, tree);
|
||||
static void hoist_adjacent_loads (basic_block, basic_block,
|
||||
basic_block, basic_block);
|
||||
static bool do_phiopt_pattern (basic_block, basic_block, basic_block);
|
||||
@@ -389,7 +389,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
|
||||
static void
|
||||
replace_phi_edge_with_variable (basic_block cond_block,
|
||||
- edge e, gimple *phi, tree new_tree)
|
||||
+ edge e, gphi *phi, tree new_tree)
|
||||
{
|
||||
basic_block bb = gimple_bb (phi);
|
||||
basic_block block_to_remove;
|
||||
@@ -1129,8 +1129,7 @@ absorbing_element_p (tree_code code, tree arg, bool right, tree rval)
|
||||
|
||||
static int
|
||||
value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
- edge e0, edge e1, gimple *phi,
|
||||
- tree arg0, tree arg1)
|
||||
+ edge e0, edge e1, gphi *phi, tree arg0, tree arg1)
|
||||
{
|
||||
gimple_stmt_iterator gsi;
|
||||
gimple *cond;
|
||||
@@ -1438,8 +1437,7 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
|
||||
static bool
|
||||
minmax_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
- edge e0, edge e1, gimple *phi,
|
||||
- tree arg0, tree arg1)
|
||||
+ edge e0, edge e1, gphi *phi, tree arg0, tree arg1)
|
||||
{
|
||||
tree result, type, rhs;
|
||||
gcond *cond;
|
||||
@@ -2240,7 +2238,7 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
static bool
|
||||
cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
||||
basic_block middle_bb,
|
||||
- edge e1, edge e2, gimple *phi,
|
||||
+ edge e1, edge e2, gphi *phi,
|
||||
tree arg0, tree arg1)
|
||||
{
|
||||
gimple *cond;
|
||||
@@ -2398,7 +2396,7 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
||||
static bool
|
||||
abs_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
edge e0 ATTRIBUTE_UNUSED, edge e1,
|
||||
- gimple *phi, tree arg0, tree arg1)
|
||||
+ gphi *phi, tree arg0, tree arg1)
|
||||
{
|
||||
tree result;
|
||||
gassign *new_stmt;
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
212
0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch
Normal file
212
0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch
Normal file
@ -0,0 +1,212 @@
|
||||
From 33dc778a34d7b93978efe922bb1b4583d8e6c4bb Mon Sep 17 00:00:00 2001
|
||||
From: Roger Sayle <roger@nextmovesoftware.com>
|
||||
Date: Mon, 2 Aug 2021 13:27:53 +0100
|
||||
Subject: [PATCH 17/35] [Backport] Optimize x ? bswap(x) : 0 in tree-ssa-phiopt
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f9fcf754825a1e01033336f84c18690aaa971a6f
|
||||
|
||||
Many thanks again to Jakub Jelinek for a speedy fix for PR 101642.
|
||||
Interestingly, that test case "bswap16(x) ? : x" also reveals a
|
||||
missed optimization opportunity. The resulting "x ? bswap(x) : 0"
|
||||
can be further simplified to just bswap(x).
|
||||
|
||||
Conveniently, tree-ssa-phiopt.c already recognizes/optimizes the
|
||||
related "x ? popcount(x) : 0", so this patch simply makes that
|
||||
transformation make general, additionally handling bswap, parity,
|
||||
ffs and clrsb. All of the required infrastructure is already
|
||||
present thanks to Jakub previously adding support for clz/ctz.
|
||||
To reflect this generalization, the name of the function is changed
|
||||
from cond_removal_in_popcount_clz_ctz_pattern to the hopefully
|
||||
equally descriptive cond_removal_in_builtin_zero_pattern.
|
||||
|
||||
2021-08-02 Roger Sayle <roger@nextmovesoftware.com>
|
||||
|
||||
gcc/ChangeLog
|
||||
* tree-ssa-phiopt.c (cond_removal_in_builtin_zero_pattern):
|
||||
Renamed from cond_removal_in_popcount_clz_ctz_pattern.
|
||||
Add support for BSWAP, FFS, PARITY and CLRSB builtins.
|
||||
(tree_ssa_phiop_worker): Update call to function above.
|
||||
|
||||
gcc/testsuite/ChangeLog
|
||||
* gcc.dg/tree-ssa/phi-opt-25.c: New test case.
|
||||
---
|
||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c | 83 ++++++++++++++++++++++
|
||||
gcc/tree-ssa-phiopt.c | 37 +++++++---
|
||||
2 files changed, 109 insertions(+), 11 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
|
||||
new file mode 100644
|
||||
index 000000000..c52c92e1d
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
|
||||
@@ -0,0 +1,83 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
+
|
||||
+unsigned short test_bswap16(unsigned short x)
|
||||
+{
|
||||
+ return x ? __builtin_bswap16(x) : 0;
|
||||
+}
|
||||
+
|
||||
+unsigned int test_bswap32(unsigned int x)
|
||||
+{
|
||||
+ return x ? __builtin_bswap32(x) : 0;
|
||||
+}
|
||||
+
|
||||
+unsigned long long test_bswap64(unsigned long long x)
|
||||
+{
|
||||
+ return x ? __builtin_bswap64(x) : 0;
|
||||
+}
|
||||
+
|
||||
+int test_clrsb(int x)
|
||||
+{
|
||||
+ return x ? __builtin_clrsb(x) : (__SIZEOF_INT__*8-1);
|
||||
+}
|
||||
+
|
||||
+int test_clrsbl(long x)
|
||||
+{
|
||||
+ return x ? __builtin_clrsbl(x) : (__SIZEOF_LONG__*8-1);
|
||||
+}
|
||||
+
|
||||
+int test_clrsbll(long long x)
|
||||
+{
|
||||
+ return x ? __builtin_clrsbll(x) : (__SIZEOF_LONG_LONG__*8-1);
|
||||
+}
|
||||
+
|
||||
+#if 0
|
||||
+/* BUILT_IN_FFS is transformed by match.pd */
|
||||
+int test_ffs(unsigned int x)
|
||||
+{
|
||||
+ return x ? __builtin_ffs(x) : 0;
|
||||
+}
|
||||
+
|
||||
+int test_ffsl(unsigned long x)
|
||||
+{
|
||||
+ return x ? __builtin_ffsl(x) : 0;
|
||||
+}
|
||||
+
|
||||
+int test_ffsll(unsigned long long x)
|
||||
+{
|
||||
+ return x ? __builtin_ffsll(x) : 0;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+int test_parity(int x)
|
||||
+{
|
||||
+ return x ? __builtin_parity(x) : 0;
|
||||
+}
|
||||
+
|
||||
+int test_parityl(long x)
|
||||
+{
|
||||
+ return x ? __builtin_parityl(x) : 0;
|
||||
+}
|
||||
+
|
||||
+int test_parityll(long long x)
|
||||
+{
|
||||
+ return x ? __builtin_parityll(x) : 0;
|
||||
+}
|
||||
+
|
||||
+int test_popcount(int x)
|
||||
+{
|
||||
+ return x ? __builtin_popcount(x) : 0;
|
||||
+}
|
||||
+
|
||||
+int test_popcountl(long x)
|
||||
+{
|
||||
+ return x ? __builtin_popcountl(x) : 0;
|
||||
+}
|
||||
+
|
||||
+int test_popcountll(long long x)
|
||||
+{
|
||||
+ return x ? __builtin_popcountll(x) : 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
|
||||
+
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index 045a7b1b8..21ac08145 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -66,9 +66,9 @@ static bool abs_replacement (basic_block, basic_block,
|
||||
edge, edge, gphi *, tree, tree);
|
||||
static bool spaceship_replacement (basic_block, basic_block,
|
||||
edge, edge, gphi *, tree, tree);
|
||||
-static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
|
||||
- edge, edge, gphi *,
|
||||
- tree, tree);
|
||||
+static bool cond_removal_in_builtin_zero_pattern (basic_block, basic_block,
|
||||
+ edge, edge, gphi *,
|
||||
+ tree, tree);
|
||||
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
|
||||
hash_set<tree> *);
|
||||
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
|
||||
@@ -355,9 +355,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
else if (!early_p
|
||||
- && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
|
||||
- e2, phi, arg0,
|
||||
- arg1))
|
||||
+ && cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
|
||||
+ phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
@@ -2204,7 +2203,8 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
return true;
|
||||
}
|
||||
|
||||
-/* Convert
|
||||
+/* Optimize x ? __builtin_fun (x) : C, where C is __builtin_fun (0).
|
||||
+ Convert
|
||||
|
||||
<bb 2>
|
||||
if (b_4(D) != 0)
|
||||
@@ -2236,10 +2236,10 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
instead of 0 above it uses the value from that macro. */
|
||||
|
||||
static bool
|
||||
-cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
||||
- basic_block middle_bb,
|
||||
- edge e1, edge e2, gphi *phi,
|
||||
- tree arg0, tree arg1)
|
||||
+cond_removal_in_builtin_zero_pattern (basic_block cond_bb,
|
||||
+ basic_block middle_bb,
|
||||
+ edge e1, edge e2, gphi *phi,
|
||||
+ tree arg0, tree arg1)
|
||||
{
|
||||
gimple *cond;
|
||||
gimple_stmt_iterator gsi, gsi_from;
|
||||
@@ -2287,6 +2287,12 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
||||
int val = 0;
|
||||
switch (cfn)
|
||||
{
|
||||
+ case CFN_BUILT_IN_BSWAP16:
|
||||
+ case CFN_BUILT_IN_BSWAP32:
|
||||
+ case CFN_BUILT_IN_BSWAP64:
|
||||
+ case CFN_BUILT_IN_BSWAP128:
|
||||
+ CASE_CFN_FFS:
|
||||
+ CASE_CFN_PARITY:
|
||||
CASE_CFN_POPCOUNT:
|
||||
break;
|
||||
CASE_CFN_CLZ:
|
||||
@@ -2315,6 +2321,15 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
||||
}
|
||||
}
|
||||
return false;
|
||||
+ case BUILT_IN_CLRSB:
|
||||
+ val = TYPE_PRECISION (integer_type_node) - 1;
|
||||
+ break;
|
||||
+ case BUILT_IN_CLRSBL:
|
||||
+ val = TYPE_PRECISION (long_integer_type_node) - 1;
|
||||
+ break;
|
||||
+ case BUILT_IN_CLRSBLL:
|
||||
+ val = TYPE_PRECISION (long_long_integer_type_node) - 1;
|
||||
+ break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
251
0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch
Normal file
251
0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch
Normal file
@ -0,0 +1,251 @@
|
||||
From 77398954ce517aa011b7a254c7aa2858521b2093 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Biener <rguenther@suse.de>
|
||||
Date: Mon, 15 Nov 2021 15:19:36 +0100
|
||||
Subject: [PATCH 18/35] [Backport] tree-optimization/102880 - make PHI-OPT
|
||||
recognize more CFGs
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f98f373dd822b35c52356b753d528924e9f89678
|
||||
|
||||
This allows extra edges into the middle BB for the PHI-OPT
|
||||
transforms using replace_phi_edge_with_variable that do not
|
||||
end up moving stmts from that middle BB. This avoids regressing
|
||||
gcc.dg/tree-ssa/ssa-hoist-4.c with the actual fix for PR102880
|
||||
where CFG cleanup has the choice to remove two forwarders and
|
||||
picks "the wrong" leading to
|
||||
|
||||
if (a > b) /
|
||||
/\ /
|
||||
/ <BB>
|
||||
/ |
|
||||
# PHI <a, b>
|
||||
|
||||
rather than
|
||||
|
||||
if (a > b) |
|
||||
/\ |
|
||||
<BB> \ |
|
||||
/ \ |
|
||||
# PHI <a, b, b>
|
||||
|
||||
but it's relatively straight-forward to support extra edges
|
||||
into the middle-BB in paths ending in replace_phi_edge_with_variable
|
||||
and that do not require moving stmts. That's because we really
|
||||
only want to remove the edge from the condition to the middle BB.
|
||||
Of course actually doing that means updating dominators in non-trival
|
||||
ways which is why I kept the original code for the single edge
|
||||
case and simply defer to CFG cleanup by adjusting the condition for
|
||||
the complicated case.
|
||||
|
||||
The testcase needs to be a GIMPLE one since it's quite unreliable
|
||||
to produce the desired CFG.
|
||||
|
||||
2021-11-15 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/102880
|
||||
* tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Push
|
||||
single_pred (bb1) condition to places that really need it.
|
||||
(match_simplify_replacement): Likewise.
|
||||
(value_replacement): Likewise.
|
||||
(replace_phi_edge_with_variable): Deal with extra edges
|
||||
into the middle BB.
|
||||
|
||||
* gcc.dg/tree-ssa/phi-opt-26.c: New testcase.
|
||||
---
|
||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c | 31 +++++++++
|
||||
gcc/tree-ssa-phiopt.c | 73 +++++++++++++---------
|
||||
2 files changed, 75 insertions(+), 29 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
||||
new file mode 100644
|
||||
index 000000000..21aa66e38
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
||||
@@ -0,0 +1,31 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O -fgimple -fdump-tree-phiopt1" } */
|
||||
+
|
||||
+int __GIMPLE (ssa,startwith("phiopt"))
|
||||
+foo (int a, int b, int flag)
|
||||
+{
|
||||
+ int res;
|
||||
+
|
||||
+ __BB(2):
|
||||
+ if (flag_2(D) != 0)
|
||||
+ goto __BB6;
|
||||
+ else
|
||||
+ goto __BB4;
|
||||
+
|
||||
+ __BB(4):
|
||||
+ if (a_3(D) > b_4(D))
|
||||
+ goto __BB7;
|
||||
+ else
|
||||
+ goto __BB6;
|
||||
+
|
||||
+ __BB(6):
|
||||
+ goto __BB7;
|
||||
+
|
||||
+ __BB(7):
|
||||
+ res_1 = __PHI (__BB4: a_3(D), __BB6: b_4(D));
|
||||
+ return res_1;
|
||||
+}
|
||||
+
|
||||
+/* We should be able to detect MAX despite the extra edge into
|
||||
+ the middle BB. */
|
||||
+/* { dg-final { scan-tree-dump "MAX" "phiopt1" } } */
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index 21ac08145..079d29e74 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -219,7 +219,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
|
||||
/* If either bb1's succ or bb2 or bb2's succ is non NULL. */
|
||||
if (EDGE_COUNT (bb1->succs) == 0
|
||||
- || bb2 == NULL
|
||||
|| EDGE_COUNT (bb2->succs) == 0)
|
||||
continue;
|
||||
|
||||
@@ -279,14 +278,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
|| (e1->flags & EDGE_FALLTHRU) == 0)
|
||||
continue;
|
||||
|
||||
- /* Also make sure that bb1 only have one predecessor and that it
|
||||
- is bb. */
|
||||
- if (!single_pred_p (bb1)
|
||||
- || single_pred (bb1) != bb)
|
||||
- continue;
|
||||
-
|
||||
if (do_store_elim)
|
||||
{
|
||||
+ /* Also make sure that bb1 only have one predecessor and that it
|
||||
+ is bb. */
|
||||
+ if (!single_pred_p (bb1)
|
||||
+ || single_pred (bb1) != bb)
|
||||
+ continue;
|
||||
+
|
||||
/* bb1 is the middle block, bb2 the join block, bb the split block,
|
||||
e1 the fallthrough edge from bb1 to bb2. We can't do the
|
||||
optimization if the join block has more than two predecessors. */
|
||||
@@ -331,10 +330,11 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
node. */
|
||||
gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE);
|
||||
|
||||
- gphi *newphi = factor_out_conditional_conversion (e1, e2, phi,
|
||||
- arg0, arg1,
|
||||
- cond_stmt);
|
||||
- if (newphi != NULL)
|
||||
+ gphi *newphi;
|
||||
+ if (single_pred_p (bb1)
|
||||
+ && (newphi = factor_out_conditional_conversion (e1, e2, phi,
|
||||
+ arg0, arg1,
|
||||
+ cond_stmt)))
|
||||
{
|
||||
phi = newphi;
|
||||
/* factor_out_conditional_conversion may create a new PHI in
|
||||
@@ -355,12 +355,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
else if (!early_p
|
||||
+ && single_pred_p (bb1)
|
||||
&& cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
|
||||
phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
- else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
+ else if (single_pred_p (bb1)
|
||||
+ && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
}
|
||||
}
|
||||
@@ -391,35 +393,41 @@ replace_phi_edge_with_variable (basic_block cond_block,
|
||||
edge e, gphi *phi, tree new_tree)
|
||||
{
|
||||
basic_block bb = gimple_bb (phi);
|
||||
- basic_block block_to_remove;
|
||||
gimple_stmt_iterator gsi;
|
||||
|
||||
/* Change the PHI argument to new. */
|
||||
SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
|
||||
|
||||
/* Remove the empty basic block. */
|
||||
+ edge edge_to_remove;
|
||||
if (EDGE_SUCC (cond_block, 0)->dest == bb)
|
||||
+ edge_to_remove = EDGE_SUCC (cond_block, 1);
|
||||
+ else
|
||||
+ edge_to_remove = EDGE_SUCC (cond_block, 0);
|
||||
+ if (EDGE_COUNT (edge_to_remove->dest->preds) == 1)
|
||||
{
|
||||
- EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
|
||||
- EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
||||
- EDGE_SUCC (cond_block, 0)->probability = profile_probability::always ();
|
||||
-
|
||||
- block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
|
||||
+ e->flags |= EDGE_FALLTHRU;
|
||||
+ e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
||||
+ e->probability = profile_probability::always ();
|
||||
+ delete_basic_block (edge_to_remove->dest);
|
||||
+
|
||||
+ /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
|
||||
+ gsi = gsi_last_bb (cond_block);
|
||||
+ gsi_remove (&gsi, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
- EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
|
||||
- EDGE_SUCC (cond_block, 1)->flags
|
||||
- &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
||||
- EDGE_SUCC (cond_block, 1)->probability = profile_probability::always ();
|
||||
-
|
||||
- block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
|
||||
+ /* If there are other edges into the middle block make
|
||||
+ CFG cleanup deal with the edge removal to avoid
|
||||
+ updating dominators here in a non-trivial way. */
|
||||
+ gcond *cond = as_a <gcond *> (last_stmt (cond_block));
|
||||
+ if (edge_to_remove->flags & EDGE_TRUE_VALUE)
|
||||
+ gimple_cond_make_false (cond);
|
||||
+ else
|
||||
+ gimple_cond_make_true (cond);
|
||||
}
|
||||
- delete_basic_block (block_to_remove);
|
||||
|
||||
- /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
|
||||
- gsi = gsi_last_bb (cond_block);
|
||||
- gsi_remove (&gsi, true);
|
||||
+ statistics_counter_event (cfun, "Replace PHI with variable", 1);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file,
|
||||
@@ -846,6 +854,9 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
allow it and move it once the transformation is done. */
|
||||
if (!empty_block_p (middle_bb))
|
||||
{
|
||||
+ if (!single_pred_p (middle_bb))
|
||||
+ return false;
|
||||
+
|
||||
stmt_to_move = last_and_only_stmt (middle_bb);
|
||||
if (!stmt_to_move)
|
||||
return false;
|
||||
@@ -1225,6 +1236,11 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
}
|
||||
else
|
||||
{
|
||||
+ if (!single_pred_p (middle_bb))
|
||||
+ return 0;
|
||||
+ statistics_counter_event (cfun, "Replace PHI with "
|
||||
+ "variable/value_replacement", 1);
|
||||
+
|
||||
/* Replace the PHI arguments with arg. */
|
||||
SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
|
||||
SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
|
||||
@@ -1239,7 +1255,6 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
-
|
||||
}
|
||||
|
||||
/* Now optimize (x != 0) ? x + y : y to just x + y. */
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
250
0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch
Normal file
250
0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch
Normal file
@ -0,0 +1,250 @@
|
||||
From a2f5e6f38fe7b5b32a252643b00dd2d7ab0e3fac Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 May 2020 09:01:10 +0100
|
||||
Subject: [PATCH 19/35] [Backport] tree: Add vector_element_bits(_tree)
|
||||
[PR94980 1/3]
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=d17a896da1e898928d337596d029f0ece0039d55
|
||||
|
||||
A lot of code that wants to know the number of bits in a vector
|
||||
element gets that information from the element's TYPE_SIZE,
|
||||
which is always equal to TYPE_SIZE_UNIT * BITS_PER_UNIT.
|
||||
This doesn't work for SVE and AVX512-style packed boolean vectors,
|
||||
where several elements can occupy a single byte.
|
||||
|
||||
This patch introduces a new pair of helpers for getting the true
|
||||
(possibly sub-byte) size. I made a token attempt to convert obvious
|
||||
element size calculations, but I'm sure I missed some.
|
||||
|
||||
2020-05-12 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
gcc/
|
||||
PR tree-optimization/94980
|
||||
* tree.h (vector_element_bits, vector_element_bits_tree): Declare.
|
||||
* tree.c (vector_element_bits, vector_element_bits_tree): New.
|
||||
* match.pd: Use the new functions instead of determining the
|
||||
vector element size directly from TYPE_SIZE(_UNIT).
|
||||
* tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise.
|
||||
* tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise.
|
||||
* tree-vect-stmts.c (vect_is_simple_cond): Likewise.
|
||||
* tree-vect-generic.c (expand_vector_piecewise): Likewise.
|
||||
(expand_vector_conversion): Likewise.
|
||||
(expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as
|
||||
a divisor. Convert the dividend to bits to compensate.
|
||||
* tree-vect-loop.c (vectorizable_live_operation): Call
|
||||
vector_element_bits instead of open-coding it.
|
||||
---
|
||||
gcc/ChangeLog | 17 +++++++++++++++++
|
||||
gcc/match.pd | 2 +-
|
||||
gcc/tree-vect-data-refs.c | 2 +-
|
||||
gcc/tree-vect-generic.c | 19 +++++++------------
|
||||
gcc/tree-vect-loop.c | 4 +---
|
||||
gcc/tree-vect-patterns.c | 3 +--
|
||||
gcc/tree-vect-stmts.c | 3 +--
|
||||
gcc/tree.c | 24 ++++++++++++++++++++++++
|
||||
gcc/tree.h | 2 ++
|
||||
9 files changed, 55 insertions(+), 21 deletions(-)
|
||||
|
||||
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
|
||||
index 3b1384e70..07aea9b86 100644
|
||||
--- a/gcc/ChangeLog
|
||||
+++ b/gcc/ChangeLog
|
||||
@@ -1,3 +1,20 @@
|
||||
+2020-05-12 Richard Sandiford <richard.sandiford@arm.com>
|
||||
+
|
||||
+ PR tree-optimization/94980
|
||||
+ * tree.h (vector_element_bits, vector_element_bits_tree): Declare.
|
||||
+ * tree.c (vector_element_bits, vector_element_bits_tree): New.
|
||||
+ * match.pd: Use the new functions instead of determining the
|
||||
+ vector element size directly from TYPE_SIZE(_UNIT).
|
||||
+ * tree-vect-data-refs.c (vect_gather_scatter_fn_p): Likewise.
|
||||
+ * tree-vect-patterns.c (vect_recog_mask_conversion_pattern): Likewise.
|
||||
+ * tree-vect-stmts.c (vect_is_simple_cond): Likewise.
|
||||
+ * tree-vect-generic.c (expand_vector_piecewise): Likewise.
|
||||
+ (expand_vector_conversion): Likewise.
|
||||
+ (expand_vector_addition): Likewise for a TYPE_SIZE_UNIT used as
|
||||
+ a divisor. Convert the dividend to bits to compensate.
|
||||
+ * tree-vect-loop.c (vectorizable_live_operation): Call
|
||||
+ vector_element_bits instead of open-coding it.
|
||||
+
|
||||
2021-04-08 Release Manager
|
||||
|
||||
* GCC 10.3.0 released.
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 5899eea95..79a0228d2 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -6236,7 +6236,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
}
|
||||
(if (ins)
|
||||
(bit_insert { op0; } { ins; }
|
||||
- { bitsize_int (at * tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)))); })
|
||||
+ { bitsize_int (at * vector_element_bits (type)); })
|
||||
(if (changed)
|
||||
(vec_perm { op0; } { op1; } { op2; }))))))))))
|
||||
|
||||
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
|
||||
index d78b06455..e4466a4f3 100644
|
||||
--- a/gcc/tree-vect-data-refs.c
|
||||
+++ b/gcc/tree-vect-data-refs.c
|
||||
@@ -3709,7 +3709,7 @@ vect_gather_scatter_fn_p (vec_info *vinfo, bool read_p, bool masked_p,
|
||||
tree *offset_vectype_out)
|
||||
{
|
||||
unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
|
||||
- unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
|
||||
+ unsigned int element_bits = vector_element_bits (vectype);
|
||||
if (element_bits != memory_bits)
|
||||
/* For now the vector elements must be the same width as the
|
||||
memory elements. */
|
||||
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
|
||||
index c10492034..37c3956a4 100644
|
||||
--- a/gcc/tree-vect-generic.c
|
||||
+++ b/gcc/tree-vect-generic.c
|
||||
@@ -276,8 +276,7 @@ expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
|
||||
tree part_width = TYPE_SIZE (inner_type);
|
||||
tree index = bitsize_int (0);
|
||||
int nunits = nunits_for_known_piecewise_op (type);
|
||||
- int delta = tree_to_uhwi (part_width)
|
||||
- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
|
||||
+ int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
|
||||
int i;
|
||||
location_t loc = gimple_location (gsi_stmt (*gsi));
|
||||
|
||||
@@ -357,8 +356,7 @@ expand_vector_addition (gimple_stmt_iterator *gsi,
|
||||
elem_op_func f, elem_op_func f_parallel,
|
||||
tree type, tree a, tree b, enum tree_code code)
|
||||
{
|
||||
- int parts_per_word = UNITS_PER_WORD
|
||||
- / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
|
||||
+ int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
|
||||
|
||||
if (INTEGRAL_TYPE_P (TREE_TYPE (type))
|
||||
&& parts_per_word >= 4
|
||||
@@ -1733,19 +1731,17 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
|
||||
optab optab1 = unknown_optab;
|
||||
|
||||
gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
|
||||
- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (ret_type))));
|
||||
- gcc_checking_assert (tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (arg_type))));
|
||||
if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
|
||||
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
|
||||
code = FIX_TRUNC_EXPR;
|
||||
else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
|
||||
&& SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
|
||||
code = FLOAT_EXPR;
|
||||
- if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
|
||||
- < tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
|
||||
+ unsigned int ret_elt_bits = vector_element_bits (ret_type);
|
||||
+ unsigned int arg_elt_bits = vector_element_bits (arg_type);
|
||||
+ if (ret_elt_bits < arg_elt_bits)
|
||||
modifier = NARROW;
|
||||
- else if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (ret_type)))
|
||||
- > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type))))
|
||||
+ else if (ret_elt_bits > arg_elt_bits)
|
||||
modifier = WIDEN;
|
||||
|
||||
if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
|
||||
@@ -1908,8 +1904,7 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
|
||||
tree part_width = TYPE_SIZE (compute_type);
|
||||
tree index = bitsize_int (0);
|
||||
int nunits = nunits_for_known_piecewise_op (arg_type);
|
||||
- int delta = tree_to_uhwi (part_width)
|
||||
- / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (arg_type)));
|
||||
+ int delta = tree_to_uhwi (part_width) / arg_elt_bits;
|
||||
int i;
|
||||
location_t loc = gimple_location (gsi_stmt (*gsi));
|
||||
|
||||
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
|
||||
index 899b56087..7990e31de 100644
|
||||
--- a/gcc/tree-vect-loop.c
|
||||
+++ b/gcc/tree-vect-loop.c
|
||||
@@ -8059,9 +8059,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
|
||||
: gimple_get_lhs (stmt);
|
||||
lhs_type = TREE_TYPE (lhs);
|
||||
|
||||
- bitsize = (VECTOR_BOOLEAN_TYPE_P (vectype)
|
||||
- ? bitsize_int (TYPE_PRECISION (TREE_TYPE (vectype)))
|
||||
- : TYPE_SIZE (TREE_TYPE (vectype)));
|
||||
+ bitsize = vector_element_bits_tree (vectype);
|
||||
vec_bitsize = TYPE_SIZE (vectype);
|
||||
|
||||
/* Get the vectorized lhs of STMT and the lane to use (counted in bits). */
|
||||
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
|
||||
index 84d7ddb17..b076740ef 100644
|
||||
--- a/gcc/tree-vect-patterns.c
|
||||
+++ b/gcc/tree-vect-patterns.c
|
||||
@@ -4406,8 +4406,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
|
||||
|| dt == vect_constant_def))
|
||||
{
|
||||
tree wide_scalar_type = build_nonstandard_integer_type
|
||||
- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
|
||||
- TYPE_UNSIGNED (rhs1_type));
|
||||
+ (vector_element_bits (vectype1), TYPE_UNSIGNED (rhs1_type));
|
||||
tree vectype3 = get_vectype_for_scalar_type (vinfo,
|
||||
wide_scalar_type);
|
||||
if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
|
||||
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
||||
index 4636b7ba2..0bdf9a547 100644
|
||||
--- a/gcc/tree-vect-stmts.c
|
||||
+++ b/gcc/tree-vect-stmts.c
|
||||
@@ -10717,8 +10717,7 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
|
||||
&& tree_int_cst_lt (TYPE_SIZE (scalar_type),
|
||||
TYPE_SIZE (TREE_TYPE (vectype))))
|
||||
scalar_type = build_nonstandard_integer_type
|
||||
- (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
|
||||
- TYPE_UNSIGNED (scalar_type));
|
||||
+ (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
|
||||
*comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
|
||||
slp_node);
|
||||
}
|
||||
diff --git a/gcc/tree.c b/gcc/tree.c
|
||||
index 3e6647ae0..9a0cedf10 100644
|
||||
--- a/gcc/tree.c
|
||||
+++ b/gcc/tree.c
|
||||
@@ -13892,6 +13892,30 @@ vector_type_mode (const_tree t)
|
||||
return mode;
|
||||
}
|
||||
|
||||
+/* Return the size in bits of each element of vector type TYPE. */
|
||||
+
|
||||
+unsigned int
|
||||
+vector_element_bits (const_tree type)
|
||||
+{
|
||||
+ gcc_checking_assert (VECTOR_TYPE_P (type));
|
||||
+ if (VECTOR_BOOLEAN_TYPE_P (type))
|
||||
+ return vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (type)),
|
||||
+ TYPE_VECTOR_SUBPARTS (type));
|
||||
+ return tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
|
||||
+}
|
||||
+
|
||||
+/* Calculate the size in bits of each element of vector type TYPE
|
||||
+ and return the result as a tree of type bitsizetype. */
|
||||
+
|
||||
+tree
|
||||
+vector_element_bits_tree (const_tree type)
|
||||
+{
|
||||
+ gcc_checking_assert (VECTOR_TYPE_P (type));
|
||||
+ if (VECTOR_BOOLEAN_TYPE_P (type))
|
||||
+ return bitsize_int (vector_element_bits (type));
|
||||
+ return TYPE_SIZE (TREE_TYPE (type));
|
||||
+}
|
||||
+
|
||||
/* Verify that basic properties of T match TV and thus T can be a variant of
|
||||
TV. TV should be the more specified variant (i.e. the main variant). */
|
||||
|
||||
diff --git a/gcc/tree.h b/gcc/tree.h
|
||||
index bddc6e528..c66207fa0 100644
|
||||
--- a/gcc/tree.h
|
||||
+++ b/gcc/tree.h
|
||||
@@ -1996,6 +1996,8 @@ class auto_suppress_location_wrappers
|
||||
|
||||
extern machine_mode element_mode (const_tree);
|
||||
extern machine_mode vector_type_mode (const_tree);
|
||||
+extern unsigned int vector_element_bits (const_tree);
|
||||
+extern tree vector_element_bits_tree (const_tree);
|
||||
|
||||
/* The "canonical" type for this type node, which is used by frontends to
|
||||
compare the type for equality with another type. If two types are
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
1063
0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch
Normal file
1063
0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch
Normal file
File diff suppressed because it is too large
Load Diff
379
0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch
Normal file
379
0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch
Normal file
@ -0,0 +1,379 @@
|
||||
From 21d265af074726b166e08301a2f847c474fcb680 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 30 Nov 2021 09:52:24 +0000
|
||||
Subject: [PATCH 21/35] [Backport] gimple-match: Add a gimple_extract_op
|
||||
function
|
||||
|
||||
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=33973fa754de1f95d459bfca66c0d80deec36537
|
||||
|
||||
code_helper and gimple_match_op seem like generally useful ways
|
||||
of summing up a gimple_assign or gimple_call (or gimple_cond).
|
||||
This patch adds a gimple_extract_op function that can be used
|
||||
for that.
|
||||
|
||||
gcc/
|
||||
* gimple-match.h (code_helper): Add functions for querying whether
|
||||
the code represents an internal_fn or a built_in_function.
|
||||
Provide explicit conversion operators for both cases.
|
||||
(gimple_extract_op): Declare.
|
||||
* gimple-match-head.c (gimple_extract): New function, extracted from...
|
||||
(gimple_simplify): ...here.
|
||||
(gimple_extract_op): New function.
|
||||
---
|
||||
gcc/gimple-match-head.c | 219 ++++++++++++++++++++--------------------
|
||||
gcc/gimple-match.h | 27 +++++
|
||||
2 files changed, 135 insertions(+), 111 deletions(-)
|
||||
|
||||
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
|
||||
index 9b3e7298d..c1dea1734 100644
|
||||
--- a/gcc/gimple-match-head.c
|
||||
+++ b/gcc/gimple-match-head.c
|
||||
@@ -884,12 +884,20 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
|
||||
return true;
|
||||
}
|
||||
|
||||
-/* The main STMT based simplification entry. It is used by the fold_stmt
|
||||
- and the fold_stmt_to_constant APIs. */
|
||||
+/* Common subroutine of gimple_extract_op and gimple_simplify. Try to
|
||||
+ describe STMT in RES_OP, returning true on success. Before recording
|
||||
+ an operand, call:
|
||||
|
||||
-bool
|
||||
-gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
||||
- tree (*valueize)(tree), tree (*top_valueize)(tree))
|
||||
+ - VALUEIZE_CONDITION for a COND_EXPR condition
|
||||
+ - VALUEIZE_OP for every other top-level operand
|
||||
+
|
||||
+ Both routines take a tree argument and returns a tree. */
|
||||
+
|
||||
+template<typename ValueizeOp, typename ValueizeCondition>
|
||||
+inline bool
|
||||
+gimple_extract (gimple *stmt, gimple_match_op *res_op,
|
||||
+ ValueizeOp valueize_op,
|
||||
+ ValueizeCondition valueize_condition)
|
||||
{
|
||||
switch (gimple_code (stmt))
|
||||
{
|
||||
@@ -905,101 +913,50 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
||||
|| code == VIEW_CONVERT_EXPR)
|
||||
{
|
||||
tree op0 = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0);
|
||||
- bool valueized = false;
|
||||
- op0 = do_valueize (op0, top_valueize, valueized);
|
||||
- res_op->set_op (code, type, op0);
|
||||
- return (gimple_resimplify1 (seq, res_op, valueize)
|
||||
- || valueized);
|
||||
+ res_op->set_op (code, type, valueize_op (op0));
|
||||
+ return true;
|
||||
}
|
||||
else if (code == BIT_FIELD_REF)
|
||||
{
|
||||
tree rhs1 = gimple_assign_rhs1 (stmt);
|
||||
- tree op0 = TREE_OPERAND (rhs1, 0);
|
||||
- bool valueized = false;
|
||||
- op0 = do_valueize (op0, top_valueize, valueized);
|
||||
+ tree op0 = valueize_op (TREE_OPERAND (rhs1, 0));
|
||||
res_op->set_op (code, type, op0,
|
||||
TREE_OPERAND (rhs1, 1),
|
||||
TREE_OPERAND (rhs1, 2),
|
||||
REF_REVERSE_STORAGE_ORDER (rhs1));
|
||||
- if (res_op->reverse)
|
||||
- return valueized;
|
||||
- return (gimple_resimplify3 (seq, res_op, valueize)
|
||||
- || valueized);
|
||||
+ return true;
|
||||
}
|
||||
- else if (code == SSA_NAME
|
||||
- && top_valueize)
|
||||
+ else if (code == SSA_NAME)
|
||||
{
|
||||
tree op0 = gimple_assign_rhs1 (stmt);
|
||||
- tree valueized = top_valueize (op0);
|
||||
- if (!valueized || op0 == valueized)
|
||||
- return false;
|
||||
- res_op->set_op (TREE_CODE (op0), type, valueized);
|
||||
+ res_op->set_op (TREE_CODE (op0), type, valueize_op (op0));
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case GIMPLE_UNARY_RHS:
|
||||
{
|
||||
tree rhs1 = gimple_assign_rhs1 (stmt);
|
||||
- bool valueized = false;
|
||||
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
|
||||
- res_op->set_op (code, type, rhs1);
|
||||
- return (gimple_resimplify1 (seq, res_op, valueize)
|
||||
- || valueized);
|
||||
+ res_op->set_op (code, type, valueize_op (rhs1));
|
||||
+ return true;
|
||||
}
|
||||
case GIMPLE_BINARY_RHS:
|
||||
{
|
||||
- tree rhs1 = gimple_assign_rhs1 (stmt);
|
||||
- tree rhs2 = gimple_assign_rhs2 (stmt);
|
||||
- bool valueized = false;
|
||||
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
|
||||
- rhs2 = do_valueize (rhs2, top_valueize, valueized);
|
||||
+ tree rhs1 = valueize_op (gimple_assign_rhs1 (stmt));
|
||||
+ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt));
|
||||
res_op->set_op (code, type, rhs1, rhs2);
|
||||
- return (gimple_resimplify2 (seq, res_op, valueize)
|
||||
- || valueized);
|
||||
+ return true;
|
||||
}
|
||||
case GIMPLE_TERNARY_RHS:
|
||||
{
|
||||
- bool valueized = false;
|
||||
tree rhs1 = gimple_assign_rhs1 (stmt);
|
||||
- /* If this is a [VEC_]COND_EXPR first try to simplify an
|
||||
- embedded GENERIC condition. */
|
||||
- if (code == COND_EXPR
|
||||
- || code == VEC_COND_EXPR)
|
||||
- {
|
||||
- if (COMPARISON_CLASS_P (rhs1))
|
||||
- {
|
||||
- tree lhs = TREE_OPERAND (rhs1, 0);
|
||||
- tree rhs = TREE_OPERAND (rhs1, 1);
|
||||
- lhs = do_valueize (lhs, top_valueize, valueized);
|
||||
- rhs = do_valueize (rhs, top_valueize, valueized);
|
||||
- gimple_match_op res_op2 (res_op->cond, TREE_CODE (rhs1),
|
||||
- TREE_TYPE (rhs1), lhs, rhs);
|
||||
- if ((gimple_resimplify2 (seq, &res_op2, valueize)
|
||||
- || valueized)
|
||||
- && res_op2.code.is_tree_code ())
|
||||
- {
|
||||
- valueized = true;
|
||||
- if (TREE_CODE_CLASS ((enum tree_code) res_op2.code)
|
||||
- == tcc_comparison)
|
||||
- rhs1 = build2 (res_op2.code, TREE_TYPE (rhs1),
|
||||
- res_op2.ops[0], res_op2.ops[1]);
|
||||
- else if (res_op2.code == SSA_NAME
|
||||
- || res_op2.code == INTEGER_CST
|
||||
- || res_op2.code == VECTOR_CST)
|
||||
- rhs1 = res_op2.ops[0];
|
||||
- else
|
||||
- valueized = false;
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
- tree rhs2 = gimple_assign_rhs2 (stmt);
|
||||
- tree rhs3 = gimple_assign_rhs3 (stmt);
|
||||
- rhs1 = do_valueize (rhs1, top_valueize, valueized);
|
||||
- rhs2 = do_valueize (rhs2, top_valueize, valueized);
|
||||
- rhs3 = do_valueize (rhs3, top_valueize, valueized);
|
||||
+ if (code == COND_EXPR && COMPARISON_CLASS_P (rhs1))
|
||||
+ rhs1 = valueize_condition (rhs1);
|
||||
+ else
|
||||
+ rhs1 = valueize_op (rhs1);
|
||||
+ tree rhs2 = valueize_op (gimple_assign_rhs2 (stmt));
|
||||
+ tree rhs3 = valueize_op (gimple_assign_rhs3 (stmt));
|
||||
res_op->set_op (code, type, rhs1, rhs2, rhs3);
|
||||
- return (gimple_resimplify3 (seq, res_op, valueize)
|
||||
- || valueized);
|
||||
+ return true;
|
||||
}
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
@@ -1013,7 +970,6 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
||||
&& gimple_call_num_args (stmt) >= 1
|
||||
&& gimple_call_num_args (stmt) <= 5)
|
||||
{
|
||||
- bool valueized = false;
|
||||
combined_fn cfn;
|
||||
if (gimple_call_internal_p (stmt))
|
||||
cfn = as_combined_fn (gimple_call_internal_fn (stmt));
|
||||
@@ -1023,7 +979,7 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
||||
if (!fn)
|
||||
return false;
|
||||
|
||||
- fn = do_valueize (fn, top_valueize, valueized);
|
||||
+ fn = valueize_op (fn);
|
||||
if (TREE_CODE (fn) != ADDR_EXPR
|
||||
|| TREE_CODE (TREE_OPERAND (fn, 0)) != FUNCTION_DECL)
|
||||
return false;
|
||||
@@ -1039,47 +995,17 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
||||
unsigned int num_args = gimple_call_num_args (stmt);
|
||||
res_op->set_op (cfn, TREE_TYPE (gimple_call_lhs (stmt)), num_args);
|
||||
for (unsigned i = 0; i < num_args; ++i)
|
||||
- {
|
||||
- tree arg = gimple_call_arg (stmt, i);
|
||||
- res_op->ops[i] = do_valueize (arg, top_valueize, valueized);
|
||||
- }
|
||||
- if (internal_fn_p (cfn)
|
||||
- && try_conditional_simplification (as_internal_fn (cfn),
|
||||
- res_op, seq, valueize))
|
||||
- return true;
|
||||
- switch (num_args)
|
||||
- {
|
||||
- case 1:
|
||||
- return (gimple_resimplify1 (seq, res_op, valueize)
|
||||
- || valueized);
|
||||
- case 2:
|
||||
- return (gimple_resimplify2 (seq, res_op, valueize)
|
||||
- || valueized);
|
||||
- case 3:
|
||||
- return (gimple_resimplify3 (seq, res_op, valueize)
|
||||
- || valueized);
|
||||
- case 4:
|
||||
- return (gimple_resimplify4 (seq, res_op, valueize)
|
||||
- || valueized);
|
||||
- case 5:
|
||||
- return (gimple_resimplify5 (seq, res_op, valueize)
|
||||
- || valueized);
|
||||
- default:
|
||||
- gcc_unreachable ();
|
||||
- }
|
||||
+ res_op->ops[i] = valueize_op (gimple_call_arg (stmt, i));
|
||||
+ return true;
|
||||
}
|
||||
break;
|
||||
|
||||
case GIMPLE_COND:
|
||||
{
|
||||
- tree lhs = gimple_cond_lhs (stmt);
|
||||
- tree rhs = gimple_cond_rhs (stmt);
|
||||
- bool valueized = false;
|
||||
- lhs = do_valueize (lhs, top_valueize, valueized);
|
||||
- rhs = do_valueize (rhs, top_valueize, valueized);
|
||||
+ tree lhs = valueize_op (gimple_cond_lhs (stmt));
|
||||
+ tree rhs = valueize_op (gimple_cond_rhs (stmt));
|
||||
res_op->set_op (gimple_cond_code (stmt), boolean_type_node, lhs, rhs);
|
||||
- return (gimple_resimplify2 (seq, res_op, valueize)
|
||||
- || valueized);
|
||||
+ return true;
|
||||
}
|
||||
|
||||
default:
|
||||
@@ -1089,6 +1015,77 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
||||
return false;
|
||||
}
|
||||
|
||||
+/* Try to describe STMT in RES_OP, returning true on success.
|
||||
+ For GIMPLE_CONDs, describe the condition that is being tested.
|
||||
+ For GIMPLE_ASSIGNs, describe the rhs of the assignment.
|
||||
+ For GIMPLE_CALLs, describe the call. */
|
||||
+
|
||||
+bool
|
||||
+gimple_extract_op (gimple *stmt, gimple_match_op *res_op)
|
||||
+{
|
||||
+ auto nop = [](tree op) { return op; };
|
||||
+ return gimple_extract (stmt, res_op, nop, nop);
|
||||
+}
|
||||
+
|
||||
+/* The main STMT based simplification entry. It is used by the fold_stmt
|
||||
+ and the fold_stmt_to_constant APIs. */
|
||||
+
|
||||
+bool
|
||||
+gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
|
||||
+ tree (*valueize)(tree), tree (*top_valueize)(tree))
|
||||
+{
|
||||
+ bool valueized = false;
|
||||
+ auto valueize_op = [&](tree op)
|
||||
+ {
|
||||
+ return do_valueize (op, top_valueize, valueized);
|
||||
+ };
|
||||
+ auto valueize_condition = [&](tree op) -> tree
|
||||
+ {
|
||||
+ bool cond_valueized = false;
|
||||
+ tree lhs = do_valueize (TREE_OPERAND (op, 0), top_valueize,
|
||||
+ cond_valueized);
|
||||
+ tree rhs = do_valueize (TREE_OPERAND (op, 1), top_valueize,
|
||||
+ cond_valueized);
|
||||
+ gimple_match_op res_op2 (res_op->cond, TREE_CODE (op),
|
||||
+ TREE_TYPE (op), lhs, rhs);
|
||||
+ if ((gimple_resimplify2 (seq, &res_op2, valueize)
|
||||
+ || cond_valueized)
|
||||
+ && res_op2.code.is_tree_code ())
|
||||
+ {
|
||||
+ if (TREE_CODE_CLASS ((tree_code) res_op2.code) == tcc_comparison)
|
||||
+ {
|
||||
+ valueized = true;
|
||||
+ return build2 (res_op2.code, TREE_TYPE (op),
|
||||
+ res_op2.ops[0], res_op2.ops[1]);
|
||||
+ }
|
||||
+ else if (res_op2.code == SSA_NAME
|
||||
+ || res_op2.code == INTEGER_CST
|
||||
+ || res_op2.code == VECTOR_CST)
|
||||
+ {
|
||||
+ valueized = true;
|
||||
+ return res_op2.ops[0];
|
||||
+ }
|
||||
+ }
|
||||
+ return valueize_op (op);
|
||||
+ };
|
||||
+
|
||||
+ if (!gimple_extract (stmt, res_op, valueize_op, valueize_condition))
|
||||
+ return false;
|
||||
+
|
||||
+ if (res_op->code.is_internal_fn ())
|
||||
+ {
|
||||
+ internal_fn ifn = internal_fn (res_op->code);
|
||||
+ if (try_conditional_simplification (ifn, res_op, seq, valueize))
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ if (!res_op->reverse
|
||||
+ && res_op->num_ops
|
||||
+ && res_op->resimplify (seq, valueize))
|
||||
+ return true;
|
||||
+
|
||||
+ return valueized;
|
||||
+}
|
||||
|
||||
/* Helper for the autogenerated code, valueize OP. */
|
||||
|
||||
diff --git a/gcc/gimple-match.h b/gcc/gimple-match.h
|
||||
index 097898aed..39858c45f 100644
|
||||
--- a/gcc/gimple-match.h
|
||||
+++ b/gcc/gimple-match.h
|
||||
@@ -33,13 +33,39 @@ public:
|
||||
code_helper (combined_fn fn) : rep (-(int) fn) {}
|
||||
operator tree_code () const { return (tree_code) rep; }
|
||||
operator combined_fn () const { return (combined_fn) -rep; }
|
||||
+ explicit operator internal_fn () const;
|
||||
+ explicit operator built_in_function () const;
|
||||
bool is_tree_code () const { return rep > 0; }
|
||||
bool is_fn_code () const { return rep < 0; }
|
||||
+ bool is_internal_fn () const;
|
||||
+ bool is_builtin_fn () const;
|
||||
int get_rep () const { return rep; }
|
||||
private:
|
||||
int rep;
|
||||
};
|
||||
|
||||
+inline code_helper::operator internal_fn () const
|
||||
+{
|
||||
+ return as_internal_fn (combined_fn (*this));
|
||||
+}
|
||||
+
|
||||
+inline code_helper::operator built_in_function () const
|
||||
+{
|
||||
+ return as_builtin_fn (combined_fn (*this));
|
||||
+}
|
||||
+
|
||||
+inline bool
|
||||
+code_helper::is_internal_fn () const
|
||||
+{
|
||||
+ return is_fn_code () && internal_fn_p (combined_fn (*this));
|
||||
+}
|
||||
+
|
||||
+inline bool
|
||||
+code_helper::is_builtin_fn () const
|
||||
+{
|
||||
+ return is_fn_code () && builtin_fn_p (combined_fn (*this));
|
||||
+}
|
||||
+
|
||||
/* Represents the condition under which an operation should happen,
|
||||
and the value to use otherwise. The condition applies elementwise
|
||||
(as for VEC_COND_EXPR) if the values are vectors. */
|
||||
@@ -333,6 +359,7 @@ gimple_simplified_result_is_gimple_val (const gimple_match_op *op)
|
||||
|
||||
extern tree (*mprts_hook) (gimple_match_op *);
|
||||
|
||||
+bool gimple_extract_op (gimple *, gimple_match_op *);
|
||||
bool gimple_simplify (gimple *, gimple_match_op *, gimple_seq *,
|
||||
tree (*)(tree), tree (*)(tree));
|
||||
tree maybe_push_res_to_seq (gimple_match_op *, gimple_seq *,
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
1004
0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch
Normal file
1004
0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,31 @@
|
||||
From b57c55b282e7a9a7b2cc0d3843e58fd7998685e6 Mon Sep 17 00:00:00 2001
|
||||
From: zhongyunde <zhongyunde@huawei.com>
|
||||
Date: Fri, 4 Nov 2022 23:19:44 +0800
|
||||
Subject: [PATCH 23/35] [PHIOPT] Disable the match A?CST1:0 when the CST1 is
|
||||
negitive value
|
||||
|
||||
Fix the regression of gcc.target/aarch64/sve/vcond_3.c
|
||||
|
||||
gcc:
|
||||
* match.pd (A?CST1:CST2): Disable the simplifcations A? (-CST1):0
|
||||
---
|
||||
gcc/match.pd | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 79a0228d2..fc1a34dd3 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -3347,7 +3347,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
(if (integer_onep (@1))
|
||||
(convert (convert:boolean_type_node @0)))
|
||||
/* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
|
||||
- (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
|
||||
+ (if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (TREE_TYPE (@1))
|
||||
+ && integer_pow2p (@1))
|
||||
(with {
|
||||
tree shift = build_int_cst (integer_type_node, tree_log2 (@1));
|
||||
}
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
1770
0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch
Normal file
1770
0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,89 @@
|
||||
From 9af03694082c462bee86c167c78717089a93a188 Mon Sep 17 00:00:00 2001
|
||||
From: zhongyunde <zhongyunde@huawei.com>
|
||||
Date: Sat, 5 Nov 2022 13:22:33 +0800
|
||||
Subject: [PATCH 25/35] [PHIOPT] Add A ? B op CST : B match and simplify
|
||||
optimizations
|
||||
|
||||
Refer to commit b6bdd7a4, use pattern match to simple
|
||||
A ? B op CST : B (where CST is power of 2) simplifications.
|
||||
Fixes the 1st issue of https://gitee.com/openeuler/gcc/issues/I5TSG0?from=project-issue.
|
||||
|
||||
gcc/
|
||||
* match.pd (A ? B op CST : B): Add simplifcations for A ? B op POW2 : B
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.dg/pr107190.c: New test.
|
||||
---
|
||||
gcc/match.pd | 21 +++++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/pr107190.c | 27 +++++++++++++++++++++++++++
|
||||
2 files changed, 48 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/pr107190.c
|
||||
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index fc1a34dd3..5c5b5f89e 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -3383,6 +3383,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
)
|
||||
#endif
|
||||
|
||||
+#if GIMPLE
|
||||
+(if (canonicalize_math_p ())
|
||||
+/* These patterns are mostly used by PHIOPT to move some operations outside of
|
||||
+ the if statements. They should be done late because it gives jump threading
|
||||
+ and few other passes to reduce what is going on. */
|
||||
+/* a ? x op C : x -> x op (a << log2(C)) when C is power of 2. */
|
||||
+ (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate)
|
||||
+ (simplify
|
||||
+ (cond @0 (op:s @1 integer_pow2p@2) @1)
|
||||
+ /* powerof2cst */
|
||||
+ (if (INTEGRAL_TYPE_P (type))
|
||||
+ (with {
|
||||
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
|
||||
+ }
|
||||
+ (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; })))
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+)
|
||||
+#endif
|
||||
+
|
||||
/* Simplification moved from fold_cond_expr_with_comparison. It may also
|
||||
be extended. */
|
||||
/* This pattern implements two kinds simplification:
|
||||
diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
|
||||
new file mode 100644
|
||||
index 000000000..235b2761a
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/pr107190.c
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
|
||||
+
|
||||
+# define BN_BITS4 32
|
||||
+# define BN_MASK2 (0xffffffffffffffffL)
|
||||
+# define BN_MASK2l (0xffffffffL)
|
||||
+# define BN_MASK2h (0xffffffff00000000L)
|
||||
+# define BN_MASK2h1 (0xffffffff80000000L)
|
||||
+# define LBITS(a) ((a)&BN_MASK2l)
|
||||
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
|
||||
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
|
||||
+
|
||||
+unsigned int test_m(unsigned long in0, unsigned long in1) {
|
||||
+ unsigned long m, m1, lt, ht, bl, bh;
|
||||
+ lt = LBITS(in0);
|
||||
+ ht = HBITS(in0);
|
||||
+ bl = LBITS(in1);
|
||||
+ bh = HBITS(in1);
|
||||
+ m = bh * lt;
|
||||
+ m1 = bl * ht;
|
||||
+ ht = bh * ht;
|
||||
+ m = (m + m1) & BN_MASK2;
|
||||
+ if (m < m1) ht += L2HBITS((unsigned long)1);
|
||||
+ return ht + m;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump "COND_EXPR in block 2 and PHI in block 4 converted to straightline code" "phiopt2" } } */
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
130
0074-FORWPROP-Fold-series-of-instructions-into-mul.patch
Normal file
130
0074-FORWPROP-Fold-series-of-instructions-into-mul.patch
Normal file
@ -0,0 +1,130 @@
|
||||
From 2a2d0ba6a26d64f4c1f9352bb2c69dea8b67d6a6 Mon Sep 17 00:00:00 2001
|
||||
From: zhongyunde <zhongyunde@huawei.com>
|
||||
Date: Wed, 9 Nov 2022 17:04:13 +0800
|
||||
Subject: [PATCH 26/35] [FORWPROP] Fold series of instructions into mul
|
||||
|
||||
Merge the low part of series instructions into mul
|
||||
|
||||
gcc/
|
||||
* match.pd: Add simplifcations for low part of mul
|
||||
* common.opt: Add new option fmerge-mull enable with -O2
|
||||
* opts.c: default_options_table
|
||||
|
||||
gcc/testsuite/
|
||||
* g++.dg/tree-ssa/mull64.C: New test.
|
||||
---
|
||||
gcc/common.opt | 4 +++
|
||||
gcc/match.pd | 27 ++++++++++++++++++++
|
||||
gcc/opts.c | 1 +
|
||||
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 34 ++++++++++++++++++++++++++
|
||||
4 files changed, 66 insertions(+)
|
||||
create mode 100644 gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index ad147f7a9..6a7f66624 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -2069,6 +2069,10 @@ fmerge-debug-strings
|
||||
Common Report Var(flag_merge_debug_strings) Init(1)
|
||||
Attempt to merge identical debug strings across compilation units.
|
||||
|
||||
+fmerge-mull
|
||||
+Common Report Var(flag_merge_mull) Init(0) Optimization
|
||||
+Attempt to merge series instructions into mul.
|
||||
+
|
||||
fmessage-length=
|
||||
Common RejectNegative Joined UInteger
|
||||
-fmessage-length=<number> Limit diagnostics to <number> characters per line. 0 suppresses line-wrapping.
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 5c5b5f89e..f6c5befd7 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -3404,6 +3404,33 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
)
|
||||
#endif
|
||||
|
||||
+#if GIMPLE
|
||||
+/* These patterns are mostly used by FORWPROP1 to fold some operations into more
|
||||
+ simple IR. The following scenario should be matched:
|
||||
+ In0Lo = In0(D) & 4294967295;
|
||||
+ In0Hi = In0(D) >> 32;
|
||||
+ In1Lo = In1(D) & 4294967295;
|
||||
+ In1Hi = In1(D) >> 32;
|
||||
+ Addc = In0Lo * In1Hi + In0Hi * In1Lo;
|
||||
+ addc32 = Addc << 32;
|
||||
+ ResLo = In0Lo * In1Lo + addc32 */
|
||||
+(simplify
|
||||
+ (plus:c (mult @4 @5)
|
||||
+ (lshift
|
||||
+ (plus:c
|
||||
+ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3))
|
||||
+ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2)))
|
||||
+ INTEGER_CST@3
|
||||
+ )
|
||||
+ )
|
||||
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
|
||||
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
|
||||
+ && TYPE_PRECISION (type) == 64)
|
||||
+ (mult (convert:type @0) (convert:type @1))
|
||||
+ )
|
||||
+)
|
||||
+#endif
|
||||
+
|
||||
/* Simplification moved from fold_cond_expr_with_comparison. It may also
|
||||
be extended. */
|
||||
/* This pattern implements two kinds simplification:
|
||||
diff --git a/gcc/opts.c b/gcc/opts.c
|
||||
index f12b13599..751965e46 100644
|
||||
--- a/gcc/opts.c
|
||||
+++ b/gcc/opts.c
|
||||
@@ -511,6 +511,7 @@ static const struct default_options default_options_table[] =
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_finline_functions, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
|
||||
+ { OPT_LEVELS_2_PLUS, OPT_fmerge_mull, NULL, 1 },
|
||||
|
||||
/* -O2 and above optimizations, but not -Os or -Og. */
|
||||
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_falign_functions, NULL, 1 },
|
||||
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
new file mode 100644
|
||||
index 000000000..2a3b74604
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
@@ -0,0 +1,34 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
|
||||
+
|
||||
+# define BN_BITS4 32
|
||||
+# define BN_MASK2 (0xffffffffffffffffL)
|
||||
+# define BN_MASK2l (0xffffffffL)
|
||||
+# define BN_MASK2h (0xffffffff00000000L)
|
||||
+# define BN_MASK2h1 (0xffffffff80000000L)
|
||||
+# define LBITS(a) ((a)&BN_MASK2l)
|
||||
+# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
|
||||
+# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
|
||||
+
|
||||
+void mul64(unsigned long in0, unsigned long in1,
|
||||
+ unsigned long &retLo, unsigned long &retHi) {
|
||||
+ unsigned long m00, m01, m10, m11, al, ah, bl, bh;
|
||||
+ unsigned long Addc, addc32, low;
|
||||
+ al = LBITS(in0);
|
||||
+ ah = HBITS(in0);
|
||||
+ bl = LBITS(in1);
|
||||
+ bh = HBITS(in1);
|
||||
+ m10 = bh * al;
|
||||
+ m00 = bl * al;
|
||||
+ m01 = bl * ah;
|
||||
+ m11 = bh * ah;
|
||||
+ Addc = (m10 + m01) & BN_MASK2;
|
||||
+ if (Addc < m01) m11 += L2HBITS((unsigned long)1);
|
||||
+ m11 += HBITS(Addc);
|
||||
+ addc32 = L2HBITS(Addc);
|
||||
+ low = (m00 + addc32) & BN_MASK2; if (low < addc32) m11++;
|
||||
+ retLo = low;
|
||||
+ retHi = m11;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
105
0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch
Normal file
105
0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch
Normal file
@ -0,0 +1,105 @@
|
||||
From 315911bd3ae6f42366779e262ab76d9ed79359a0 Mon Sep 17 00:00:00 2001
|
||||
From: zhongyunde <zhongyunde@huawei.com>
|
||||
Date: Fri, 11 Nov 2022 11:30:37 +0800
|
||||
Subject: [PATCH 27/35] [FORWPROP] Fold series of instructions into umulh
|
||||
|
||||
Merge the high part of series instructions into umulh
|
||||
|
||||
gcc/
|
||||
* match.pd: Add simplifcations for high part of umulh
|
||||
|
||||
gcc/testsuite/
|
||||
* g++.dg/tree-ssa/mull64.C: Add checking of tree pass forwprop4
|
||||
---
|
||||
gcc/match.pd | 56 ++++++++++++++++++++++++++
|
||||
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 5 ++-
|
||||
2 files changed, 59 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index f6c5befd7..433682afb 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -3404,6 +3404,62 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
)
|
||||
#endif
|
||||
|
||||
+#if GIMPLE
|
||||
+/* These patterns are mostly used by FORWPROP4 to move some operations outside of
|
||||
+ the if statements. They should be done late because it gives jump threading
|
||||
+ and few other passes to reduce what is going on. */
|
||||
+/* Mul64 is defined as a multiplication algorithm which compute two 64-bit
|
||||
+ integers to one 128-bit integer. Try to match the high part of mul pattern
|
||||
+ after the low part of mul pattern is simplified. The following scenario
|
||||
+ should be matched:
|
||||
+ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) {
|
||||
+ In0Lo = In0(D) & 4294967295; -- bit_and@4 SSA_NAME@0 @2
|
||||
+ In0Hi = In0(D) >> 32; -- rshift@5 SSA_NAME@0 @3
|
||||
+ In1Lo = In1(D) & 4294967295; -- bit_and@6 SSA_NAME@1 INTEGER_CST@2
|
||||
+ In1Hi = In1(D) >> 32; -- rshift@7 SSA_NAME@1 INTEGER_CST@3
|
||||
+ Mull_01 = In0Hi * In1Lo; -- mult@8 @5 @6
|
||||
+ Addc = In0Lo * In1Hi + Mull_01; -- plus@9 (mult (@4 @7) @8
|
||||
+ AddH = (Addc >> 32) + In0Hi * In1Hi -- (plus@11 (rshift @9 @3) (mult @5 @7))
|
||||
+ addc32 = Addc << 32; -- lshift@10 @9 @3
|
||||
+ ResLo = In0(D) * In1(D); -- mult @0 @1
|
||||
+ ResHi = ((long unsigned int) (addc32 > ResLo)) +
|
||||
+ (((long unsigned int) (Mull_01 > Addc)) << 32) + AddH;
|
||||
+ } */
|
||||
+(simplify
|
||||
+ (plus:c
|
||||
+ (plus:c
|
||||
+ (convert
|
||||
+ (gt (lshift@10 @9 @3)
|
||||
+ (mult:c @0 @1)))
|
||||
+ (lshift
|
||||
+ (convert
|
||||
+ (gt @8 @9))
|
||||
+ @3))
|
||||
+ (plus:c@11
|
||||
+ (rshift
|
||||
+ (plus:c@9
|
||||
+ (mult:c (bit_and@4 SSA_NAME@0 @2) @7)
|
||||
+ (mult:c@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2)))
|
||||
+ @3)
|
||||
+ (mult:c (rshift@5 SSA_NAME@0 @3)
|
||||
+ (rshift@7 SSA_NAME@1 INTEGER_CST@3))
|
||||
+ )
|
||||
+ )
|
||||
+ (if (flag_merge_mull && INTEGRAL_TYPE_P (type)
|
||||
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1)
|
||||
+ && TYPE_PRECISION (type) == 64)
|
||||
+ (with {
|
||||
+ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type));
|
||||
+ tree shift = build_int_cst (integer_type_node, 64);
|
||||
+ }
|
||||
+ (convert:type (rshift
|
||||
+ (mult (convert:i128_type @0)
|
||||
+ (convert:i128_type @1))
|
||||
+ { shift; })))
|
||||
+ )
|
||||
+)
|
||||
+#endif
|
||||
+
|
||||
#if GIMPLE
|
||||
/* These patterns are mostly used by FORWPROP1 to fold some operations into more
|
||||
simple IR. The following scenario should be matched:
|
||||
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
index 2a3b74604..f61cf5e6f 100644
|
||||
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -Wno-psabi -fmerge-mull -fdump-tree-forwprop1-details" } */
|
||||
+/* { dg-options "-O2 -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
|
||||
|
||||
# define BN_BITS4 32
|
||||
# define BN_MASK2 (0xffffffffffffffffL)
|
||||
@@ -31,4 +31,5 @@ void mul64(unsigned long in0, unsigned long in1,
|
||||
retHi = m11;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-tree-dump "gimple_simplified to low_18 = in0_4" "forwprop1" } } */
|
||||
+/* { dg-final { scan-tree-dump "gimple_simplified to" "forwprop1" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "gimple_simplified to" 1 "forwprop4" } } */
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
@ -0,0 +1,38 @@
|
||||
From b669b4512e8425f4d752ef76bf61097cf40d9b35 Mon Sep 17 00:00:00 2001
|
||||
From: zgat <1071107108@qq.com>
|
||||
Date: Thu, 17 Nov 2022 02:55:48 +0000
|
||||
Subject: [PATCH 28/35] [Struct Reorg] Fix speccpu2006 462 double free #I60YUV
|
||||
modify gcc/tree.c. Normal operation speccpu 462 after modifed
|
||||
|
||||
Signed-off-by: zgat <1071107108@qq.com>
|
||||
---
|
||||
gcc/tree.c | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/tree.c b/gcc/tree.c
|
||||
index 2a532d15a..a61788651 100644
|
||||
--- a/gcc/tree.c
|
||||
+++ b/gcc/tree.c
|
||||
@@ -5224,8 +5224,7 @@ fld_simplified_type_name (tree type)
|
||||
optimizations. */
|
||||
if (flag_ipa_struct_reorg
|
||||
&& lang_c_p ()
|
||||
- && flag_lto_partition == LTO_PARTITION_ONE
|
||||
- && (in_lto_p || flag_whole_program))
|
||||
+ && flag_lto_partition == LTO_PARTITION_ONE)
|
||||
return TYPE_NAME (type);
|
||||
|
||||
if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
|
||||
@@ -5471,8 +5470,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
|
||||
optimizations. */
|
||||
if (flag_ipa_struct_reorg
|
||||
&& lang_c_p ()
|
||||
- && flag_lto_partition == LTO_PARTITION_ONE
|
||||
- && (in_lto_p || flag_whole_program))
|
||||
+ && flag_lto_partition == LTO_PARTITION_ONE)
|
||||
return t;
|
||||
if (POINTER_TYPE_P (t))
|
||||
return fld_incomplete_type_of (t, fld);
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
1193
0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
Normal file
1193
0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
Normal file
File diff suppressed because it is too large
Load Diff
1007
0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch
Normal file
1007
0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch
Normal file
File diff suppressed because it is too large
Load Diff
267
0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch
Normal file
267
0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch
Normal file
@ -0,0 +1,267 @@
|
||||
From 013544d0b477647c8835a8806c75e7b09155b8ed Mon Sep 17 00:00:00 2001
|
||||
From: benniaobufeijiushiji <linda7@huawei.com>
|
||||
Date: Mon, 8 Aug 2022 09:13:53 +0800
|
||||
Subject: [PATCH 31/35] [loop-vect] Transfer arrays using registers between
|
||||
loops For vectorized stores in loop, if all succeed loops immediately use the
|
||||
data, transfer data using registers instead of load store to prevent overhead
|
||||
from memory access.
|
||||
|
||||
---
|
||||
gcc/testsuite/gcc.dg/vect/vect-perm-1.c | 45 ++++++
|
||||
gcc/tree-vect-stmts.c | 181 ++++++++++++++++++++++++
|
||||
2 files changed, 226 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-perm-1.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/vect/vect-perm-1.c b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
|
||||
new file mode 100644
|
||||
index 000000000..d8b29fbd5
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/vect/vect-perm-1.c
|
||||
@@ -0,0 +1,45 @@
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -fdump-tree-vect-all-details -save-temps" } */
|
||||
+
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+
|
||||
+static unsigned inline abs2 (unsigned a)
|
||||
+{
|
||||
+ unsigned s = ((a>>15)&0x10001)*0xffff;
|
||||
+ return (a+s)^s;
|
||||
+}
|
||||
+
|
||||
+int foo (unsigned *a00, unsigned *a11, unsigned *a22, unsigned *a33)
|
||||
+{
|
||||
+ unsigned tmp[4][4];
|
||||
+ unsigned a0, a1, a2, a3;
|
||||
+ int sum = 0;
|
||||
+ for (int i = 0; i < 4; i++)
|
||||
+ {
|
||||
+ int t0 = a00[i] + a11[i];
|
||||
+ int t1 = a00[i] - a11[i];
|
||||
+ int t2 = a22[i] + a33[i];
|
||||
+ int t3 = a22[i] - a33[i];
|
||||
+ tmp[i][0] = t0 + t2;
|
||||
+ tmp[i][2] = t0 - t2;
|
||||
+ tmp[i][1] = t1 + t3;
|
||||
+ tmp[i][3] = t1 - t3;
|
||||
+ }
|
||||
+ for (int i = 0; i < 4; i++)
|
||||
+ {
|
||||
+ int t0 = tmp[0][i] + tmp[1][i];
|
||||
+ int t1 = tmp[0][i] - tmp[1][i];
|
||||
+ int t2 = tmp[2][i] + tmp[3][i];
|
||||
+ int t3 = tmp[2][i] - tmp[3][i];
|
||||
+ a0 = t0 + t2;
|
||||
+ a2 = t0 - t2;
|
||||
+ a1 = t1 + t3;
|
||||
+ a3 = t1 - t3;
|
||||
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
|
||||
+ }
|
||||
+ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 16 "vect" } } */
|
||||
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
||||
index 2c2197022..98b233718 100644
|
||||
--- a/gcc/tree-vect-stmts.c
|
||||
+++ b/gcc/tree-vect-stmts.c
|
||||
@@ -2276,6 +2276,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
+/* Check succeedor BB, BB without load is regarded as empty BB. Ignore empty
|
||||
+ BB in DFS. */
|
||||
+
|
||||
+static unsigned
|
||||
+mem_refs_in_bb (basic_block bb, vec<gimple *> &stmts)
|
||||
+{
|
||||
+ unsigned num = 0;
|
||||
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
|
||||
+ !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
+ {
|
||||
+ gimple *stmt = gsi_stmt (gsi);
|
||||
+ if (is_gimple_debug (stmt))
|
||||
+ continue;
|
||||
+ if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt)
|
||||
+ && !gimple_has_volatile_ops (stmt))
|
||||
+ {
|
||||
+ if (gimple_assign_rhs_code (stmt) == MEM_REF
|
||||
+ || gimple_assign_rhs_code (stmt) == ARRAY_REF)
|
||||
+ {
|
||||
+ stmts.safe_push (stmt);
|
||||
+ num++;
|
||||
+ }
|
||||
+ else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF
|
||||
+ || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF)
|
||||
+ num++;
|
||||
+ }
|
||||
+ }
|
||||
+ return num;
|
||||
+}
|
||||
+
|
||||
+static bool
|
||||
+check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr)
|
||||
+{
|
||||
+ for (unsigned ui = 0; ui < datarefs->length (); ui++)
|
||||
+ {
|
||||
+ tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0);
|
||||
+ tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0);
|
||||
+ if (TREE_CODE (op1) != TREE_CODE (op2))
|
||||
+ continue;
|
||||
+ if (TREE_CODE (op1) == ADDR_EXPR)
|
||||
+ {
|
||||
+ op1 = TREE_OPERAND (op1, 0);
|
||||
+ op2 = TREE_OPERAND (op2, 0);
|
||||
+ }
|
||||
+ enum tree_code code = TREE_CODE (op1);
|
||||
+ switch (code)
|
||||
+ {
|
||||
+ case VAR_DECL:
|
||||
+ if (DECL_NAME (op1) == DECL_NAME (op2)
|
||||
+ && DR_IS_READ ((*datarefs)[ui]))
|
||||
+ return true;
|
||||
+ break;
|
||||
+ case SSA_NAME:
|
||||
+ if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2)
|
||||
+ && DR_IS_READ ((*datarefs)[ui]))
|
||||
+ return true;
|
||||
+ break;
|
||||
+ default:
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return,
|
||||
+ Otherwise, set false to SUCCESS. */
|
||||
+
|
||||
+static void
|
||||
+check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts,
|
||||
+ stmt_vec_info stmt_info, bool &success)
|
||||
+{
|
||||
+ if (stmt_info == NULL)
|
||||
+ {
|
||||
+ success = false;
|
||||
+ return;
|
||||
+ }
|
||||
+ if (DR_IS_READ (stmt_info->dr_aux.dr))
|
||||
+ {
|
||||
+ success = false;
|
||||
+ return;
|
||||
+ }
|
||||
+ unsigned ui = 0;
|
||||
+ gimple *candidate = NULL;
|
||||
+ FOR_EACH_VEC_ELT (stmts, ui, candidate)
|
||||
+ {
|
||||
+ if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE)
|
||||
+ continue;
|
||||
+
|
||||
+ if (candidate->bb != candidate->bb->loop_father->header)
|
||||
+ {
|
||||
+ success = false;
|
||||
+ return;
|
||||
+ }
|
||||
+ auto_vec<data_reference_p> datarefs;
|
||||
+ tree res = find_data_references_in_bb (candidate->bb->loop_father,
|
||||
+ candidate->bb, &datarefs);
|
||||
+ if (res == chrec_dont_know)
|
||||
+ {
|
||||
+ success = false;
|
||||
+ return;
|
||||
+ }
|
||||
+ if (check_same_base (&datarefs, stmt_info->dr_aux.dr))
|
||||
+ return;
|
||||
+ }
|
||||
+ success = false;
|
||||
+}
|
||||
+
|
||||
+/* Deep first search from present BB. If succeedor has load STMTS,
|
||||
+ stop further searching. */
|
||||
+
|
||||
+static void
|
||||
+dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info,
|
||||
+ bool &success, vec<basic_block> &visited_bbs)
|
||||
+{
|
||||
+ if (bb == cfun->cfg->x_exit_block_ptr)
|
||||
+ {
|
||||
+ success = false;
|
||||
+ return;
|
||||
+ }
|
||||
+ if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch)
|
||||
+ return;
|
||||
+
|
||||
+ visited_bbs.safe_push (bb);
|
||||
+ auto_vec<gimple *> stmts;
|
||||
+ unsigned num = mem_refs_in_bb (bb, stmts);
|
||||
+ /* Empty BB. */
|
||||
+ if (num == 0)
|
||||
+ {
|
||||
+ edge e;
|
||||
+ edge_iterator ei;
|
||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
+ {
|
||||
+ dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs);
|
||||
+ if (!success)
|
||||
+ return;
|
||||
+ }
|
||||
+ return;
|
||||
+ }
|
||||
+ /* Non-empty BB. */
|
||||
+ check_vec_use (loop_vinfo, stmts, stmt_info, success);
|
||||
+}
|
||||
+
|
||||
+/* For grouped store, if all succeedors of present BB have vectorized load
|
||||
+ from same base of store. If so, set memory_access_type using
|
||||
+ VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES. */
|
||||
+
|
||||
+static bool
|
||||
+conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo)
|
||||
+{
|
||||
+ gimple *stmt = stmt_vinfo->stmt;
|
||||
+ if (gimple_code (stmt) != GIMPLE_ASSIGN)
|
||||
+ return false;
|
||||
+
|
||||
+ if (DR_IS_READ (stmt_vinfo->dr_aux.dr))
|
||||
+ return false;
|
||||
+
|
||||
+ basic_block bb = stmt->bb;
|
||||
+ bool success = true;
|
||||
+ auto_vec<basic_block> visited_bbs;
|
||||
+ visited_bbs.safe_push (bb);
|
||||
+ edge e;
|
||||
+ edge_iterator ei;
|
||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
+ dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs);
|
||||
+ return success;
|
||||
+}
|
||||
+
|
||||
/* A subroutine of get_load_store_type, with a subset of the same
|
||||
arguments. Handle the case where STMT_INFO is part of a grouped load
|
||||
or store.
|
||||
@@ -2434,6 +2601,20 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
|
||||
*memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
|
||||
overrun_p = would_overrun_p;
|
||||
}
|
||||
+
|
||||
+ if (*memory_access_type == VMAT_LOAD_STORE_LANES
|
||||
+ && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST
|
||||
+ && maybe_eq (tree_to_shwi (loop_vinfo->num_iters),
|
||||
+ loop_vinfo->vectorization_factor)
|
||||
+ && conti_perm (stmt_info, loop_vinfo)
|
||||
+ && (vls_type == VLS_LOAD
|
||||
+ ? vect_grouped_load_supported (vectype, single_element_p,
|
||||
+ group_size)
|
||||
+ : vect_grouped_store_supported (vectype, group_size)))
|
||||
+ {
|
||||
+ *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
|
||||
+ overrun_p = would_overrun_p;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* As a last resort, trying using a gather load or scatter store.
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
1061
0080-Struct-Reorg-Add-Unsafe-Structure-Pointer-Compressio.patch
Normal file
1061
0080-Struct-Reorg-Add-Unsafe-Structure-Pointer-Compressio.patch
Normal file
File diff suppressed because it is too large
Load Diff
826
0081-Loop-distribution-Insert-temp-arrays-built-from-isom.patch
Normal file
826
0081-Loop-distribution-Insert-temp-arrays-built-from-isom.patch
Normal file
@ -0,0 +1,826 @@
|
||||
From ca2a541ed3425bec64f97fe277c6c02bf4f20049 Mon Sep 17 00:00:00 2001
|
||||
From: benniaobufeijiushiji <linda7@huawei.com>
|
||||
Date: Thu, 27 Oct 2022 10:26:34 +0800
|
||||
Subject: [PATCH 33/35] [Loop-distribution] Insert temp arrays built from
|
||||
isomorphic stmts Use option -ftree-slp-transpose-vectorize Build temp arrays
|
||||
for isomorphic stmt and regard them as new seed_stmts for loop distribution.
|
||||
|
||||
---
|
||||
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c | 67 +++
|
||||
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c | 17 +
|
||||
gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c | 19 +
|
||||
gcc/tree-loop-distribution.c | 577 +++++++++++++++++++-
|
||||
4 files changed, 663 insertions(+), 17 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
|
||||
new file mode 100644
|
||||
index 000000000..649463647
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
|
||||
@@ -0,0 +1,67 @@
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-do run { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details -save-temps" } */
|
||||
+
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+
|
||||
+static unsigned inline abs2 (unsigned a)
|
||||
+{
|
||||
+ unsigned s = ((a>>15)&0x10001)*0xffff;
|
||||
+ return (a+s)^s;
|
||||
+}
|
||||
+
|
||||
+int foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
|
||||
+{
|
||||
+ unsigned tmp[4][4];
|
||||
+ unsigned a0, a1, a2, a3;
|
||||
+ int sum = 0;
|
||||
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
|
||||
+ {
|
||||
+ a0 = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16);
|
||||
+ a1 = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16);
|
||||
+ a2 = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16);
|
||||
+ a3 = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16);
|
||||
+ int t0 = a0 + a1;
|
||||
+ int t1 = a0 - a1;
|
||||
+ int t2 = a2 + a3;
|
||||
+ int t3 = a2 - a3;
|
||||
+ tmp[i][0] = t0 + t2;
|
||||
+ tmp[i][2] = t0 - t2;
|
||||
+ tmp[i][1] = t1 + t3;
|
||||
+ tmp[i][3] = t1 - t3;
|
||||
+ }
|
||||
+ for (int i = 0; i < 4; i++)
|
||||
+ {
|
||||
+ int t0 = tmp[0][i] + tmp[1][i];
|
||||
+ int t1 = tmp[0][i] - tmp[1][i];
|
||||
+ int t2 = tmp[2][i] + tmp[3][i];
|
||||
+ int t3 = tmp[2][i] - tmp[3][i];
|
||||
+ a0 = t0 + t2;
|
||||
+ a2 = t0 - t2;
|
||||
+ a1 = t1 + t3;
|
||||
+ a3 = t1 - t3;
|
||||
+ sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
|
||||
+ }
|
||||
+ return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
|
||||
+}
|
||||
+
|
||||
+int main ()
|
||||
+{
|
||||
+ unsigned char oxa[128] = {0};
|
||||
+ unsigned char oxb[128] = {0};
|
||||
+ for (int i = 0; i < 128; i++)
|
||||
+ {
|
||||
+ oxa[i] += i * 3;
|
||||
+ oxb[i] = i * 2;
|
||||
+ }
|
||||
+ int sum = foo (oxa, 16, oxb, 32);
|
||||
+ if (sum != 736)
|
||||
+ {
|
||||
+ abort ();
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
|
||||
new file mode 100644
|
||||
index 000000000..1b50fd27d
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
|
||||
@@ -0,0 +1,17 @@
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */
|
||||
+
|
||||
+unsigned a0[4], a1[4], a2[4], a3[4];
|
||||
+
|
||||
+void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
|
||||
+{
|
||||
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
|
||||
+ {
|
||||
+ a0[i] = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16);
|
||||
+ a1[i] = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16);
|
||||
+ a2[i] = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16);
|
||||
+ a3[i] = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
|
||||
new file mode 100644
|
||||
index 000000000..94b992b05
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
|
||||
@@ -0,0 +1,19 @@
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */
|
||||
+
|
||||
+unsigned a0[4], a1[4], a2[4], a3[4];
|
||||
+
|
||||
+void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
|
||||
+{
|
||||
+ for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
|
||||
+ {
|
||||
+ a0[i] = ((oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16)) + 1;
|
||||
+ a1[i] = ((oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16)) - 2;
|
||||
+ a2[i] = ((oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16)) * 3;
|
||||
+ a3[i] = ((oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16)) / 4;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "Insertion removed" 1 "ldist" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */
|
||||
\ No newline at end of file
|
||||
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
|
||||
index c08af6562..88b56379c 100644
|
||||
--- a/gcc/tree-loop-distribution.c
|
||||
+++ b/gcc/tree-loop-distribution.c
|
||||
@@ -36,6 +36,47 @@ along with GCC; see the file COPYING3. If not see
|
||||
| D(I) = A(I-1)*E
|
||||
|ENDDO
|
||||
|
||||
+ If an unvectorizable loop has grouped loads, and calculations from grouped
|
||||
+ loads are isomorphic, build temp arrays using stmts where isomorphic
|
||||
+ calculations end. Afer distribution, the partition built from temp
|
||||
+ arrays can be vectorized in pass SLP after loop unrolling. For example,
|
||||
+
|
||||
+ |DO I = 1, N
|
||||
+ | A = FOO (ARG_1);
|
||||
+ | B = FOO (ARG_2);
|
||||
+ | C = BAR_0 (A);
|
||||
+ | D = BAR_1 (B);
|
||||
+ |ENDDO
|
||||
+
|
||||
+ is transformed to
|
||||
+
|
||||
+ |DO I = 1, N
|
||||
+ | J = FOO (ARG_1);
|
||||
+ | K = FOO (ARG_2);
|
||||
+ | X[I] = J;
|
||||
+ | Y[I] = K;
|
||||
+ | A = X[I];
|
||||
+ | B = Y[I];
|
||||
+ | C = BAR_0 (A);
|
||||
+ | D = BAR_1 (B);
|
||||
+ |ENDDO
|
||||
+
|
||||
+ and is then distributed to
|
||||
+
|
||||
+ |DO I = 1, N
|
||||
+ | J = FOO (ARG_1);
|
||||
+ | K = FOO (ARG_2);
|
||||
+ | X[I] = J;
|
||||
+ | Y[I] = K;
|
||||
+ |ENDDO
|
||||
+
|
||||
+ |DO I = 1, N
|
||||
+ | A = X[I];
|
||||
+ | B = Y[I];
|
||||
+ | C = BAR_0 (A);
|
||||
+ | D = BAR_1 (B);
|
||||
+ |ENDDO
|
||||
+
|
||||
Loop distribution is the dual of loop fusion. It separates statements
|
||||
of a loop (or loop nest) into multiple loops (or loop nests) with the
|
||||
same loop header. The major goal is to separate statements which may
|
||||
@@ -44,7 +85,9 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
1) Seed partitions with specific type statements. For now we support
|
||||
two types seed statements: statement defining variable used outside
|
||||
- of loop; statement storing to memory.
|
||||
+ of loop; statement storing to memory. Moreover, for unvectorizable
|
||||
+ loops, we try to find isomorphic stmts from grouped load and build
|
||||
+ temp arrays as new seed statements.
|
||||
2) Build reduced dependence graph (RDG) for loop to be distributed.
|
||||
The vertices (RDG:V) model all statements in the loop and the edges
|
||||
(RDG:E) model flow and control dependencies between statements.
|
||||
@@ -643,7 +686,8 @@ class loop_distribution
|
||||
/* Returns true when PARTITION1 and PARTITION2 access the same memory
|
||||
object in RDG. */
|
||||
bool share_memory_accesses (struct graph *rdg,
|
||||
- partition *partition1, partition *partition2);
|
||||
+ partition *partition1, partition *partition2,
|
||||
+ hash_set<tree> *excluded_arrays);
|
||||
|
||||
/* For each seed statement in STARTING_STMTS, this function builds
|
||||
partition for it by adding depended statements according to RDG.
|
||||
@@ -686,8 +730,9 @@ class loop_distribution
|
||||
|
||||
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
|
||||
ALIAS_DDRS contains ddrs which need runtime alias check. */
|
||||
- void finalize_partitions (class loop *loop, vec<struct partition *>
|
||||
- *partitions, vec<ddr_p> *alias_ddrs);
|
||||
+ void finalize_partitions (class loop *loop,
|
||||
+ vec<struct partition *> *partitions,
|
||||
+ vec<ddr_p> *alias_ddrs, bitmap producers);
|
||||
|
||||
/* Analyze loop form and if it's vectorizable to decide if we need to
|
||||
insert temp arrays to distribute it. */
|
||||
@@ -701,6 +746,28 @@ class loop_distribution
|
||||
|
||||
inline void rebuild_rdg (loop_p loop, struct graph *&rdg,
|
||||
control_dependences *cd);
|
||||
+
|
||||
+ /* If loop is not distributed, remove inserted temp arrays. */
|
||||
+ void remove_insertion (loop_p loop, struct graph *flow_only_rdg,
|
||||
+ bitmap producers, struct partition *partition);
|
||||
+
|
||||
+ /* Insert temp arrays if isomorphic computation exists. Temp arrays will be
|
||||
+ regarded as SEED_STMTS for building partitions in succeeding processes. */
|
||||
+ bool insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts,
|
||||
+ hash_set<tree> *tmp_array_vars, bitmap producers);
|
||||
+
|
||||
+ void build_producers (loop_p loop, bitmap producers,
|
||||
+ vec<gimple *> &transformed);
|
||||
+
|
||||
+ void do_insertion (loop_p loop, struct graph *flow_only_rdg, tree iv,
|
||||
+ bitmap cut_points, hash_set <tree> *tmp_array_vars,
|
||||
+ bitmap producers);
|
||||
+
|
||||
+ /* Fuse PARTITIONS built from inserted temp arrays into one partition,
|
||||
+ fuse the rest into another. */
|
||||
+ void merge_remaining_partitions (vec<struct partition *> *partitions,
|
||||
+ bitmap producers);
|
||||
+
|
||||
/* Distributes the code from LOOP in such a way that producer statements
|
||||
are placed before consumer statements. Tries to separate only the
|
||||
statements from STMTS into separate loops. Returns the number of
|
||||
@@ -1913,7 +1980,8 @@ loop_distribution::classify_partition (loop_p loop,
|
||||
|
||||
bool
|
||||
loop_distribution::share_memory_accesses (struct graph *rdg,
|
||||
- partition *partition1, partition *partition2)
|
||||
+ partition *partition1, partition *partition2,
|
||||
+ hash_set <tree> *excluded_arrays)
|
||||
{
|
||||
unsigned i, j;
|
||||
bitmap_iterator bi, bj;
|
||||
@@ -1947,7 +2015,10 @@ loop_distribution::share_memory_accesses (struct graph *rdg,
|
||||
if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
|
||||
&& operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
|
||||
&& operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
|
||||
- && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
|
||||
+ && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0)
|
||||
+ /* An exception, if PARTITION1 and PARTITION2 contain the
|
||||
+ temp array we inserted, do not merge them. */
|
||||
+ && !excluded_arrays->contains (DR_REF (dr1)))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -2909,13 +2980,47 @@ fuse_memset_builtins (vec<struct partition *> *partitions)
|
||||
}
|
||||
}
|
||||
|
||||
+void
|
||||
+loop_distribution::merge_remaining_partitions
|
||||
+ (vec<struct partition *> *partitions,
|
||||
+ bitmap producers)
|
||||
+{
|
||||
+ struct partition *partition = NULL;
|
||||
+ struct partition *p1 = NULL, *p2 = NULL;
|
||||
+ for (unsigned i = 0; partitions->iterate (i, &partition); i++)
|
||||
+ {
|
||||
+ if (bitmap_intersect_p (producers, partition->stmts))
|
||||
+ {
|
||||
+ if (p1 == NULL)
|
||||
+ {
|
||||
+ p1 = partition;
|
||||
+ continue;
|
||||
+ }
|
||||
+ partition_merge_into (NULL, p1, partition, FUSE_FINALIZE);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (p2 == NULL)
|
||||
+ {
|
||||
+ p2 = partition;
|
||||
+ continue;
|
||||
+ }
|
||||
+ partition_merge_into (NULL, p2, partition, FUSE_FINALIZE);
|
||||
+ }
|
||||
+ partitions->unordered_remove (i);
|
||||
+ partition_free (partition);
|
||||
+ i--;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
void
|
||||
loop_distribution::finalize_partitions (class loop *loop,
|
||||
vec<struct partition *> *partitions,
|
||||
- vec<ddr_p> *alias_ddrs)
|
||||
+ vec<ddr_p> *alias_ddrs,
|
||||
+ bitmap producers)
|
||||
{
|
||||
unsigned i;
|
||||
- struct partition *partition, *a;
|
||||
+ struct partition *partition;
|
||||
|
||||
if (partitions->length () == 1
|
||||
|| alias_ddrs->length () > 0)
|
||||
@@ -2947,13 +3052,7 @@ loop_distribution::finalize_partitions (class loop *loop,
|
||||
|| (loop->inner == NULL
|
||||
&& i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin))
|
||||
{
|
||||
- a = (*partitions)[0];
|
||||
- for (i = 1; partitions->iterate (i, &partition); ++i)
|
||||
- {
|
||||
- partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
|
||||
- partition_free (partition);
|
||||
- }
|
||||
- partitions->truncate (1);
|
||||
+ merge_remaining_partitions (partitions, producers);
|
||||
}
|
||||
|
||||
/* Fuse memset builtins if possible. */
|
||||
@@ -3758,6 +3857,404 @@ find_isomorphic_stmts (loop_vec_info vinfo, vec<gimple *> &stmts)
|
||||
return decide_stmts_by_profit (candi_stmts, stmts);
|
||||
}
|
||||
|
||||
+/* Get iv from SEED_STMTS and make sure each seed_stmt has only one iv as index
|
||||
+ and all indices are the same. */
|
||||
+
|
||||
+static tree
|
||||
+find_index (vec<gimple *> seed_stmts)
|
||||
+{
|
||||
+ if (seed_stmts.length () == 0)
|
||||
+ return NULL;
|
||||
+ bool found_index = false;
|
||||
+ tree index = NULL;
|
||||
+ unsigned ui = 0;
|
||||
+ for (ui = 0; ui < seed_stmts.length (); ui++)
|
||||
+ {
|
||||
+ if (!gimple_vdef (seed_stmts[ui]))
|
||||
+ return NULL;
|
||||
+ tree lhs = gimple_assign_lhs (seed_stmts[ui]);
|
||||
+ unsigned num_index = 0;
|
||||
+ while (TREE_CODE (lhs) == ARRAY_REF)
|
||||
+ {
|
||||
+ if (TREE_CODE (TREE_OPERAND (lhs, 1)) == SSA_NAME)
|
||||
+ {
|
||||
+ num_index++;
|
||||
+ if (num_index > 1)
|
||||
+ return NULL;
|
||||
+ if (index == NULL)
|
||||
+ {
|
||||
+ index = TREE_OPERAND (lhs, 1);
|
||||
+ found_index = true;
|
||||
+ }
|
||||
+ else if (index != TREE_OPERAND (lhs, 1))
|
||||
+ return NULL;
|
||||
+ }
|
||||
+ lhs = TREE_OPERAND (lhs, 0);
|
||||
+ }
|
||||
+ if (!found_index)
|
||||
+ return NULL;
|
||||
+ }
|
||||
+ return index;
|
||||
+}
|
||||
+
|
||||
+/* Check if expression of phi is an increament of a const. */
|
||||
+
|
||||
+static void
|
||||
+check_phi_inc (struct vertex *v_phi, struct graph *rdg, bool &found_inc)
|
||||
+{
|
||||
+ struct graph_edge *e_phi;
|
||||
+ for (e_phi = v_phi->succ; e_phi; e_phi = e_phi->succ_next)
|
||||
+ {
|
||||
+ struct vertex *v_inc = &(rdg->vertices[e_phi->dest]);
|
||||
+ if (!is_gimple_assign (RDGV_STMT (v_inc))
|
||||
+ || gimple_expr_code (RDGV_STMT (v_inc)) != PLUS_EXPR)
|
||||
+ continue;
|
||||
+ tree rhs1 = gimple_assign_rhs1 (RDGV_STMT (v_inc));
|
||||
+ tree rhs2 = gimple_assign_rhs2 (RDGV_STMT (v_inc));
|
||||
+ if (!(integer_onep (rhs1) || integer_onep (rhs2)))
|
||||
+ continue;
|
||||
+ struct graph_edge *e_inc;
|
||||
+ /* find cycle with only two vertices inc and phi: inc <--> phi. */
|
||||
+ bool found_cycle = false;
|
||||
+ for (e_inc = v_inc->succ; e_inc; e_inc = e_inc->succ_next)
|
||||
+ {
|
||||
+ if (e_inc->dest == e_phi->src)
|
||||
+ {
|
||||
+ found_cycle = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ if (!found_cycle)
|
||||
+ continue;
|
||||
+ found_inc = true;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Check if phi satisfies form like PHI <0, i>. */
|
||||
+
|
||||
+static inline bool
|
||||
+iv_check_phi_stmt (gimple *phi_stmt)
|
||||
+{
|
||||
+ return gimple_phi_num_args (phi_stmt) == 2
|
||||
+ && (integer_zerop (gimple_phi_arg_def (phi_stmt, 0))
|
||||
+ || integer_zerop (gimple_phi_arg_def (phi_stmt, 1)));
|
||||
+}
|
||||
+
|
||||
+/* Make sure the iteration varible is a phi. */
|
||||
+
|
||||
+static tree
|
||||
+get_iv_from_seed (struct graph *flow_only_rdg, vec<gimple *> seed_stmts)
|
||||
+{
|
||||
+ tree index = find_index (seed_stmts);
|
||||
+ if (index == NULL)
|
||||
+ return NULL;
|
||||
+ for (int i = 0; i < flow_only_rdg->n_vertices; i++)
|
||||
+ {
|
||||
+ struct vertex *v = &(flow_only_rdg->vertices[i]);
|
||||
+ if (RDGV_STMT (v) != seed_stmts[0])
|
||||
+ continue;
|
||||
+ struct graph_edge *e;
|
||||
+ bool found_phi = false;
|
||||
+ for (e = v->pred; e; e = e->pred_next)
|
||||
+ {
|
||||
+ struct vertex *v_phi = &(flow_only_rdg->vertices[e->src]);
|
||||
+ gimple *phi_stmt = RDGV_STMT (v_phi);
|
||||
+ if (gimple_code (phi_stmt) != GIMPLE_PHI
|
||||
+ || gimple_phi_result (phi_stmt) != index)
|
||||
+ continue;
|
||||
+ if (!iv_check_phi_stmt (phi_stmt))
|
||||
+ return NULL;
|
||||
+ /* find inc expr in succ of phi. */
|
||||
+ bool found_inc = false;
|
||||
+ check_phi_inc (v_phi, flow_only_rdg, found_inc);
|
||||
+ if (!found_inc)
|
||||
+ return NULL;
|
||||
+ found_phi = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ if (!found_phi)
|
||||
+ return NULL;
|
||||
+ break;
|
||||
+ }
|
||||
+ return index;
|
||||
+}
|
||||
+
|
||||
+/* Do not distribute loop if vertexes in ROOT_MAP have antidependence with in
|
||||
+ FLOW_ONLY_RDG. */
|
||||
+
|
||||
+static bool
|
||||
+check_no_dependency (struct graph *flow_only_rdg, bitmap root_map)
|
||||
+{
|
||||
+ bitmap_iterator bi;
|
||||
+ unsigned ui;
|
||||
+ auto_vec<unsigned, 16> visited_nodes;
|
||||
+ auto_bitmap visited_map;
|
||||
+ EXECUTE_IF_SET_IN_BITMAP (root_map, 0, ui, bi)
|
||||
+ visited_nodes.safe_push (ui);
|
||||
+ for (ui = 0; ui < visited_nodes.length (); ui++)
|
||||
+ {
|
||||
+ struct vertex *v = &(flow_only_rdg->vertices[visited_nodes[ui]]);
|
||||
+ struct graph_edge *e;
|
||||
+ for (e = v->succ; e; e = e->succ_next)
|
||||
+ {
|
||||
+ if (bitmap_bit_p (root_map, e->dest))
|
||||
+ return false;
|
||||
+ if (bitmap_bit_p (visited_map, e->dest))
|
||||
+ continue;
|
||||
+ visited_nodes.safe_push (e->dest);
|
||||
+ bitmap_set_bit (visited_map, e->dest);
|
||||
+ }
|
||||
+ }
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+/* Find isomorphic stmts from GROUPED_LOADS in VINFO and make sure
|
||||
+ there is no dependency among those STMT we found. */
|
||||
+
|
||||
+static unsigned
|
||||
+get_cut_points (struct graph *flow_only_rdg, bitmap cut_points,
|
||||
+ loop_vec_info vinfo)
|
||||
+{
|
||||
+ unsigned n_stmts = 0;
|
||||
+
|
||||
+ /* STMTS that may be CUT_POINTS. */
|
||||
+ auto_vec<gimple *> stmts;
|
||||
+ if (!find_isomorphic_stmts (vinfo, stmts))
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "No temp array insertion: no isomorphic stmts"
|
||||
+ " were found.\n");
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ for (int i = 0; i < flow_only_rdg->n_vertices; i++)
|
||||
+ {
|
||||
+ if (stmts.contains (RDG_STMT (flow_only_rdg, i)))
|
||||
+ bitmap_set_bit (cut_points, i);
|
||||
+ }
|
||||
+ n_stmts = bitmap_count_bits (cut_points);
|
||||
+
|
||||
+ bool succ = check_no_dependency (flow_only_rdg, cut_points);
|
||||
+ if (!succ)
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "No temp array inserted: data dependency"
|
||||
+ " among isomorphic stmts.\n");
|
||||
+ return 0;
|
||||
+ }
|
||||
+ return n_stmts;
|
||||
+}
|
||||
+
|
||||
+static void
|
||||
+build_temp_array (struct vertex *v, gimple_stmt_iterator &gsi,
|
||||
+ poly_uint64 array_extent, tree iv,
|
||||
+ hash_set<tree> *tmp_array_vars, vec<gimple *> *transformed)
|
||||
+{
|
||||
+ gimple *stmt = RDGV_STMT (v);
|
||||
+ tree lhs = gimple_assign_lhs (stmt);
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ {
|
||||
+ fprintf (dump_file, "original stmt:\t");
|
||||
+ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);
|
||||
+ }
|
||||
+ tree var_ssa = duplicate_ssa_name (lhs, stmt);
|
||||
+ gimple_assign_set_lhs (stmt, var_ssa);
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ {
|
||||
+ fprintf (dump_file, "changed to:\t");
|
||||
+ print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS);
|
||||
+ }
|
||||
+ gimple_set_uid (gsi_stmt (gsi), -1);
|
||||
+ tree vect_elt_type = TREE_TYPE (lhs);
|
||||
+ tree array_type = build_array_type_nelts (vect_elt_type, array_extent);
|
||||
+ tree array = create_tmp_var (array_type);
|
||||
+ tree array_ssa = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
|
||||
+ tmp_array_vars->add (array_ssa);
|
||||
+ gimple *store = gimple_build_assign (array_ssa, var_ssa);
|
||||
+ tree new_vdef = make_ssa_name (gimple_vop (cfun), store);
|
||||
+ gsi_insert_after (&gsi, store, GSI_NEW_STMT);
|
||||
+ gimple_set_vdef (store, new_vdef);
|
||||
+ transformed->safe_push (store);
|
||||
+ gimple_set_uid (gsi_stmt (gsi), -1);
|
||||
+ tree array_ssa2 = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
|
||||
+ tmp_array_vars->add (array_ssa2);
|
||||
+ gimple *load = gimple_build_assign (lhs, array_ssa2);
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ {
|
||||
+ fprintf (dump_file, "insert stmt:\t");
|
||||
+ print_gimple_stmt (dump_file, store, 0, TDF_VOPS|TDF_MEMSYMS);
|
||||
+ fprintf (dump_file, " and stmt:\t");
|
||||
+ print_gimple_stmt (dump_file, load, 0, TDF_VOPS|TDF_MEMSYMS);
|
||||
+ }
|
||||
+ gimple_set_vuse (load, new_vdef);
|
||||
+ gsi_insert_after (&gsi, load, GSI_NEW_STMT);
|
||||
+ gimple_set_uid (gsi_stmt (gsi), -1);
|
||||
+}
|
||||
+
|
||||
+/* Set bitmap PRODUCERS based on vec TRANSFORMED. */
|
||||
+
|
||||
+void
|
||||
+loop_distribution::build_producers (loop_p loop, bitmap producers,
|
||||
+ vec<gimple *> &transformed)
|
||||
+{
|
||||
+ auto_vec<gimple *, 10> stmts;
|
||||
+ stmts_from_loop (loop, &stmts);
|
||||
+ int i = 0;
|
||||
+ gimple *stmt = NULL;
|
||||
+
|
||||
+ FOR_EACH_VEC_ELT (stmts, i, stmt)
|
||||
+ gimple_set_uid (stmt, i);
|
||||
+ i = 0;
|
||||
+ FOR_EACH_VEC_ELT (transformed, i, stmt)
|
||||
+ bitmap_set_bit (producers, stmt->uid);
|
||||
+}
|
||||
+
|
||||
+/* Transform stmt
|
||||
+
|
||||
+ A = FOO (ARG_1);
|
||||
+
|
||||
+ to
|
||||
+
|
||||
+ STMT_1: A1 = FOO (ARG_1);
|
||||
+ STMT_2: X[I] = A1;
|
||||
+ STMT_3: A = X[I];
|
||||
+
|
||||
+ Producer is STMT_2 who defines the temp array and consumer is
|
||||
+ STMT_3 who uses the temp array. */
|
||||
+
|
||||
+void
|
||||
+loop_distribution::do_insertion (loop_p loop, struct graph *flow_only_rdg,
|
||||
+ tree iv, bitmap cut_points,
|
||||
+ hash_set<tree> *tmp_array_vars,
|
||||
+ bitmap producers)
|
||||
+{
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "=== do insertion ===\n");
|
||||
+
|
||||
+ auto_vec<gimple *> transformed;
|
||||
+
|
||||
+ /* Execution times of loop. */
|
||||
+ poly_uint64 array_extent
|
||||
+ = tree_to_poly_uint64 (number_of_latch_executions (loop)) + 1;
|
||||
+
|
||||
+ basic_block *bbs = get_loop_body_in_custom_order (loop, this,
|
||||
+ bb_top_order_cmp_r);
|
||||
+
|
||||
+ for (int i = 0; i < int (loop->num_nodes); i++)
|
||||
+ {
|
||||
+ basic_block bb = bbs[i];
|
||||
+
|
||||
+ /* Find all cut points in bb and transform them. */
|
||||
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
|
||||
+ gsi_next (&gsi))
|
||||
+ {
|
||||
+ unsigned j = gimple_uid (gsi_stmt (gsi));
|
||||
+ if (bitmap_bit_p (cut_points, j))
|
||||
+ {
|
||||
+ struct vertex *v = &(flow_only_rdg->vertices[j]);
|
||||
+ build_temp_array (v, gsi, array_extent, iv, tmp_array_vars,
|
||||
+ &transformed);
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ build_producers (loop, producers, transformed);
|
||||
+ update_ssa (TODO_update_ssa);
|
||||
+ free (bbs);
|
||||
+}
|
||||
+
|
||||
+/* After temp array insertion, given stmts
|
||||
+ STMT_1: M = FOO (ARG_1);
|
||||
+ STMT_2: X[I] = M;
|
||||
+ STMT_3: A = X[I];
|
||||
+ STMT_2 is the producer, STMT_1 is its prev and STMT_3 is its next.
|
||||
+ Replace M with A, and remove STMT_2 and STMT_3. */
|
||||
+
|
||||
+static void
|
||||
+reset_gimple_assign (struct graph *flow_only_rdg, struct partition *partition,
|
||||
+ gimple_stmt_iterator &gsi, int j)
|
||||
+{
|
||||
+ struct vertex *v = &(flow_only_rdg->vertices[j]);
|
||||
+ gimple *stmt = RDGV_STMT (v);
|
||||
+ gimple *prev = stmt->prev;
|
||||
+ gimple *next = stmt->next;
|
||||
+ tree n_lhs = gimple_assign_lhs (next);
|
||||
+ gimple_assign_set_lhs (prev, n_lhs);
|
||||
+ unlink_stmt_vdef (stmt);
|
||||
+ if (partition)
|
||||
+ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
|
||||
+ gsi_remove (&gsi, true);
|
||||
+ release_defs (stmt);
|
||||
+ if (partition)
|
||||
+ bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
|
||||
+ gsi_remove (&gsi, true);
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+loop_distribution::remove_insertion (loop_p loop, struct graph *flow_only_rdg,
|
||||
+ bitmap producers, struct partition *partition)
|
||||
+{
|
||||
+ basic_block *bbs = get_loop_body_in_custom_order (loop, this,
|
||||
+ bb_top_order_cmp_r);
|
||||
+ for (int i = 0; i < int (loop->num_nodes); i++)
|
||||
+ {
|
||||
+ basic_block bb = bbs[i];
|
||||
+ for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
|
||||
+ gsi_next (&gsi))
|
||||
+ {
|
||||
+ unsigned j = gimple_uid (gsi_stmt (gsi));
|
||||
+ if (bitmap_bit_p (producers, j))
|
||||
+ reset_gimple_assign (flow_only_rdg, partition, gsi, j);
|
||||
+ }
|
||||
+ }
|
||||
+ update_ssa (TODO_update_ssa);
|
||||
+ free (bbs);
|
||||
+}
|
||||
+
|
||||
+/* Insert temp arrays if isomorphic computation exists. Temp arrays will be
|
||||
+ regarded as SEED_STMTS for building partitions in succeeding processes. */
|
||||
+
|
||||
+bool
|
||||
+loop_distribution::insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts,
|
||||
+ hash_set<tree> *tmp_array_vars, bitmap producers)
|
||||
+{
|
||||
+ struct graph *flow_only_rdg = build_rdg (loop, NULL);
|
||||
+ gcc_checking_assert (flow_only_rdg != NULL);
|
||||
+ tree iv = get_iv_from_seed (flow_only_rdg, seed_stmts);
|
||||
+ if (iv == NULL)
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "Loop %d no temp array insertion: failed to get"
|
||||
+ " iteration variable.\n", loop->num);
|
||||
+ free_rdg (flow_only_rdg);
|
||||
+ return false;
|
||||
+ }
|
||||
+ auto_bitmap cut_points;
|
||||
+ loop_vec_info vinfo = loop_vec_info_for_loop (loop);
|
||||
+ unsigned n_cut_points = get_cut_points (flow_only_rdg, cut_points, vinfo);
|
||||
+ delete vinfo;
|
||||
+ loop->aux = NULL;
|
||||
+ if (n_cut_points == 0)
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "Loop %d no temp array insertion: no cut points"
|
||||
+ " found.\n", loop->num);
|
||||
+ free_rdg (flow_only_rdg);
|
||||
+ return false;
|
||||
+ }
|
||||
+ do_insertion (loop, flow_only_rdg, iv, cut_points, tmp_array_vars, producers);
|
||||
+ if (dump_enabled_p ())
|
||||
+ {
|
||||
+ dump_user_location_t loc = find_loop_location (loop);
|
||||
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion done:"
|
||||
+ " %d temp arrays inserted in Loop %d.\n",
|
||||
+ n_cut_points, loop->num);
|
||||
+ }
|
||||
+ free_rdg (flow_only_rdg);
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+static bool find_seed_stmts_for_distribution (class loop *, vec<gimple *> *);
|
||||
+
|
||||
/* Distributes the code from LOOP in such a way that producer statements
|
||||
are placed before consumer statements. Tries to separate only the
|
||||
statements from STMTS into separate loops. Returns the number of
|
||||
@@ -3814,6 +4311,34 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+ /* Try to distribute LOOP if LOOP is simple enough and unable to vectorize.
|
||||
+ If LOOP has grouped loads, recursively find isomorphic stmts and insert
|
||||
+ temp arrays, rebuild RDG and call find_seed_stmts_for_distribution
|
||||
+ to replace STMTS. */
|
||||
+
|
||||
+ hash_set<tree> tmp_array_vars;
|
||||
+
|
||||
+ /* STMTs that define those inserted TMP_ARRAYs. */
|
||||
+ auto_bitmap producers;
|
||||
+
|
||||
+ /* New SEED_STMTS after insertion. */
|
||||
+ auto_vec<gimple *> work_list;
|
||||
+ bool insert_success = false;
|
||||
+ if (may_insert_temp_arrays (loop, rdg, cd))
|
||||
+ {
|
||||
+ if (insert_temp_arrays (loop, stmts, &tmp_array_vars, producers))
|
||||
+ {
|
||||
+ if (find_seed_stmts_for_distribution (loop, &work_list))
|
||||
+ {
|
||||
+ insert_success = true;
|
||||
+ stmts = work_list;
|
||||
+ }
|
||||
+ else
|
||||
+ remove_insertion (loop, rdg, producers, NULL);
|
||||
+ rebuild_rdg (loop, rdg, cd);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
data_reference_p dref;
|
||||
for (i = 0; datarefs_vec.iterate (i, &dref); ++i)
|
||||
dref->aux = (void *) (uintptr_t) i;
|
||||
@@ -3894,7 +4419,7 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
|
||||
for (int j = i + 1;
|
||||
partitions.iterate (j, &partition); ++j)
|
||||
{
|
||||
- if (share_memory_accesses (rdg, into, partition))
|
||||
+ if (share_memory_accesses (rdg, into, partition, &tmp_array_vars))
|
||||
{
|
||||
partition_merge_into (rdg, into, partition, FUSE_SHARE_REF);
|
||||
partitions.unordered_remove (j);
|
||||
@@ -3944,7 +4469,7 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
|
||||
}
|
||||
}
|
||||
|
||||
- finalize_partitions (loop, &partitions, &alias_ddrs);
|
||||
+ finalize_partitions (loop, &partitions, &alias_ddrs, producers);
|
||||
|
||||
/* If there is a reduction in all partitions make sure the last one
|
||||
is not classified for builtin code generation. */
|
||||
@@ -3962,6 +4487,24 @@ loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
|
||||
}
|
||||
|
||||
nbp = partitions.length ();
|
||||
+
|
||||
+ /* If we have inserted TMP_ARRAYs but there is only one partition left in
|
||||
+ the succeeding processes, remove those inserted TMP_ARRAYs back to the
|
||||
+ original version. */
|
||||
+
|
||||
+ if (nbp == 1 && insert_success)
|
||||
+ {
|
||||
+ struct partition *partition = NULL;
|
||||
+ partitions.iterate (0, &partition);
|
||||
+ remove_insertion (loop, rdg, producers, partition);
|
||||
+ if (dump_enabled_p ())
|
||||
+ {
|
||||
+ dump_user_location_t loc = find_loop_location (loop);
|
||||
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion removed:"
|
||||
+ " unable to distribute loop %d.\n", loop->num);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
if (nbp == 0
|
||||
|| (nbp == 1 && !partition_builtin_p (partitions[0]))
|
||||
|| (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
206
0082-Revert-Backport-tree-optimization-102880-make-PHI-OP.patch
Normal file
206
0082-Revert-Backport-tree-optimization-102880-make-PHI-OP.patch
Normal file
@ -0,0 +1,206 @@
|
||||
From 717782ec36469eb81650b07e8b5536281a59993d Mon Sep 17 00:00:00 2001
|
||||
From: zhongyunde <zhongyunde@huawei.com>
|
||||
Date: Tue, 29 Nov 2022 22:12:29 +0800
|
||||
Subject: [PATCH 34/35] Revert "[Backport] tree-optimization/102880 - make
|
||||
PHI-OPT recognize more CFGs"
|
||||
|
||||
This reverts commit 77398954ce517aa011b7a254c7aa2858521b2093.
|
||||
---
|
||||
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c | 31 ---------
|
||||
gcc/tree-ssa-phiopt.c | 73 +++++++++-------------
|
||||
2 files changed, 29 insertions(+), 75 deletions(-)
|
||||
delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
||||
deleted file mode 100644
|
||||
index 21aa66e38..000000000
|
||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-26.c
|
||||
+++ /dev/null
|
||||
@@ -1,31 +0,0 @@
|
||||
-/* { dg-do compile } */
|
||||
-/* { dg-options "-O -fgimple -fdump-tree-phiopt1" } */
|
||||
-
|
||||
-int __GIMPLE (ssa,startwith("phiopt"))
|
||||
-foo (int a, int b, int flag)
|
||||
-{
|
||||
- int res;
|
||||
-
|
||||
- __BB(2):
|
||||
- if (flag_2(D) != 0)
|
||||
- goto __BB6;
|
||||
- else
|
||||
- goto __BB4;
|
||||
-
|
||||
- __BB(4):
|
||||
- if (a_3(D) > b_4(D))
|
||||
- goto __BB7;
|
||||
- else
|
||||
- goto __BB6;
|
||||
-
|
||||
- __BB(6):
|
||||
- goto __BB7;
|
||||
-
|
||||
- __BB(7):
|
||||
- res_1 = __PHI (__BB4: a_3(D), __BB6: b_4(D));
|
||||
- return res_1;
|
||||
-}
|
||||
-
|
||||
-/* We should be able to detect MAX despite the extra edge into
|
||||
- the middle BB. */
|
||||
-/* { dg-final { scan-tree-dump "MAX" "phiopt1" } } */
|
||||
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
index 079d29e74..21ac08145 100644
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -219,6 +219,7 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
|
||||
/* If either bb1's succ or bb2 or bb2's succ is non NULL. */
|
||||
if (EDGE_COUNT (bb1->succs) == 0
|
||||
+ || bb2 == NULL
|
||||
|| EDGE_COUNT (bb2->succs) == 0)
|
||||
continue;
|
||||
|
||||
@@ -278,14 +279,14 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
|| (e1->flags & EDGE_FALLTHRU) == 0)
|
||||
continue;
|
||||
|
||||
+ /* Also make sure that bb1 only have one predecessor and that it
|
||||
+ is bb. */
|
||||
+ if (!single_pred_p (bb1)
|
||||
+ || single_pred (bb1) != bb)
|
||||
+ continue;
|
||||
+
|
||||
if (do_store_elim)
|
||||
{
|
||||
- /* Also make sure that bb1 only have one predecessor and that it
|
||||
- is bb. */
|
||||
- if (!single_pred_p (bb1)
|
||||
- || single_pred (bb1) != bb)
|
||||
- continue;
|
||||
-
|
||||
/* bb1 is the middle block, bb2 the join block, bb the split block,
|
||||
e1 the fallthrough edge from bb1 to bb2. We can't do the
|
||||
optimization if the join block has more than two predecessors. */
|
||||
@@ -330,11 +331,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
node. */
|
||||
gcc_assert (arg0 != NULL_TREE && arg1 != NULL_TREE);
|
||||
|
||||
- gphi *newphi;
|
||||
- if (single_pred_p (bb1)
|
||||
- && (newphi = factor_out_conditional_conversion (e1, e2, phi,
|
||||
- arg0, arg1,
|
||||
- cond_stmt)))
|
||||
+ gphi *newphi = factor_out_conditional_conversion (e1, e2, phi,
|
||||
+ arg0, arg1,
|
||||
+ cond_stmt);
|
||||
+ if (newphi != NULL)
|
||||
{
|
||||
phi = newphi;
|
||||
/* factor_out_conditional_conversion may create a new PHI in
|
||||
@@ -355,14 +355,12 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||||
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
else if (!early_p
|
||||
- && single_pred_p (bb1)
|
||||
&& cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
|
||||
phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
- else if (single_pred_p (bb1)
|
||||
- && spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
+ else if (spaceship_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||||
cfgchanged = true;
|
||||
}
|
||||
}
|
||||
@@ -393,41 +391,35 @@ replace_phi_edge_with_variable (basic_block cond_block,
|
||||
edge e, gphi *phi, tree new_tree)
|
||||
{
|
||||
basic_block bb = gimple_bb (phi);
|
||||
+ basic_block block_to_remove;
|
||||
gimple_stmt_iterator gsi;
|
||||
|
||||
/* Change the PHI argument to new. */
|
||||
SET_USE (PHI_ARG_DEF_PTR (phi, e->dest_idx), new_tree);
|
||||
|
||||
/* Remove the empty basic block. */
|
||||
- edge edge_to_remove;
|
||||
if (EDGE_SUCC (cond_block, 0)->dest == bb)
|
||||
- edge_to_remove = EDGE_SUCC (cond_block, 1);
|
||||
- else
|
||||
- edge_to_remove = EDGE_SUCC (cond_block, 0);
|
||||
- if (EDGE_COUNT (edge_to_remove->dest->preds) == 1)
|
||||
{
|
||||
- e->flags |= EDGE_FALLTHRU;
|
||||
- e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
||||
- e->probability = profile_probability::always ();
|
||||
- delete_basic_block (edge_to_remove->dest);
|
||||
-
|
||||
- /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
|
||||
- gsi = gsi_last_bb (cond_block);
|
||||
- gsi_remove (&gsi, true);
|
||||
+ EDGE_SUCC (cond_block, 0)->flags |= EDGE_FALLTHRU;
|
||||
+ EDGE_SUCC (cond_block, 0)->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
||||
+ EDGE_SUCC (cond_block, 0)->probability = profile_probability::always ();
|
||||
+
|
||||
+ block_to_remove = EDGE_SUCC (cond_block, 1)->dest;
|
||||
}
|
||||
else
|
||||
{
|
||||
- /* If there are other edges into the middle block make
|
||||
- CFG cleanup deal with the edge removal to avoid
|
||||
- updating dominators here in a non-trivial way. */
|
||||
- gcond *cond = as_a <gcond *> (last_stmt (cond_block));
|
||||
- if (edge_to_remove->flags & EDGE_TRUE_VALUE)
|
||||
- gimple_cond_make_false (cond);
|
||||
- else
|
||||
- gimple_cond_make_true (cond);
|
||||
+ EDGE_SUCC (cond_block, 1)->flags |= EDGE_FALLTHRU;
|
||||
+ EDGE_SUCC (cond_block, 1)->flags
|
||||
+ &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
|
||||
+ EDGE_SUCC (cond_block, 1)->probability = profile_probability::always ();
|
||||
+
|
||||
+ block_to_remove = EDGE_SUCC (cond_block, 0)->dest;
|
||||
}
|
||||
+ delete_basic_block (block_to_remove);
|
||||
|
||||
- statistics_counter_event (cfun, "Replace PHI with variable", 1);
|
||||
+ /* Eliminate the COND_EXPR at the end of COND_BLOCK. */
|
||||
+ gsi = gsi_last_bb (cond_block);
|
||||
+ gsi_remove (&gsi, true);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file,
|
||||
@@ -854,9 +846,6 @@ match_simplify_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
allow it and move it once the transformation is done. */
|
||||
if (!empty_block_p (middle_bb))
|
||||
{
|
||||
- if (!single_pred_p (middle_bb))
|
||||
- return false;
|
||||
-
|
||||
stmt_to_move = last_and_only_stmt (middle_bb);
|
||||
if (!stmt_to_move)
|
||||
return false;
|
||||
@@ -1236,11 +1225,6 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
}
|
||||
else
|
||||
{
|
||||
- if (!single_pred_p (middle_bb))
|
||||
- return 0;
|
||||
- statistics_counter_event (cfun, "Replace PHI with "
|
||||
- "variable/value_replacement", 1);
|
||||
-
|
||||
/* Replace the PHI arguments with arg. */
|
||||
SET_PHI_ARG_DEF (phi, e0->dest_idx, arg);
|
||||
SET_PHI_ARG_DEF (phi, e1->dest_idx, arg);
|
||||
@@ -1255,6 +1239,7 @@ value_replacement (basic_block cond_bb, basic_block middle_bb,
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
+
|
||||
}
|
||||
|
||||
/* Now optimize (x != 0) ? x + y : y to just x + y. */
|
||||
--
|
||||
2.27.0.windows.1
|
||||
|
||||
1297
0083-Struct-reorg-Add-struct-semi-relayout-optimize.patch
Normal file
1297
0083-Struct-reorg-Add-struct-semi-relayout-optimize.patch
Normal file
File diff suppressed because it is too large
Load Diff
79
gcc.spec
79
gcc.spec
@ -61,7 +61,7 @@
|
||||
Summary: Various compilers (C, C++, Objective-C, ...)
|
||||
Name: gcc
|
||||
Version: %{gcc_version}
|
||||
Release: 16
|
||||
Release: 17
|
||||
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
||||
URL: https://gcc.gnu.org
|
||||
|
||||
@ -162,6 +162,41 @@ Patch45: 0045-Transposed-SLP-Enable-Transposed-SLP.patch
|
||||
Patch46: 0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch
|
||||
Patch47: 0047-DFE-Fix-the-bug-caused-by-inconsistent-types.patch
|
||||
Patch48: 0048-Struct-Reorg-Type-simplify-limitation-when-in-struct.patch
|
||||
Patch49: 0049-build-Add-some-file-right-to-executable.patch
|
||||
Patch50: 0050-Backport-phiopt-Optimize-x-1024-0-to-int-x-10-PR9769.patch
|
||||
Patch51: 0051-Backport-phiopt-Fix-up-conditional_replacement-PR993.patch
|
||||
Patch52: 0052-Backport-phiopt-Handle-bool-in-two_value_replacement.patch
|
||||
Patch53: 0053-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch
|
||||
Patch54: 0054-Backport-phiopt-Optimize-x-__builtin_clz-x-32-in-GIM.patch
|
||||
Patch55: 0055-Backport-phiopt-Optimize-x-0-y-y-to-x-31-y-PR96928.patch
|
||||
Patch56: 0056-Backport-phiopt-Optimize-x-y-cmp-z-PR94589.patch
|
||||
Patch57: 0057-Backport-Add-support-for-__builtin_bswap128.patch
|
||||
Patch58: 0058-Backport-tree-optimization-95393-fold-MIN-MAX_EXPR-g.patch
|
||||
Patch59: 0059-Backport-Add-a-couple-of-A-CST1-CST2-match-and-simpl.patch
|
||||
Patch60: 0060-Backport-Optimize-x-0-y-y-to-x-31-y-in-match.pd.patch
|
||||
Patch61: 0061-Backport-Replace-conditional_replacement-with-match-.patch
|
||||
Patch62: 0062-Backport-Allow-match-and-simplified-phiopt-to-run-in.patch
|
||||
Patch63: 0063-Backport-Improve-match_simplify_replacement-in-phi-o.patch
|
||||
Patch64: 0064-Backport-phiopt-Use-gphi-phi-instead-of-gimple-phi-s.patch
|
||||
Patch65: 0065-Backport-Optimize-x-bswap-x-0-in-tree-ssa-phiopt.patch
|
||||
Patch66: 0066-Backport-tree-optimization-102880-make-PHI-OPT-recog.patch
|
||||
Patch67: 0067-Backport-tree-Add-vector_element_bits-_tree-PR94980-.patch
|
||||
Patch68: 0068-Backport-Lower-VEC_COND_EXPR-into-internal-functions.patch
|
||||
Patch69: 0069-Backport-gimple-match-Add-a-gimple_extract_op-functi.patch
|
||||
Patch70: 0070-Backport-aarch64-Fix-subs_compare_2.c-regression-PR1.patch
|
||||
Patch71: 0071-PHIOPT-Disable-the-match-A-CST1-0-when-the-CST1-is-n.patch
|
||||
Patch72: 0072-Struct-Reorg-Merge-struct_layout-pass-into-struct_re.patch
|
||||
Patch73: 0073-PHIOPT-Add-A-B-op-CST-B-match-and-simplify-optimizat.patch
|
||||
Patch74: 0074-FORWPROP-Fold-series-of-instructions-into-mul.patch
|
||||
Patch75: 0075-FORWPROP-Fold-series-of-instructions-into-umulh.patch
|
||||
Patch76: 0076-Struct-Reorg-Fix-speccpu2006-462-double-free-I60YUV.patch
|
||||
Patch77: 0077-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
|
||||
Patch78: 0078-Loop-distribution-Add-isomorphic-stmts-analysis.patch
|
||||
Patch79: 0079-loop-vect-Transfer-arrays-using-registers-between-lo.patch
|
||||
Patch80: 0080-Struct-Reorg-Add-Unsafe-Structure-Pointer-Compressio.patch
|
||||
Patch81: 0081-Loop-distribution-Insert-temp-arrays-built-from-isom.patch
|
||||
Patch82: 0082-Revert-Backport-tree-optimization-102880-make-PHI-OP.patch
|
||||
Patch83: 0083-Struct-reorg-Add-struct-semi-relayout-optimize.patch
|
||||
|
||||
%global gcc_target_platform %{_arch}-linux-gnu
|
||||
|
||||
@ -664,7 +699,41 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
||||
%patch46 -p1
|
||||
%patch47 -p1
|
||||
%patch48 -p1
|
||||
|
||||
%patch49 -p1
|
||||
%patch50 -p1
|
||||
%patch51 -p1
|
||||
%patch52 -p1
|
||||
%patch53 -p1
|
||||
%patch54 -p1
|
||||
%patch55 -p1
|
||||
%patch56 -p1
|
||||
%patch57 -p1
|
||||
%patch58 -p1
|
||||
%patch59 -p1
|
||||
%patch60 -p1
|
||||
%patch61 -p1
|
||||
%patch62 -p1
|
||||
%patch63 -p1
|
||||
%patch64 -p1
|
||||
%patch65 -p1
|
||||
%patch66 -p1
|
||||
%patch67 -p1
|
||||
%patch68 -p1
|
||||
%patch69 -p1
|
||||
%patch70 -p1
|
||||
%patch71 -p1
|
||||
%patch72 -p1
|
||||
%patch73 -p1
|
||||
%patch74 -p1
|
||||
%patch75 -p1
|
||||
%patch76 -p1
|
||||
%patch77 -p1
|
||||
%patch78 -p1
|
||||
%patch79 -p1
|
||||
%patch80 -p1
|
||||
%patch81 -p1
|
||||
%patch82 -p1
|
||||
%patch83 -p1
|
||||
|
||||
%build
|
||||
|
||||
@ -2684,6 +2753,12 @@ end
|
||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||
|
||||
%changelog
|
||||
* Thu Dec 6 2022 benniaobufeijiushiji <linda7@huawei.com> - 10.3.1-17
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC:Sync patch from openeuler/gcc
|
||||
|
||||
* Fri Sep 16 2022 eastb233 <xiezhiheng@huawei.com> - 10.3.1-16
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user