213 lines
6.2 KiB
Diff
213 lines
6.2 KiB
Diff
|
|
From 33dc778a34d7b93978efe922bb1b4583d8e6c4bb Mon Sep 17 00:00:00 2001
|
||
|
|
From: Roger Sayle <roger@nextmovesoftware.com>
|
||
|
|
Date: Mon, 2 Aug 2021 13:27:53 +0100
|
||
|
|
Subject: [PATCH 17/35] [Backport] Optimize x ? bswap(x) : 0 in tree-ssa-phiopt
|
||
|
|
|
||
|
|
Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=f9fcf754825a1e01033336f84c18690aaa971a6f
|
||
|
|
|
||
|
|
Many thanks again to Jakub Jelinek for a speedy fix for PR 101642.
|
||
|
|
Interestingly, that test case "bswap16(x) ? : x" also reveals a
|
||
|
|
missed optimization opportunity. The resulting "x ? bswap(x) : 0"
|
||
|
|
can be further simplified to just bswap(x).
|
||
|
|
|
||
|
|
Conveniently, tree-ssa-phiopt.c already recognizes/optimizes the
|
||
|
|
related "x ? popcount(x) : 0", so this patch simply makes that
|
||
|
|
transformation make general, additionally handling bswap, parity,
|
||
|
|
ffs and clrsb. All of the required infrastructure is already
|
||
|
|
present thanks to Jakub previously adding support for clz/ctz.
|
||
|
|
To reflect this generalization, the name of the function is changed
|
||
|
|
from cond_removal_in_popcount_clz_ctz_pattern to the hopefully
|
||
|
|
equally descriptive cond_removal_in_builtin_zero_pattern.
|
||
|
|
|
||
|
|
2021-08-02 Roger Sayle <roger@nextmovesoftware.com>
|
||
|
|
|
||
|
|
gcc/ChangeLog
|
||
|
|
* tree-ssa-phiopt.c (cond_removal_in_builtin_zero_pattern):
|
||
|
|
Renamed from cond_removal_in_popcount_clz_ctz_pattern.
|
||
|
|
Add support for BSWAP, FFS, PARITY and CLRSB builtins.
|
||
|
|
(tree_ssa_phiop_worker): Update call to function above.
|
||
|
|
|
||
|
|
gcc/testsuite/ChangeLog
|
||
|
|
* gcc.dg/tree-ssa/phi-opt-25.c: New test case.
|
||
|
|
---
|
||
|
|
gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c | 83 ++++++++++++++++++++++
|
||
|
|
gcc/tree-ssa-phiopt.c | 37 +++++++---
|
||
|
|
2 files changed, 109 insertions(+), 11 deletions(-)
|
||
|
|
create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
|
||
|
|
|
||
|
|
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..c52c92e1d
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25.c
|
||
|
|
@@ -0,0 +1,83 @@
|
||
|
|
+/* { dg-do compile } */
|
||
|
|
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||
|
|
+
|
||
|
|
+unsigned short test_bswap16(unsigned short x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_bswap16(x) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+unsigned int test_bswap32(unsigned int x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_bswap32(x) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+unsigned long long test_bswap64(unsigned long long x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_bswap64(x) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int test_clrsb(int x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_clrsb(x) : (__SIZEOF_INT__*8-1);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int test_clrsbl(long x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_clrsbl(x) : (__SIZEOF_LONG__*8-1);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int test_clrsbll(long long x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_clrsbll(x) : (__SIZEOF_LONG_LONG__*8-1);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+#if 0
|
||
|
|
+/* BUILT_IN_FFS is transformed by match.pd */
|
||
|
|
+int test_ffs(unsigned int x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_ffs(x) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int test_ffsl(unsigned long x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_ffsl(x) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int test_ffsll(unsigned long long x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_ffsll(x) : 0;
|
||
|
|
+}
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
+int test_parity(int x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_parity(x) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int test_parityl(long x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_parityl(x) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int test_parityll(long long x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_parityll(x) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int test_popcount(int x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_popcount(x) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int test_popcountl(long x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_popcountl(x) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int test_popcountll(long long x)
|
||
|
|
+{
|
||
|
|
+ return x ? __builtin_popcountll(x) : 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* { dg-final { scan-tree-dump-not "goto" "optimized" } } */
|
||
|
|
+
|
||
|
|
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||
|
|
index 045a7b1b8..21ac08145 100644
|
||
|
|
--- a/gcc/tree-ssa-phiopt.c
|
||
|
|
+++ b/gcc/tree-ssa-phiopt.c
|
||
|
|
@@ -66,9 +66,9 @@ static bool abs_replacement (basic_block, basic_block,
|
||
|
|
edge, edge, gphi *, tree, tree);
|
||
|
|
static bool spaceship_replacement (basic_block, basic_block,
|
||
|
|
edge, edge, gphi *, tree, tree);
|
||
|
|
-static bool cond_removal_in_popcount_clz_ctz_pattern (basic_block, basic_block,
|
||
|
|
- edge, edge, gphi *,
|
||
|
|
- tree, tree);
|
||
|
|
+static bool cond_removal_in_builtin_zero_pattern (basic_block, basic_block,
|
||
|
|
+ edge, edge, gphi *,
|
||
|
|
+ tree, tree);
|
||
|
|
static bool cond_store_replacement (basic_block, basic_block, edge, edge,
|
||
|
|
hash_set<tree> *);
|
||
|
|
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
|
||
|
|
@@ -355,9 +355,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
|
||
|
|
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||
|
|
cfgchanged = true;
|
||
|
|
else if (!early_p
|
||
|
|
- && cond_removal_in_popcount_clz_ctz_pattern (bb, bb1, e1,
|
||
|
|
- e2, phi, arg0,
|
||
|
|
- arg1))
|
||
|
|
+ && cond_removal_in_builtin_zero_pattern (bb, bb1, e1, e2,
|
||
|
|
+ phi, arg0, arg1))
|
||
|
|
cfgchanged = true;
|
||
|
|
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
|
||
|
|
cfgchanged = true;
|
||
|
|
@@ -2204,7 +2203,8 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
-/* Convert
|
||
|
|
+/* Optimize x ? __builtin_fun (x) : C, where C is __builtin_fun (0).
|
||
|
|
+ Convert
|
||
|
|
|
||
|
|
<bb 2>
|
||
|
|
if (b_4(D) != 0)
|
||
|
|
@@ -2236,10 +2236,10 @@ spaceship_replacement (basic_block cond_bb, basic_block middle_bb,
|
||
|
|
instead of 0 above it uses the value from that macro. */
|
||
|
|
|
||
|
|
static bool
|
||
|
|
-cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
||
|
|
- basic_block middle_bb,
|
||
|
|
- edge e1, edge e2, gphi *phi,
|
||
|
|
- tree arg0, tree arg1)
|
||
|
|
+cond_removal_in_builtin_zero_pattern (basic_block cond_bb,
|
||
|
|
+ basic_block middle_bb,
|
||
|
|
+ edge e1, edge e2, gphi *phi,
|
||
|
|
+ tree arg0, tree arg1)
|
||
|
|
{
|
||
|
|
gimple *cond;
|
||
|
|
gimple_stmt_iterator gsi, gsi_from;
|
||
|
|
@@ -2287,6 +2287,12 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
||
|
|
int val = 0;
|
||
|
|
switch (cfn)
|
||
|
|
{
|
||
|
|
+ case CFN_BUILT_IN_BSWAP16:
|
||
|
|
+ case CFN_BUILT_IN_BSWAP32:
|
||
|
|
+ case CFN_BUILT_IN_BSWAP64:
|
||
|
|
+ case CFN_BUILT_IN_BSWAP128:
|
||
|
|
+ CASE_CFN_FFS:
|
||
|
|
+ CASE_CFN_PARITY:
|
||
|
|
CASE_CFN_POPCOUNT:
|
||
|
|
break;
|
||
|
|
CASE_CFN_CLZ:
|
||
|
|
@@ -2315,6 +2321,15 @@ cond_removal_in_popcount_clz_ctz_pattern (basic_block cond_bb,
|
||
|
|
}
|
||
|
|
}
|
||
|
|
return false;
|
||
|
|
+ case BUILT_IN_CLRSB:
|
||
|
|
+ val = TYPE_PRECISION (integer_type_node) - 1;
|
||
|
|
+ break;
|
||
|
|
+ case BUILT_IN_CLRSBL:
|
||
|
|
+ val = TYPE_PRECISION (long_integer_type_node) - 1;
|
||
|
|
+ break;
|
||
|
|
+ case BUILT_IN_CLRSBLL:
|
||
|
|
+ val = TYPE_PRECISION (long_long_integer_type_node) - 1;
|
||
|
|
+ break;
|
||
|
|
default:
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
--
|
||
|
|
2.27.0.windows.1
|
||
|
|
|