360 lines
14 KiB
Diff
360 lines
14 KiB
Diff
|
|
This backport contains 1 patch from gcc main stream tree.
|
||
|
|
The commit id of these patchs list as following in the order of time.
|
||
|
|
|
||
|
|
0001-store-merging-Consider-also-overlapping-stores-earli.patch
|
||
|
|
bd909071ac04e94f4b6f0baab64d0687ec55681d
|
||
|
|
|
||
|
|
diff -uprN a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
|
||
|
|
--- a/gcc/gimple-ssa-store-merging.c 2020-12-16 17:03:16.155633230 +0800
|
||
|
|
+++ b/gcc/gimple-ssa-store-merging.c 2020-12-16 11:15:58.575633230 +0800
|
||
|
|
@@ -2021,7 +2021,8 @@ struct imm_store_chain_info
|
||
|
|
}
|
||
|
|
}
|
||
|
|
bool terminate_and_process_chain ();
|
||
|
|
- bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
|
||
|
|
+ bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
|
||
|
|
+ unsigned int);
|
||
|
|
bool coalesce_immediate_stores ();
|
||
|
|
bool output_merged_store (merged_store_group *);
|
||
|
|
bool output_merged_stores ();
|
||
|
|
@@ -2342,14 +2343,39 @@ gather_bswap_load_refs (vec<tree> *refs,
|
||
|
|
into the group. That way it will be its own store group and will
|
||
|
|
not be touched. If ALL_INTEGER_CST_P and there are overlapping
|
||
|
|
INTEGER_CST stores, those are mergeable using merge_overlapping,
|
||
|
|
- so don't return false for those. */
|
||
|
|
+ so don't return false for those.
|
||
|
|
+
|
||
|
|
+ Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
|
||
|
|
+ (exclusive), whether they don't overlap the bitrange START to END
|
||
|
|
+ and have order in between FIRST_ORDER and LAST_ORDER. This is to
|
||
|
|
+ prevent merging in cases like:
|
||
|
|
+ MEM <char[12]> [&b + 8B] = {};
|
||
|
|
+ MEM[(short *) &b] = 5;
|
||
|
|
+ _5 = *x_4(D);
|
||
|
|
+ MEM <long long unsigned int> [&b + 2B] = _5;
|
||
|
|
+ MEM[(char *)&b + 16B] = 88;
|
||
|
|
+ MEM[(int *)&b + 20B] = 1;
|
||
|
|
+ The = {} store comes in sort_by_bitpos before the = 88 store, and can't
|
||
|
|
+ be merged with it, because the = _5 store overlaps these and is in between
|
||
|
|
+ them in sort_by_order ordering. If it was merged, the merged store would
|
||
|
|
+ go after the = _5 store and thus change behavior. */
|
||
|
|
|
||
|
|
static bool
|
||
|
|
check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
|
||
|
|
- bool all_integer_cst_p, unsigned int last_order,
|
||
|
|
- unsigned HOST_WIDE_INT end)
|
||
|
|
+ bool all_integer_cst_p, unsigned int first_order,
|
||
|
|
+ unsigned int last_order, unsigned HOST_WIDE_INT start,
|
||
|
|
+ unsigned HOST_WIDE_INT end, unsigned int first_earlier,
|
||
|
|
+ unsigned end_earlier)
|
||
|
|
{
|
||
|
|
unsigned int len = m_store_info.length ();
|
||
|
|
+ for (unsigned int j = first_earlier; j < end_earlier; j++)
|
||
|
|
+ {
|
||
|
|
+ store_immediate_info *info = m_store_info[j];
|
||
|
|
+ if (info->order > first_order
|
||
|
|
+ && info->order < last_order
|
||
|
|
+ && info->bitpos + info->bitsize > start)
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
for (++i; i < len; ++i)
|
||
|
|
{
|
||
|
|
store_immediate_info *info = m_store_info[i];
|
||
|
|
@@ -2370,7 +2396,8 @@ check_no_overlap (vec<store_immediate_in
|
||
|
|
bool
|
||
|
|
imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
|
||
|
|
unsigned int first,
|
||
|
|
- unsigned int try_size)
|
||
|
|
+ unsigned int try_size,
|
||
|
|
+ unsigned int first_earlier)
|
||
|
|
{
|
||
|
|
unsigned int len = m_store_info.length (), last = first;
|
||
|
|
unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
|
||
|
|
@@ -2509,7 +2536,8 @@ imm_store_chain_info::try_coalesce_bswap
|
||
|
|
if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
- if (!check_no_overlap (m_store_info, last, false, last_order, end))
|
||
|
|
+ if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
|
||
|
|
+ merged_store->start, end, first_earlier, first))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
/* Don't handle memory copy this way if normal non-bswap processing
|
||
|
|
@@ -2601,6 +2629,8 @@ imm_store_chain_info::coalesce_immediate
|
||
|
|
|
||
|
|
store_immediate_info *info;
|
||
|
|
unsigned int i, ignore = 0;
|
||
|
|
+ unsigned int first_earlier = 0;
|
||
|
|
+ unsigned int end_earlier = 0;
|
||
|
|
|
||
|
|
/* Order the stores by the bitposition they write to. */
|
||
|
|
m_store_info.qsort (sort_by_bitpos);
|
||
|
|
@@ -2615,6 +2645,12 @@ imm_store_chain_info::coalesce_immediate
|
||
|
|
if (i <= ignore)
|
||
|
|
goto done;
|
||
|
|
|
||
|
|
+ while (first_earlier < end_earlier
|
||
|
|
+ && (m_store_info[first_earlier]->bitpos
|
||
|
|
+ + m_store_info[first_earlier]->bitsize
|
||
|
|
+ <= merged_store->start))
|
||
|
|
+ first_earlier++;
|
||
|
|
+
|
||
|
|
/* First try to handle group of stores like:
|
||
|
|
p[0] = data >> 24;
|
||
|
|
p[1] = data >> 16;
|
||
|
|
@@ -2628,7 +2664,8 @@ imm_store_chain_info::coalesce_immediate
|
||
|
|
{
|
||
|
|
unsigned int try_size;
|
||
|
|
for (try_size = 64; try_size >= 16; try_size >>= 1)
|
||
|
|
- if (try_coalesce_bswap (merged_store, i - 1, try_size))
|
||
|
|
+ if (try_coalesce_bswap (merged_store, i - 1, try_size,
|
||
|
|
+ first_earlier))
|
||
|
|
break;
|
||
|
|
|
||
|
|
if (try_size >= 16)
|
||
|
|
@@ -2636,7 +2673,10 @@ imm_store_chain_info::coalesce_immediate
|
||
|
|
ignore = i + merged_store->stores.length () - 1;
|
||
|
|
m_merged_store_groups.safe_push (merged_store);
|
||
|
|
if (ignore < m_store_info.length ())
|
||
|
|
- merged_store = new merged_store_group (m_store_info[ignore]);
|
||
|
|
+ {
|
||
|
|
+ merged_store = new merged_store_group (m_store_info[ignore]);
|
||
|
|
+ end_earlier = ignore;
|
||
|
|
+ }
|
||
|
|
else
|
||
|
|
merged_store = NULL;
|
||
|
|
goto done;
|
||
|
|
@@ -2662,12 +2702,16 @@ imm_store_chain_info::coalesce_immediate
|
||
|
|
/* Only allow overlapping stores of constants. */
|
||
|
|
if (info->rhs_code == INTEGER_CST && merged_store->only_constants)
|
||
|
|
{
|
||
|
|
+ unsigned int first_order
|
||
|
|
+ = MIN (merged_store->first_order, info->order);
|
||
|
|
unsigned int last_order
|
||
|
|
= MAX (merged_store->last_order, info->order);
|
||
|
|
unsigned HOST_WIDE_INT end
|
||
|
|
= MAX (merged_store->start + merged_store->width,
|
||
|
|
info->bitpos + info->bitsize);
|
||
|
|
- if (check_no_overlap (m_store_info, i, true, last_order, end))
|
||
|
|
+ if (check_no_overlap (m_store_info, i, true, first_order,
|
||
|
|
+ last_order, merged_store->start, end,
|
||
|
|
+ first_earlier, end_earlier))
|
||
|
|
{
|
||
|
|
/* check_no_overlap call above made sure there are no
|
||
|
|
overlapping stores with non-INTEGER_CST rhs_code
|
||
|
|
@@ -2696,6 +2740,7 @@ imm_store_chain_info::coalesce_immediate
|
||
|
|
do
|
||
|
|
{
|
||
|
|
unsigned int max_order = 0;
|
||
|
|
+ unsigned int min_order = first_order;
|
||
|
|
unsigned first_nonmergeable_int_order = ~0U;
|
||
|
|
unsigned HOST_WIDE_INT this_end = end;
|
||
|
|
k = i;
|
||
|
|
@@ -2721,6 +2766,7 @@ imm_store_chain_info::coalesce_immediate
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
k = j;
|
||
|
|
+ min_order = MIN (min_order, info2->order);
|
||
|
|
this_end = MAX (this_end,
|
||
|
|
info2->bitpos + info2->bitsize);
|
||
|
|
}
|
||
|
|
@@ -2736,6 +2782,12 @@ imm_store_chain_info::coalesce_immediate
|
||
|
|
first_nonmergeable_order
|
||
|
|
= MIN (first_nonmergeable_order, info2->order);
|
||
|
|
}
|
||
|
|
+ if (k > i
|
||
|
|
+ && !check_no_overlap (m_store_info, len - 1, true,
|
||
|
|
+ min_order, try_order,
|
||
|
|
+ merged_store->start, this_end,
|
||
|
|
+ first_earlier, end_earlier))
|
||
|
|
+ k = 0;
|
||
|
|
if (k == 0)
|
||
|
|
{
|
||
|
|
if (last_order == try_order)
|
||
|
|
@@ -2821,9 +2873,12 @@ imm_store_chain_info::coalesce_immediate
|
||
|
|
info->ops_swapped_p = true;
|
||
|
|
}
|
||
|
|
if (check_no_overlap (m_store_info, i, false,
|
||
|
|
+ MIN (merged_store->first_order, info->order),
|
||
|
|
MAX (merged_store->last_order, info->order),
|
||
|
|
+ merged_store->start,
|
||
|
|
MAX (merged_store->start + merged_store->width,
|
||
|
|
- info->bitpos + info->bitsize)))
|
||
|
|
+ info->bitpos + info->bitsize),
|
||
|
|
+ first_earlier, end_earlier))
|
||
|
|
{
|
||
|
|
/* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
|
||
|
|
if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
|
||
|
|
@@ -2868,6 +2923,7 @@ imm_store_chain_info::coalesce_immediate
|
||
|
|
delete merged_store;
|
||
|
|
|
||
|
|
merged_store = new merged_store_group (info);
|
||
|
|
+ end_earlier = i;
|
||
|
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||
|
|
fputs ("New store group\n", dump_file);
|
||
|
|
|
||
|
|
diff -uprN a/gcc/testsuite/gcc.dg/store_merging_31.c b/gcc/testsuite/gcc.dg/store_merging_31.c
|
||
|
|
--- a/gcc/testsuite/gcc.dg/store_merging_31.c 1970-01-01 08:00:00.000000000 +0800
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/store_merging_31.c 2020-12-16 11:15:58.575633230 +0800
|
||
|
|
@@ -0,0 +1,27 @@
|
||
|
|
+/* PR tree-optimization/97053 */
|
||
|
|
+/* { dg-do run } */
|
||
|
|
+/* { dg-options "-O2" } */
|
||
|
|
+
|
||
|
|
+struct S { short a; char b[9]; int c; char d; int e; };
|
||
|
|
+
|
||
|
|
+__attribute__((noipa)) void
|
||
|
|
+foo (char *x, char *y)
|
||
|
|
+{
|
||
|
|
+ if (__builtin_strcmp (x, "ABCDXXXX") != 0
|
||
|
|
+ || __builtin_strcmp (y, "ABCDXXXX") != 0)
|
||
|
|
+ __builtin_abort ();
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int
|
||
|
|
+main ()
|
||
|
|
+{
|
||
|
|
+ char a[9] = "XXXXXXXX";
|
||
|
|
+ struct S b = {};
|
||
|
|
+ __builtin_memcpy (a, "ABCD", 4);
|
||
|
|
+ b.a = 5;
|
||
|
|
+ __builtin_memcpy (b.b, a, 8);
|
||
|
|
+ b.d = 'X';
|
||
|
|
+ b.e = 1;
|
||
|
|
+ foo (a, b.b);
|
||
|
|
+ return 0;
|
||
|
|
+}
|
||
|
|
diff -uprN a/gcc/testsuite/gcc.dg/store_merging_32.c b/gcc/testsuite/gcc.dg/store_merging_32.c
|
||
|
|
--- a/gcc/testsuite/gcc.dg/store_merging_32.c 1970-01-01 08:00:00.000000000 +0800
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/store_merging_32.c 2020-12-16 11:15:58.575633230 +0800
|
||
|
|
@@ -0,0 +1,129 @@
|
||
|
|
+/* PR tree-optimization/97053 */
|
||
|
|
+/* { dg-do run } */
|
||
|
|
+/* { dg-options "-O2 -fno-tree-dse" } */
|
||
|
|
+
|
||
|
|
+struct __attribute__((packed, may_alias)) S { long long s; };
|
||
|
|
+struct __attribute__((packed, may_alias)) T { short t; };
|
||
|
|
+
|
||
|
|
+__attribute__((noipa)) void
|
||
|
|
+test (char *p, char *q, int s)
|
||
|
|
+{
|
||
|
|
+ if ((s & 1) == 0)
|
||
|
|
+ {
|
||
|
|
+ if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
|
||
|
|
+ != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
|
||
|
|
+ || (((struct S __attribute__((may_alias)) *) &p[1])->s
|
||
|
|
+ != ((struct S __attribute__((may_alias)) *) &q[1])->s)
|
||
|
|
+ || (*(short __attribute__((may_alias)) *) &p[2 * sizeof (short)]
|
||
|
|
+ != *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)]))
|
||
|
|
+ __builtin_abort ();
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ {
|
||
|
|
+ if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
|
||
|
|
+ != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
|
||
|
|
+ || (((struct S __attribute__((may_alias)) *) &p[1])->s
|
||
|
|
+ != ((struct S __attribute__((may_alias)) *) &q[1])->s)
|
||
|
|
+ || (((struct T __attribute__((may_alias)) *) &p[2 * sizeof (short) - 1])->t
|
||
|
|
+ != ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t)
|
||
|
|
+ || p[3 * sizeof (short) - 2] != q[3 * sizeof (short) - 2])
|
||
|
|
+ __builtin_abort ();
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+__attribute__((noipa)) void
|
||
|
|
+foo (long long *p, char *q, char *r, char *s)
|
||
|
|
+{
|
||
|
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||
|
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||
|
|
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
|
||
|
|
+ *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||
|
|
+ *(short __attribute__((may_alias)) *) &s[2 * sizeof (short)] = 2;
|
||
|
|
+ test (a, q, 0);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+__attribute__((noipa)) void
|
||
|
|
+bar (long long *p, char *q, char *r, char *s, char *t)
|
||
|
|
+{
|
||
|
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||
|
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||
|
|
+ ((struct T __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1])->t = 2;
|
||
|
|
+ a[3 * sizeof (short) - 2] = 3;
|
||
|
|
+ *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||
|
|
+ ((struct T __attribute__((may_alias)) *) &s[2 * sizeof (short) - 1])->t = 2;
|
||
|
|
+ t[3 * sizeof (short) - 2] = 3;
|
||
|
|
+ test (a, q, 1);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+__attribute__((noipa)) void
|
||
|
|
+baz (long long *p, char *q, char *r, char *s)
|
||
|
|
+{
|
||
|
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||
|
|
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||
|
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||
|
|
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = 2;
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||
|
|
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = 1;
|
||
|
|
+ test (a, q, 2);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+__attribute__((noipa)) void
|
||
|
|
+qux (long long *p, char *q, char *r, char *s, char *t)
|
||
|
|
+{
|
||
|
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||
|
|
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1] = 2;
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||
|
|
+ a[3 * sizeof (short) - 2] = 3;
|
||
|
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||
|
|
+ ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t = 2;
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||
|
|
+ s[3 * sizeof (short) - 2] = 3;
|
||
|
|
+ ((struct T __attribute__((may_alias)) *) &t[sizeof (short)])->t = 1;
|
||
|
|
+ test (a, q, 3);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+__attribute__((noipa)) void
|
||
|
|
+corge (long long *p, char *q, char *r, char *s, short u[3])
|
||
|
|
+{
|
||
|
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||
|
|
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||
|
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
|
||
|
|
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||
|
|
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
|
||
|
|
+ test (a, q, 4);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+__attribute__((noipa)) void
|
||
|
|
+garply (long long *p, char *q, char *r, char *s, short u[3])
|
||
|
|
+{
|
||
|
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||
|
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||
|
|
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
|
||
|
|
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
|
||
|
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||
|
|
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
|
||
|
|
+ test (a, q, 6);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int
|
||
|
|
+main ()
|
||
|
|
+{
|
||
|
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||
|
|
+ long long p = -1LL;
|
||
|
|
+ short u[] = { 1, 2, 3 };
|
||
|
|
+ foo (&p, &a[0], &a[0], &a[0]);
|
||
|
|
+ bar (&p, &a[0], &a[0], &a[0], &a[0]);
|
||
|
|
+ baz (&p, &a[0], &a[0], &a[0]);
|
||
|
|
+ qux (&p, &a[0], &a[0], &a[0], &a[0]);
|
||
|
|
+ corge (&p, &a[0], &a[0], &a[0], u);
|
||
|
|
+ garply (&p, &a[0], &a[0], &a[0], u);
|
||
|
|
+ return 0;
|
||
|
|
+}
|