This backport contains 14 patch from gcc main stream tree. The commit id of these patchs list as following in the order of time. c2851dc2896bfc0d27b32c90cafc873f67cd6727 0001-tree-ssa-sccvn.c-struct-vn_walk_cb_data-Add-orig_ref.patch 69b5279e977593d656906288316ee03a8bf79c6a 0001-gimple-parser.c-c_parser_gimple_postfix_expression-H.patch 8389386c6d55d57afc3ae01f71546ac4468f7926 0001-gimple-parser.c-c_parser_gimple_postfix_expression-S.patch d1f2e4c1027b826cf3ba353e86c37589f63f8efe 0001-tree-ssa-sccvn.c-vn_walk_cb_data-push_partial_def-Re.patch 62e3e66f130fc280eac0bbb6b69e9adca328c03b 0001-re-PR-tree-optimization-83518-Missing-optimization-u.patch 10f30ac9cda947d117e50f0cbd4cf94ee70a944f 0001-re-PR-tree-optimization-91756-g-.dg-lto-alias-3-FAIL.patch 1284e2b104a81ad93daab5110cd844981e501086 0001-re-PR-tree-optimization-90883-Generated-code-is-wors.patch fb08a53b2eb01cc06d66f479c865aca55c91fd26 0001-tree-ssa-sccvn.c-vn_walk_cb_data-push_partial_def-Ba.patch 0849cdae714ddf056a4944f31eef53a465f1bcd0 0001-tree-ssa-sccvn.c-vn_walk_cb_data-push_partial_def-Ha.patch 5f0653a8b75a5ad5a5405a27dd92d3a5759eed4c 0001-tree-optimization-91123-restore-redundant-store-remo.patch 8aba425f4ebc5e2c054776d3cdddf13f7c1918f8 0001-sccvn-Handle-bitfields-in-vn_reference_lookup_3-PR93.patch 7f5617b00445dcc861a498a4cecc8aaa59e05b8c 0001-sccvn-Handle-bitfields-in-push_partial_def-PR93582.patch 5f9cd512c4278621435cce486dd00248ea2e821c 0001-sccvn-Handle-non-byte-aligned-offset-or-size-for-mem.patch b07e4e7c7520ca3e798f514dec0711eea2c027be 0001-sccvn-Improve-handling-of-load-masked-with-integer-c.patch diff -urpN a/gcc/c/gimple-parser.c b/gcc/c/gimple-parser.c --- a/gcc/c/gimple-parser.c 2020-11-26 22:26:34.848000000 -0500 +++ b/gcc/c/gimple-parser.c 2020-11-26 22:06:08.032000000 -0500 @@ -1320,17 +1320,24 @@ c_parser_gimple_postfix_expression (gimp } else { - bool neg_p; + bool neg_p, addr_p; if ((neg_p = c_parser_next_token_is (parser, CPP_MINUS))) c_parser_consume_token (parser); + if ((addr_p = c_parser_next_token_is (parser, CPP_AND))) + c_parser_consume_token (parser); tree val = c_parser_gimple_postfix_expression (parser).value; if (! val || val == error_mark_node - || ! CONSTANT_CLASS_P (val)) + || (!CONSTANT_CLASS_P (val) + && !(addr_p + && (TREE_CODE (val) == STRING_CST + || DECL_P (val))))) { c_parser_error (parser, "invalid _Literal"); return expr; } + if (addr_p) + val = build1 (ADDR_EXPR, type, val); if (neg_p) { val = const_unop (NEGATE_EXPR, TREE_TYPE (val), val); diff -urpN a/gcc/fold-const.c b/gcc/fold-const.c --- a/gcc/fold-const.c 2020-11-26 22:26:32.816000000 -0500 +++ b/gcc/fold-const.c 2020-11-26 22:06:08.036000000 -0500 @@ -7773,6 +7773,70 @@ native_decode_vector_tree (tree type, ve return builder.build (); } +/* Routines for manipulation of native_encode_expr encoded data if the encoded + or extracted constant positions and/or sizes aren't byte aligned. */ + +/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the + bits between adjacent elements. AMNT should be within + [0, BITS_PER_UNIT). + Example, AMNT = 2: + 00011111|11100000 << 2 = 01111111|10000000 + PTR[1] | PTR[0] PTR[1] | PTR[0]. */ + +void +shift_bytes_in_array_left (unsigned char *ptr, unsigned int sz, + unsigned int amnt) +{ + if (amnt == 0) + return; + + unsigned char carry_over = 0U; + unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt); + unsigned char clear_mask = (~0U) << amnt; + + for (unsigned int i = 0; i < sz; i++) + { + unsigned prev_carry_over = carry_over; + carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt); + + ptr[i] <<= amnt; + if (i != 0) + { + ptr[i] &= clear_mask; + ptr[i] |= prev_carry_over; + } + } +} + +/* Like shift_bytes_in_array_left but for big-endian. + Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the + bits between adjacent elements. AMNT should be within + [0, BITS_PER_UNIT). + Example, AMNT = 2: + 00011111|11100000 >> 2 = 00000111|11111000 + PTR[0] | PTR[1] PTR[0] | PTR[1]. */ + +void +shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz, + unsigned int amnt) +{ + if (amnt == 0) + return; + + unsigned char carry_over = 0U; + unsigned char carry_mask = ~(~0U << amnt); + + for (unsigned int i = 0; i < sz; i++) + { + unsigned prev_carry_over = carry_over; + carry_over = ptr[i] & carry_mask; + + carry_over <<= (unsigned char) BITS_PER_UNIT - amnt; + ptr[i] >>= amnt; + ptr[i] |= prev_carry_over; + } +} + /* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating directly on the VECTOR_CST encoding, in a way that works for variable- length vectors. Return the resulting VECTOR_CST on success or null diff -urpN a/gcc/fold-const.h b/gcc/fold-const.h --- a/gcc/fold-const.h 2020-11-26 22:26:32.816000000 -0500 +++ b/gcc/fold-const.h 2020-11-26 22:06:08.036000000 -0500 @@ -27,6 +27,10 @@ extern int folding_initializer; /* Convert between trees and native memory representation. */ extern int native_encode_expr (const_tree, unsigned char *, int, int off = -1); extern tree native_interpret_expr (tree, const unsigned char *, int); +extern void shift_bytes_in_array_left (unsigned char *, unsigned int, + unsigned int); +extern void shift_bytes_in_array_right (unsigned char *, unsigned int, + unsigned int); /* Fold constants as much as possible in an expression. Returns the simplified expression. diff -urpN a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c --- a/gcc/gimple-ssa-store-merging.c 2020-11-26 22:26:32.860000000 -0500 +++ b/gcc/gimple-ssa-store-merging.c 2020-11-26 22:06:08.036000000 -0500 @@ -1464,66 +1464,6 @@ dump_char_array (FILE *fd, unsigned char fprintf (fd, "\n"); } -/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the - bits between adjacent elements. AMNT should be within - [0, BITS_PER_UNIT). - Example, AMNT = 2: - 00011111|11100000 << 2 = 01111111|10000000 - PTR[1] | PTR[0] PTR[1] | PTR[0]. */ - -static void -shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt) -{ - if (amnt == 0) - return; - - unsigned char carry_over = 0U; - unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt); - unsigned char clear_mask = (~0U) << amnt; - - for (unsigned int i = 0; i < sz; i++) - { - unsigned prev_carry_over = carry_over; - carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt); - - ptr[i] <<= amnt; - if (i != 0) - { - ptr[i] &= clear_mask; - ptr[i] |= prev_carry_over; - } - } -} - -/* Like shift_bytes_in_array but for big-endian. - Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the - bits between adjacent elements. AMNT should be within - [0, BITS_PER_UNIT). - Example, AMNT = 2: - 00011111|11100000 >> 2 = 00000111|11111000 - PTR[0] | PTR[1] PTR[0] | PTR[1]. */ - -static void -shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz, - unsigned int amnt) -{ - if (amnt == 0) - return; - - unsigned char carry_over = 0U; - unsigned char carry_mask = ~(~0U << amnt); - - for (unsigned int i = 0; i < sz; i++) - { - unsigned prev_carry_over = carry_over; - carry_over = ptr[i] & carry_mask; - - carry_over <<= (unsigned char) BITS_PER_UNIT - amnt; - ptr[i] >>= amnt; - ptr[i] |= prev_carry_over; - } -} - /* Clear out LEN bits starting from bit START in the byte array PTR. This clears the bits to the *right* from START. START must be within [0, BITS_PER_UNIT) and counts starting from @@ -1749,7 +1689,7 @@ encode_tree_to_bitpos (tree expr, unsign /* Create the shifted version of EXPR. */ if (!BYTES_BIG_ENDIAN) { - shift_bytes_in_array (tmpbuf, byte_size, shift_amnt); + shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt); if (shift_amnt == 0) byte_size--; } @@ -4667,11 +4607,11 @@ verify_array_eq (unsigned char *x, unsig } } -/* Test shift_bytes_in_array and that it carries bits across between +/* Test shift_bytes_in_array_left and that it carries bits across between bytes correctly. */ static void -verify_shift_bytes_in_array (void) +verify_shift_bytes_in_array_left (void) { /* byte 1 | byte 0 00011111 | 11100000. */ @@ -4680,13 +4620,13 @@ verify_shift_bytes_in_array (void) memcpy (in, orig, sizeof orig); unsigned char expected[2] = { 0x80, 0x7f }; - shift_bytes_in_array (in, sizeof (in), 2); + shift_bytes_in_array_left (in, sizeof (in), 2); verify_array_eq (in, expected, sizeof (in)); memcpy (in, orig, sizeof orig); memcpy (expected, orig, sizeof orig); /* Check that shifting by zero doesn't change anything. */ - shift_bytes_in_array (in, sizeof (in), 0); + shift_bytes_in_array_left (in, sizeof (in), 0); verify_array_eq (in, expected, sizeof (in)); } @@ -4771,7 +4711,7 @@ verify_clear_bit_region_be (void) void store_merging_c_tests (void) { - verify_shift_bytes_in_array (); + verify_shift_bytes_in_array_left (); verify_shift_bytes_in_array_right (); verify_clear_bit_region (); verify_clear_bit_region_be (); diff -urpN a/gcc/testsuite/gcc.c-torture/execute/pr93582.c b/gcc/testsuite/gcc.c-torture/execute/pr93582.c --- a/gcc/testsuite/gcc.c-torture/execute/pr93582.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.c-torture/execute/pr93582.c 2020-11-26 22:25:43.532000000 -0500 @@ -0,0 +1,22 @@ +/* PR tree-optimization/93582 */ + +short a; +int b, c; + +__attribute__((noipa)) void +foo (void) +{ + b = c; + a &= 7; +} + +int +main () +{ + c = 27; + a = 14; + foo (); + if (b != 27 || a != 6) + __builtin_abort (); + return 0; +} diff -urpN a/gcc/testsuite/gcc.dg/gimplefe-42.c b/gcc/testsuite/gcc.dg/gimplefe-42.c --- a/gcc/testsuite/gcc.dg/gimplefe-42.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/gimplefe-42.c 2020-11-26 22:06:08.036000000 -0500 @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-fgimple" } */ + +typedef char ref_all_char __attribute__((may_alias)); +char a[7]; +__GIMPLE void f() +{ + int _1; + /* string literals inside __MEM need their address taken. */ + __MEM ((ref_all_char *)&a) + = __MEM (_Literal (char *) &"654321"); + /* but plain assignment also works. */ + __MEM ((ref_all_char *)&a) = "654321"; + /* also punning with int. */ + _1 = __MEM (_Literal (char *) &"654321"); + __MEM ((ref_all_char *)&a) = _1; + return; +} diff -urpN a/gcc/testsuite/gcc.dg/pr93582.c b/gcc/testsuite/gcc.dg/pr93582.c --- a/gcc/testsuite/gcc.dg/pr93582.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/pr93582.c 2020-11-26 22:26:15.784000000 -0500 @@ -0,0 +1,57 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -Warray-bounds" } */ + +struct S { + unsigned int s1:1; + unsigned int s2:1; + unsigned int s3:1; + unsigned int s4:1; + unsigned int s5:4; + unsigned char s6; + unsigned short s7; + unsigned short s8; +}; +struct T { + int t1; + int t2; +}; + +static inline int +bar (struct S *x) +{ + if (x->s4) + return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2; /* { dg-bogus "array subscript 1 is outside array bounds of" } */ + else + return 0; +} + +int +foo (int x, int y) +{ + struct S s; /* { dg-bogus "while referencing" } */ + s.s6 = x; + s.s7 = y & 0x1FFF; + s.s4 = 0; + return bar (&s); +} + +static inline int +qux (struct S *x) +{ + int s4 = x->s4; + if (s4) + return ((struct T *)(x + 1))->t1 + ((struct T *)(x + 1))->t2; + else + return 0; +} + +int +baz (int x, int y) +{ + struct S s; + s.s6 = x; + s.s7 = y & 0x1FFF; + s.s4 = 0; + return qux (&s); +} diff -urpN a/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c --- a/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-5.c 2020-11-26 22:06:08.036000000 -0500 @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ +/* { dg-additional-options "-fgimple -fdump-tree-fre1" } */ + +typedef int v4si __attribute__((vector_size(16))); + +int __GIMPLE (ssa,startwith("fre")) +foo () +{ + int * p; + int i; + int x[4]; + long unsigned int _1; + long unsigned int _2; + int _7; + + __BB(2): + i_3 = 0; + _1 = (long unsigned int) i_3; + _2 = _1 * 4ul; + p_4 = _Literal (int *) &x + _2; + __MEM ((v4si *)p_4) = _Literal (v4si) { 1, 2, 3, 4 }; + _7 = x[0]; + return _7; +} + +/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */ diff -urpN a/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c --- a/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-6.c 2020-11-26 22:06:08.036000000 -0500 @@ -0,0 +1,27 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ +/* { dg-additional-options "-fgimple -fdump-tree-fre1" } */ + +typedef int v4si __attribute__((vector_size(16))); + +int __GIMPLE (ssa,startwith("fre")) +foo () +{ + int * p; + int i; + int x[4]; + long unsigned int _1; + long unsigned int _2; + int _7; + + __BB(2): + i_3 = 0; + _1 = (long unsigned int) i_3; + _2 = _1 * 4ul; + p_4 = _Literal (int *) &x + _2; + __MEM ((v4si *)p_4) = _Literal (v4si) {}; + _7 = x[0]; + return _7; +} + +/* { dg-final { scan-tree-dump "return 0;" "fre1" } } */ diff -urpN a/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c b/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c --- a/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/torture/ssa-fre-7.c 2020-11-26 22:06:08.036000000 -0500 @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */ +/* { dg-additional-options "-fgimple -fdump-tree-fre1" } */ + +typedef int v4si __attribute__((vector_size(16))); + +int __GIMPLE (ssa,startwith("fre")) +foo (int c) +{ + int * p; + int i; + int x[4]; + long unsigned int _1; + long unsigned int _2; + int _7; + v4si _6; + + __BB(2): + i_3 = 0; + _1 = (long unsigned int) i_3; + _2 = _1 * 4ul; + p_4 = _Literal (int *) &x + _2; + _6 = _Literal (v4si) { c_5(D), c_5(D), c_5(D), c_5(D) }; + __MEM ((v4si *)p_4) = _6; + _7 = x[0]; + return _7; +} + +/* { dg-final { scan-tree-dump "return c_5\\(D\\);" "fre1" } } */ diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c b/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c --- a/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c 2020-11-26 22:26:34.324000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/alias-access-path-1.c 2020-11-26 22:06:08.036000000 -0500 @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-tree-fre3" } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ struct foo { int val; @@ -18,4 +18,4 @@ test () return barptr->val2; } -/* { dg-final { scan-tree-dump-times "return 123" 1 "fre3"} } */ +/* { dg-final { scan-tree-dump-times "return 123" 1 "fre1"} } */ diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-10.c 2020-11-26 22:24:45.812000000 -0500 @@ -0,0 +1,29 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return 72876566;" "fre1" { target le } } } */ +/* { dg-final { scan-tree-dump "return 559957376;" "fre1" { target be } } } */ + +union U { + struct S { int a : 12, b : 5, c : 10, d : 5; } s; + unsigned int i; +}; +struct A { char a[12]; union U u; }; +void bar (struct A *); + +unsigned +foo (void) +{ + struct A a; + bar (&a); + a.u.s.a = 1590; + a.u.s.c = -404; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define M 0x67e0a5f +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define M 0xa5f067e0 +#else +#define M 0 +#endif + return a.u.i & M; +} diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-1.c 2020-11-26 22:18:39.368000000 -0500 @@ -0,0 +1,18 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */ + +union U { + struct S { int a : 1, b : 4, c : 27; } s; + struct T { int d : 2; int e : 2; int f : 28; } t; +}; + +int +foo (void) +{ + union U u; + u.s.b = 10; + return u.t.e; +} + diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-2.c 2020-11-26 22:18:44.832000000 -0500 @@ -0,0 +1,17 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return 593;" "fre1" } } */ + +union U { + struct S { int a : 1, b : 14, c : 17; } s; + struct T { int d : 2; int e : 12; int f : 18; } t; +}; + +int +foo (void) +{ + union U u; + u.s.b = -7005; + return u.t.e; +} diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-3.c 2020-11-26 22:21:44.936000000 -0500 @@ -0,0 +1,19 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return 1;" "fre1" { target be } } } */ +/* { dg-final { scan-tree-dump "return 2;" "fre1" { target le } } } */ + +union U { + struct S { int a : 1, b : 14, c : 17; } s; + struct T { int d : 10; int e : 4; int f : 18; } t; +}; + +int +foo (void) +{ + union U u; + u.s.b = -7005; + return u.t.e; +} + diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-4.c 2020-11-26 22:23:33.236000000 -0500 @@ -0,0 +1,24 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return -1991560811;" "fre1" { target le } } } */ +/* { dg-final { scan-tree-dump "return -733324916;" "fre1" { target be } } } */ + +union U { + struct S { int a : 1, b : 4, c : 27; } s; + unsigned int i; +}; +struct A { char a[24]; union U u; }; +void bar (struct A *); + +int +foo (void) +{ + struct A a; + bar (&a); + a.u.s.a = -1; + a.u.s.b = -6; + a.u.s.c = -62236276; + return a.u.i; +} + diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-5.c 2020-11-26 22:23:38.324000000 -0500 @@ -0,0 +1,26 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return -1462729318;" "fre1" { target le } } } */ +/* { dg-final { scan-tree-dump "return 1300568597;" "fre1" { target be } } } */ + +union U { + struct S { int a : 1, b : 7, c : 8, d : 11, e : 5; } s; + unsigned int i; +}; +struct A { char a[8]; union U u; }; +void bar (struct A *); + +int +foo (void) +{ + struct A a; + bar (&a); + a.u.s.a = 0; + a.u.s.b = -51; + a.u.s.c = -123; + a.u.s.d = 208; + a.u.s.e = -11; + return a.u.i; +} + diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-6.c 2020-11-26 22:23:42.348000000 -0500 @@ -0,0 +1,25 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return 890118;" "fre1" { target le } } } */ +/* { dg-final { scan-tree-dump "return 447899;" "fre1" { target be } } } */ + +union U { + struct S { int a : 16, b : 5, c : 10, d : 1; } s; + struct T { int a : 8, b : 21, c : 3; } t; +}; +struct A { char a[4]; union U u; }; +void bar (struct A *); + +int +foo (void) +{ + struct A a; + bar (&a); + a.u.s.a = 1590; + a.u.s.b = -11; + a.u.s.c = 620; + a.u.s.d = -1; + return a.u.t.b; +} + diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-7.c 2020-11-26 22:23:45.756000000 -0500 @@ -0,0 +1,25 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return -413012;" "fre1" { target le } } } */ +/* { dg-final { scan-tree-dump "return -611112;" "fre1" { target be } } } */ + +union U { + struct S { int a : 12, b : 5, c : 10, d : 5; } s; + struct T { int a : 7, b : 21, c : 4; } t; +}; +struct A { char a[48]; union U u; }; +void bar (struct A *); + +int +foo (void) +{ + struct A a; + bar (&a); + a.u.s.a = 1590; + a.u.s.b = -11; + a.u.s.c = -404; + a.u.s.d = 7; + return a.u.t.b; +} + diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c --- a/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr93582-8.c 2020-11-26 22:23:53.088000000 -0500 @@ -0,0 +1,15 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return 0;" "fre1" { target le } } } */ +/* { dg-final { scan-tree-dump "return -8531;" "fre1" { target be } } } */ + +short +foo (void) +{ + union U { char c[32]; short s[16]; int i[8]; } u; + __builtin_memset (u.c + 1, '\0', 5); + u.s[3] = 0xdead; + return u.i[1]; +} + diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-82.c 2020-11-26 22:06:08.036000000 -0500 @@ -0,0 +1,25 @@ +/* { dg-do run } */ +/* { dg-options "-O -fdump-tree-fre1-details" } */ + +struct S { _Bool x; }; + +void +foo (struct S *s) +{ + __builtin_memset (s, 1, sizeof (struct S)); + s->x = 1; +} + +int +main () +{ + struct S s; + foo (&s); + char c; + __builtin_memcpy (&c, &s.x, 1); + if (c != 1) + __builtin_abort (); + return 0; +} + +/* { dg-final { scan-tree-dump "Deleted redundant store" "fre1" } } */ diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-83.c 2020-11-26 22:06:08.036000000 -0500 @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1-details" } */ + +struct X +{ + int a : 1; + int b : 1; +} x; + +void foo (int v) +{ + x.a = 1; + x.b = v; + x.a = 1; + x.b = v; +} + +struct Y +{ + _Bool a; + _Bool b; +} y; + +void bar (int v) +{ + y.a = 1; + y.b = v; + y.a = 1; + y.b = v; +} + +/* { dg-final { scan-tree-dump-times "Deleted redundant store" 4 "fre1" } } */ diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-84.c 2020-11-26 22:06:08.036000000 -0500 @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-fre1" } */ + +typedef int v4si __attribute__((vector_size(16))); + +void foo (v4si *dst, int x) +{ + v4si v[2]; + v[0][0] = 1; + v[0][1] = x; + v[0][2] = 2; + v[0][3] = 3; + v[0][1] = 0; + *dst = v[0]; +} + +/* The shadowed non-constant assign to v[0][1] shouldn't prevent us from + value-numbering the load to a constant. */ +/* { dg-final { scan-tree-dump "\\*dst_\[0-9\]*\\\(D\\) = { 1, 0, 2, 3 };" "fre1" } } */ diff -urpN a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c 1969-12-31 19:00:00.000000000 -0500 +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-85.c 2020-11-26 22:06:08.036000000 -0500 @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fstrict-aliasing -fdump-tree-fre1-details" } */ + +struct X { int i; int j; }; + +struct X x, y; +void foo () +{ + x.i = 1; + y = x; + y.i = 1; // redundant +} + +/* { dg-final { scan-tree-dump "Deleted redundant store y.i" "fre1" } } */ diff -urpN a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c --- a/gcc/tree-ssa-alias.c 2020-11-26 22:26:32.884000000 -0500 +++ b/gcc/tree-ssa-alias.c 2020-11-26 22:06:08.036000000 -0500 @@ -2628,7 +2628,8 @@ static bool maybe_skip_until (gimple *phi, tree &target, basic_block target_bb, ao_ref *ref, tree vuse, bool tbaa_p, unsigned int &limit, bitmap *visited, bool abort_on_visited, - void *(*translate)(ao_ref *, tree, void *, bool *), + void *(*translate)(ao_ref *, tree, void *, translate_flags *), + translate_flags disambiguate_only, void *data) { basic_block bb = gimple_bb (phi); @@ -2663,7 +2664,7 @@ maybe_skip_until (gimple *phi, tree &tar return !abort_on_visited; vuse = get_continuation_for_phi (def_stmt, ref, tbaa_p, limit, visited, abort_on_visited, - translate, data); + translate, data, disambiguate_only); if (!vuse) return false; continue; @@ -2678,9 +2679,9 @@ maybe_skip_until (gimple *phi, tree &tar --limit; if (stmt_may_clobber_ref_p_1 (def_stmt, ref, tbaa_p)) { - bool disambiguate_only = true; + translate_flags tf = disambiguate_only; if (translate - && (*translate) (ref, vuse, data, &disambiguate_only) == NULL) + && (*translate) (ref, vuse, data, &tf) == NULL) ; else return false; @@ -2711,8 +2712,10 @@ tree get_continuation_for_phi (gimple *phi, ao_ref *ref, bool tbaa_p, unsigned int &limit, bitmap *visited, bool abort_on_visited, - void *(*translate)(ao_ref *, tree, void *, bool *), - void *data) + void *(*translate)(ao_ref *, tree, void *, + translate_flags *), + void *data, + translate_flags disambiguate_only) { unsigned nargs = gimple_phi_num_args (phi); @@ -2754,13 +2757,15 @@ get_continuation_for_phi (gimple *phi, a else if (! maybe_skip_until (phi, arg0, dom, ref, arg1, tbaa_p, limit, visited, abort_on_visited, - /* Do not translate when walking over + translate, + /* Do not valueize when walking over backedges. */ dominated_by_p (CDI_DOMINATORS, gimple_bb (SSA_NAME_DEF_STMT (arg1)), phi_bb) - ? NULL : translate, data)) + ? TR_DISAMBIGUATE + : disambiguate_only, data)) return NULL_TREE; } @@ -2798,7 +2803,8 @@ get_continuation_for_phi (gimple *phi, a void * walk_non_aliased_vuses (ao_ref *ref, tree vuse, bool tbaa_p, void *(*walker)(ao_ref *, tree, void *), - void *(*translate)(ao_ref *, tree, void *, bool *), + void *(*translate)(ao_ref *, tree, void *, + translate_flags *), tree (*valueize)(tree), unsigned &limit, void *data) { @@ -2851,7 +2857,7 @@ walk_non_aliased_vuses (ao_ref *ref, tre { if (!translate) break; - bool disambiguate_only = false; + translate_flags disambiguate_only = TR_TRANSLATE; res = (*translate) (ref, vuse, data, &disambiguate_only); /* Failed lookup and translation. */ if (res == (void *)-1) @@ -2863,7 +2869,7 @@ walk_non_aliased_vuses (ao_ref *ref, tre else if (res != NULL) break; /* Translation succeeded, continue walking. */ - translated = translated || !disambiguate_only; + translated = translated || disambiguate_only == TR_TRANSLATE; } vuse = gimple_vuse (def_stmt); } diff -urpN a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h --- a/gcc/tree-ssa-alias.h 2020-11-26 22:26:32.868000000 -0500 +++ b/gcc/tree-ssa-alias.h 2020-11-26 22:06:08.040000000 -0500 @@ -131,13 +131,18 @@ extern bool call_may_clobber_ref_p (gcal extern bool call_may_clobber_ref_p_1 (gcall *, ao_ref *); extern bool stmt_kills_ref_p (gimple *, tree); extern bool stmt_kills_ref_p (gimple *, ao_ref *); +enum translate_flags + { TR_TRANSLATE, TR_VALUEIZE_AND_DISAMBIGUATE, TR_DISAMBIGUATE }; extern tree get_continuation_for_phi (gimple *, ao_ref *, bool, unsigned int &, bitmap *, bool, - void *(*)(ao_ref *, tree, void *, bool *), - void *); + void *(*)(ao_ref *, tree, void *, + translate_flags *), + void *, translate_flags + = TR_VALUEIZE_AND_DISAMBIGUATE); extern void *walk_non_aliased_vuses (ao_ref *, tree, bool, void *(*)(ao_ref *, tree, void *), - void *(*)(ao_ref *, tree, void *, bool *), + void *(*)(ao_ref *, tree, void *, + translate_flags *), tree (*)(tree), unsigned &, void *); extern int walk_aliased_vdefs (ao_ref *, tree, bool (*)(ao_ref *, tree, void *), diff -urpN a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c --- a/gcc/tree-ssa-sccvn.c 2020-11-26 22:26:32.836000000 -0500 +++ b/gcc/tree-ssa-sccvn.c 2020-11-27 03:17:41.080000000 -0500 @@ -1684,24 +1684,75 @@ struct pd_data struct vn_walk_cb_data { - vn_walk_cb_data (vn_reference_t vr_, tree *last_vuse_ptr_, - vn_lookup_kind vn_walk_kind_, bool tbaa_p_) - : vr (vr_), last_vuse_ptr (last_vuse_ptr_), vn_walk_kind (vn_walk_kind_), - tbaa_p (tbaa_p_), known_ranges (NULL) - {} + vn_walk_cb_data (vn_reference_t vr_, tree orig_ref_, tree *last_vuse_ptr_, + vn_lookup_kind vn_walk_kind_, bool tbaa_p_, tree mask_) + : vr (vr_), last_vuse_ptr (last_vuse_ptr_), last_vuse (NULL_TREE), + mask (mask_), masked_result (NULL_TREE), vn_walk_kind (vn_walk_kind_), + tbaa_p (tbaa_p_), saved_operands (vNULL), first_set (-2), + known_ranges (NULL) + { + if (!last_vuse_ptr) + last_vuse_ptr = &last_vuse; + ao_ref_init (&orig_ref, orig_ref_); + if (mask) + { + wide_int w = wi::to_wide (mask); + unsigned int pos = 0, prec = w.get_precision (); + pd_data pd; + pd.rhs = build_constructor (NULL_TREE, NULL); + /* When bitwise and with a constant is done on a memory load, + we don't really need all the bits to be defined or defined + to constants, we don't really care what is in the position + corresponding to 0 bits in the mask. + So, push the ranges of those 0 bits in the mask as artificial + zero stores and let the partial def handling code do the + rest. */ + while (pos < prec) + { + int tz = wi::ctz (w); + if (pos + tz > prec) + tz = prec - pos; + if (tz) + { + if (BYTES_BIG_ENDIAN) + pd.offset = prec - pos - tz; + else + pd.offset = pos; + pd.size = tz; + void *r = push_partial_def (pd, 0, prec); + gcc_assert (r == NULL_TREE); + } + pos += tz; + if (pos == prec) + break; + w = wi::lrshift (w, tz); + tz = wi::ctz (wi::bit_not (w)); + if (pos + tz > prec) + tz = prec - pos; + pos += tz; + w = wi::lrshift (w, tz); + } + } + } ~vn_walk_cb_data (); - void *push_partial_def (const pd_data& pd, tree, HOST_WIDE_INT); + void *finish (alias_set_type, tree); + void *push_partial_def (const pd_data& pd, alias_set_type, HOST_WIDE_INT); vn_reference_t vr; + ao_ref orig_ref; tree *last_vuse_ptr; + tree last_vuse; + tree mask; + tree masked_result; vn_lookup_kind vn_walk_kind; bool tbaa_p; + vec saved_operands; /* The VDEFs of partial defs we come along. */ auto_vec partial_defs; /* The first defs range to avoid splay tree setup in most cases. */ pd_range first_range; - tree first_vuse; + alias_set_type first_set; splay_tree known_ranges; obstack ranges_obstack; }; @@ -1713,6 +1764,23 @@ vn_walk_cb_data::~vn_walk_cb_data () splay_tree_delete (known_ranges); obstack_free (&ranges_obstack, NULL); } + saved_operands.release (); +} + +void * +vn_walk_cb_data::finish (alias_set_type set, tree val) +{ + if (first_set != -2) + set = first_set; + if (mask) + { + masked_result = val; + return (void *) -1; + } + vec &operands + = saved_operands.exists () ? saved_operands : vr->operands; + return vn_reference_lookup_or_insert_for_pieces (last_vuse, set, + vr->type, operands, val); } /* pd_range splay-tree helpers. */ @@ -1742,168 +1810,306 @@ pd_tree_dealloc (void *, void *) } /* Push PD to the vector of partial definitions returning a - value when we are ready to combine things with VUSE and MAXSIZEI, + value when we are ready to combine things with VUSE, SET and MAXSIZEI, NULL when we want to continue looking for partial defs or -1 on failure. */ void * -vn_walk_cb_data::push_partial_def (const pd_data &pd, tree vuse, - HOST_WIDE_INT maxsizei) +vn_walk_cb_data::push_partial_def (const pd_data &pd, + alias_set_type set, HOST_WIDE_INT maxsizei) { + const HOST_WIDE_INT bufsize = 64; + /* We're using a fixed buffer for encoding so fail early if the object + we want to interpret is bigger. */ + if (maxsizei > bufsize * BITS_PER_UNIT + || CHAR_BIT != 8 + || BITS_PER_UNIT != 8 + /* Not prepared to handle PDP endian. */ + || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) + return (void *)-1; + + bool pd_constant_p = (TREE_CODE (pd.rhs) == CONSTRUCTOR + || CONSTANT_CLASS_P (pd.rhs)); if (partial_defs.is_empty ()) { + if (!pd_constant_p) + return (void *)-1; partial_defs.safe_push (pd); first_range.offset = pd.offset; first_range.size = pd.size; - first_vuse = vuse; + first_set = set; last_vuse_ptr = NULL; + /* Continue looking for partial defs. */ + return NULL; + } + + if (!known_ranges) + { + /* ??? Optimize the case where the 2nd partial def completes things. */ + gcc_obstack_init (&ranges_obstack); + known_ranges = splay_tree_new_with_allocator (pd_range_compare, 0, 0, + pd_tree_alloc, + pd_tree_dealloc, this); + splay_tree_insert (known_ranges, + (splay_tree_key)&first_range.offset, + (splay_tree_value)&first_range); + } + + pd_range newr = { pd.offset, pd.size }; + splay_tree_node n; + pd_range *r; + /* Lookup the predecessor of offset + 1 and see if we need to merge. */ + HOST_WIDE_INT loffset = newr.offset + 1; + if ((n = splay_tree_predecessor (known_ranges, (splay_tree_key)&loffset)) + && ((r = (pd_range *)n->value), true) + && ranges_known_overlap_p (r->offset, r->size + 1, + newr.offset, newr.size)) + { + /* Ignore partial defs already covered. */ + if (known_subrange_p (newr.offset, newr.size, r->offset, r->size)) + return NULL; + r->size = MAX (r->offset + r->size, newr.offset + newr.size) - r->offset; } else { - if (!known_ranges) - { - /* ??? Optimize the case where the second partial def - completes things. */ - gcc_obstack_init (&ranges_obstack); - known_ranges - = splay_tree_new_with_allocator (pd_range_compare, 0, 0, - pd_tree_alloc, - pd_tree_dealloc, this); - splay_tree_insert (known_ranges, - (splay_tree_key)&first_range.offset, - (splay_tree_value)&first_range); - } - if (known_ranges) - { - pd_range newr = { pd.offset, pd.size }; - splay_tree_node n; - pd_range *r; - /* Lookup the predecessor of offset + 1 and see if - we need to merge with it. */ - HOST_WIDE_INT loffset = newr.offset + 1; - if ((n = splay_tree_predecessor (known_ranges, - (splay_tree_key)&loffset)) - && ((r = (pd_range *)n->value), true) - && ranges_known_overlap_p (r->offset, r->size + 1, - newr.offset, newr.size)) - { - /* Ignore partial defs already covered. */ - if (known_subrange_p (newr.offset, newr.size, - r->offset, r->size)) - return NULL; - r->size = MAX (r->offset + r->size, - newr.offset + newr.size) - r->offset; - } - else - { - /* newr.offset wasn't covered yet, insert the - range. */ - r = XOBNEW (&ranges_obstack, pd_range); - *r = newr; - splay_tree_insert (known_ranges, - (splay_tree_key)&r->offset, - (splay_tree_value)r); - } - /* Merge r which now contains newr and is a member - of the splay tree with adjacent overlapping ranges. */ - pd_range *rafter; - while ((n = splay_tree_successor (known_ranges, - (splay_tree_key)&r->offset)) - && ((rafter = (pd_range *)n->value), true) - && ranges_known_overlap_p (r->offset, r->size + 1, - rafter->offset, rafter->size)) - { - r->size = MAX (r->offset + r->size, - rafter->offset + rafter->size) - r->offset; - splay_tree_remove (known_ranges, - (splay_tree_key)&rafter->offset); - } - partial_defs.safe_push (pd); - - /* Now we have merged newr into the range tree. - When we have covered [offseti, sizei] then the - tree will contain exactly one node which has - the desired properties and it will be 'r'. */ - if (known_subrange_p (0, maxsizei / BITS_PER_UNIT, - r->offset, r->size)) - { - /* Now simply native encode all partial defs - in reverse order. */ - unsigned ndefs = partial_defs.length (); - /* We support up to 512-bit values (for V8DFmode). */ - unsigned char buffer[64]; - int len; + /* newr.offset wasn't covered yet, insert the range. */ + r = XOBNEW (&ranges_obstack, pd_range); + *r = newr; + splay_tree_insert (known_ranges, (splay_tree_key)&r->offset, + (splay_tree_value)r); + } + /* Merge r which now contains newr and is a member of the splay tree with + adjacent overlapping ranges. */ + pd_range *rafter; + while ((n = splay_tree_successor (known_ranges, (splay_tree_key)&r->offset)) + && ((rafter = (pd_range *)n->value), true) + && ranges_known_overlap_p (r->offset, r->size + 1, + rafter->offset, rafter->size)) + { + r->size = MAX (r->offset + r->size, + rafter->offset + rafter->size) - r->offset; + splay_tree_remove (known_ranges, (splay_tree_key)&rafter->offset); + } + /* Non-constants are OK as long as they are shadowed by a constant. */ + if (!pd_constant_p) + return (void *)-1; + partial_defs.safe_push (pd); + + /* Now we have merged newr into the range tree. When we have covered + [offseti, sizei] then the tree will contain exactly one node which has + the desired properties and it will be 'r'. */ + if (!known_subrange_p (0, maxsizei, r->offset, r->size)) + /* Continue looking for partial defs. */ + return NULL; - while (!partial_defs.is_empty ()) + /* Now simply native encode all partial defs in reverse order. */ + unsigned ndefs = partial_defs.length (); + /* We support up to 512-bit values (for V8DFmode). */ + unsigned char buffer[bufsize + 1]; + unsigned char this_buffer[bufsize + 1]; + int len; + + memset (buffer, 0, bufsize + 1); + unsigned needed_len = ROUND_UP (maxsizei, BITS_PER_UNIT) / BITS_PER_UNIT; + while (!partial_defs.is_empty ()) + { + pd_data pd = partial_defs.pop (); + unsigned int amnt; + if (TREE_CODE (pd.rhs) == CONSTRUCTOR) + { + /* Empty CONSTRUCTOR. */ + if (pd.size >= needed_len * BITS_PER_UNIT) + len = needed_len; + else + len = ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT; + memset (this_buffer, 0, len); + } + else + { + len = native_encode_expr (pd.rhs, this_buffer, bufsize, + MAX (0, -pd.offset) / BITS_PER_UNIT); + if (len <= 0 + || len < (ROUND_UP (pd.size, BITS_PER_UNIT) / BITS_PER_UNIT + - MAX (0, -pd.offset) / BITS_PER_UNIT)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Failed to encode %u " + "partial definitions\n", ndefs); + return (void *)-1; + } + } + + unsigned char *p = buffer; + HOST_WIDE_INT size = pd.size; + if (pd.offset < 0) + size -= ROUND_DOWN (-pd.offset, BITS_PER_UNIT); + this_buffer[len] = 0; + if (BYTES_BIG_ENDIAN) + { + /* LSB of this_buffer[len - 1] byte should be at + pd.offset + pd.size - 1 bits in buffer. */ + amnt = ((unsigned HOST_WIDE_INT) pd.offset + + pd.size) % BITS_PER_UNIT; + if (amnt) + shift_bytes_in_array_right (this_buffer, len + 1, amnt); + unsigned char *q = this_buffer; + unsigned int off = 0; + if (pd.offset >= 0) + { + unsigned int msk; + off = pd.offset / BITS_PER_UNIT; + gcc_assert (off < needed_len); + p = buffer + off; + if (size <= amnt) { - pd_data pd = partial_defs.pop (); - if (TREE_CODE (pd.rhs) == CONSTRUCTOR) - /* Empty CONSTRUCTOR. */ - memset (buffer + MAX (0, pd.offset), - 0, MIN ((HOST_WIDE_INT)sizeof (buffer) - - MAX (0, pd.offset), - pd.size + MIN (0, pd.offset))); - else + msk = ((1 << size) - 1) << (BITS_PER_UNIT - amnt); + *p = (*p & ~msk) | (this_buffer[len] & msk); + size = 0; + } + else + { + if (TREE_CODE (pd.rhs) != CONSTRUCTOR) + q = (this_buffer + len + - (ROUND_UP (size - amnt, BITS_PER_UNIT) + / BITS_PER_UNIT)); + if (pd.offset % BITS_PER_UNIT) { - len = native_encode_expr (pd.rhs, - buffer + MAX (0, pd.offset), - sizeof (buffer) - - MAX (0, pd.offset), - MAX (0, -pd.offset)); - if (len <= 0 - || len < (pd.size - MAX (0, -pd.offset))) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Failed to encode %u " - "partial definitions\n", ndefs); - return (void *)-1; - } + msk = -1U << (BITS_PER_UNIT + - (pd.offset % BITS_PER_UNIT)); + *p = (*p & msk) | (*q & ~msk); + p++; + q++; + off++; + size -= BITS_PER_UNIT - (pd.offset % BITS_PER_UNIT); + gcc_assert (size >= 0); } } - - tree type = vr->type; - /* Make sure to interpret in a type that has a range - covering the whole access size. */ - if (INTEGRAL_TYPE_P (vr->type) - && maxsizei != TYPE_PRECISION (vr->type)) - type = build_nonstandard_integer_type (maxsizei, - TYPE_UNSIGNED (type)); - tree val = native_interpret_expr (type, buffer, - maxsizei / BITS_PER_UNIT); - /* If we chop off bits because the types precision doesn't - match the memory access size this is ok when optimizing - reads but not when called from the DSE code during - elimination. */ - if (val - && type != vr->type) + } + else if (TREE_CODE (pd.rhs) != CONSTRUCTOR) + { + q = (this_buffer + len + - (ROUND_UP (size - amnt, BITS_PER_UNIT) + / BITS_PER_UNIT)); + if (pd.offset % BITS_PER_UNIT) { - if (! int_fits_type_p (val, vr->type)) - val = NULL_TREE; - else - val = fold_convert (vr->type, val); + q++; + size -= BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) pd.offset + % BITS_PER_UNIT); + gcc_assert (size >= 0); } - - if (val) + } + if ((unsigned HOST_WIDE_INT) size / BITS_PER_UNIT + off + > needed_len) + size = (needed_len - off) * BITS_PER_UNIT; + memcpy (p, q, size / BITS_PER_UNIT); + if (size % BITS_PER_UNIT) + { + unsigned int msk + = -1U << (BITS_PER_UNIT - (size % BITS_PER_UNIT)); + p += size / BITS_PER_UNIT; + q += size / BITS_PER_UNIT; + *p = (*q & msk) | (*p & ~msk); + } + } + else + { + size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT); + if (pd.offset >= 0) + { + /* LSB of this_buffer[0] byte should be at pd.offset bits + in buffer. */ + unsigned int msk; + amnt = pd.offset % BITS_PER_UNIT; + if (amnt) + shift_bytes_in_array_left (this_buffer, len + 1, amnt); + unsigned int off = pd.offset / BITS_PER_UNIT; + gcc_assert (off < needed_len); + p = buffer + off; + if (amnt + size < BITS_PER_UNIT) { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Successfully combined %u " - "partial definitions\n", ndefs); - return vn_reference_lookup_or_insert_for_pieces - (first_vuse, - vr->set, vr->type, vr->operands, val); + /* Low amnt bits come from *p, then size bits + from this_buffer[0] and the remaining again from + *p. */ + msk = ((1 << size) - 1) << amnt; + *p = (*p & ~msk) | (this_buffer[0] & msk); + size = 0; } - else + else if (amnt) { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Failed to interpret %u " - "encoded partial definitions\n", ndefs); - return (void *)-1; + msk = -1U << amnt; + *p = (*p & ~msk) | (this_buffer[0] & msk); + p++; + size -= (BITS_PER_UNIT - amnt); } } + else + { + amnt = (unsigned HOST_WIDE_INT) pd.offset % BITS_PER_UNIT; + if (amnt) + shift_bytes_in_array_left (this_buffer, len + 1, amnt); + } + memcpy (p, this_buffer + (amnt != 0), size / BITS_PER_UNIT); + p += size / BITS_PER_UNIT; + if (size % BITS_PER_UNIT) + { + unsigned int msk = -1U << (size % BITS_PER_UNIT); + *p = (this_buffer[(amnt != 0) + size / BITS_PER_UNIT] + & ~msk) | (*p & msk); + } } } - /* Continue looking for partial defs. */ - return NULL; + + tree type = vr->type; + /* Make sure to interpret in a type that has a range covering the whole + access size. */ + if (INTEGRAL_TYPE_P (vr->type) && maxsizei != TYPE_PRECISION (vr->type)) + type = build_nonstandard_integer_type (maxsizei, TYPE_UNSIGNED (type)); + tree val; + if (BYTES_BIG_ENDIAN) + { + unsigned sz = needed_len; + if (maxsizei % BITS_PER_UNIT) + shift_bytes_in_array_right (buffer, needed_len, + BITS_PER_UNIT + - (maxsizei % BITS_PER_UNIT)); + if (INTEGRAL_TYPE_P (type)) + sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type)); + if (sz > needed_len) + { + memcpy (this_buffer + (sz - needed_len), buffer, needed_len); + val = native_interpret_expr (type, this_buffer, sz); + } + else + val = native_interpret_expr (type, buffer, needed_len); + } + else + val = native_interpret_expr (type, buffer, bufsize); + /* If we chop off bits because the types precision doesn't match the memory + access size this is ok when optimizing reads but not when called from + the DSE code during elimination. */ + if (val && type != vr->type) + { + if (! int_fits_type_p (val, vr->type)) + val = NULL_TREE; + else + val = fold_convert (vr->type, val); + } + if (val) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Successfully combined %u partial definitions\n", ndefs); + /* We are using the alias-set of the first store we encounter which + should be appropriate here. */ + return finish (first_set, val); + } + else + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Failed to interpret %u encoded partial definitions\n", ndefs); + return (void *)-1; + } } /* Callback for walk_non_aliased_vuses. Adjusts the vn_reference_t VR_ @@ -1923,7 +2129,10 @@ vn_reference_lookup_2 (ao_ref *op ATTRIB return NULL; if (data->last_vuse_ptr) - *data->last_vuse_ptr = vuse; + { + *data->last_vuse_ptr = vuse; + data->last_vuse = vuse; + } /* Fixup vuse and hash. */ if (vr->vuse) @@ -1935,7 +2144,11 @@ vn_reference_lookup_2 (ao_ref *op ATTRIB hash = vr->hashcode; slot = valid_info->references->find_slot_with_hash (vr, hash, NO_INSERT); if (slot) - return *slot; + { + if ((*slot)->result && data->saved_operands.exists ()) + return data->finish (vr->set, (*slot)->result); + return *slot; + } return NULL; } @@ -2221,13 +2434,13 @@ adjust_offsets_for_equal_base_address (t static void * vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, - bool *disambiguate_only) + translate_flags *disambiguate_only) { vn_walk_cb_data *data = (vn_walk_cb_data *)data_; vn_reference_t vr = data->vr; gimple *def_stmt = SSA_NAME_DEF_STMT (vuse); tree base = ao_ref_base (ref); - HOST_WIDE_INT offseti, maxsizei; + HOST_WIDE_INT offseti = 0, maxsizei, sizei = 0; static vec lhs_ops; ao_ref lhs_ref; bool lhs_ref_ok = false; @@ -2242,8 +2455,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree lhs_ops.truncate (0); basic_block saved_rpo_bb = vn_context_bb; vn_context_bb = gimple_bb (def_stmt); - copy_reference_ops_from_ref (lhs, &lhs_ops); - lhs_ops = valueize_refs_1 (lhs_ops, &valueized_anything, true); + if (*disambiguate_only <= TR_VALUEIZE_AND_DISAMBIGUATE) + { + copy_reference_ops_from_ref (lhs, &lhs_ops); + lhs_ops = valueize_refs_1 (lhs_ops, &valueized_anything, true); + } vn_context_bb = saved_rpo_bb; if (valueized_anything) { @@ -2253,7 +2469,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree if (lhs_ref_ok && !refs_may_alias_p_1 (ref, &lhs_ref, data->tbaa_p)) { - *disambiguate_only = true; + *disambiguate_only = TR_VALUEIZE_AND_DISAMBIGUATE; return NULL; } } @@ -2263,6 +2479,30 @@ vn_reference_lookup_3 (ao_ref *ref, tree lhs_ref_ok = true; } + /* Besides valueizing the LHS we can also use access-path based + disambiguation on the original non-valueized ref. */ + if (!ref->ref + && lhs_ref_ok + && data->orig_ref.ref) + { + /* We want to use the non-valueized LHS for this, but avoid redundant + work. */ + ao_ref *lref = &lhs_ref; + ao_ref lref_alt; + if (valueized_anything) + { + ao_ref_init (&lref_alt, lhs); + lref = &lref_alt; + } + if (!refs_may_alias_p_1 (&data->orig_ref, lref, data->tbaa_p)) + { + *disambiguate_only = (valueized_anything + ? TR_VALUEIZE_AND_DISAMBIGUATE + : TR_DISAMBIGUATE); + return NULL; + } + } + /* If we reach a clobbering statement try to skip it and see if we find a VN result with exactly the same value as the possible clobber. In this case we can ignore the clobber @@ -2299,7 +2539,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree } } } - else if (gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL) + else if (*disambiguate_only <= TR_VALUEIZE_AND_DISAMBIGUATE + && gimple_call_builtin_p (def_stmt, BUILT_IN_NORMAL) && gimple_call_num_args (def_stmt) <= 4) { /* For builtin calls valueize its arguments and call the @@ -2328,15 +2569,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree gimple_call_set_arg (def_stmt, i, oldargs[i]); if (!res) { - *disambiguate_only = true; + *disambiguate_only = TR_VALUEIZE_AND_DISAMBIGUATE; return NULL; } } } - /* If we are looking for redundant stores do not create new hashtable - entries from aliasing defs with made up alias-sets. */ - if (*disambiguate_only || !data->tbaa_p) + if (*disambiguate_only > TR_TRANSLATE) return (void *)-1; /* If we cannot constrain the size of the reference we cannot @@ -2359,10 +2598,14 @@ vn_reference_lookup_3 (ao_ref *ref, tree && (integer_zerop (gimple_call_arg (def_stmt, 1)) || ((TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST || (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8))) - && CHAR_BIT == 8 && BITS_PER_UNIT == 8 + && CHAR_BIT == 8 + && BITS_PER_UNIT == 8 + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN && offset.is_constant (&offseti) - && offseti % BITS_PER_UNIT == 0 && multiple_p (ref->size, BITS_PER_UNIT))) + && ref->size.is_constant (&sizei) + && (offseti % BITS_PER_UNIT == 0 + || TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST) && poly_int_tree_p (gimple_call_arg (def_stmt, 2)) && (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME)) @@ -2423,7 +2666,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree else return (void *)-1; tree len = gimple_call_arg (def_stmt, 2); - HOST_WIDE_INT leni, offset2i, offseti; + HOST_WIDE_INT leni, offset2i; + /* Sometimes the above trickery is smarter than alias analysis. Take + advantage of that. */ + if (!ranges_maybe_overlap_p (offset, maxsize, offset2, + (wi::to_poly_offset (len) + << LOG2_BITS_PER_UNIT))) + return NULL; if (data->partial_defs.is_empty () && known_subrange_p (offset, maxsize, offset2, wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT)) @@ -2432,7 +2681,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree if (integer_zerop (gimple_call_arg (def_stmt, 1))) val = build_zero_cst (vr->type); else if (INTEGRAL_TYPE_P (vr->type) - && known_eq (ref->size, 8)) + && known_eq (ref->size, 8) + && offseti % BITS_PER_UNIT == 0) { gimple_match_op res_op (gimple_match_cond::UNCOND, NOP_EXPR, vr->type, gimple_call_arg (def_stmt, 1)); @@ -2444,30 +2694,57 @@ vn_reference_lookup_3 (ao_ref *ref, tree } else { - unsigned len = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type)); - unsigned char *buf = XALLOCAVEC (unsigned char, len); + unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type)) + + 1; + if (INTEGRAL_TYPE_P (vr->type)) + buflen = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (vr->type)) + 1; + unsigned char *buf = XALLOCAVEC (unsigned char, buflen); memset (buf, TREE_INT_CST_LOW (gimple_call_arg (def_stmt, 1)), - len); - val = native_interpret_expr (vr->type, buf, len); + buflen); + if (BYTES_BIG_ENDIAN) + { + unsigned int amnt + = (((unsigned HOST_WIDE_INT) offseti + sizei) + % BITS_PER_UNIT); + if (amnt) + { + shift_bytes_in_array_right (buf, buflen, + BITS_PER_UNIT - amnt); + buf++; + buflen--; + } + } + else if (offseti % BITS_PER_UNIT != 0) + { + unsigned int amnt + = BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) offseti + % BITS_PER_UNIT); + shift_bytes_in_array_left (buf, buflen, amnt); + buf++; + buflen--; + } + val = native_interpret_expr (vr->type, buf, buflen); if (!val) return (void *)-1; } - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + return data->finish (0, val); } /* For now handle clearing memory with partial defs. */ else if (known_eq (ref->size, maxsize) && integer_zerop (gimple_call_arg (def_stmt, 1)) && tree_to_poly_int64 (len).is_constant (&leni) + && leni <= INTTYPE_MAXIMUM (HOST_WIDE_INT) / BITS_PER_UNIT && offset.is_constant (&offseti) && offset2.is_constant (&offset2i) - && maxsize.is_constant (&maxsizei)) + && maxsize.is_constant (&maxsizei) + && ranges_known_overlap_p (offseti, maxsizei, offset2i, + leni << LOG2_BITS_PER_UNIT)) { pd_data pd; pd.rhs = build_constructor (NULL_TREE, NULL); - pd.offset = (offset2i - offseti) / BITS_PER_UNIT; - pd.size = leni; - return data->push_partial_def (pd, vuse, maxsizei); + pd.offset = offset2i - offseti; + pd.size = leni << LOG2_BITS_PER_UNIT; + return data->push_partial_def (pd, 0, maxsizei); } } @@ -2477,12 +2754,22 @@ vn_reference_lookup_3 (ao_ref *ref, tree && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (def_stmt)) == 0) { + tree lhs = gimple_assign_lhs (def_stmt); tree base2; poly_int64 offset2, size2, maxsize2; HOST_WIDE_INT offset2i, size2i; bool reverse; - base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt), - &offset2, &size2, &maxsize2, &reverse); + if (lhs_ref_ok) + { + base2 = ao_ref_base (&lhs_ref); + offset2 = lhs_ref.offset; + size2 = lhs_ref.size; + maxsize2 = lhs_ref.max_size; + reverse = reverse_storage_order_for_component_p (lhs); + } + else + base2 = get_ref_base_and_extent (lhs, + &offset2, &size2, &maxsize2, &reverse); if (known_size_p (maxsize2) && known_eq (maxsize2, size2) && adjust_offsets_for_equal_base_address (base, &offset, @@ -2492,24 +2779,21 @@ vn_reference_lookup_3 (ao_ref *ref, tree && known_subrange_p (offset, maxsize, offset2, size2)) { tree val = build_zero_cst (vr->type); - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + return data->finish (get_alias_set (lhs), val); } else if (known_eq (ref->size, maxsize) && maxsize.is_constant (&maxsizei) - && maxsizei % BITS_PER_UNIT == 0 && offset.is_constant (&offseti) - && offseti % BITS_PER_UNIT == 0 && offset2.is_constant (&offset2i) - && offset2i % BITS_PER_UNIT == 0 && size2.is_constant (&size2i) - && size2i % BITS_PER_UNIT == 0) + && ranges_known_overlap_p (offseti, maxsizei, + offset2i, size2i)) { pd_data pd; pd.rhs = gimple_assign_rhs1 (def_stmt); - pd.offset = (offset2i - offseti) / BITS_PER_UNIT; - pd.size = size2i / BITS_PER_UNIT; - return data->push_partial_def (pd, vuse, maxsizei); + pd.offset = offset2i - offseti; + pd.size = size2i; + return data->push_partial_def (pd, get_alias_set (lhs), maxsizei); } } } @@ -2520,28 +2804,36 @@ vn_reference_lookup_3 (ao_ref *ref, tree && is_gimple_reg_type (vr->type) && !contains_storage_order_barrier_p (vr->operands) && gimple_assign_single_p (def_stmt) - && CHAR_BIT == 8 && BITS_PER_UNIT == 8 + && CHAR_BIT == 8 + && BITS_PER_UNIT == 8 + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN /* native_encode and native_decode operate on arrays of bytes and so fundamentally need a compile-time size and offset. */ && maxsize.is_constant (&maxsizei) - && maxsizei % BITS_PER_UNIT == 0 && offset.is_constant (&offseti) - && offseti % BITS_PER_UNIT == 0 && (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)) || (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME && is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt)))))) { + tree lhs = gimple_assign_lhs (def_stmt); tree base2; poly_int64 offset2, size2, maxsize2; HOST_WIDE_INT offset2i, size2i; bool reverse; - base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt), - &offset2, &size2, &maxsize2, &reverse); + if (lhs_ref_ok) + { + base2 = ao_ref_base (&lhs_ref); + offset2 = lhs_ref.offset; + size2 = lhs_ref.size; + maxsize2 = lhs_ref.max_size; + reverse = reverse_storage_order_for_component_p (lhs); + } + else + base2 = get_ref_base_and_extent (lhs, + &offset2, &size2, &maxsize2, &reverse); if (base2 && !reverse && known_eq (maxsize2, size2) - && multiple_p (size2, BITS_PER_UNIT) - && multiple_p (offset2, BITS_PER_UNIT) && adjust_offsets_for_equal_base_address (base, &offset, base2, &offset2) && offset.is_constant (&offseti) @@ -2552,37 +2844,80 @@ vn_reference_lookup_3 (ao_ref *ref, tree && known_subrange_p (offseti, maxsizei, offset2, size2)) { /* We support up to 512-bit values (for V8DFmode). */ - unsigned char buffer[64]; + unsigned char buffer[65]; int len; tree rhs = gimple_assign_rhs1 (def_stmt); if (TREE_CODE (rhs) == SSA_NAME) rhs = SSA_VAL (rhs); - unsigned pad = 0; - if (BYTES_BIG_ENDIAN - && is_a (TYPE_MODE (TREE_TYPE (rhs)))) - { - /* On big-endian the padding is at the 'front' so - just skip the initial bytes. */ - fixed_size_mode mode - = as_a (TYPE_MODE (TREE_TYPE (rhs))); - pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT; - } len = native_encode_expr (rhs, - buffer, sizeof (buffer), - ((offseti - offset2i) / BITS_PER_UNIT - + pad)); + buffer, sizeof (buffer) - 1, + (offseti - offset2i) / BITS_PER_UNIT); if (len > 0 && len * BITS_PER_UNIT >= maxsizei) { tree type = vr->type; + unsigned char *buf = buffer; + unsigned int amnt = 0; /* Make sure to interpret in a type that has a range covering the whole access size. */ if (INTEGRAL_TYPE_P (vr->type) && maxsizei != TYPE_PRECISION (vr->type)) type = build_nonstandard_integer_type (maxsizei, TYPE_UNSIGNED (type)); - tree val = native_interpret_expr (type, buffer, - maxsizei / BITS_PER_UNIT); + if (BYTES_BIG_ENDIAN) + { + /* For big-endian native_encode_expr stored the rhs + such that the LSB of it is the LSB of buffer[len - 1]. + That bit is stored into memory at position + offset2 + size2 - 1, i.e. in byte + base + (offset2 + size2 - 1) / BITS_PER_UNIT. + E.g. for offset2 1 and size2 14, rhs -1 and memory + previously cleared that is: + 0 1 + 01111111|11111110 + Now, if we want to extract offset 2 and size 12 from + it using native_interpret_expr (which actually works + for integral bitfield types in terms of byte size of + the mode), the native_encode_expr stored the value + into buffer as + XX111111|11111111 + and returned len 2 (the X bits are outside of + precision). + Let sz be maxsize / BITS_PER_UNIT if not extracting + a bitfield, and GET_MODE_SIZE otherwise. + We need to align the LSB of the value we want to + extract as the LSB of buf[sz - 1]. + The LSB from memory we need to read is at position + offset + maxsize - 1. */ + HOST_WIDE_INT sz = maxsizei / BITS_PER_UNIT; + if (INTEGRAL_TYPE_P (type)) + sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type)); + amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i + - offseti - maxsizei) % BITS_PER_UNIT; + if (amnt) + shift_bytes_in_array_right (buffer, len, amnt); + amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i + - offseti - maxsizei - amnt) / BITS_PER_UNIT; + if ((unsigned HOST_WIDE_INT) sz + amnt > (unsigned) len) + len = 0; + else + { + buf = buffer + len - sz - amnt; + len -= (buf - buffer); + } + } + else + { + amnt = ((unsigned HOST_WIDE_INT) offset2i + - offseti) % BITS_PER_UNIT; + if (amnt) + { + buffer[len] = 0; + shift_bytes_in_array_left (buffer, len + 1, amnt); + buf = buffer + 1; + } + } + tree val = native_interpret_expr (type, buf, len); /* If we chop off bits because the types precision doesn't match the memory access size this is ok when optimizing reads but not when called from the DSE code during @@ -2597,73 +2932,95 @@ vn_reference_lookup_3 (ao_ref *ref, tree } if (val) - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + return data->finish (get_alias_set (lhs), val); } } - else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i)) + else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, + size2i)) { pd_data pd; tree rhs = gimple_assign_rhs1 (def_stmt); if (TREE_CODE (rhs) == SSA_NAME) rhs = SSA_VAL (rhs); pd.rhs = rhs; - pd.offset = (offset2i - offseti) / BITS_PER_UNIT; - pd.size = size2i / BITS_PER_UNIT; - return data->push_partial_def (pd, vuse, maxsizei); + pd.offset = offset2i - offseti; + pd.size = size2i; + return data->push_partial_def (pd, get_alias_set (lhs), maxsizei); } } } /* 4) Assignment from an SSA name which definition we may be able - to access pieces from. */ + to access pieces from or we can combine to a larger entity. */ else if (known_eq (ref->size, maxsize) && is_gimple_reg_type (vr->type) && !contains_storage_order_barrier_p (vr->operands) && gimple_assign_single_p (def_stmt) - && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME - /* A subset of partial defs from non-constants can be handled - by for example inserting a CONSTRUCTOR, a COMPLEX_EXPR or - even a (series of) BIT_INSERT_EXPR hoping for simplifications - downstream, not so much for actually doing the insertion. */ - && data->partial_defs.is_empty ()) + && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) { + tree lhs = gimple_assign_lhs (def_stmt); tree base2; poly_int64 offset2, size2, maxsize2; + HOST_WIDE_INT offset2i, size2i, offseti; bool reverse; - base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt), - &offset2, &size2, &maxsize2, - &reverse); + if (lhs_ref_ok) + { + base2 = ao_ref_base (&lhs_ref); + offset2 = lhs_ref.offset; + size2 = lhs_ref.size; + maxsize2 = lhs_ref.max_size; + reverse = reverse_storage_order_for_component_p (lhs); + } + else + base2 = get_ref_base_and_extent (lhs, + &offset2, &size2, &maxsize2, &reverse); tree def_rhs = gimple_assign_rhs1 (def_stmt); if (!reverse && known_size_p (maxsize2) && known_eq (maxsize2, size2) && adjust_offsets_for_equal_base_address (base, &offset, - base2, &offset2) - && known_subrange_p (offset, maxsize, offset2, size2) - /* ??? We can't handle bitfield precision extracts without - either using an alternate type for the BIT_FIELD_REF and - then doing a conversion or possibly adjusting the offset - according to endianness. */ - && (! INTEGRAL_TYPE_P (vr->type) - || known_eq (ref->size, TYPE_PRECISION (vr->type))) - && multiple_p (ref->size, BITS_PER_UNIT) - && (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs)) - || type_has_mode_precision_p (TREE_TYPE (def_rhs)))) - { - gimple_match_op op (gimple_match_cond::UNCOND, - BIT_FIELD_REF, vr->type, - vn_valueize (def_rhs), - bitsize_int (ref->size), - bitsize_int (offset - offset2)); - tree val = vn_nary_build_or_lookup (&op); - if (val - && (TREE_CODE (val) != SSA_NAME - || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val))) - { - vn_reference_t res = vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); - return res; + base2, &offset2)) + { + if (data->partial_defs.is_empty () + && known_subrange_p (offset, maxsize, offset2, size2) + /* ??? We can't handle bitfield precision extracts without + either using an alternate type for the BIT_FIELD_REF and + then doing a conversion or possibly adjusting the offset + according to endianness. */ + && (! INTEGRAL_TYPE_P (vr->type) + || known_eq (ref->size, TYPE_PRECISION (vr->type))) + && multiple_p (ref->size, BITS_PER_UNIT)) + { + if (known_eq (ref->size, size2)) + return vn_reference_lookup_or_insert_for_pieces + (vuse, get_alias_set (lhs), vr->type, vr->operands, + SSA_VAL (def_rhs)); + else if (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs)) + || type_has_mode_precision_p (TREE_TYPE (def_rhs))) + { + gimple_match_op op (gimple_match_cond::UNCOND, + BIT_FIELD_REF, vr->type, + SSA_VAL (def_rhs), + bitsize_int (ref->size), + bitsize_int (offset - offset2)); + tree val = vn_nary_build_or_lookup (&op); + if (val + && (TREE_CODE (val) != SSA_NAME + || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val))) + return data->finish (get_alias_set (lhs), val); + } + } + else if (maxsize.is_constant (&maxsizei) + && offset.is_constant (&offseti) + && offset2.is_constant (&offset2i) + && size2.is_constant (&size2i) + && ranges_known_overlap_p (offset, maxsize, offset2, size2)) + { + pd_data pd; + pd.rhs = SSA_VAL (def_rhs); + pd.offset = offset2i - offseti; + pd.size = size2i; + return data->push_partial_def (pd, get_alias_set (lhs), maxsizei); } } } @@ -2678,6 +3035,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree /* Handling this is more complicated, give up for now. */ && data->partial_defs.is_empty ()) { + tree lhs = gimple_assign_lhs (def_stmt); tree base2; int i, j, k; auto_vec rhs; @@ -2747,7 +3105,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree } /* Now re-write REF to be based on the rhs of the assignment. */ - copy_reference_ops_from_ref (gimple_assign_rhs1 (def_stmt), &rhs); + tree rhs1 = gimple_assign_rhs1 (def_stmt); + copy_reference_ops_from_ref (rhs1, &rhs); /* Apply an extra offset to the inner MEM_REF of the RHS. */ if (maybe_ne (extra_off, 0)) @@ -2764,6 +3123,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree extra_off)); } + /* Save the operands since we need to use the original ones for + the hash entry we use. */ + if (!data->saved_operands.exists ()) + data->saved_operands = vr->operands.copy (); + /* We need to pre-pend vr->operands[0..i] to rhs. */ vec old = vr->operands; if (i + 1 + rhs.length () > vr->operands.length ()) @@ -2780,11 +3144,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree /* Try folding the new reference to a constant. */ tree val = fully_constant_vn_reference_p (vr); if (val) - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + return data->finish (get_alias_set (lhs), val); /* Adjust *ref from the new operands. */ - if (!ao_ref_init_from_vn_reference (&r, vr->set, vr->type, vr->operands)) + if (!ao_ref_init_from_vn_reference (&r, get_alias_set (rhs1), + vr->type, vr->operands)) return (void *)-1; /* This can happen with bitfields. */ if (maybe_ne (ref->size, r.size)) @@ -2793,6 +3157,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree /* Do not update last seen VUSE after translating. */ data->last_vuse_ptr = NULL; + /* Invalidate the original access path since it now contains + the wrong base. */ + data->orig_ref.ref = NULL_TREE; + /* Use the alias-set of this LHS for recording an eventual result. */ + if (data->first_set == -2) + data->first_set = get_alias_set (lhs); /* Keep looking for the adjusted *REF / VR pair. */ return NULL; @@ -2912,6 +3282,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree if (!known_subrange_p (at, byte_maxsize, lhs_offset, copy_size)) return (void *)-1; + /* Save the operands since we need to use the original ones for + the hash entry we use. */ + if (!data->saved_operands.exists ()) + data->saved_operands = vr->operands.copy (); + /* Make room for 2 operands in the new reference. */ if (vr->operands.length () < 2) { @@ -2940,11 +3315,10 @@ vn_reference_lookup_3 (ao_ref *ref, tree /* Try folding the new reference to a constant. */ tree val = fully_constant_vn_reference_p (vr); if (val) - return vn_reference_lookup_or_insert_for_pieces - (vuse, vr->set, vr->type, vr->operands, val); + return data->finish (0, val); /* Adjust *ref from the new operands. */ - if (!ao_ref_init_from_vn_reference (&r, vr->set, vr->type, vr->operands)) + if (!ao_ref_init_from_vn_reference (&r, 0, vr->type, vr->operands)) return (void *)-1; /* This can happen with bitfields. */ if (maybe_ne (ref->size, r.size)) @@ -2953,6 +3327,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree /* Do not update last seen VUSE after translating. */ data->last_vuse_ptr = NULL; + /* Invalidate the original access path since it now contains + the wrong base. */ + data->orig_ref.ref = NULL_TREE; + /* Use the alias-set of this stmt for recording an eventual result. */ + if (data->first_set == -2) + data->first_set = 0; /* Keep looking for the adjusted *REF / VR pair. */ return NULL; @@ -3013,13 +3393,13 @@ vn_reference_lookup_pieces (tree vuse, a { ao_ref r; unsigned limit = PARAM_VALUE (PARAM_SCCVN_MAX_ALIAS_QUERIES_PER_ACCESS); - vn_walk_cb_data data (&vr1, NULL, kind, true); + vn_walk_cb_data data (&vr1, NULL_TREE, NULL, kind, true, NULL_TREE); if (ao_ref_init_from_vn_reference (&r, set, type, vr1.operands)) - *vnresult = - (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, true, - vn_reference_lookup_2, - vn_reference_lookup_3, - vuse_valueize, limit, &data); + *vnresult + = ((vn_reference_t) + walk_non_aliased_vuses (&r, vr1.vuse, true, vn_reference_lookup_2, + vn_reference_lookup_3, vuse_valueize, + limit, &data)); gcc_checking_assert (vr1.operands == shared_lookup_references); } @@ -3035,15 +3415,19 @@ vn_reference_lookup_pieces (tree vuse, a was NULL.. VNRESULT will be filled in with the vn_reference_t stored in the hashtable if one exists. When TBAA_P is false assume we are looking up a store and treat it as having alias-set zero. - *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded. */ + *LAST_VUSE_PTR will be updated with the VUSE the value lookup succeeded. + MASK is either NULL_TREE, or can be an INTEGER_CST if the result of the + load is bitwise anded with MASK and so we are only interested in a subset + of the bits and can ignore if the other bits are uninitialized or + not initialized with constants. */ tree vn_reference_lookup (tree op, tree vuse, vn_lookup_kind kind, - vn_reference_t *vnresult, bool tbaa_p, tree *last_vuse_ptr) + vn_reference_t *vnresult, bool tbaa_p, + tree *last_vuse_ptr, tree mask) { vec operands; struct vn_reference_s vr1; - tree cst; bool valuezied_anything; if (vnresult) @@ -3055,11 +3439,11 @@ vn_reference_lookup (tree op, tree vuse, vr1.type = TREE_TYPE (op); vr1.set = get_alias_set (op); vr1.hashcode = vn_reference_compute_hash (&vr1); - if ((cst = fully_constant_vn_reference_p (&vr1))) - return cst; + if (mask == NULL_TREE) + if (tree cst = fully_constant_vn_reference_p (&vr1)) + return cst; - if (kind != VN_NOWALK - && vr1.vuse) + if (kind != VN_NOWALK && vr1.vuse) { vn_reference_t wvnresult; ao_ref r; @@ -3070,23 +3454,32 @@ vn_reference_lookup (tree op, tree vuse, || !ao_ref_init_from_vn_reference (&r, vr1.set, vr1.type, vr1.operands)) ao_ref_init (&r, op); - vn_walk_cb_data data (&vr1, last_vuse_ptr, kind, tbaa_p); - wvnresult = - (vn_reference_t)walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p, - vn_reference_lookup_2, - vn_reference_lookup_3, - vuse_valueize, limit, &data); + vn_walk_cb_data data (&vr1, r.ref ? NULL_TREE : op, + last_vuse_ptr, kind, tbaa_p, mask); + + wvnresult + = ((vn_reference_t) + walk_non_aliased_vuses (&r, vr1.vuse, tbaa_p, vn_reference_lookup_2, + vn_reference_lookup_3, vuse_valueize, limit, + &data)); gcc_checking_assert (vr1.operands == shared_lookup_references); if (wvnresult) { + gcc_assert (mask == NULL_TREE); if (vnresult) *vnresult = wvnresult; return wvnresult->result; } + else if (mask) + return data.masked_result; return NULL_TREE; } + if (last_vuse_ptr) + *last_vuse_ptr = vr1.vuse; + if (mask) + return NULL_TREE; return vn_reference_lookup_1 (&vr1, vnresult); } @@ -4333,7 +4726,39 @@ visit_nary_op (tree lhs, gassign *stmt) } } } - default:; + break; + case BIT_AND_EXPR: + if (INTEGRAL_TYPE_P (type) + && TREE_CODE (rhs1) == SSA_NAME + && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1) + && default_vn_walk_kind != VN_NOWALK + && CHAR_BIT == 8 + && BITS_PER_UNIT == 8 + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN + && !integer_all_onesp (gimple_assign_rhs2 (stmt)) + && !integer_zerop (gimple_assign_rhs2 (stmt))) + { + gassign *ass = dyn_cast (SSA_NAME_DEF_STMT (rhs1)); + if (ass + && !gimple_has_volatile_ops (ass) + && vn_get_stmt_kind (ass) == VN_REFERENCE) + { + tree last_vuse = gimple_vuse (ass); + tree op = gimple_assign_rhs1 (ass); + tree result = vn_reference_lookup (op, gimple_vuse (ass), + default_vn_walk_kind, + NULL, true, &last_vuse, + gimple_assign_rhs2 (stmt)); + if (result + && useless_type_conversion_p (TREE_TYPE (result), + TREE_TYPE (op))) + return set_ssa_val_to (lhs, result); + } + } + break; + default: + break; } bool changed = set_ssa_val_to (lhs, lhs); @@ -4844,14 +5269,14 @@ visit_stmt (gimple *stmt, bool backedges switch (vn_get_stmt_kind (ass)) { case VN_NARY: - changed = visit_nary_op (lhs, ass); - break; + changed = visit_nary_op (lhs, ass); + break; case VN_REFERENCE: - changed = visit_reference_op_load (lhs, rhs1, ass); - break; + changed = visit_reference_op_load (lhs, rhs1, ass); + break; default: - changed = defs_to_varying (ass); - break; + changed = defs_to_varying (ass); + break; } } } @@ -5525,8 +5950,48 @@ eliminate_dom_walker::eliminate_stmt (ba tree val; tree rhs = gimple_assign_rhs1 (stmt); vn_reference_t vnresult; - val = vn_reference_lookup (lhs, gimple_vuse (stmt), VN_WALKREWRITE, - &vnresult, false); + /* ??? gcc.dg/torture/pr91445.c shows that we lookup a boolean + typed load of a byte known to be 0x11 as 1 so a store of + a boolean 1 is detected as redundant. Because of this we + have to make sure to lookup with a ref where its size + matches the precision. */ + tree lookup_lhs = lhs; + if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)) + && (TREE_CODE (lhs) != COMPONENT_REF + || !DECL_BIT_FIELD_TYPE (TREE_OPERAND (lhs, 1))) + && !type_has_mode_precision_p (TREE_TYPE (lhs))) + { + if (TREE_CODE (lhs) == COMPONENT_REF + || TREE_CODE (lhs) == MEM_REF) + { + tree ltype = build_nonstandard_integer_type + (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (lhs))), + TYPE_UNSIGNED (TREE_TYPE (lhs))); + if (TREE_CODE (lhs) == COMPONENT_REF) + { + tree foff = component_ref_field_offset (lhs); + tree f = TREE_OPERAND (lhs, 1); + if (!poly_int_tree_p (foff)) + lookup_lhs = NULL_TREE; + else + lookup_lhs = build3 (BIT_FIELD_REF, ltype, + TREE_OPERAND (lhs, 0), + TYPE_SIZE (TREE_TYPE (lhs)), + bit_from_pos + (foff, DECL_FIELD_BIT_OFFSET (f))); + } + else + lookup_lhs = build2 (MEM_REF, ltype, + TREE_OPERAND (lhs, 0), + TREE_OPERAND (lhs, 1)); + } + else + lookup_lhs = NULL_TREE; + } + val = NULL_TREE; + if (lookup_lhs) + val = vn_reference_lookup (lookup_lhs, gimple_vuse (stmt), + VN_WALKREWRITE, &vnresult, false); if (TREE_CODE (rhs) == SSA_NAME) rhs = VN_INFO (rhs)->valnum; if (val diff -urpN a/gcc/tree-ssa-sccvn.h b/gcc/tree-ssa-sccvn.h --- a/gcc/tree-ssa-sccvn.h 2020-11-26 22:26:32.856000000 -0500 +++ b/gcc/tree-ssa-sccvn.h 2020-11-26 22:06:08.040000000 -0500 @@ -235,7 +235,7 @@ tree vn_reference_lookup_pieces (tree, a vec , vn_reference_t *, vn_lookup_kind); tree vn_reference_lookup (tree, tree, vn_lookup_kind, vn_reference_t *, bool, - tree * = NULL); + tree * = NULL, tree = NULL_TREE); void vn_reference_lookup_call (gcall *, vn_reference_t *, vn_reference_t); vn_reference_t vn_reference_insert_pieces (tree, alias_set_type, tree, vec ,