!49 Upload GCC feature and bugfix patches

From: @eastb233
Reviewed-by: @jdkboy
Signed-off-by: @jdkboy
This commit is contained in:
openeuler-ci-bot 2020-12-30 10:51:55 +08:00 committed by Gitee
commit a390fe7f7a
49 changed files with 479702 additions and 76 deletions

View File

@ -0,0 +1,67 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-AArch64-Fix-constraints-for-CPY-M.patch
3c2707f33af46ac145769872b65e25fd0b870903
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index cbf29a82e28..59bf4a69507 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6523,7 +6523,7 @@
(define_insn "@aarch64_sel_dup<mode>"
[(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
(unspec:SVE_FULL
- [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upl, Upl, Upl, Upl")
+ [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
(vec_duplicate:SVE_FULL
(match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
(match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c
new file mode 100644
index 00000000000..1d8f429caeb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** dup_x0_m:
+** add (x[0-9]+), x0, #?1
+** mov (p[0-7])\.b, p15\.b
+** mov z0\.d, \2/m, \1
+** ret
+*/
+svuint64_t
+dup_x0_m (svuint64_t z0, uint64_t x0)
+{
+ register svbool_t pg asm ("p15");
+ asm volatile ("" : "=Upa" (pg));
+ return svdup_u64_m (z0, pg, x0 + 1);
+}
+
+/*
+** dup_d1_z:
+** mov (p[0-7])\.b, p15\.b
+** mov z0\.d, \1/m, d1
+** ret
+*/
+svfloat64_t
+dup_d1_z (svfloat64_t z0, float64_t d1)
+{
+ register svbool_t pg asm ("p15");
+ asm volatile ("" : "=Upa" (pg));
+ return svdup_f64_m (z0, pg, d1);
+}
+
+#ifdef __cplusplus
+}
+#endif

View File

@ -0,0 +1,694 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-Apply-maximum-nunits-for-BB-SLP.patch
9b75f56d4b7951c60a656396dddd4a65787b95bc
diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-4.c b/gcc/testsuite/gcc.dg/vect/bb-slp-4.c
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-4.c 2020-12-20 18:46:19.539633230 +0800
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-4.c 2020-12-20 18:48:12.799633230 +0800
@@ -38,5 +38,4 @@ int main (void)
return 0;
}
-/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */
-
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c 2020-12-20 18:48:12.799633230 +0800
@@ -0,0 +1,44 @@
+#include "tree-vect.h"
+
+void __attribute__ ((noipa))
+f1 (_Bool *x, unsigned short *y)
+{
+ x[0] = (y[0] == 1);
+ x[1] = (y[1] == 1);
+}
+
+void __attribute__ ((noipa))
+f2 (_Bool *x, unsigned short *y)
+{
+ x[0] = (y[0] == 1);
+ x[1] = (y[1] == 1);
+ x[2] = (y[2] == 1);
+ x[3] = (y[3] == 1);
+ x[4] = (y[4] == 1);
+ x[5] = (y[5] == 1);
+ x[6] = (y[6] == 1);
+ x[7] = (y[7] == 1);
+}
+
+_Bool x[8];
+unsigned short y[8] = { 11, 1, 9, 5, 1, 44, 1, 1 };
+
+int
+main (void)
+{
+ check_vect ();
+
+ f1 (x, y);
+
+ if (x[0] || !x[1])
+ __builtin_abort ();
+
+ x[1] = 0;
+
+ f2 (x, y);
+
+ if (x[0] || !x[1] || x[2] | x[3] || !x[4] || x[5] || !x[6] || !x[7])
+ __builtin_abort ();
+
+ return 0;
+}
diff -Nurp a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c
--- a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c 2020-12-20 18:48:11.811633230 +0800
@@ -0,0 +1,26 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** foo:
+** (
+** ldr d([0-9]+), \[x1\]
+** ldr q([0-9]+), \[x0\]
+** saddw v([0-9]+)\.4s, v\2\.4s, v\1\.4h
+** str q\3, \[x0\]
+** |
+** ldr q([0-9]+), \[x0\]
+** ldr d([0-9]+), \[x1\]
+** saddw v([0-9]+)\.4s, v\4\.4s, v\5\.4h
+** str q\6, \[x0\]
+** )
+** ret
+*/
+void
+foo (int *x, short *y)
+{
+ x[0] += y[0];
+ x[1] += y[1];
+ x[2] += y[2];
+ x[3] += y[3];
+}
diff -Nurp a/gcc/testsuite/gcc.target/i386/pr84101.c b/gcc/testsuite/gcc.target/i386/pr84101.c
--- a/gcc/testsuite/gcc.target/i386/pr84101.c 2020-12-20 18:46:18.383633230 +0800
+++ b/gcc/testsuite/gcc.target/i386/pr84101.c 2020-12-20 18:48:11.611633230 +0800
@@ -18,4 +18,5 @@ uint64_pair_t pair(int num)
return p ;
}
-/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp2" } } */
+/* See PR92266 for the XFAIL. */
+/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp2" { xfail ilp32 } } } */
diff -Nurp a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
--- a/gcc/tree-vect-data-refs.c 2020-12-20 18:46:19.911633230 +0800
+++ b/gcc/tree-vect-data-refs.c 2020-12-20 18:48:11.047633230 +0800
@@ -4312,9 +4312,8 @@ vect_analyze_data_refs (vec_info *vinfo,
/* Set vectype for STMT. */
scalar_type = TREE_TYPE (DR_REF (dr));
- STMT_VINFO_VECTYPE (stmt_info)
- = get_vectype_for_scalar_type (vinfo, scalar_type);
- if (!STMT_VINFO_VECTYPE (stmt_info))
+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ if (!vectype)
{
if (dump_enabled_p ())
{
@@ -4345,14 +4344,19 @@ vect_analyze_data_refs (vec_info *vinfo,
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"got vectype for stmt: %G%T\n",
- stmt_info->stmt, STMT_VINFO_VECTYPE (stmt_info));
+ stmt_info->stmt, vectype);
}
/* Adjust the minimal vectorization factor according to the
vector type. */
- vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
+ vf = TYPE_VECTOR_SUBPARTS (vectype);
*min_vf = upper_bound (*min_vf, vf);
+ /* Leave the BB vectorizer to pick the vector type later, based on
+ the final dataref group size and SLP node size. */
+ if (is_a <loop_vec_info> (vinfo))
+ STMT_VINFO_VECTYPE (stmt_info) = vectype;
+
if (gatherscatter != SG_NONE)
{
gather_scatter_info gs_info;
diff -Nurp a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
--- a/gcc/tree-vect-patterns.c 2020-12-20 18:46:19.979633230 +0800
+++ b/gcc/tree-vect-patterns.c 2020-12-20 18:48:11.227633230 +0800
@@ -4142,9 +4142,10 @@ vect_recog_bool_pattern (stmt_vec_info s
&& STMT_VINFO_DATA_REF (stmt_vinfo))
{
stmt_vec_info pattern_stmt_info;
- vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
- gcc_assert (vectype != NULL_TREE);
- if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
+ tree nunits_vectype;
+ if (!vect_get_vector_types_for_stmt (stmt_vinfo, &vectype,
+ &nunits_vectype)
+ || !VECTOR_MODE_P (TYPE_MODE (vectype)))
return NULL;
if (check_bool_pattern (var, vinfo, bool_stmts))
diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
--- a/gcc/tree-vect-slp.c 2020-12-20 18:46:17.763633230 +0800
+++ b/gcc/tree-vect-slp.c 2020-12-20 18:48:11.227633230 +0800
@@ -606,6 +606,77 @@ again:
return 0;
}
+/* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
+ Return true if we can, meaning that this choice doesn't conflict with
+ existing SLP nodes that use STMT_INFO. */
+
+static bool
+vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
+{
+ tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
+ if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
+ return true;
+
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
+ {
+ /* We maintain the invariant that if any statement in the group is
+ used, all other members of the group have the same vector type. */
+ stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+ stmt_vec_info member_info = first_info;
+ for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
+ if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
+ || is_pattern_stmt_p (member_info))
+ break;
+
+ if (!member_info)
+ {
+ for (member_info = first_info; member_info;
+ member_info = DR_GROUP_NEXT_ELEMENT (member_info))
+ STMT_VINFO_VECTYPE (member_info) = vectype;
+ return true;
+ }
+ }
+ else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
+ && !is_pattern_stmt_p (stmt_info))
+ {
+ STMT_VINFO_VECTYPE (stmt_info) = vectype;
+ return true;
+ }
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Build SLP failed: incompatible vector"
+ " types for: %G", stmt_info->stmt);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " old vector type: %T\n", old_vectype);
+ dump_printf_loc (MSG_NOTE, vect_location,
+ " new vector type: %T\n", vectype);
+ }
+ return false;
+}
+
+/* Try to infer and assign a vector type to all the statements in STMTS.
+ Used only for BB vectorization. */
+
+static bool
+vect_update_all_shared_vectypes (vec<stmt_vec_info> stmts)
+{
+ tree vectype, nunits_vectype;
+ if (!vect_get_vector_types_for_stmt (stmts[0], &vectype,
+ &nunits_vectype, stmts.length ()))
+ return false;
+
+ stmt_vec_info stmt_info;
+ unsigned int i;
+ FOR_EACH_VEC_ELT (stmts, i, stmt_info)
+ if (!vect_update_shared_vectype (stmt_info, vectype))
+ return false;
+
+ return true;
+}
+
/* Return true if call statements CALL1 and CALL2 are similar enough
to be combined into the same SLP group. */
@@ -751,6 +822,7 @@ vect_build_slp_tree_1 (unsigned char *sw
stmt_vec_info stmt_info;
FOR_EACH_VEC_ELT (stmts, i, stmt_info)
{
+ vec_info *vinfo = stmt_info->vinfo;
gimple *stmt = stmt_info->stmt;
swap[i] = 0;
matches[i] = false;
@@ -784,7 +856,7 @@ vect_build_slp_tree_1 (unsigned char *sw
tree nunits_vectype;
if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
- &nunits_vectype)
+ &nunits_vectype, group_size)
|| (nunits_vectype
&& !vect_record_max_nunits (stmt_info, group_size,
nunits_vectype, max_nunits)))
@@ -796,6 +868,10 @@ vect_build_slp_tree_1 (unsigned char *sw
gcc_assert (vectype);
+ if (is_a <bb_vec_info> (vinfo)
+ && !vect_update_shared_vectype (stmt_info, vectype))
+ continue;
+
if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
{
rhs_code = CALL_EXPR;
@@ -1328,7 +1404,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
break;
- if (!grandchild)
+ if (!grandchild
+ && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
{
/* Roll back. */
this_tree_size = old_tree_size;
@@ -1369,7 +1446,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
do extra work to cancel the pattern so the uses see the
scalar version. */
&& !is_pattern_stmt_p (stmt_info)
- && !oprnd_info->any_pattern)
+ && !oprnd_info->any_pattern
+ && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -1488,7 +1566,9 @@ vect_build_slp_tree_2 (vec_info *vinfo,
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
break;
- if (!grandchild)
+ if (!grandchild
+ && (vect_update_all_shared_vectypes
+ (oprnd_info->def_stmts)))
{
/* Roll back. */
this_tree_size = old_tree_size;
@@ -2026,8 +2106,8 @@ vect_analyze_slp_instance (vec_info *vin
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
{
scalar_type = TREE_TYPE (DR_REF (dr));
- vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
group_size = DR_GROUP_SIZE (stmt_info);
+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
}
else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
{
@@ -2669,22 +2749,13 @@ vect_slp_analyze_node_operations_1 (vec_
Memory accesses already got their vector type assigned
in vect_analyze_data_refs. */
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
- if (bb_vinfo
- && ! STMT_VINFO_DATA_REF (stmt_info))
+ if (bb_vinfo && STMT_VINFO_VECTYPE (stmt_info) == boolean_type_node)
{
- tree vectype, nunits_vectype;
- if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
- &nunits_vectype))
- /* We checked this when building the node. */
- gcc_unreachable ();
- if (vectype == boolean_type_node)
- {
- vectype = vect_get_mask_type_for_stmt (stmt_info);
- if (!vectype)
- /* vect_get_mask_type_for_stmt has already explained the
- failure. */
- return false;
- }
+ tree vectype = vect_get_mask_type_for_stmt (stmt_info, node);
+ if (!vectype)
+ /* vect_get_mask_type_for_stmt has already explained the
+ failure. */
+ return false;
stmt_vec_info sstmt_info;
unsigned int i;
@@ -3585,7 +3656,7 @@ vect_get_constant_vectors (slp_tree op_n
&& vect_mask_constant_operand_p (stmt_vinfo))
vector_type = truth_type_for (stmt_vectype);
else
- vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
+ vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
unsigned int number_of_vectors
= vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
--- a/gcc/tree-vect-stmts.c 2020-12-20 18:46:17.707633230 +0800
+++ b/gcc/tree-vect-stmts.c 2020-12-20 18:48:11.227633230 +0800
@@ -798,7 +798,7 @@ vect_prologue_cost_for_slp_op (slp_tree
/* Without looking at the actual initializer a vector of
constants can be implemented as load from the constant pool.
When all elements are the same we can use a splat. */
- tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
+ tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
unsigned num_vects_to_check;
unsigned HOST_WIDE_INT const_nunits;
@@ -3308,7 +3308,7 @@ vectorizable_call (stmt_vec_info stmt_in
/* If all arguments are external or constant defs, infer the vector type
from the scalar type. */
if (!vectype_in)
- vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
+ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
if (vec_stmt)
gcc_assert (vectype_in);
if (!vectype_in)
@@ -4106,7 +4106,8 @@ vectorizable_simd_clone_call (stmt_vec_i
&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
{
tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
- arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type);
+ arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
+ slp_node);
if (arginfo[i].vectype == NULL
|| (simd_clone_subparts (arginfo[i].vectype)
> bestn->simdclone->simdlen))
@@ -4805,7 +4806,7 @@ vectorizable_conversion (stmt_vec_info s
/* If op0 is an external or constant def, infer the vector type
from the scalar type. */
if (!vectype_in)
- vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
+ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
if (vec_stmt)
gcc_assert (vectype_in);
if (!vectype_in)
@@ -5558,7 +5559,7 @@ vectorizable_shift (stmt_vec_info stmt_i
/* If op0 is an external or constant def, infer the vector type
from the scalar type. */
if (!vectype)
- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
if (vec_stmt)
gcc_assert (vectype);
if (!vectype)
@@ -5656,7 +5657,8 @@ vectorizable_shift (stmt_vec_info stmt_i
"vector/vector shift/rotate found.\n");
if (!op1_vectype)
- op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1));
+ op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
+ slp_node);
incompatible_op1_vectype_p
= (op1_vectype == NULL_TREE
|| maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
@@ -6000,7 +6002,8 @@ vectorizable_operation (stmt_vec_info st
vectype = vectype_out;
}
else
- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
+ slp_node);
}
if (vec_stmt)
gcc_assert (vectype);
@@ -8903,7 +8906,7 @@ vectorizable_load (stmt_vec_info stmt_in
condition operands are supportable using vec_is_simple_use. */
static bool
-vect_is_simple_cond (tree cond, vec_info *vinfo,
+vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
tree *comp_vectype, enum vect_def_type *dts,
tree vectype)
{
@@ -8966,7 +8969,8 @@ vect_is_simple_cond (tree cond, vec_info
scalar_type = build_nonstandard_integer_type
(tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
TYPE_UNSIGNED (scalar_type));
- *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
+ slp_node);
}
return true;
@@ -9073,7 +9077,7 @@ vectorizable_condition (stmt_vec_info st
then_clause = gimple_assign_rhs2 (stmt);
else_clause = gimple_assign_rhs3 (stmt);
- if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
+ if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, slp_node,
&comp_vectype, &dts[0], slp_node ? NULL : vectype)
|| !comp_vectype)
return false;
@@ -9564,7 +9568,8 @@ vectorizable_comparison (stmt_vec_info s
/* Invariant comparison. */
if (!vectype)
{
- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
+ slp_node);
if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
return false;
}
@@ -10322,31 +10327,93 @@ get_related_vectype_for_scalar_type (mac
/* Function get_vectype_for_scalar_type.
Returns the vector type corresponding to SCALAR_TYPE as supported
- by the target. */
+ by the target. If GROUP_SIZE is nonzero and we're performing BB
+ vectorization, make sure that the number of elements in the vector
+ is no bigger than GROUP_SIZE. */
tree
-get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type)
+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
+ unsigned int group_size)
{
+ /* For BB vectorization, we should always have a group size once we've
+ constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
+ are tentative requests during things like early data reference
+ analysis and pattern recognition. */
+ if (is_a <bb_vec_info> (vinfo))
+ gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
+ else
+ group_size = 0;
+
tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
scalar_type);
if (vectype && vinfo->vector_mode == VOIDmode)
vinfo->vector_mode = TYPE_MODE (vectype);
+ /* Register the natural choice of vector type, before the group size
+ has been applied. */
if (vectype)
vinfo->used_vector_modes.add (TYPE_MODE (vectype));
+ /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
+ try again with an explicit number of elements. */
+ if (vectype
+ && group_size
+ && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
+ {
+ /* Start with the biggest number of units that fits within
+ GROUP_SIZE and halve it until we find a valid vector type.
+ Usually either the first attempt will succeed or all will
+ fail (in the latter case because GROUP_SIZE is too small
+ for the target), but it's possible that a target could have
+ a hole between supported vector types.
+
+ If GROUP_SIZE is not a power of 2, this has the effect of
+ trying the largest power of 2 that fits within the group,
+ even though the group is not a multiple of that vector size.
+ The BB vectorizer will then try to carve up the group into
+ smaller pieces. */
+ unsigned int nunits = 1 << floor_log2 (group_size);
+ do
+ {
+ vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
+ scalar_type, nunits);
+ nunits /= 2;
+ }
+ while (nunits > 1 && !vectype);
+ }
+
return vectype;
}
+/* Return the vector type corresponding to SCALAR_TYPE as supported
+ by the target. NODE, if nonnull, is the SLP tree node that will
+ use the returned vector type. */
+
+tree
+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
+{
+ unsigned int group_size = 0;
+ if (node)
+ {
+ group_size = SLP_TREE_SCALAR_OPS (node).length ();
+ if (group_size == 0)
+ group_size = SLP_TREE_SCALAR_STMTS (node).length ();
+ }
+ return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
+}
+
/* Function get_mask_type_for_scalar_type.
Returns the mask type corresponding to a result of comparison
- of vectors of specified SCALAR_TYPE as supported by target. */
+ of vectors of specified SCALAR_TYPE as supported by target.
+ NODE, if nonnull, is the SLP tree node that will use the returned
+ vector type. */
tree
-get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type)
+get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
+ slp_tree node)
{
- tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
if (!vectype)
return NULL;
@@ -11033,6 +11100,9 @@ vect_gen_while_not (gimple_seq *seq, tre
/* Try to compute the vector types required to vectorize STMT_INFO,
returning true on success and false if vectorization isn't possible.
+ If GROUP_SIZE is nonzero and we're performing BB vectorization,
+ take sure that the number of elements in the vectors is no bigger
+ than GROUP_SIZE.
On success:
@@ -11050,11 +11120,21 @@ vect_gen_while_not (gimple_seq *seq, tre
opt_result
vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
tree *stmt_vectype_out,
- tree *nunits_vectype_out)
+ tree *nunits_vectype_out,
+ unsigned int group_size)
{
vec_info *vinfo = stmt_info->vinfo;
gimple *stmt = stmt_info->stmt;
+ /* For BB vectorization, we should always have a group size once we've
+ constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
+ are tentative requests during things like early data reference
+ analysis and pattern recognition. */
+ if (is_a <bb_vec_info> (vinfo))
+ gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
+ else
+ group_size = 0;
+
*stmt_vectype_out = NULL_TREE;
*nunits_vectype_out = NULL_TREE;
@@ -11085,7 +11165,7 @@ vect_get_vector_types_for_stmt (stmt_vec
tree vectype;
tree scalar_type = NULL_TREE;
- if (STMT_VINFO_VECTYPE (stmt_info))
+ if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
{
*stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
if (dump_enabled_p ())
@@ -11094,15 +11174,17 @@ vect_get_vector_types_for_stmt (stmt_vec
}
else
{
- gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
- if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
+ if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
+ scalar_type = TREE_TYPE (DR_REF (dr));
+ else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
else
scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
/* Pure bool ops don't participate in number-of-units computation.
For comparisons use the types being compared. */
- if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
+ if (!STMT_VINFO_DATA_REF (stmt_info)
+ && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
&& is_gimple_assign (stmt)
&& gimple_assign_rhs_code (stmt) != COND_EXPR)
{
@@ -11122,9 +11204,16 @@ vect_get_vector_types_for_stmt (stmt_vec
}
if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "get vectype for scalar type: %T\n", scalar_type);
- vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ {
+ if (group_size)
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "get vectype for scalar type (group size %d):"
+ " %T\n", group_size, scalar_type);
+ else
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "get vectype for scalar type: %T\n", scalar_type);
+ }
+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
if (!vectype)
return opt_result::failure_at (stmt,
"not vectorized:"
@@ -11155,7 +11244,8 @@ vect_get_vector_types_for_stmt (stmt_vec
dump_printf_loc (MSG_NOTE, vect_location,
"get vectype for smallest scalar type: %T\n",
scalar_type);
- nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
+ group_size);
if (!nunits_vectype)
return opt_result::failure_at
(stmt, "not vectorized: unsupported data-type %T\n",
@@ -11183,10 +11273,11 @@ vect_get_vector_types_for_stmt (stmt_vec
/* Try to determine the correct vector type for STMT_INFO, which is a
statement that produces a scalar boolean result. Return the vector
- type on success, otherwise return NULL_TREE. */
+ type on success, otherwise return NULL_TREE. NODE, if nonnull,
+ is the SLP tree node that will use the returned vector type. */
opt_tree
-vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
+vect_get_mask_type_for_stmt (stmt_vec_info stmt_info, slp_tree node)
{
vec_info *vinfo = stmt_info->vinfo;
gimple *stmt = stmt_info->stmt;
@@ -11198,7 +11289,7 @@ vect_get_mask_type_for_stmt (stmt_vec_in
&& !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
{
scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
- mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type);
+ mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type, node);
if (!mask_type)
return opt_tree::failure_at (stmt,
diff -Nurp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
--- a/gcc/tree-vectorizer.h 2020-12-20 18:46:17.851633230 +0800
+++ b/gcc/tree-vectorizer.h 2020-12-20 18:48:11.227633230 +0800
@@ -1618,8 +1618,9 @@ extern void vect_update_inits_of_drs (lo
/* In tree-vect-stmts.c. */
extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
poly_uint64 = 0);
-extern tree get_vectype_for_scalar_type (vec_info *, tree);
-extern tree get_mask_type_for_scalar_type (vec_info *, tree);
+extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0);
+extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree);
+extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree = 0);
extern tree get_same_sized_vectype (tree, tree);
extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
extern bool vect_get_loop_mask_type (loop_vec_info);
@@ -1671,8 +1672,8 @@ extern void optimize_mask_stores (struct
extern gcall *vect_gen_while (tree, tree, tree);
extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *,
- tree *);
-extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info);
+ tree *, unsigned int = 0);
+extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, slp_tree = 0);
/* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);

View File

@ -0,0 +1,82 @@
This backport contains 2 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-Fix-EXTRACT_LAST_REDUCTION-handling-of-pattern-stmts.patch
9ec35478ccf0f3539988a054b7996278706a7710
0001-Fix-EXTRACT_LAST_REDUCTION-segfault.patch
dc176c3ccd6a8cd3f809f3c1549ad00674061eb5
diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c
--- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c 1969-12-31 19:00:00.000000000 -0500
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c 2020-12-14 21:16:26.492000000 -0500
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+
+int
+f (int *y)
+{
+ int res = 0;
+ for (int i = 0; i < 100; ++i)
+ res = (y[i] & 1) == 0 && (y[i] < 10) ? res : 1;
+ return res;
+}
diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
--- a/gcc/tree-vect-stmts.c 2020-12-14 21:15:27.004000000 -0500
+++ b/gcc/tree-vect-stmts.c 2020-12-14 21:16:26.492000000 -0500
@@ -1777,9 +1777,10 @@ vect_finish_stmt_generation_1 (stmt_vec_
stmt_vec_info
vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
{
- gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
+ gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
+ gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
- gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
+ gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
gsi_replace (&gsi, vec_stmt, true);
return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
@@ -9118,10 +9119,12 @@ vectorizable_condition (stmt_vec_info st
if (new_code == ERROR_MARK)
must_invert_cmp_result = true;
else
- cond_code = new_code;
+ {
+ cond_code = new_code;
+ /* Make sure we don't accidentally use the old condition. */
+ cond_expr = NULL_TREE;
+ }
}
- /* Make sure we don't accidentally use the old condition. */
- cond_expr = NULL_TREE;
std::swap (then_clause, else_clause);
}
@@ -9426,20 +9429,21 @@ vectorizable_condition (stmt_vec_info st
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
vec_compare = vec_compare_name;
}
+ gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
+ tree lhs = gimple_get_lhs (old_stmt);
gcall *new_stmt = gimple_build_call_internal
(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
vec_then_clause);
- gimple_call_set_lhs (new_stmt, scalar_dest);
- SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
- if (stmt_info->stmt == gsi_stmt (*gsi))
+ gimple_call_set_lhs (new_stmt, lhs);
+ SSA_NAME_DEF_STMT (lhs) = new_stmt;
+ if (old_stmt == gsi_stmt (*gsi))
new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
else
{
/* In this case we're moving the definition to later in the
block. That doesn't matter because the only uses of the
lhs are in phi statements. */
- gimple_stmt_iterator old_gsi
- = gsi_for_stmt (stmt_info->stmt);
+ gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
gsi_remove (&old_gsi, true);
new_stmt_info
= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);

View File

@ -0,0 +1,51 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
c69325a5db450dbac198f76f1162734af05a1061
0001-sccvn-Fix-up-push_partial_def-little-endian-bitfield.patch
diff -urpN a/gcc/testsuite/gcc.c-torture/execute/pr97764.c b/gcc/testsuite/gcc.c-torture/execute/pr97764.c
--- a/gcc/testsuite/gcc.c-torture/execute/pr97764.c 1969-12-31 19:00:00.000000000 -0500
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97764.c 2020-12-07 03:42:13.404000000 -0500
@@ -0,0 +1,14 @@
+/* PR tree-optimization/97764 */
+/* { dg-require-effective-target int32plus } */
+
+struct S { int b : 3; int c : 28; int d : 1; };
+
+int
+main ()
+{
+ struct S e = {};
+ e.c = -1;
+ if (e.d)
+ __builtin_abort ();
+ return 0;
+}
diff -urpN a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
--- a/gcc/tree-ssa-sccvn.c 2020-12-07 03:43:37.792000000 -0500
+++ b/gcc/tree-ssa-sccvn.c 2020-12-07 03:42:13.404000000 -0500
@@ -2013,12 +2013,12 @@ vn_walk_cb_data::push_partial_def (const
}
else
{
- size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT);
if (pd.offset >= 0)
{
/* LSB of this_buffer[0] byte should be at pd.offset bits
in buffer. */
unsigned int msk;
+ size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT);
amnt = pd.offset % BITS_PER_UNIT;
if (amnt)
shift_bytes_in_array_left (this_buffer, len + 1, amnt);
@@ -2046,6 +2046,9 @@ vn_walk_cb_data::push_partial_def (const
{
amnt = (unsigned HOST_WIDE_INT) pd.offset % BITS_PER_UNIT;
if (amnt)
+ size -= BITS_PER_UNIT - amnt;
+ size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT);
+ if (amnt)
shift_bytes_in_array_left (this_buffer, len + 1, amnt);
}
memcpy (p, this_buffer + (amnt != 0), size / BITS_PER_UNIT);

View File

@ -0,0 +1,139 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-Fix-zero-masking-for-vcvtps2ph-when-dest-operand-is-.patch
43088bb4dadd3d14b6b594c5f9363fe879f3d7f7
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 87354451c58..7815d77bcbf 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -21775,19 +21775,19 @@
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "V4SF")])
-(define_insn "*vcvtps2ph_store<mask_name>"
+(define_insn "*vcvtps2ph_store<merge_mask_name>"
[(set (match_operand:V4HI 0 "memory_operand" "=m")
(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
(match_operand:SI 2 "const_0_to_255_operand" "N")]
UNSPEC_VCVTPS2PH))]
"TARGET_F16C || TARGET_AVX512VL"
- "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+ "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "V4SF")])
(define_insn "vcvtps2ph256<mask_name>"
- [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
(match_operand:SI 2 "const_0_to_255_operand" "N")]
UNSPEC_VCVTPS2PH))]
@@ -21798,8 +21798,20 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "V8SF")])
+(define_insn "*vcvtps2ph256<merge_mask_name>"
+ [(set (match_operand:V8HI 0 "memory_operand" "=m")
+ (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
+ (match_operand:SI 2 "const_0_to_255_operand" "N")]
+ UNSPEC_VCVTPS2PH))]
+ "TARGET_F16C || TARGET_AVX512VL"
+ "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "maybe_evex")
+ (set_attr "btver2_decode" "vector")
+ (set_attr "mode" "V8SF")])
+
(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
- [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
(unspec:V16HI
[(match_operand:V16SF 1 "register_operand" "v")
(match_operand:SI 2 "const_0_to_255_operand" "N")]
@@ -21810,6 +21822,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V16SF")])
+(define_insn "*avx512f_vcvtps2ph512<merge_mask_name>"
+ [(set (match_operand:V16HI 0 "memory_operand" "=m")
+ (unspec:V16HI
+ [(match_operand:V16SF 1 "register_operand" "v")
+ (match_operand:SI 2 "const_0_to_255_operand" "N")]
+ UNSPEC_VCVTPS2PH))]
+ "TARGET_AVX512F"
+ "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V16SF")])
+
;; For gather* insn patterns
(define_mode_iterator VEC_GATHER_MODE
[V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index a5ca144c7f7..58ea9dc83e2 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -73,6 +73,18 @@
(match_operand:SUBST_V 2 "nonimm_or_0_operand" "0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))])
+(define_subst_attr "merge_mask_name" "merge_mask" "" "_merge_mask")
+(define_subst_attr "merge_mask_operand3" "merge_mask" "" "%{%3%}")
+(define_subst "merge_mask"
+ [(set (match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1))]
+ "TARGET_AVX512F"
+ [(set (match_dup 0)
+ (vec_merge:SUBST_V
+ (match_dup 1)
+ (match_dup 0)
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))])
+
(define_subst_attr "mask_scalar_merge_name" "mask_scalar_merge" "" "_mask")
(define_subst_attr "mask_scalar_merge_operand3" "mask_scalar_merge" "" "%{%3%}")
(define_subst_attr "mask_scalar_merge_operand4" "mask_scalar_merge" "" "%{%4%}")
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c
new file mode 100644
index 00000000000..9e0da947368
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512f" } */
+
+#include<immintrin.h>
+extern __m256i res;
+void
+foo (__m512 a, __mmask16 m)
+{
+ res = _mm512_maskz_cvtps_ph (m, a, 10);
+}
+
+/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c
new file mode 100644
index 00000000000..0c685ea66fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vl -mavx512f" } */
+
+#include<immintrin.h>
+extern __m128i res;
+void
+foo (__m256 a, __mmask8 m)
+{
+ res = _mm256_maskz_cvtps_ph (m, a, 10);
+}
+
+void
+foo1 (__m128 a, __mmask8 m)
+{
+ res = _mm_maskz_cvtps_ph (m, a, 10);
+}
+
+/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%\[xy\]mm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */

View File

@ -0,0 +1,155 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-IRA-Handle-fully-tied-destinations-in-a-similar-way-.patch
9b0365879b3c4917f5a2485a1fca8bb678484bfe
diff --git a/gcc/ira-lives.c b/gcc/ira-lives.c
index cce73a1c3d4..098b0e73953 100644
--- a/gcc/ira-lives.c
+++ b/gcc/ira-lives.c
@@ -633,9 +633,28 @@ check_and_make_def_use_conflict (rtx dreg, rtx orig_dreg,
/* Check and make if necessary conflicts for definition DEF of class
DEF_CL of the current insn with input operands. Process only
- constraints of alternative ALT. */
+ constraints of alternative ALT.
+
+ One of three things is true when this function is called:
+
+ (1) DEF is an earlyclobber for alternative ALT. Input operands then
+ conflict with DEF in ALT unless they explicitly match DEF via 0-9
+ constraints.
+
+ (2) DEF matches (via 0-9 constraints) an operand that is an
+ earlyclobber for alternative ALT. Other input operands then
+ conflict with DEF in ALT.
+
+ (3) [FOR_TIE_P] Some input operand X matches DEF for alternative ALT.
+ Input operands with a different value from X then conflict with
+ DEF in ALT.
+
+ However, there's still a judgement call to make when deciding
+ whether a conflict in ALT is important enough to be reflected
+ in the pan-alternative allocno conflict set. */
static void
-check_and_make_def_conflict (int alt, int def, enum reg_class def_cl)
+check_and_make_def_conflict (int alt, int def, enum reg_class def_cl,
+ bool for_tie_p)
{
int use, use_match;
ira_allocno_t a;
@@ -669,14 +688,40 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl)
if (use == def || recog_data.operand_type[use] == OP_OUT)
continue;
+ /* An earlyclobber on DEF doesn't apply to an input operand X if X
+ explicitly matches DEF, but it applies to other input operands
+ even if they happen to be the same value as X.
+
+ In contrast, if an input operand X is tied to a non-earlyclobber
+ DEF, there's no conflict with other input operands that have the
+ same value as X. */
+ if (op_alt[use].matches == def
+ || (for_tie_p
+ && rtx_equal_p (recog_data.operand[use],
+ recog_data.operand[op_alt[def].matched])))
+ continue;
+
if (op_alt[use].anything_ok)
use_cl = ALL_REGS;
else
use_cl = op_alt[use].cl;
+ if (use_cl == NO_REGS)
+ continue;
+
+ /* If DEF is simply a tied operand, ignore cases in which this
+ alternative requires USE to have a likely-spilled class.
+ Adding a conflict would just constrain USE further if DEF
+ happens to be allocated first. */
+ if (for_tie_p && targetm.class_likely_spilled_p (use_cl))
+ continue;
/* If there's any alternative that allows USE to match DEF, do not
record a conflict. If that causes us to create an invalid
- instruction due to the earlyclobber, reload must fix it up. */
+ instruction due to the earlyclobber, reload must fix it up.
+
+ Likewise, if we're treating a tied DEF like a partial earlyclobber,
+ do not record a conflict if there's another alternative in which
+ DEF is neither tied nor earlyclobber. */
for (alt1 = 0; alt1 < recog_data.n_alternatives; alt1++)
{
if (!TEST_BIT (preferred_alternatives, alt1))
@@ -691,6 +736,12 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl)
&& recog_data.constraints[use - 1][0] == '%'
&& op_alt1[use - 1].matches == def))
break;
+ if (for_tie_p
+ && !op_alt1[def].earlyclobber
+ && op_alt1[def].matched < 0
+ && alternative_class (op_alt1, def) != NO_REGS
+ && alternative_class (op_alt1, use) != NO_REGS)
+ break;
}
if (alt1 < recog_data.n_alternatives)
@@ -701,8 +752,7 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl)
if ((use_match = op_alt[use].matches) >= 0)
{
- if (use_match == def)
- continue;
+ gcc_checking_assert (use_match != def);
if (op_alt[use_match].anything_ok)
use_cl = ALL_REGS;
@@ -717,7 +767,11 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl)
/* Make conflicts of early clobber pseudo registers of the current
insn with its inputs. Avoid introducing unnecessary conflicts by
checking classes of the constraints and pseudos because otherwise
- significant code degradation is possible for some targets. */
+ significant code degradation is possible for some targets.
+
+ For these purposes, tying an input to an output makes that output act
+ like an earlyclobber for inputs with a different value, since the output
+ register then has a predetermined purpose on input to the instruction. */
static void
make_early_clobber_and_input_conflicts (void)
{
@@ -732,15 +786,19 @@ make_early_clobber_and_input_conflicts (void)
if (TEST_BIT (preferred_alternatives, alt))
for (def = 0; def < n_operands; def++)
{
- def_cl = NO_REGS;
- if (op_alt[def].earlyclobber)
+ if (op_alt[def].anything_ok)
+ def_cl = ALL_REGS;
+ else
+ def_cl = op_alt[def].cl;
+ if (def_cl != NO_REGS)
{
- if (op_alt[def].anything_ok)
- def_cl = ALL_REGS;
- else
- def_cl = op_alt[def].cl;
- check_and_make_def_conflict (alt, def, def_cl);
+ if (op_alt[def].earlyclobber)
+ check_and_make_def_conflict (alt, def, def_cl, false);
+ else if (op_alt[def].matched >= 0
+ && !targetm.class_likely_spilled_p (def_cl))
+ check_and_make_def_conflict (alt, def, def_cl, true);
}
+
if ((def_match = op_alt[def].matches) >= 0
&& (op_alt[def_match].earlyclobber
|| op_alt[def].earlyclobber))
@@ -749,7 +807,7 @@ make_early_clobber_and_input_conflicts (void)
def_cl = ALL_REGS;
else
def_cl = op_alt[def_match].cl;
- check_and_make_def_conflict (alt, def, def_cl);
+ check_and_make_def_conflict (alt, def, def_cl, false);
}
}
}

View File

@ -0,0 +1,99 @@
This backport contains 2 patchs from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
97b798d80baf945ea28236eef3fa69f36626b579
0001-SLP-VECT-Add-check-to-fix-96837.patch
373b99dc40949efa697326f378e5022a02e0328b
0002-Add-a-testcase-for-PR-target-96827.patch
diff -uprN a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 2020-11-17 15:58:12.118126065 +0800
@@ -0,0 +1,28 @@
+/* This checks that vectorized constructors have the correct ordering. */
+/* { dg-require-effective-target vect_int } */
+
+typedef int V __attribute__((__vector_size__(16)));
+
+__attribute__((__noipa__)) void
+foo (unsigned int x, V *y)
+{
+ unsigned int a[4] = { x + 0, x + 2, x + 4, x + 6 };
+ for (unsigned int i = 0; i < 3; ++i)
+ if (a[i] == 1234)
+ a[i]--;
+ *y = (V) { a[3], a[2], a[1], a[0] };
+}
+
+int
+main ()
+{
+ V b;
+ foo (0, &b);
+ if (b[0] != 6 || b[1] != 4 || b[2] != 2 || b[3] != 0)
+ __builtin_abort ();
+ return 0;
+}
+
+/* See that we vectorize an SLP instance. */
+/* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */
diff -uprN a/gcc/testsuite/gcc.target/i386/pr96827.c b/gcc/testsuite/gcc.target/i386/pr96827.c
--- a/gcc/testsuite/gcc.target/i386/pr96827.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.target/i386/pr96827.c 2020-11-17 15:58:15.182126065 +0800
@@ -0,0 +1,41 @@
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O3 -msse2 -mfpmath=sse" } */
+
+typedef unsigned short int __uint16_t;
+typedef unsigned int __uint32_t;
+typedef __uint16_t uint16_t;
+typedef __uint32_t uint32_t;
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_si128 (__m128i *__P, __m128i __B)
+{
+ *__P = __B;
+}
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
+{
+ return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
+}
+typedef uint16_t u16;
+typedef uint32_t u32;
+extern int printf (const char *__restrict __format, ...);
+void do_the_thing(u32 idx, __m128i *dude)
+{
+ u32 dude_[4] = { idx+0, idx+2, idx+4, idx+6 };
+ for (u32 i = 0; i < 3; ++i)
+ if (dude_[i] == 1234)
+ dude_[i]--;
+ *dude = _mm_set_epi32(dude_[0], dude_[1], dude_[2], dude_[3]);
+}
+int main()
+{
+ __m128i dude;
+ u32 idx = 0;
+ do_the_thing(idx, &dude);
+ __attribute__((aligned(16))) u32 dude_[4];
+ _mm_store_si128((__m128i*)dude_, dude);
+ if (!(6 == dude_[0] && 4 == dude_[1] && 2 == dude_[2] && 0 == dude_[3]))
+ __builtin_abort ();
+ return 0;
+}
diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
--- a/gcc/tree-vect-slp.c 2020-11-17 15:55:57.098126065 +0800
+++ b/gcc/tree-vect-slp.c 2020-11-17 15:59:25.862126065 +0800
@@ -1842,7 +1842,8 @@ vect_supported_load_permutation_p (slp_i
/* Reduction (there are no data-refs in the root).
In reduction chain the order of the loads is not important. */
if (!STMT_VINFO_DATA_REF (stmt_info)
- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+ && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ && !SLP_INSTANCE_ROOT_STMT (slp_instn))
vect_attempt_slp_rearrange_stmts (slp_instn);
/* In basic block vectorization we allow any subchain of an interleaving

View File

@ -0,0 +1,165 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
7a6588fe65432c0f1a8b5fdefba81700ebf88711
0001-aarch64-Fix-ash-lr-lshr-mode-3-expanders-PR94488.patch
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 24a11fb5040..9f0e2bd1e6f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1105,31 +1105,17 @@
tmp));
DONE;
}
- else
- {
- operands[2] = force_reg (SImode, operands[2]);
- }
- }
- else if (MEM_P (operands[2]))
- {
- operands[2] = force_reg (SImode, operands[2]);
}
- if (REG_P (operands[2]))
- {
- rtx tmp = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_aarch64_simd_dup<mode> (tmp,
- convert_to_mode (<VEL>mode,
- operands[2],
- 0)));
- emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
- tmp));
- DONE;
- }
- else
- FAIL;
-}
-)
+ operands[2] = force_reg (SImode, operands[2]);
+
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
+ operands[2],
+ 0)));
+ emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
+ DONE;
+})
(define_expand "lshr<mode>3"
[(match_operand:VDQ_I 0 "register_operand")
@@ -1152,31 +1138,19 @@
tmp));
DONE;
}
- else
- operands[2] = force_reg (SImode, operands[2]);
- }
- else if (MEM_P (operands[2]))
- {
- operands[2] = force_reg (SImode, operands[2]);
}
- if (REG_P (operands[2]))
- {
- rtx tmp = gen_reg_rtx (SImode);
- rtx tmp1 = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_negsi2 (tmp, operands[2]));
- emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
- convert_to_mode (<VEL>mode,
- tmp, 0)));
- emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
- operands[1],
- tmp1));
- DONE;
- }
- else
- FAIL;
-}
-)
+ operands[2] = force_reg (SImode, operands[2]);
+
+ rtx tmp = gen_reg_rtx (SImode);
+ rtx tmp1 = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_negsi2 (tmp, operands[2]));
+ emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
+ convert_to_mode (<VEL>mode, tmp, 0)));
+ emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
+ tmp1));
+ DONE;
+})
(define_expand "ashr<mode>3"
[(match_operand:VDQ_I 0 "register_operand")
@@ -1199,31 +1173,19 @@
tmp));
DONE;
}
- else
- operands[2] = force_reg (SImode, operands[2]);
- }
- else if (MEM_P (operands[2]))
- {
- operands[2] = force_reg (SImode, operands[2]);
}
- if (REG_P (operands[2]))
- {
- rtx tmp = gen_reg_rtx (SImode);
- rtx tmp1 = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_negsi2 (tmp, operands[2]));
- emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
- convert_to_mode (<VEL>mode,
- tmp, 0)));
- emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
- operands[1],
- tmp1));
- DONE;
- }
- else
- FAIL;
-}
-)
+ operands[2] = force_reg (SImode, operands[2]);
+
+ rtx tmp = gen_reg_rtx (SImode);
+ rtx tmp1 = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_negsi2 (tmp, operands[2]));
+ emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
+ tmp, 0)));
+ emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
+ tmp1));
+ DONE;
+})
(define_expand "vashl<mode>3"
[(match_operand:VDQ_I 0 "register_operand")
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr94488.c b/gcc/testsuite/gcc.c-torture/compile/pr94488.c
new file mode 100644
index 00000000000..6e20a4168de
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr94488.c
@@ -0,0 +1,22 @@
+/* PR target/94488 */
+
+typedef unsigned long V __attribute__((__vector_size__(16)));
+typedef long W __attribute__((__vector_size__(16)));
+
+void
+foo (V *x, unsigned long y)
+{
+ *x = *x >> (unsigned int) y;
+}
+
+void
+bar (V *x, unsigned long y)
+{
+ *x = *x << (unsigned int) y;
+}
+
+void
+baz (W *x, unsigned long y)
+{
+ *x = *x >> (unsigned int) y;
+}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,34 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-aarch64-Fix-mismatched-SVE-predicate-modes.patch
26bebf576ddcdcfb596f07e8c2896f17c48516e7
diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
--- a/gcc/config/aarch64/aarch64.c 2020-12-14 00:57:20.128000000 -0500
+++ b/gcc/config/aarch64/aarch64.c 2020-12-14 01:00:15.080000000 -0500
@@ -4328,6 +4328,7 @@ aarch64_expand_sve_const_pred_eor (rtx t
/* EOR the result with an ELT_SIZE PTRUE. */
rtx mask = aarch64_ptrue_all (elt_size);
mask = force_reg (VNx16BImode, mask);
+ inv = gen_lowpart (VNx16BImode, inv);
target = aarch64_target_reg (target, VNx16BImode);
emit_insn (gen_aarch64_pred_z (XOR, VNx16BImode, target, mask, inv, mask));
return target;
diff -urpN a/gcc/testsuite/gcc.dg/vect/pr94606.c b/gcc/testsuite/gcc.dg/vect/pr94606.c
--- a/gcc/testsuite/gcc.dg/vect/pr94606.c 1969-12-31 19:00:00.000000000 -0500
+++ b/gcc/testsuite/gcc.dg/vect/pr94606.c 2020-12-14 01:00:15.080000000 -0500
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=256" { target aarch64*-*-* } } */
+
+const short mask[] = { 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 1, 1, 1, 1, 1 };
+
+int
+foo (short *restrict x, short *restrict y)
+{
+ for (int i = 0; i < 16; ++i)
+ if (mask[i])
+ x[i] += y[i];
+}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,88 @@
This backport contains 2 patchs from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
4bf29d15f2e01348a45a1f4e1a135962f123fdd6
0001-AArch64-PR79262-Adjust-vector-cost.patch
27071013521b015d17a2666448f27a6ff0c55aca
0001-Move-EXTRACT_LAST_REDUCTION-costing-to-vectorizable_.patch
diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
--- a/gcc/config/aarch64/aarch64.c 2020-11-20 04:36:33.988000000 +0800
+++ b/gcc/config/aarch64/aarch64.c 2020-11-20 04:32:20.984000000 +0800
@@ -448,7 +448,7 @@ static const struct cpu_vector_cost gene
1, /* vec_int_stmt_cost */
1, /* vec_fp_stmt_cost */
2, /* vec_permute_cost */
- 1, /* vec_to_scalar_cost */
+ 2, /* vec_to_scalar_cost */
1, /* scalar_to_vec_cost */
1, /* vec_align_load_cost */
1, /* vec_unalign_load_cost */
diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
--- a/gcc/tree-vect-loop.c 2020-11-20 04:36:34.016000000 +0800
+++ b/gcc/tree-vect-loop.c 2020-11-20 04:32:20.984000000 +0800
@@ -3926,8 +3926,11 @@ vect_model_reduction_cost (stmt_vec_info
code = gimple_assign_rhs_code (orig_stmt_info->stmt);
- if (reduction_type == EXTRACT_LAST_REDUCTION
- || reduction_type == FOLD_LEFT_REDUCTION)
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
+ /* No extra instructions are needed in the prologue. The loop body
+ operations are costed in vectorizable_condition. */
+ inside_cost = 0;
+ else if (reduction_type == FOLD_LEFT_REDUCTION)
{
/* No extra instructions needed in the prologue. */
prologue_cost = 0;
diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
--- a/gcc/tree-vect-stmts.c 2020-11-20 04:36:33.996000000 +0800
+++ b/gcc/tree-vect-stmts.c 2020-11-20 04:32:20.984000000 +0800
@@ -859,7 +859,8 @@ vect_model_simple_cost (stmt_vec_info st
enum vect_def_type *dt,
int ndts,
slp_tree node,
- stmt_vector_for_cost *cost_vec)
+ stmt_vector_for_cost *cost_vec,
+ vect_cost_for_stmt kind = vector_stmt)
{
int inside_cost = 0, prologue_cost = 0;
@@ -906,7 +907,7 @@ vect_model_simple_cost (stmt_vec_info st
}
/* Pass the inside-of-loop statements to the target-specific cost model. */
- inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
+ inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
stmt_info, 0, vect_body);
if (dump_enabled_p ())
@@ -9194,15 +9195,18 @@ vectorizable_condition (stmt_vec_info st
" EXTRACT_LAST_REDUCTION.\n");
LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
}
- if (expand_vec_cond_expr_p (vectype, comp_vectype,
- cond_code))
- {
- STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
- vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
- cost_vec);
- return true;
- }
- return false;
+
+ vect_cost_for_stmt kind = vector_stmt;
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
+ /* Count one reduction-like operation per vector. */
+ kind = vec_to_scalar;
+ else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
+ return false;
+
+ STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
+ vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
+ cost_vec, kind);
+ return true;
}
/* Transform. */

View File

@ -1,3 +1,9 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-lra-Avoid-cycling-on-certain-subreg-reloads-PR96796.patch
6001db79c477b03eacc7e7049560921fb54b7845
diff -uprN a/gcc/lra-constraints.c b/gcc/lra-constraints.c
--- a/gcc/lra-constraints.c 2020-03-12 19:07:21.000000000 +0800
+++ b/gcc/lra-constraints.c 2020-09-08 10:02:52.308147305 +0800

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,9 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-Bump-BASE-VER-to-9.3.1.patch
9f26e5863a75744bbee1479792ecae084a3ceb20
diff -Nurp a/gcc/BASE-VER b/gcc/BASE-VER
--- a/gcc/BASE-VER 2020-08-19 10:47:14.100000000 +0800
+++ b/gcc/BASE-VER 2020-08-19 10:32:30.380000000 +0800

View File

@ -1,3 +1,9 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-combine-Don-t-generate-IF_THEN_ELSE.patch
ddbb5da5199fb421dc398911c37fa7f896efc13f
diff --git a/gcc/combine.c b/gcc/combine.c
index 4de759a8e6b..ce7aeecb5c2 100644
--- a/gcc/combine.c

View File

@ -0,0 +1,51 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-IPA-compare-VRP-types.patch
a86623902767122c71c7229150a8b8a79cbb3673
diff -Nurp a/gcc/ipa-prop.c b/gcc/ipa-prop.c
--- a/gcc/ipa-prop.c 2020-11-28 00:19:34.340000000 +0800
+++ b/gcc/ipa-prop.c 2020-11-28 00:21:24.680000000 +0800
@@ -122,7 +122,8 @@ struct ipa_vr_ggc_hash_traits : public g
static bool
equal (const value_range_base *a, const value_range_base *b)
{
- return a->equal_p (*b);
+ return (a->equal_p (*b)
+ && types_compatible_p (a->type (), b->type ()));
}
static void
mark_empty (value_range_base *&p)
diff -Nurp a/gcc/testsuite/gcc.c-torture/execute/pr97404.c b/gcc/testsuite/gcc.c-torture/execute/pr97404.c
--- a/gcc/testsuite/gcc.c-torture/execute/pr97404.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97404.c 2020-11-28 00:21:24.680000000 +0800
@@ -0,0 +1,28 @@
+/* PR ipa/97404 */
+/* { dg-additional-options "-fno-inline" } */
+
+char a, b;
+long c;
+short d, e;
+long *f = &c;
+int g;
+char h(signed char i) { return 0; }
+static short j(short i, int k) { return i < 0 ? 0 : i >> k; }
+void l(void);
+void m(void)
+{
+ e = j(d | 9766, 11);
+ *f = e;
+}
+void l(void)
+{
+ a = 5 | g;
+ b = h(a);
+}
+int main()
+{
+ m();
+ if (c != 4)
+ __builtin_abort();
+ return 0;
+}

View File

@ -0,0 +1,396 @@
This backport contains 2 patchs from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-tree-affine.c-expr_to_aff_combination-New-function-s.patch
5120e0d8d48f4590a275e60565de6c5a4e772fc1
0001-PR-tree-optimization-94574-aarch64-ICE-during-GIMPLE.patch
0447929f11e6a3e1b076841712b90a8b6bc7d33a
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 2020-12-08 14:54:11.467633230 +0800
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE unsigned int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 2020-12-08 14:54:11.467633230 +0800
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
+
+#define TYPE int
+
+#include "pr83403.h"
+
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 2020-12-08 14:54:11.467633230 +0800
@@ -0,0 +1,30 @@
+__attribute__ ((noinline)) void
+calculate (const double *__restrict__ A, const double *__restrict__ B,
+ double *__restrict__ C)
+{
+ TYPE m = 0;
+ TYPE n = 0;
+ TYPE k = 0;
+
+ A = (const double *) __builtin_assume_aligned (A, 16);
+ B = (const double *) __builtin_assume_aligned (B, 16);
+ C = (double *) __builtin_assume_aligned (C, 16);
+
+ for (n = 0; n < 9; n++)
+ {
+ for (m = 0; m < 10; m++)
+ {
+ C[(n * 10) + m] = 0.0;
+ }
+
+ for (k = 0; k < 17; k++)
+ {
+#pragma simd
+ for (m = 0; m < 10; m++)
+ {
+ C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
+ }
+ }
+ }
+}
+
diff -Nurp a/gcc/tree-affine.c b/gcc/tree-affine.c
--- a/gcc/tree-affine.c 2020-12-09 09:01:13.179633230 +0800
+++ b/gcc/tree-affine.c 2020-12-08 14:54:11.467633230 +0800
@@ -259,104 +259,66 @@ aff_combination_convert (aff_tree *comb,
}
}
-/* Splits EXPR into an affine combination of parts. */
+/* Tries to handle OP0 CODE OP1 as affine combination of parts. Returns
+ true when that was successful and returns the combination in COMB. */
-void
-tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
+static bool
+expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
+ tree op0, tree op1 = NULL_TREE)
{
aff_tree tmp;
- enum tree_code code;
- tree cst, core, toffset;
poly_int64 bitpos, bitsize, bytepos;
- machine_mode mode;
- int unsignedp, reversep, volatilep;
-
- STRIP_NOPS (expr);
- code = TREE_CODE (expr);
switch (code)
{
case POINTER_PLUS_EXPR:
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
+ tree_to_aff_combination (op0, type, comb);
+ tree_to_aff_combination (op1, sizetype, &tmp);
aff_combination_add (comb, &tmp);
- return;
+ return true;
case PLUS_EXPR:
case MINUS_EXPR:
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
- tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
+ tree_to_aff_combination (op0, type, comb);
+ tree_to_aff_combination (op1, type, &tmp);
if (code == MINUS_EXPR)
aff_combination_scale (&tmp, -1);
aff_combination_add (comb, &tmp);
- return;
+ return true;
case MULT_EXPR:
- cst = TREE_OPERAND (expr, 1);
- if (TREE_CODE (cst) != INTEGER_CST)
+ if (TREE_CODE (op1) != INTEGER_CST)
break;
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
- aff_combination_scale (comb, wi::to_widest (cst));
- return;
+ tree_to_aff_combination (op0, type, comb);
+ aff_combination_scale (comb, wi::to_widest (op1));
+ return true;
case NEGATE_EXPR:
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
+ tree_to_aff_combination (op0, type, comb);
aff_combination_scale (comb, -1);
- return;
+ return true;
case BIT_NOT_EXPR:
/* ~x = -x - 1 */
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
+ tree_to_aff_combination (op0, type, comb);
aff_combination_scale (comb, -1);
aff_combination_add_cst (comb, -1);
- return;
-
- case ADDR_EXPR:
- /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
- if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF)
- {
- expr = TREE_OPERAND (expr, 0);
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
- aff_combination_add (comb, &tmp);
- return;
- }
- core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
- &toffset, &mode, &unsignedp, &reversep,
- &volatilep);
- if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
- break;
- aff_combination_const (comb, type, bytepos);
- if (TREE_CODE (core) == MEM_REF)
- {
- tree mem_offset = TREE_OPERAND (core, 1);
- aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset));
- core = TREE_OPERAND (core, 0);
- }
- else
- core = build_fold_addr_expr (core);
-
- if (TREE_CODE (core) == ADDR_EXPR)
- aff_combination_add_elt (comb, core, 1);
- else
- {
- tree_to_aff_combination (core, type, &tmp);
- aff_combination_add (comb, &tmp);
- }
- if (toffset)
- {
- tree_to_aff_combination (toffset, type, &tmp);
- aff_combination_add (comb, &tmp);
- }
- return;
+ return true;
CASE_CONVERT:
{
- tree otype = TREE_TYPE (expr);
- tree inner = TREE_OPERAND (expr, 0);
+ tree otype = type;
+ tree inner = op0;
tree itype = TREE_TYPE (inner);
enum tree_code icode = TREE_CODE (inner);
+ /* STRIP_NOPS */
+ if (tree_nop_conversion_p (otype, itype))
+ {
+ tree_to_aff_combination (op0, type, comb);
+ return true;
+ }
+
/* In principle this is a valid folding, but it isn't necessarily
an optimization, so do it here and not in fold_unary. */
if ((icode == PLUS_EXPR || icode == MINUS_EXPR || icode == MULT_EXPR)
@@ -376,38 +338,127 @@ tree_to_aff_combination (tree expr, tree
{
op0 = fold_convert (otype, op0);
op1 = fold_convert (otype, op1);
- expr = fold_build2 (icode, otype, op0, op1);
- tree_to_aff_combination (expr, type, comb);
- return;
+ return expr_to_aff_combination (comb, icode, otype, op0, op1);
}
wide_int minv, maxv;
/* If inner type has wrapping overflow behavior, fold conversion
for below case:
- (T1)(X - CST) -> (T1)X - (T1)CST
- if X - CST doesn't overflow by range information. Also handle
- (T1)(X + CST) as (T1)(X - (-CST)). */
+ (T1)(X *+- CST) -> (T1)X *+- (T1)CST
+ if X *+- CST doesn't overflow by range information. */
if (TYPE_UNSIGNED (itype)
&& TYPE_OVERFLOW_WRAPS (itype)
- && TREE_CODE (op0) == SSA_NAME
&& TREE_CODE (op1) == INTEGER_CST
- && icode != MULT_EXPR
- && get_range_info (op0, &minv, &maxv) == VR_RANGE)
+ && determine_value_range (op0, &minv, &maxv) == VR_RANGE)
{
+ wi::overflow_type overflow = wi::OVF_NONE;
+ signop sign = UNSIGNED;
if (icode == PLUS_EXPR)
- op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
- if (wi::geu_p (minv, wi::to_wide (op1)))
+ wi::add (maxv, wi::to_wide (op1), sign, &overflow);
+ else if (icode == MULT_EXPR)
+ wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
+ else
+ wi::sub (minv, wi::to_wide (op1), sign, &overflow);
+
+ if (overflow == wi::OVF_NONE)
{
op0 = fold_convert (otype, op0);
op1 = fold_convert (otype, op1);
- expr = fold_build2 (MINUS_EXPR, otype, op0, op1);
- tree_to_aff_combination (expr, type, comb);
- return;
+ return expr_to_aff_combination (comb, icode, otype, op0,
+ op1);
}
}
}
}
break;
+ default:;
+ }
+
+ return false;
+}
+
+/* Splits EXPR into an affine combination of parts. */
+
+void
+tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
+{
+ aff_tree tmp;
+ enum tree_code code;
+ tree core, toffset;
+ poly_int64 bitpos, bitsize, bytepos;
+ machine_mode mode;
+ int unsignedp, reversep, volatilep;
+
+ STRIP_NOPS (expr);
+
+ code = TREE_CODE (expr);
+ switch (code)
+ {
+ case POINTER_PLUS_EXPR:
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ case MULT_EXPR:
+ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0),
+ TREE_OPERAND (expr, 1)))
+ return;
+ break;
+
+ case NEGATE_EXPR:
+ case BIT_NOT_EXPR:
+ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0)))
+ return;
+ break;
+
+ CASE_CONVERT:
+ /* ??? TREE_TYPE (expr) should be equal to type here, but IVOPTS
+ calls this with not showing an outer widening cast. */
+ if (expr_to_aff_combination (comb, code,
+ TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
+ {
+ aff_combination_convert (comb, type);
+ return;
+ }
+ break;
+
+ case ADDR_EXPR:
+ /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
+ if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF)
+ {
+ expr = TREE_OPERAND (expr, 0);
+ tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
+ tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
+ aff_combination_add (comb, &tmp);
+ return;
+ }
+ core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
+ &toffset, &mode, &unsignedp, &reversep,
+ &volatilep);
+ if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
+ break;
+ aff_combination_const (comb, type, bytepos);
+ if (TREE_CODE (core) == MEM_REF)
+ {
+ tree mem_offset = TREE_OPERAND (core, 1);
+ aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset));
+ core = TREE_OPERAND (core, 0);
+ }
+ else
+ core = build_fold_addr_expr (core);
+
+ if (TREE_CODE (core) == ADDR_EXPR)
+ aff_combination_add_elt (comb, core, 1);
+ else
+ {
+ tree_to_aff_combination (core, type, &tmp);
+ aff_combination_add (comb, &tmp);
+ }
+ if (toffset)
+ {
+ tree_to_aff_combination (toffset, type, &tmp);
+ aff_combination_add (comb, &tmp);
+ }
+ return;
+
default:
{
if (poly_int_tree_p (expr))
@@ -665,7 +716,7 @@ aff_combination_expand (aff_tree *comb A
{
unsigned i;
aff_tree to_add, current, curre;
- tree e, rhs;
+ tree e;
gimple *def;
widest_int scale;
struct name_expansion *exp;
@@ -715,20 +766,38 @@ aff_combination_expand (aff_tree *comb A
case PLUS_EXPR:
case MINUS_EXPR:
case MULT_EXPR:
+ if (!expr_to_aff_combination (&current, code, TREE_TYPE (name),
+ gimple_assign_rhs1 (def),
+ gimple_assign_rhs2 (def)))
+ continue;
+ break;
case NEGATE_EXPR:
case BIT_NOT_EXPR:
+ if (!expr_to_aff_combination (&current, code, TREE_TYPE (name),
+ gimple_assign_rhs1 (def)))
+ continue;
+ break;
CASE_CONVERT:
- rhs = gimple_assign_rhs_to_tree (def);
+ if (!expr_to_aff_combination (&current, code, TREE_TYPE (name),
+ gimple_assign_rhs1 (def)))
+ /* This makes us always expand conversions which we did
+ in the past and makes gcc.dg/tree-ssa/ivopts-lt-2.c
+ PASS, eliminating one induction variable in IVOPTs.
+ ??? But it is really excessive and we should try
+ harder to do without it. */
+ aff_combination_elt (&current, TREE_TYPE (name),
+ fold_convert (TREE_TYPE (name),
+ gimple_assign_rhs1 (def)));
break;
case ADDR_EXPR:
case INTEGER_CST:
case POLY_INT_CST:
- rhs = gimple_assign_rhs1 (def);
+ tree_to_aff_combination (gimple_assign_rhs1 (def),
+ TREE_TYPE (name), &current);
break;
default:
continue;
}
- tree_to_aff_combination (rhs, TREE_TYPE (name), &current);
exp = XNEW (struct name_expansion);
exp->in_progress = 1;
if (!*cache)

View File

@ -1,3 +1,9 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-re-PR-tree-optimization-92085-ICE-tree-check-expecte.patch
3c8e341b996546607fa1f39a0fd9a9d7c2c38214
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c 2020-07-09 11:05:23.136000000 +0800

View File

@ -0,0 +1,37 @@
diff -uprN a/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c 2020-11-17 02:38:45.284000000 +0800
@@ -0,0 +1,19 @@
+/* { dg-do compiler} */
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 -funsafe-math-optimizations" } */
+
+long a, b;
+float c, e;
+float *d;
+void f() {
+ float g, h, i, j;
+ b = 0;
+ for (; b < a; b++) {
+ i = d[0];
+ g = g + i * e;
+ j = d[1];
+ h = h - j * e;
+ d = d + 2;
+ }
+ c = g;
+ e = h;
+}
diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
--- a/gcc/tree-vect-slp.c 2020-11-16 10:59:36.000000000 +0800
+++ b/gcc/tree-vect-slp.c 2020-11-16 23:30:19.560000000 +0800
@@ -4140,8 +4140,8 @@ vect_schedule_slp_instance (slp_tree nod
gimple *vstmt;
vstmt = gimple_build_assign (make_ssa_name (vectype),
VEC_PERM_EXPR,
- gimple_assign_lhs (v0[j]->stmt),
- gimple_assign_lhs (v1[j]->stmt),
+ gimple_get_lhs (v0[j]->stmt),
+ gimple_get_lhs (v1[j]->stmt),
tmask);
SLP_TREE_VEC_STMTS (node).quick_push
(vect_finish_stmt_generation (stmt_info, vstmt, &si));

View File

@ -1,3 +1,9 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-Don-t-assign-a-cost-to-vectorizable_assignment.patch
e4020b28d02a00d478a3a769855ae6a8d9cc6b26
diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
--- a/gcc/tree-vect-loop.c 2020-07-09 10:42:35.824000000 +0800
+++ b/gcc/tree-vect-loop.c 2020-07-09 10:43:23.920000000 +0800

View File

@ -1,3 +1,9 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-re-PR-tree-optimization-92252-ICE-Segmentation-fault.patch
97c6bea819ec0a773041308e62a7c05c33f093b0
diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92252.c b/gcc/testsuite/gcc.dg/torture/pr92252.c
--- a/gcc/testsuite/gcc.dg/torture/pr92252.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/torture/pr92252.c 2020-07-03 10:39:44.808000000 +0800

View File

@ -0,0 +1,784 @@
This backport contains 5 patchs from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
8801ca5c28c3a9e9f36fa39a6a4455b48c8221fa
9ac1403ca2c65ba4f28cf051b5326617fa9298d1
7e99af4816cfad578094fcf08e2377f3ed76e201
ef8777c14ce8694f53eab7a88d24513cbf541ba4
dccbf1e2a6e544f71b4a5795f0c79015db019fc3
diff -Nurp a/gcc/testsuite/gcc.dg/vect/pr92677.c b/gcc/testsuite/gcc.dg/vect/pr92677.c
--- a/gcc/testsuite/gcc.dg/vect/pr92677.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/vect/pr92677.c 2020-10-26 18:31:50.980000000 +0800
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3" } */
+
+int a, c;
+int *b;
+long d;
+double *e;
+
+void fn1() {
+ long f;
+ double g, h;
+ while (c) {
+ if (d) {
+ g = *e;
+ *(b + 4) = g;
+ }
+ if (f) {
+ h = *(e + 2);
+ *(b + 6) = h;
+ }
+ e += a;
+ b += 8;
+ c--;
+ d += 2;
+ }
+}
diff -Nurp a/gcc/testsuite/gcc.dg/vect/slp-46.c b/gcc/testsuite/gcc.dg/vect/slp-46.c
--- a/gcc/testsuite/gcc.dg/vect/slp-46.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/vect/slp-46.c 2020-10-26 18:31:56.512000000 +0800
@@ -0,0 +1,96 @@
+/* { dg-require-effective-target vect_double } */
+
+#include "tree-vect.h"
+
+double x[1024], y[1024];
+
+void __attribute__((noipa)) foo()
+{
+ for (int i = 0; i < 512; ++i)
+ {
+ x[2*i] = y[i];
+ x[2*i+1] = y[i];
+ }
+}
+
+void __attribute__((noipa)) bar()
+{
+ for (int i = 0; i < 512; ++i)
+ {
+ x[2*i] = y[2*i];
+ x[2*i+1] = y[2*i];
+ }
+}
+
+void __attribute__((noipa)) baz()
+{
+ for (int i = 0; i < 512; ++i)
+ {
+ x[2*i] = y[511-i];
+ x[2*i+1] = y[511-i];
+ }
+}
+
+void __attribute__((noipa)) boo()
+{
+ for (int i = 0; i < 512; ++i)
+ {
+ x[2*i] = y[2*(511-i)];
+ x[2*i+1] = y[2*(511-i)];
+ }
+}
+
+int
+main ()
+{
+ check_vect ();
+
+ for (int i = 0; i < 1024; ++i)
+ {
+ x[i] = 0;
+ y[i] = i;
+ __asm__ volatile ("");
+ }
+
+ foo ();
+ for (int i = 0; i < 1024; ++i)
+ if (x[i] != y[i/2])
+ abort ();
+
+ for (int i = 0; i < 1024; ++i)
+ {
+ x[i] = 0;
+ __asm__ volatile ("");
+ }
+
+ bar ();
+ for (int i = 0; i < 1024; ++i)
+ if (x[i] != y[2*(i/2)])
+ abort ();
+
+ for (int i = 0; i < 1024; ++i)
+ {
+ x[i] = 0;
+ __asm__ volatile ("");
+ }
+
+ baz ();
+ for (int i = 0; i < 1024; ++i)
+ if (x[i] != y[511 - i/2])
+ abort ();
+
+ for (int i = 0; i < 1024; ++i)
+ {
+ x[i] = 0;
+ __asm__ volatile ("");
+ }
+
+ boo ();
+ for (int i = 0; i < 1024; ++i)
+ if (x[i] != y[2*(511 - i/2)])
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c
--- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c 2020-10-26 18:31:53.584000000 +0800
@@ -0,0 +1,36 @@
+#include "tree-vect.h"
+
+#define N 512
+
+int a[N], b[N];
+
+int __attribute__((noipa))
+foo (int aval, int bval)
+{
+ int i, res = 0;
+ for (i=0; i<N; i++)
+ {
+ if (a[i] != 0)
+ res = aval;
+ if (b[i] != 0)
+ res = bval;
+ }
+ return res;
+}
+
+int main()
+{
+ check_vect ();
+ if (foo (1, 2) != 0)
+ abort ();
+ a[3] = 1;
+ b[4] = 1;
+ if (foo (1, 2) != 2)
+ abort ();
+ a[7] = 1;
+ if (foo (1, 2) != 1)
+ abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_condition } } } */
diff -Nurp a/gcc/testsuite/g++.dg/pr91221.C b/gcc/testsuite/g++.dg/pr91221.C
--- a/gcc/testsuite/g++.dg/pr91221.C 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/g++.dg/pr91221.C 2020-10-26 18:31:45.768000000 +0800
@@ -0,0 +1,13 @@
+// { dg-do compile }
+// { dg-options "-O2 -fno-ipa-pure-const -fpack-struct -Wno-address-of-packed-member" }
+
+void printf(...);
+struct A {
+ A() : bar_(), dbar_() {
+ for (int i;; i++)
+ printf(i, bar_[i]);
+ }
+ int bar_[5];
+ double dbar_[5];
+};
+void fn1() { A a; }
diff -Nurp a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
--- a/gcc/tree-scalar-evolution.c 2020-10-26 18:28:58.720000000 +0800
+++ b/gcc/tree-scalar-evolution.c 2020-10-26 18:31:48.472000000 +0800
@@ -933,8 +933,8 @@ enum t_bool {
};
-static t_bool follow_ssa_edge (struct loop *loop, gimple *, gphi *,
- tree *, int);
+static t_bool follow_ssa_edge_expr (struct loop *loop, gimple *, tree, gphi *,
+ tree *, int);
/* Follow the ssa edge into the binary expression RHS0 CODE RHS1.
Return true if the strongly connected component has been found. */
@@ -969,8 +969,8 @@ follow_ssa_edge_binary (struct loop *loo
(loop->num,
chrec_convert (type, evol, at_stmt),
code, rhs1, at_stmt);
- res = follow_ssa_edge
- (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi, &evol, limit);
+ res = follow_ssa_edge_expr
+ (loop, at_stmt, rhs0, halting_phi, &evol, limit);
if (res == t_true)
*evolution_of_loop = evol;
else if (res == t_false)
@@ -979,8 +979,8 @@ follow_ssa_edge_binary (struct loop *loo
(loop->num,
chrec_convert (type, *evolution_of_loop, at_stmt),
code, rhs0, at_stmt);
- res = follow_ssa_edge
- (loop, SSA_NAME_DEF_STMT (rhs1), halting_phi,
+ res = follow_ssa_edge_expr
+ (loop, at_stmt, rhs1, halting_phi,
evolution_of_loop, limit);
if (res == t_true)
;
@@ -1000,8 +1000,8 @@ follow_ssa_edge_binary (struct loop *loo
(loop->num, chrec_convert (type, *evolution_of_loop,
at_stmt),
code, rhs1, at_stmt);
- res = follow_ssa_edge
- (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi,
+ res = follow_ssa_edge_expr
+ (loop, at_stmt, rhs0, halting_phi,
evolution_of_loop, limit);
if (res == t_true)
;
@@ -1018,8 +1018,8 @@ follow_ssa_edge_binary (struct loop *loo
(loop->num, chrec_convert (type, *evolution_of_loop,
at_stmt),
code, rhs0, at_stmt);
- res = follow_ssa_edge
- (loop, SSA_NAME_DEF_STMT (rhs1), halting_phi,
+ res = follow_ssa_edge_expr
+ (loop, at_stmt, rhs1, halting_phi,
evolution_of_loop, limit);
if (res == t_true)
;
@@ -1050,8 +1050,8 @@ follow_ssa_edge_binary (struct loop *loo
*evolution_of_loop = add_to_evolution
(loop->num, chrec_convert (type, *evolution_of_loop, at_stmt),
MINUS_EXPR, rhs1, at_stmt);
- res = follow_ssa_edge (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi,
- evolution_of_loop, limit);
+ res = follow_ssa_edge_expr (loop, at_stmt, rhs0, halting_phi,
+ evolution_of_loop, limit);
if (res == t_true)
;
else if (res == t_dont_know)
@@ -1071,140 +1071,6 @@ follow_ssa_edge_binary (struct loop *loo
return res;
}
-/* Follow the ssa edge into the expression EXPR.
- Return true if the strongly connected component has been found. */
-
-static t_bool
-follow_ssa_edge_expr (struct loop *loop, gimple *at_stmt, tree expr,
- gphi *halting_phi, tree *evolution_of_loop,
- int limit)
-{
- enum tree_code code = TREE_CODE (expr);
- tree type = TREE_TYPE (expr), rhs0, rhs1;
- t_bool res;
-
- /* The EXPR is one of the following cases:
- - an SSA_NAME,
- - an INTEGER_CST,
- - a PLUS_EXPR,
- - a POINTER_PLUS_EXPR,
- - a MINUS_EXPR,
- - an ASSERT_EXPR,
- - other cases are not yet handled. */
-
- switch (code)
- {
- CASE_CONVERT:
- /* This assignment is under the form "a_1 = (cast) rhs. */
- res = follow_ssa_edge_expr (loop, at_stmt, TREE_OPERAND (expr, 0),
- halting_phi, evolution_of_loop, limit);
- *evolution_of_loop = chrec_convert (type, *evolution_of_loop, at_stmt);
- break;
-
- case INTEGER_CST:
- /* This assignment is under the form "a_1 = 7". */
- res = t_false;
- break;
-
- case SSA_NAME:
- /* This assignment is under the form: "a_1 = b_2". */
- res = follow_ssa_edge
- (loop, SSA_NAME_DEF_STMT (expr), halting_phi, evolution_of_loop, limit);
- break;
-
- case POINTER_PLUS_EXPR:
- case PLUS_EXPR:
- case MINUS_EXPR:
- /* This case is under the form "rhs0 +- rhs1". */
- rhs0 = TREE_OPERAND (expr, 0);
- rhs1 = TREE_OPERAND (expr, 1);
- type = TREE_TYPE (rhs0);
- STRIP_USELESS_TYPE_CONVERSION (rhs0);
- STRIP_USELESS_TYPE_CONVERSION (rhs1);
- res = follow_ssa_edge_binary (loop, at_stmt, type, rhs0, code, rhs1,
- halting_phi, evolution_of_loop, limit);
- break;
-
- case ADDR_EXPR:
- /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
- if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF)
- {
- expr = TREE_OPERAND (expr, 0);
- rhs0 = TREE_OPERAND (expr, 0);
- rhs1 = TREE_OPERAND (expr, 1);
- type = TREE_TYPE (rhs0);
- STRIP_USELESS_TYPE_CONVERSION (rhs0);
- STRIP_USELESS_TYPE_CONVERSION (rhs1);
- res = follow_ssa_edge_binary (loop, at_stmt, type,
- rhs0, POINTER_PLUS_EXPR, rhs1,
- halting_phi, evolution_of_loop, limit);
- }
- else
- res = t_false;
- break;
-
- case ASSERT_EXPR:
- /* This assignment is of the form: "a_1 = ASSERT_EXPR <a_2, ...>"
- It must be handled as a copy assignment of the form a_1 = a_2. */
- rhs0 = ASSERT_EXPR_VAR (expr);
- if (TREE_CODE (rhs0) == SSA_NAME)
- res = follow_ssa_edge (loop, SSA_NAME_DEF_STMT (rhs0),
- halting_phi, evolution_of_loop, limit);
- else
- res = t_false;
- break;
-
- default:
- res = t_false;
- break;
- }
-
- return res;
-}
-
-/* Follow the ssa edge into the right hand side of an assignment STMT.
- Return true if the strongly connected component has been found. */
-
-static t_bool
-follow_ssa_edge_in_rhs (struct loop *loop, gimple *stmt,
- gphi *halting_phi, tree *evolution_of_loop,
- int limit)
-{
- enum tree_code code = gimple_assign_rhs_code (stmt);
- tree type = gimple_expr_type (stmt), rhs1, rhs2;
- t_bool res;
-
- switch (code)
- {
- CASE_CONVERT:
- /* This assignment is under the form "a_1 = (cast) rhs. */
- res = follow_ssa_edge_expr (loop, stmt, gimple_assign_rhs1 (stmt),
- halting_phi, evolution_of_loop, limit);
- *evolution_of_loop = chrec_convert (type, *evolution_of_loop, stmt);
- break;
-
- case POINTER_PLUS_EXPR:
- case PLUS_EXPR:
- case MINUS_EXPR:
- rhs1 = gimple_assign_rhs1 (stmt);
- rhs2 = gimple_assign_rhs2 (stmt);
- type = TREE_TYPE (rhs1);
- res = follow_ssa_edge_binary (loop, stmt, type, rhs1, code, rhs2,
- halting_phi, evolution_of_loop, limit);
- break;
-
- default:
- if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
- res = follow_ssa_edge_expr (loop, stmt, gimple_assign_rhs1 (stmt),
- halting_phi, evolution_of_loop, limit);
- else
- res = t_false;
- break;
- }
-
- return res;
-}
-
/* Checks whether the I-th argument of a PHI comes from a backedge. */
static bool
@@ -1244,8 +1110,8 @@ follow_ssa_edge_in_condition_phi_branch
if (TREE_CODE (branch) == SSA_NAME)
{
*evolution_of_branch = init_cond;
- return follow_ssa_edge (loop, SSA_NAME_DEF_STMT (branch), halting_phi,
- evolution_of_branch, limit);
+ return follow_ssa_edge_expr (loop, condition_phi, branch, halting_phi,
+ evolution_of_branch, limit);
}
/* This case occurs when one of the condition branches sets
@@ -1352,65 +1218,158 @@ follow_ssa_edge_inner_loop_phi (struct l
evolution_of_loop, limit);
}
-/* Follow an SSA edge from a loop-phi-node to itself, constructing a
- path that is analyzed on the return walk. */
+/* Follow the ssa edge into the expression EXPR.
+ Return true if the strongly connected component has been found. */
static t_bool
-follow_ssa_edge (struct loop *loop, gimple *def, gphi *halting_phi,
- tree *evolution_of_loop, int limit)
+follow_ssa_edge_expr (struct loop *loop, gimple *at_stmt, tree expr,
+ gphi *halting_phi, tree *evolution_of_loop,
+ int limit)
{
- struct loop *def_loop;
+ enum tree_code code;
+ tree type, rhs0, rhs1 = NULL_TREE;
- if (gimple_nop_p (def))
- return t_false;
+ /* The EXPR is one of the following cases:
+ - an SSA_NAME,
+ - an INTEGER_CST,
+ - a PLUS_EXPR,
+ - a POINTER_PLUS_EXPR,
+ - a MINUS_EXPR,
+ - an ASSERT_EXPR,
+ - other cases are not yet handled. */
- /* Give up if the path is longer than the MAX that we allow. */
- if (limit > PARAM_VALUE (PARAM_SCEV_MAX_EXPR_COMPLEXITY))
- return t_dont_know;
-
- def_loop = loop_containing_stmt (def);
-
- switch (gimple_code (def))
- {
- case GIMPLE_PHI:
- if (!loop_phi_node_p (def))
- /* DEF is a condition-phi-node. Follow the branches, and
- record their evolutions. Finally, merge the collected
- information and set the approximation to the main
- variable. */
- return follow_ssa_edge_in_condition_phi
- (loop, as_a <gphi *> (def), halting_phi, evolution_of_loop,
- limit);
-
- /* When the analyzed phi is the halting_phi, the
- depth-first search is over: we have found a path from
- the halting_phi to itself in the loop. */
- if (def == halting_phi)
- return t_true;
+ /* For SSA_NAME look at the definition statement, handling
+ PHI nodes and otherwise expand appropriately for the expression
+ handling below. */
+ if (TREE_CODE (expr) == SSA_NAME)
+ {
+ gimple *def = SSA_NAME_DEF_STMT (expr);
- /* Otherwise, the evolution of the HALTING_PHI depends
- on the evolution of another loop-phi-node, i.e. the
- evolution function is a higher degree polynomial. */
- if (def_loop == loop)
+ if (gimple_nop_p (def))
return t_false;
- /* Inner loop. */
- if (flow_loop_nested_p (loop, def_loop))
- return follow_ssa_edge_inner_loop_phi
- (loop, as_a <gphi *> (def), halting_phi, evolution_of_loop,
- limit + 1);
+ /* Give up if the path is longer than the MAX that we allow. */
+ if (limit > PARAM_VALUE (PARAM_SCEV_MAX_EXPR_COMPLEXITY))
+ return t_dont_know;
- /* Outer loop. */
- return t_false;
+ if (gphi *phi = dyn_cast <gphi *>(def))
+ {
+ if (!loop_phi_node_p (phi))
+ /* DEF is a condition-phi-node. Follow the branches, and
+ record their evolutions. Finally, merge the collected
+ information and set the approximation to the main
+ variable. */
+ return follow_ssa_edge_in_condition_phi
+ (loop, phi, halting_phi, evolution_of_loop, limit);
+
+ /* When the analyzed phi is the halting_phi, the
+ depth-first search is over: we have found a path from
+ the halting_phi to itself in the loop. */
+ if (phi == halting_phi)
+ return t_true;
+
+ /* Otherwise, the evolution of the HALTING_PHI depends
+ on the evolution of another loop-phi-node, i.e. the
+ evolution function is a higher degree polynomial. */
+ class loop *def_loop = loop_containing_stmt (def);
+ if (def_loop == loop)
+ return t_false;
+
+ /* Inner loop. */
+ if (flow_loop_nested_p (loop, def_loop))
+ return follow_ssa_edge_inner_loop_phi
+ (loop, phi, halting_phi, evolution_of_loop,
+ limit + 1);
- case GIMPLE_ASSIGN:
- return follow_ssa_edge_in_rhs (loop, def, halting_phi,
- evolution_of_loop, limit);
+ /* Outer loop. */
+ return t_false;
+ }
- default:
/* At this level of abstraction, the program is just a set
of GIMPLE_ASSIGNs and PHI_NODEs. In principle there is no
- other node to be handled. */
+ other def to be handled. */
+ if (!is_gimple_assign (def))
+ return t_false;
+
+ code = gimple_assign_rhs_code (def);
+ switch (get_gimple_rhs_class (code))
+ {
+ case GIMPLE_BINARY_RHS:
+ rhs0 = gimple_assign_rhs1 (def);
+ rhs1 = gimple_assign_rhs2 (def);
+ break;
+ case GIMPLE_UNARY_RHS:
+ case GIMPLE_SINGLE_RHS:
+ rhs0 = gimple_assign_rhs1 (def);
+ break;
+ default:
+ return t_false;
+ }
+ type = TREE_TYPE (gimple_assign_lhs (def));
+ at_stmt = def;
+ }
+ else
+ {
+ code = TREE_CODE (expr);
+ type = TREE_TYPE (expr);
+ switch (code)
+ {
+ CASE_CONVERT:
+ rhs0 = TREE_OPERAND (expr, 0);
+ break;
+ case POINTER_PLUS_EXPR:
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ rhs0 = TREE_OPERAND (expr, 0);
+ rhs1 = TREE_OPERAND (expr, 1);
+ break;
+ default:
+ rhs0 = expr;
+ }
+ }
+
+ switch (code)
+ {
+ CASE_CONVERT:
+ {
+ /* This assignment is under the form "a_1 = (cast) rhs. */
+ t_bool res = follow_ssa_edge_expr (loop, at_stmt, rhs0, halting_phi,
+ evolution_of_loop, limit);
+ *evolution_of_loop = chrec_convert (type, *evolution_of_loop, at_stmt);
+ return res;
+ }
+
+ case INTEGER_CST:
+ /* This assignment is under the form "a_1 = 7". */
+ return t_false;
+
+ case ADDR_EXPR:
+ {
+ /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
+ if (TREE_CODE (TREE_OPERAND (rhs0, 0)) != MEM_REF)
+ return t_false;
+ tree mem = TREE_OPERAND (rhs0, 0);
+ rhs0 = TREE_OPERAND (mem, 0);
+ rhs1 = TREE_OPERAND (mem, 1);
+ code = POINTER_PLUS_EXPR;
+ }
+ /* Fallthru. */
+ case POINTER_PLUS_EXPR:
+ case PLUS_EXPR:
+ case MINUS_EXPR:
+ /* This case is under the form "rhs0 +- rhs1". */
+ STRIP_USELESS_TYPE_CONVERSION (rhs0);
+ STRIP_USELESS_TYPE_CONVERSION (rhs1);
+ return follow_ssa_edge_binary (loop, at_stmt, type, rhs0, code, rhs1,
+ halting_phi, evolution_of_loop, limit);
+
+ case ASSERT_EXPR:
+ /* This assignment is of the form: "a_1 = ASSERT_EXPR <a_2, ...>"
+ It must be handled as a copy assignment of the form a_1 = a_2. */
+ return follow_ssa_edge_expr (loop, at_stmt, ASSERT_EXPR_VAR (rhs0),
+ halting_phi, evolution_of_loop, limit);
+
+ default:
return t_false;
}
}
@@ -1504,7 +1463,6 @@ analyze_evolution_in_loop (gphi *loop_ph
for (i = 0; i < n; i++)
{
tree arg = PHI_ARG_DEF (loop_phi_node, i);
- gimple *ssa_chain;
tree ev_fn;
t_bool res;
@@ -1517,11 +1475,10 @@ analyze_evolution_in_loop (gphi *loop_ph
{
bool val = false;
- ssa_chain = SSA_NAME_DEF_STMT (arg);
-
/* Pass in the initial condition to the follow edge function. */
ev_fn = init_cond;
- res = follow_ssa_edge (loop, ssa_chain, loop_phi_node, &ev_fn, 0);
+ res = follow_ssa_edge_expr (loop, loop_phi_node, arg,
+ loop_phi_node, &ev_fn, 0);
/* If ev_fn has no evolution in the inner loop, and the
init_cond is not equal to ev_fn, then we have an
diff -Nurp a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
--- a/gcc/tree-ssa-sccvn.c 2020-10-26 18:28:58.736000000 +0800
+++ b/gcc/tree-ssa-sccvn.c 2020-10-26 18:31:45.768000000 +0800
@@ -2456,7 +2456,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
(vuse, vr->set, vr->type, vr->operands, val);
}
/* For now handle clearing memory with partial defs. */
- else if (integer_zerop (gimple_call_arg (def_stmt, 1))
+ else if (known_eq (ref->size, maxsize)
+ && integer_zerop (gimple_call_arg (def_stmt, 1))
&& tree_to_poly_int64 (len).is_constant (&leni)
&& offset.is_constant (&offseti)
&& offset2.is_constant (&offset2i)
@@ -2494,7 +2495,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
return vn_reference_lookup_or_insert_for_pieces
(vuse, vr->set, vr->type, vr->operands, val);
}
- else if (maxsize.is_constant (&maxsizei)
+ else if (known_eq (ref->size, maxsize)
+ && maxsize.is_constant (&maxsizei)
&& maxsizei % BITS_PER_UNIT == 0
&& offset.is_constant (&offseti)
&& offseti % BITS_PER_UNIT == 0
diff -Nurp a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
--- a/gcc/tree-vect-data-refs.c 2020-10-26 18:28:58.792000000 +0800
+++ b/gcc/tree-vect-data-refs.c 2020-10-26 18:31:56.512000000 +0800
@@ -1045,7 +1045,7 @@ vect_compute_data_ref_alignment (dr_vec_
if (tree_int_cst_sgn (drb->step) < 0)
/* PLUS because STEP is negative. */
misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
- * TREE_INT_CST_LOW (drb->step));
+ * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
unsigned int const_misalignment;
if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
--- a/gcc/tree-vect-loop.c 2020-10-26 18:28:58.728000000 +0800
+++ b/gcc/tree-vect-loop.c 2020-10-26 18:31:53.584000000 +0800
@@ -1850,7 +1850,10 @@ vect_dissolve_slp_only_groups (loop_vec_
DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
DR_GROUP_SIZE (vinfo) = 1;
- DR_GROUP_GAP (vinfo) = group_size - 1;
+ if (STMT_VINFO_STRIDED_P (first_element))
+ DR_GROUP_GAP (vinfo) = 0;
+ else
+ DR_GROUP_GAP (vinfo) = group_size - 1;
vinfo = next;
}
}
@@ -4516,18 +4519,26 @@ vect_create_epilog_for_reduction (stmt_v
zeroes. */
if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
{
+ auto_vec<std::pair<tree, bool>, 2> ccompares;
stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info);
cond_info = vect_stmt_to_vectorize (cond_info);
- while (gimple_assign_rhs_code (cond_info->stmt) != COND_EXPR)
+ while (cond_info != reduc_info)
{
+ if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
+ {
+ gimple *vec_stmt = STMT_VINFO_VEC_STMT (cond_info)->stmt;
+ gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
+ ccompares.safe_push
+ (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)),
+ STMT_VINFO_REDUC_IDX (cond_info) == 2));
+ }
cond_info
= loop_vinfo->lookup_def (gimple_op (cond_info->stmt,
1 + STMT_VINFO_REDUC_IDX
(cond_info)));
cond_info = vect_stmt_to_vectorize (cond_info);
}
- gimple *vec_stmt = STMT_VINFO_VEC_STMT (cond_info)->stmt;
- gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
+ gcc_assert (ccompares.length () != 0);
tree indx_before_incr, indx_after_incr;
poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
@@ -4569,37 +4580,35 @@ vect_create_epilog_for_reduction (stmt_v
add_phi_arg (as_a <gphi *> (new_phi), vec_zero,
loop_preheader_edge (loop), UNKNOWN_LOCATION);
- /* Now take the condition from the loops original cond_expr
- (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
+ /* Now take the condition from the loops original cond_exprs
+ and produce a new cond_exprs (INDEX_COND_EXPR) which for
every match uses values from the induction variable
(INDEX_BEFORE_INCR) otherwise uses values from the phi node
(NEW_PHI_TREE).
Finally, we update the phi (NEW_PHI_TREE) to take the value of
the new cond_expr (INDEX_COND_EXPR). */
-
- /* Duplicate the condition from vec_stmt. */
- tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt));
-
- /* Create a conditional, where the condition is taken from vec_stmt
- (CCOMPARE). The then and else values mirror the main VEC_COND_EXPR:
- the reduction phi corresponds to NEW_PHI_TREE and the new values
- correspond to INDEX_BEFORE_INCR. */
- gcc_assert (STMT_VINFO_REDUC_IDX (cond_info) >= 1);
- tree index_cond_expr;
- if (STMT_VINFO_REDUC_IDX (cond_info) == 2)
- index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
- ccompare, indx_before_incr, new_phi_tree);
- else
- index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
- ccompare, new_phi_tree, indx_before_incr);
- induction_index = make_ssa_name (cr_index_vector_type);
- gimple *index_condition = gimple_build_assign (induction_index,
- index_cond_expr);
- gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
- stmt_vec_info index_vec_info = loop_vinfo->add_stmt (index_condition);
+ gimple_seq stmts = NULL;
+ for (int i = ccompares.length () - 1; i != -1; --i)
+ {
+ tree ccompare = ccompares[i].first;
+ if (ccompares[i].second)
+ new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR,
+ cr_index_vector_type,
+ ccompare,
+ indx_before_incr, new_phi_tree);
+ else
+ new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR,
+ cr_index_vector_type,
+ ccompare,
+ new_phi_tree, indx_before_incr);
+ }
+ gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT);
+ stmt_vec_info index_vec_info
+ = loop_vinfo->add_stmt (SSA_NAME_DEF_STMT (new_phi_tree));
STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
/* Update the phi with the vec cond. */
+ induction_index = new_phi_tree;
add_phi_arg (as_a <gphi *> (new_phi), induction_index,
loop_latch_edge (loop), UNKNOWN_LOCATION);
}

View File

@ -1,3 +1,9 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-re-PR-tree-optimization-92461-ICE-verify_ssa-failed-.patch
830d1b18526dd1f085e8a2e1467a6dde18fc6434
diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92461.c b/gcc/testsuite/gcc.dg/torture/pr92461.c
--- a/gcc/testsuite/gcc.dg/torture/pr92461.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/torture/pr92461.c 2020-07-28 19:48:09.324000000 +0800

View File

@ -1,3 +1,9 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-tree-optimization-96698-fix-ICE-when-vectorizing-nes.patch
2130efe6ac7beba72d289e3dd145daa10aeaed54
diff -uprN a/gcc/testsuite/gcc.dg/vect/pr96698.c b/gcc/testsuite/gcc.dg/vect/pr96698.c
--- a/gcc/testsuite/gcc.dg/vect/pr96698.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/vect/pr96698.c 2020-08-27 17:53:24.396000000 +0800

View File

@ -0,0 +1,152 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-vect-PR-92351-When-peeling-for-alignment-make-alignm.patch
4e9d58d16767b1bc686f0c4b3bd2da25dc71e8f3
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c
new file mode 100644
index 00000000000..c06fa442faf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c
@@ -0,0 +1,3 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "vect-peel-2-src.c"
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c
new file mode 100644
index 00000000000..f6fc134c870
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c
@@ -0,0 +1,48 @@
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+/* unaligned store. */
+
+int ib[N+7];
+
+__attribute__ ((noinline))
+int main1 ()
+{
+ int i;
+ int ia[N+1];
+
+ /* The store is aligned and the loads are misaligned with the same
+ misalignment. Cost model is disabled. If misaligned stores are supported,
+ we peel according to the loads to align them. */
+ for (i = 0; i <= N; i++)
+ {
+ ia[i] = ib[i+2] + ib[i+6];
+ }
+
+ /* check results: */
+ for (i = 1; i <= N; i++)
+ {
+ if (ia[i] != ib[i+2] + ib[i+6])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+
+ check_vect ();
+
+ for (i = 0; i <= N+6; i++)
+ {
+ asm volatile ("" : "+r" (i));
+ ib[i] = i;
+ }
+
+ return main1 ();
+}
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2.c
index b6061c3b855..65e70bd4417 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-peel-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2.c
@@ -1,52 +1,8 @@
/* { dg-require-effective-target vect_int } */
+/* Disabling epilogues until we find a better way to deal with scans. */
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
-#include <stdarg.h>
-#include "tree-vect.h"
-
-#define N 128
-
-/* unaligned store. */
-
-int ib[N+7];
-
-__attribute__ ((noinline))
-int main1 ()
-{
- int i;
- int ia[N+1];
-
- /* The store is aligned and the loads are misaligned with the same
- misalignment. Cost model is disabled. If misaligned stores are supported,
- we peel according to the loads to align them. */
- for (i = 0; i <= N; i++)
- {
- ia[i] = ib[i+2] + ib[i+6];
- }
-
- /* check results: */
- for (i = 1; i <= N; i++)
- {
- if (ia[i] != ib[i+2] + ib[i+6])
- abort ();
- }
-
- return 0;
-}
-
-int main (void)
-{
- int i;
-
- check_vect ();
-
- for (i = 0; i <= N+6; i++)
- {
- asm volatile ("" : "+r" (i));
- ib[i] = i;
- }
-
- return main1 ();
-}
+#include "vect-peel-2-src.c"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } } } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 36639b697f1..88f14e73d65 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -938,6 +938,18 @@ vect_compute_data_ref_alignment (dr_vec_info *dr_info)
= exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT);
DR_TARGET_ALIGNMENT (dr_info) = vector_alignment;
+ /* If the main loop has peeled for alignment we have no way of knowing
+ whether the data accesses in the epilogues are aligned. We can't at
+ compile time answer the question whether we have entered the main loop or
+ not. Fixes PR 92351. */
+ if (loop_vinfo)
+ {
+ loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
+ if (orig_loop_vinfo
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
+ return;
+ }
+
unsigned HOST_WIDE_INT vect_align_c;
if (!vector_alignment.is_constant (&vect_align_c))
return;

View File

@ -0,0 +1,30 @@
diff -uprN a/gcc/lra.c b/gcc/lra.c
--- a/gcc/lra.c 2020-12-14 15:26:36.331633230 +0800
+++ b/gcc/lra.c 2020-12-15 18:56:33.699633230 +0800
@@ -507,6 +507,26 @@ lra_emit_move (rtx x, rtx y)
data. */
if (old != max_reg_num ())
expand_reg_data (old);
+ while (insn != NULL)
+ {
+ if (GET_CODE (PATTERN (insn)) == SET
+ && GET_CODE (SET_SRC (PATTERN (insn))) == LO_SUM
+ && GET_CODE (SET_DEST (PATTERN (insn))) == REG
+ && strcmp (insn_data[recog_memoized (insn)].name,
+ "add_losym_di") == 0)
+ {
+ rtx add_losym_dest = SET_DEST (PATTERN (insn));
+ for (int i = (int) max_reg_num () - 1; i >= old; i--)
+ {
+ if (regno_reg_rtx[i] == add_losym_dest)
+ {
+ setup_reg_classes (i, GENERAL_REGS,
+ NO_REGS, GENERAL_REGS);
+ }
+ }
+ }
+ insn = PREV_INSN (insn);
+ }
return;
}
lra_emit_add (x, XEXP (y, 0), XEXP (y, 1));

View File

@ -0,0 +1,115 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-aarch64-Fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch:
91d80cf4bd2827dd9c40fe6a7c719c909d79083d
diff -Nurp a/gcc/testsuite/gcc.target/aarch64/pr96757.c b/gcc/testsuite/gcc.target/aarch64/pr96757.c
--- a/gcc/testsuite/gcc.target/aarch64/pr96757.c 1969-12-31 19:00:00.000000000 -0500
+++ b/gcc/testsuite/gcc.target/aarch64/pr96757.c 2020-10-12 08:32:12.192000000 -0400
@@ -0,0 +1,23 @@
+/* PR target/96757 */
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+short
+fun1(short i, short j)
+{
+ return i * j;
+}
+
+int
+fun(int a, int b, int c)
+{
+ int *v, z, k, m;
+ short f, d;
+ for (int i=0; i<c; i++)
+ {
+ f= 4 <= d;
+ k= a > m;
+ z = f > k;
+ *v += fun1(z,b);
+ }
+}
diff -Nurp a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
--- a/gcc/tree-vect-patterns.c 2020-10-12 08:05:18.924000000 -0400
+++ b/gcc/tree-vect-patterns.c 2020-10-12 08:50:56.996000000 -0400
@@ -3917,6 +3917,8 @@ vect_recog_mask_conversion_pattern (stmt
tree vectype1, vectype2;
stmt_vec_info pattern_stmt_info;
vec_info *vinfo = stmt_vinfo->vinfo;
+ tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
+ tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
/* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
if (is_gimple_call (last_stmt)
@@ -4016,9 +4018,37 @@ vect_recog_mask_conversion_pattern (stmt
it is better for b1 and b2 to use the mask type associated
with int elements rather bool (byte) elements. */
- rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
- if (!rhs1_type)
- rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
+ rhs1_op0 = TREE_OPERAND (rhs1, 0);
+ rhs1_op1 = TREE_OPERAND (rhs1, 1);
+ if (!rhs1_op0 || !rhs1_op1)
+ return NULL;
+ rhs1_op0_type = search_type_for_mask (rhs1_op0, vinfo);
+ rhs1_op1_type = search_type_for_mask (rhs1_op1, vinfo);
+
+ if (!rhs1_op0_type)
+ rhs1_type = TREE_TYPE (rhs1_op0);
+ else if (!rhs1_op1_type)
+ rhs1_type = TREE_TYPE (rhs1_op1);
+ else if (TYPE_PRECISION (rhs1_op0_type)
+ != TYPE_PRECISION (rhs1_op1_type))
+ {
+ int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
+ - (int) TYPE_PRECISION (TREE_TYPE (lhs));
+ int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
+ - (int) TYPE_PRECISION (TREE_TYPE (lhs));
+ if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
+ {
+ if (abs (tmp0) > abs (tmp1))
+ rhs1_type = rhs1_op1_type;
+ else
+ rhs1_type = rhs1_op0_type;
+ }
+ else
+ rhs1_type = build_nonstandard_integer_type
+ (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
+ }
+ else
+ rhs1_type = rhs1_op0_type;
}
else
return NULL;
@@ -4036,8 +4066,8 @@ vect_recog_mask_conversion_pattern (stmt
name from the outset. */
if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
TYPE_VECTOR_SUBPARTS (vectype2))
- && (TREE_CODE (rhs1) == SSA_NAME
- || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
+ && !rhs1_op0_type
+ && !rhs1_op1_type)
return NULL;
/* If rhs1 is invariant and we can promote it leave the COND_EXPR
@@ -4069,7 +4099,16 @@ vect_recog_mask_conversion_pattern (stmt
if (TREE_CODE (rhs1) != SSA_NAME)
{
tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
- pattern_stmt = gimple_build_assign (tmp, rhs1);
+ if (rhs1_op0_type
+ && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
+ rhs1_op0 = build_mask_conversion (rhs1_op0,
+ vectype2, stmt_vinfo);
+ if (rhs1_op1_type
+ && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
+ rhs1_op1 = build_mask_conversion (rhs1_op1,
+ vectype2, stmt_vinfo);
+ pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
+ rhs1_op0, rhs1_op1);
rhs1 = tmp;
append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
}

View File

@ -0,0 +1,301 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch
946732df902dbb23dd44abe97fea41e154e6e5f9
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3ce22395c65..12d6dc0cb7e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5927,16 +5927,16 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "OI")])
-(define_insn "sse2_cvtpd2dq<mask_name>"
+(define_insn "sse2_cvtpd2dq"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(vec_concat:V4SI
(unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
UNSPEC_FIX_NOTRUNC)
(const_vector:V2SI [(const_int 0) (const_int 0)])))]
- "TARGET_SSE2 && <mask_avx512vl_condition>"
+ "TARGET_SSE2"
{
if (TARGET_AVX)
- return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+ return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
else
return "cvtpd2dq\t{%1, %0|%0, %1}";
}
@@ -5949,6 +5949,38 @@
(set_attr "athlon_decode" "vector")
(set_attr "bdver1_decode" "double")])
+(define_insn "sse2_cvtpd2dq_mask"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
+ UNSPEC_FIX_NOTRUNC)
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_cvtpd2dq_mask_1"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
+ UNSPEC_FIX_NOTRUNC)
+ (const_vector:V2SI [(const_int 0) (const_int 0)])
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
;; For ufix_notrunc* insn patterns
(define_mode_attr pd2udqsuff
[(V8DF "") (V4DF "{y}")])
@@ -5964,15 +5996,49 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "ufix_notruncv2dfv2si2<mask_name>"
+(define_insn "ufix_notruncv2dfv2si2"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(vec_concat:V4SI
(unspec:V2SI
[(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
- UNSPEC_UNSIGNED_FIX_NOTRUNC)
+ UNSPEC_UNSIGNED_FIX_NOTRUNC)
(const_vector:V2SI [(const_int 0) (const_int 0)])))]
"TARGET_AVX512VL"
- "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ "vcvtpd2udq{x}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ufix_notruncv2dfv2si2_mask"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (unspec:V2SI
+ [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC)
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*ufix_notruncv2dfv2si2_mask_1"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (unspec:V2SI
+ [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
+ UNSPEC_UNSIGNED_FIX_NOTRUNC)
+ (const_vector:V2SI [(const_int 0) (const_int 0)])
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
@@ -5987,13 +6053,43 @@
(set_attr "prefix" "evex")
(set_attr "mode" "OI")])
-(define_insn "ufix_truncv2dfv2si2<mask_name>"
+(define_insn "ufix_truncv2dfv2si2"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(vec_concat:V4SI
(unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
(const_vector:V2SI [(const_int 0) (const_int 0)])))]
"TARGET_AVX512VL"
- "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+ "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "ufix_truncv2dfv2si2_mask"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*ufix_truncv2dfv2si2_mask_1"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "evex")
(set_attr "mode" "TI")])
@@ -6138,15 +6234,15 @@
"TARGET_AVX"
"operands[2] = CONST0_RTX (V4SImode);")
-(define_insn "sse2_cvttpd2dq<mask_name>"
+(define_insn "sse2_cvttpd2dq"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(vec_concat:V4SI
(fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
(const_vector:V2SI [(const_int 0) (const_int 0)])))]
- "TARGET_SSE2 && <mask_avx512vl_condition>"
+ "TARGET_SSE2"
{
if (TARGET_AVX)
- return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
+ return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
else
return "cvttpd2dq\t{%1, %0|%0, %1}";
}
@@ -6157,6 +6253,36 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_insn "sse2_cvttpd2dq_mask"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_cvttpd2dq_mask_1"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_concat:V4SI
+ (vec_merge:V2SI
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
+ "TARGET_AVX512VL"
+ "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "TI")])
+
(define_insn "sse2_cvtsd2ss<round_name>"
[(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
(vec_merge:V4SF
@@ -6276,26 +6402,28 @@
(define_expand "sse2_cvtpd2ps_mask"
[(set (match_operand:V4SF 0 "register_operand")
- (vec_merge:V4SF
- (vec_concat:V4SF
+ (vec_concat:V4SF
+ (vec_merge:V2SF
(float_truncate:V2SF
(match_operand:V2DF 1 "vector_operand"))
- (match_dup 4))
- (match_operand:V4SF 2 "register_operand")
- (match_operand:QI 3 "register_operand")))]
+ (vec_select:V2SF
+ (match_operand:V4SF 2 "nonimm_or_0_operand")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand"))
+ (match_dup 4)))]
"TARGET_SSE2"
"operands[4] = CONST0_RTX (V2SFmode);")
-(define_insn "*sse2_cvtpd2ps<mask_name>"
+(define_insn "*sse2_cvtpd2ps"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(vec_concat:V4SF
(float_truncate:V2SF
(match_operand:V2DF 1 "vector_operand" "vBm"))
- (match_operand:V2SF 2 "const0_operand")))]
- "TARGET_SSE2 && <mask_avx512vl_condition>"
+ (match_operand:V2SF 2 "const0_operand" "C")))]
+ "TARGET_SSE2"
{
if (TARGET_AVX)
- return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
+ return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
else
return "cvtpd2ps\t{%1, %0|%0, %1}";
}
@@ -6307,6 +6435,38 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "V4SF")])
+(define_insn "*sse2_cvtpd2ps_mask"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (vec_concat:V4SF
+ (vec_merge:V2SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
+ (vec_select:V2SF
+ (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:QI 3 "register_operand" "Yk"))
+ (match_operand:V2SF 4 "const0_operand" "C")))]
+ "TARGET_AVX512VL"
+ "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*sse2_cvtpd2ps_mask_1"
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
+ (vec_concat:V4SF
+ (vec_merge:V2SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
+ (match_operand:V2SF 3 "const0_operand" "C")
+ (match_operand:QI 2 "register_operand" "Yk"))
+ (match_operand:V2SF 4 "const0_operand" "C")))]
+ "TARGET_AVX512VL"
+ "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "V4SF")])
+
;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
(define_mode_attr sf2dfmode
[(V8DF "V8SF") (V4DF "V4SF")])

View File

@ -1,3 +1,6 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
AArch64-Fix-cost-of-plus-.-const_int-C.patch:
commit 835d50c66aa5bde2f354a6e63a2afa7d2f76a05a

View File

@ -0,0 +1,928 @@
This patch is a combine of following 8 commits
commit e944354ec05891474b0d204c6c239c04ee7b527b
Author: Robin Dapp <rdapp@linux.ibm.com>
Date: Mon Aug 26 10:18:24 2019 +0000
[PATCH 1/2] Allow folding all statements.
commit df7d46d925c7baca7bf9961aee900876d8aef225
Author: Robin Dapp <rdapp@linux.ibm.com>
Date: Mon Aug 26 10:24:44 2019 +0000
[PATCH 2/2] Add simplify rule for wrapped addition.
commit 6c14d008122fcee4157be79a60f8d6685869ad19
Author: Robin Dapp <rdapp@linux.ibm.com>
Date: Tue Aug 27 12:08:58 2019 +0000
re PR testsuite/91549 (gcc.dg/wrapped-binop-simplify.c fails starting with r274925)
commit 129bd066049f065e522990e63bb10ff92b3c018d
Author: Jakub Jelinek <jakub@redhat.com>
Date: Tue Dec 3 10:20:43 2019 +0100
re PR tree-optimization/92734 (Missing match.pd simplification done by fold_binary_loc on generic)
commit 526b4c716a340ee9464965e63eee2b9954fe21f1
Author: Jakub Jelinek <jakub@redhat.com>
Date: Wed Dec 4 10:38:48 2019 +0100
re PR tree-optimization/92734 (Missing match.pd simplification done by fold_binary_loc on generic)
commit 28fabd43d9d249134244eb9d7815917c7ae44b64
Author: Richard Biener <rguenther@suse.de>
Date: Fri Dec 6 10:25:08 2019 +0000
genmatch.c (enum tree_code): Remove CONVERT{0,1,2} and VIEW_CONVERT{0,1,2}.
commit e150da383346adc762bc904342f9877f2f071265
Author: Richard Biener <rguenther@suse.de>
Date: Fri Dec 6 11:44:27 2019 +0000
match.pd (nop_convert): Remove empty match.
commit 496f4f884716ae061f771a62e44868a32dbd502f
Author: Jakub Jelinek <jakub@redhat.com>
Date: Mon May 4 11:01:08 2020 +0200
match.pd: Decrease number of nop conversions around bitwise ops [PR94718]
diff -Nurp a/gcc/genmatch.c b/gcc/genmatch.c
--- a/gcc/genmatch.c 2020-03-12 19:07:21.000000000 +0800
+++ b/gcc/genmatch.c 2020-11-24 14:49:12.792000000 +0800
@@ -224,12 +224,6 @@ output_line_directive (FILE *f, location
#define DEFTREECODE(SYM, STRING, TYPE, NARGS) SYM,
enum tree_code {
#include "tree.def"
-CONVERT0,
-CONVERT1,
-CONVERT2,
-VIEW_CONVERT0,
-VIEW_CONVERT1,
-VIEW_CONVERT2,
MAX_TREE_CODES
};
#undef DEFTREECODE
@@ -695,11 +689,12 @@ struct expr : public operand
expr (id_base *operation_, location_t loc, bool is_commutative_ = false)
: operand (OP_EXPR, loc), operation (operation_),
ops (vNULL), expr_type (NULL), is_commutative (is_commutative_),
- is_generic (false), force_single_use (false) {}
+ is_generic (false), force_single_use (false), opt_grp (0) {}
expr (expr *e)
: operand (OP_EXPR, e->location), operation (e->operation),
ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative),
- is_generic (e->is_generic), force_single_use (e->force_single_use) {}
+ is_generic (e->is_generic), force_single_use (e->force_single_use),
+ opt_grp (e->opt_grp) {}
void append_op (operand *op) { ops.safe_push (op); }
/* The operator and its operands. */
id_base *operation;
@@ -714,6 +709,8 @@ struct expr : public operand
/* Whether pushing any stmt to the sequence should be conditional
on this expression having a single-use. */
bool force_single_use;
+ /* If non-zero, the group for optional handling. */
+ unsigned char opt_grp;
virtual void gen_transform (FILE *f, int, const char *, bool, int,
const char *, capture_info *,
dt_operand ** = 0, int = 0);
@@ -1079,18 +1076,17 @@ lower_commutative (simplify *s, vec<simp
}
}
-/* Strip conditional conversios using operator OPER from O and its
- children if STRIP, else replace them with an unconditional convert. */
+/* Strip conditional operations using group GRP from O and its
+ children if STRIP, else replace them with an unconditional operation. */
operand *
-lower_opt_convert (operand *o, enum tree_code oper,
- enum tree_code to_oper, bool strip)
+lower_opt (operand *o, unsigned char grp, bool strip)
{
if (capture *c = dyn_cast<capture *> (o))
{
if (c->what)
return new capture (c->location, c->where,
- lower_opt_convert (c->what, oper, to_oper, strip),
+ lower_opt (c->what, grp, strip),
c->value_match);
else
return c;
@@ -1100,36 +1096,34 @@ lower_opt_convert (operand *o, enum tree
if (!e)
return o;
- if (*e->operation == oper)
+ if (e->opt_grp == grp)
{
if (strip)
- return lower_opt_convert (e->ops[0], oper, to_oper, strip);
+ return lower_opt (e->ops[0], grp, strip);
expr *ne = new expr (e);
- ne->operation = (to_oper == CONVERT_EXPR
- ? get_operator ("CONVERT_EXPR")
- : get_operator ("VIEW_CONVERT_EXPR"));
- ne->append_op (lower_opt_convert (e->ops[0], oper, to_oper, strip));
+ ne->opt_grp = 0;
+ ne->append_op (lower_opt (e->ops[0], grp, strip));
return ne;
}
expr *ne = new expr (e);
for (unsigned i = 0; i < e->ops.length (); ++i)
- ne->append_op (lower_opt_convert (e->ops[i], oper, to_oper, strip));
+ ne->append_op (lower_opt (e->ops[i], grp, strip));
return ne;
}
-/* Determine whether O or its children uses the conditional conversion
- operator OPER. */
+/* Determine whether O or its children uses the conditional operation
+ group GRP. */
static bool
-has_opt_convert (operand *o, enum tree_code oper)
+has_opt (operand *o, unsigned char grp)
{
if (capture *c = dyn_cast<capture *> (o))
{
if (c->what)
- return has_opt_convert (c->what, oper);
+ return has_opt (c->what, grp);
else
return false;
}
@@ -1138,11 +1132,11 @@ has_opt_convert (operand *o, enum tree_c
if (!e)
return false;
- if (*e->operation == oper)
+ if (e->opt_grp == grp)
return true;
for (unsigned i = 0; i < e->ops.length (); ++i)
- if (has_opt_convert (e->ops[i], oper))
+ if (has_opt (e->ops[i], grp))
return true;
return false;
@@ -1152,34 +1146,24 @@ has_opt_convert (operand *o, enum tree_c
if required. */
static vec<operand *>
-lower_opt_convert (operand *o)
+lower_opt (operand *o)
{
vec<operand *> v1 = vNULL, v2;
v1.safe_push (o);
- enum tree_code opers[]
- = { CONVERT0, CONVERT_EXPR,
- CONVERT1, CONVERT_EXPR,
- CONVERT2, CONVERT_EXPR,
- VIEW_CONVERT0, VIEW_CONVERT_EXPR,
- VIEW_CONVERT1, VIEW_CONVERT_EXPR,
- VIEW_CONVERT2, VIEW_CONVERT_EXPR };
-
- /* Conditional converts are lowered to a pattern with the
- conversion and one without. The three different conditional
- convert codes are lowered separately. */
+ /* Conditional operations are lowered to a pattern with the
+ operation and one without. All different conditional operation
+ groups are lowered separately. */
- for (unsigned i = 0; i < sizeof (opers) / sizeof (enum tree_code); i += 2)
+ for (unsigned i = 1; i <= 10; ++i)
{
v2 = vNULL;
for (unsigned j = 0; j < v1.length (); ++j)
- if (has_opt_convert (v1[j], opers[i]))
+ if (has_opt (v1[j], i))
{
- v2.safe_push (lower_opt_convert (v1[j],
- opers[i], opers[i+1], false));
- v2.safe_push (lower_opt_convert (v1[j],
- opers[i], opers[i+1], true));
+ v2.safe_push (lower_opt (v1[j], i, false));
+ v2.safe_push (lower_opt (v1[j], i, true));
}
if (v2 != vNULL)
@@ -1197,9 +1181,9 @@ lower_opt_convert (operand *o)
the resulting multiple patterns to SIMPLIFIERS. */
static void
-lower_opt_convert (simplify *s, vec<simplify *>& simplifiers)
+lower_opt (simplify *s, vec<simplify *>& simplifiers)
{
- vec<operand *> matchers = lower_opt_convert (s->match);
+ vec<operand *> matchers = lower_opt (s->match);
for (unsigned i = 0; i < matchers.length (); ++i)
{
simplify *ns = new simplify (s->kind, s->id, matchers[i], s->result,
@@ -1543,7 +1527,7 @@ lower (vec<simplify *>& simplifiers, boo
{
auto_vec<simplify *> out_simplifiers;
for (unsigned i = 0; i < simplifiers.length (); ++i)
- lower_opt_convert (simplifiers[i], out_simplifiers);
+ lower_opt (simplifiers[i], out_simplifiers);
simplifiers.truncate (0);
for (unsigned i = 0; i < out_simplifiers.length (); ++i)
@@ -3927,7 +3911,7 @@ private:
unsigned get_internal_capture_id ();
- id_base *parse_operation ();
+ id_base *parse_operation (unsigned char &);
operand *parse_capture (operand *, bool);
operand *parse_expr ();
c_expr *parse_c_expr (cpp_ttype);
@@ -4118,47 +4102,36 @@ parser::record_operlist (location_t loc,
convert2? */
id_base *
-parser::parse_operation ()
+parser::parse_operation (unsigned char &opt_grp)
{
const cpp_token *id_tok = peek ();
+ char *alt_id = NULL;
const char *id = get_ident ();
const cpp_token *token = peek ();
- if (strcmp (id, "convert0") == 0)
- fatal_at (id_tok, "use 'convert?' here");
- else if (strcmp (id, "view_convert0") == 0)
- fatal_at (id_tok, "use 'view_convert?' here");
+ opt_grp = 0;
if (token->type == CPP_QUERY
&& !(token->flags & PREV_WHITE))
{
- if (strcmp (id, "convert") == 0)
- id = "convert0";
- else if (strcmp (id, "convert1") == 0)
- ;
- else if (strcmp (id, "convert2") == 0)
- ;
- else if (strcmp (id, "view_convert") == 0)
- id = "view_convert0";
- else if (strcmp (id, "view_convert1") == 0)
- ;
- else if (strcmp (id, "view_convert2") == 0)
- ;
- else
- fatal_at (id_tok, "non-convert operator conditionalized");
-
if (!parsing_match_operand)
fatal_at (id_tok, "conditional convert can only be used in "
"match expression");
+ if (ISDIGIT (id[strlen (id) - 1]))
+ {
+ opt_grp = id[strlen (id) - 1] - '0' + 1;
+ alt_id = xstrdup (id);
+ alt_id[strlen (id) - 1] = '\0';
+ if (opt_grp == 1)
+ fatal_at (id_tok, "use '%s?' here", alt_id);
+ }
+ else
+ opt_grp = 1;
eat_token (CPP_QUERY);
}
- else if (strcmp (id, "convert1") == 0
- || strcmp (id, "convert2") == 0
- || strcmp (id, "view_convert1") == 0
- || strcmp (id, "view_convert2") == 0)
- fatal_at (id_tok, "expected '?' after conditional operator");
- id_base *op = get_operator (id);
+ id_base *op = get_operator (alt_id ? alt_id : id);
if (!op)
- fatal_at (id_tok, "unknown operator %s", id);
-
+ fatal_at (id_tok, "unknown operator %s", alt_id ? alt_id : id);
+ if (alt_id)
+ free (alt_id);
user_id *p = dyn_cast<user_id *> (op);
if (p && p->is_oper_list)
{
@@ -4214,7 +4187,8 @@ struct operand *
parser::parse_expr ()
{
const cpp_token *token = peek ();
- expr *e = new expr (parse_operation (), token->src_loc);
+ unsigned char opt_grp;
+ expr *e = new expr (parse_operation (opt_grp), token->src_loc);
token = peek ();
operand *op;
bool is_commutative = false;
@@ -4310,6 +4284,12 @@ parser::parse_expr ()
"commutative");
}
e->expr_type = expr_type;
+ if (opt_grp != 0)
+ {
+ if (e->ops.length () != 1)
+ fatal_at (token, "only unary operations can be conditional");
+ e->opt_grp = opt_grp;
+ }
return op;
}
else if (!(token->flags & PREV_WHITE))
@@ -4692,10 +4672,6 @@ parser::parse_for (location_t)
id_base *idb = get_operator (oper, true);
if (idb == NULL)
fatal_at (token, "no such operator '%s'", oper);
- if (*idb == CONVERT0 || *idb == CONVERT1 || *idb == CONVERT2
- || *idb == VIEW_CONVERT0 || *idb == VIEW_CONVERT1
- || *idb == VIEW_CONVERT2)
- fatal_at (token, "conditional operators cannot be used inside for");
if (arity == -1)
arity = idb->nargs;
@@ -5102,12 +5078,6 @@ main (int argc, char **argv)
add_operator (SYM, # SYM, # TYPE, NARGS);
#define END_OF_BASE_TREE_CODES
#include "tree.def"
-add_operator (CONVERT0, "convert0", "tcc_unary", 1);
-add_operator (CONVERT1, "convert1", "tcc_unary", 1);
-add_operator (CONVERT2, "convert2", "tcc_unary", 1);
-add_operator (VIEW_CONVERT0, "view_convert0", "tcc_unary", 1);
-add_operator (VIEW_CONVERT1, "view_convert1", "tcc_unary", 1);
-add_operator (VIEW_CONVERT2, "view_convert2", "tcc_unary", 1);
#undef END_OF_BASE_TREE_CODES
#undef DEFTREECODE
diff -Nurp a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
--- a/gcc/gimple-loop-versioning.cc 2020-03-12 19:07:21.000000000 +0800
+++ b/gcc/gimple-loop-versioning.cc 2020-11-24 14:49:12.792000000 +0800
@@ -1264,6 +1264,12 @@ loop_versioning::record_address_fragment
continue;
}
}
+ if (CONVERT_EXPR_CODE_P (code))
+ {
+ tree op1 = gimple_assign_rhs1 (assign);
+ address->terms[i].expr = strip_casts (op1);
+ continue;
+ }
}
i += 1;
}
diff -Nurp a/gcc/match.pd b/gcc/match.pd
--- a/gcc/match.pd 2020-11-24 14:54:43.576000000 +0800
+++ b/gcc/match.pd 2020-11-24 14:49:12.792000000 +0800
@@ -97,8 +97,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(define_operator_list COND_TERNARY
IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
-/* As opposed to convert?, this still creates a single pattern, so
- it is not a suitable replacement for convert? in all cases. */
+/* With nop_convert? combine convert? and view_convert? in one pattern
+ plus conditionalize on tree_nop_conversion_p conversions. */
(match (nop_convert @0)
(convert @0)
(if (tree_nop_conversion_p (type, TREE_TYPE (@0)))))
@@ -108,9 +108,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& known_eq (TYPE_VECTOR_SUBPARTS (type),
TYPE_VECTOR_SUBPARTS (TREE_TYPE (@0)))
&& tree_nop_conversion_p (TREE_TYPE (type), TREE_TYPE (TREE_TYPE (@0))))))
-/* This one has to be last, or it shadows the others. */
-(match (nop_convert @0)
- @0)
/* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR <x>
ABSU_EXPR returns unsigned absolute value of the operand and the operand
@@ -1260,7 +1257,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
We combine the above two cases by using a conditional convert. */
(for bitop (bit_and bit_ior bit_xor)
(simplify
- (bitop (convert @0) (convert? @1))
+ (bitop (convert@2 @0) (convert?@3 @1))
(if (((TREE_CODE (@1) == INTEGER_CST
&& INTEGRAL_TYPE_P (TREE_TYPE (@0))
&& int_fits_type_p (@1, TREE_TYPE (@0)))
@@ -1279,8 +1276,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT
/* Or if the precision of TO is not the same as the precision
of its mode. */
- || !type_has_mode_precision_p (type)))
- (convert (bitop @0 (convert @1))))))
+ || !type_has_mode_precision_p (type)
+ /* In GIMPLE, getting rid of 2 conversions for one new results
+ in smaller IL. */
+ || (GIMPLE
+ && TREE_CODE (@1) != INTEGER_CST
+ && tree_nop_conversion_p (type, TREE_TYPE (@0))
+ && single_use (@2)
+ && single_use (@3))))
+ (convert (bitop @0 (convert @1)))))
+ /* In GIMPLE, getting rid of 2 conversions for one new results
+ in smaller IL. */
+ (simplify
+ (convert (bitop:cs@2 (nop_convert:s @0) @1))
+ (if (GIMPLE
+ && TREE_CODE (@1) != INTEGER_CST
+ && tree_nop_conversion_p (type, TREE_TYPE (@2))
+ && types_match (type, @0))
+ (bitop @0 (convert @1)))))
(for bitop (bit_and bit_ior)
rbitop (bit_ior bit_and)
@@ -1374,7 +1387,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
/* Convert - (~A) to A + 1. */
(simplify
- (negate (nop_convert (bit_not @0)))
+ (negate (nop_convert? (bit_not @0)))
(plus (view_convert @0) { build_each_one_cst (type); }))
/* Convert ~ (A - 1) or ~ (A + -1) to -A. */
@@ -1401,7 +1414,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
/* Otherwise prefer ~(X ^ Y) to ~X ^ Y as more canonical. */
(simplify
- (bit_xor:c (nop_convert:s (bit_not:s @0)) @1)
+ (bit_xor:c (nop_convert?:s (bit_not:s @0)) @1)
(if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
(bit_not (bit_xor (view_convert @0) @1))))
@@ -1614,7 +1627,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
/* For equality, this is also true with wrapping overflow. */
(for op (eq ne)
(simplify
- (op:c (nop_convert@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
+ (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
(if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
&& (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
|| TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
@@ -1623,7 +1636,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
(op @0 { build_zero_cst (TREE_TYPE (@0)); })))
(simplify
- (op:c (nop_convert@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0))
+ (op:c (nop_convert?@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0))
(if (tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@0))
&& tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0))
&& (CONSTANT_CLASS_P (@1) || (single_use (@2) && single_use (@3))))
@@ -1866,7 +1879,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| !HONOR_SIGN_DEPENDENT_ROUNDING (type)))
(convert (negate @1))))
(simplify
- (negate (nop_convert (negate @1)))
+ (negate (nop_convert? (negate @1)))
(if (!TYPE_OVERFLOW_SANITIZED (type)
&& !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))
(view_convert @1)))
@@ -1883,20 +1896,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
/* A - (A +- B) -> -+ B */
/* A +- (B -+ A) -> +- B */
(simplify
- (minus (plus:c @0 @1) @0)
- @1)
- (simplify
- (minus (minus @0 @1) @0)
- (negate @1))
+ (minus (nop_convert1? (plus:c (nop_convert2? @0) @1)) @0)
+ (view_convert @1))
(simplify
- (plus:c (minus @0 @1) @1)
- @0)
+ (minus (nop_convert1? (minus (nop_convert2? @0) @1)) @0)
+ (if (!ANY_INTEGRAL_TYPE_P (type)
+ || TYPE_OVERFLOW_WRAPS (type))
+ (negate (view_convert @1))
+ (view_convert (negate @1))))
+ (simplify
+ (plus:c (nop_convert1? (minus @0 (nop_convert2? @1))) @1)
+ (view_convert @0))
+ (simplify
+ (minus @0 (nop_convert1? (plus:c (nop_convert2? @0) @1)))
+ (if (!ANY_INTEGRAL_TYPE_P (type)
+ || TYPE_OVERFLOW_WRAPS (type))
+ (negate (view_convert @1))
+ (view_convert (negate @1))))
(simplify
- (minus @0 (plus:c @0 @1))
- (negate @1))
- (simplify
- (minus @0 (minus @0 @1))
- @1)
+ (minus @0 (nop_convert1? (minus (nop_convert2? @0) @1)))
+ (view_convert @1))
/* (A +- B) + (C - A) -> C +- B */
/* (A + B) - (A - C) -> B + C */
/* More cases are handled with comparisons. */
@@ -1922,7 +1941,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(for inner_op (plus minus)
neg_inner_op (minus plus)
(simplify
- (outer_op (nop_convert (inner_op @0 CONSTANT_CLASS_P@1))
+ (outer_op (nop_convert? (inner_op @0 CONSTANT_CLASS_P@1))
CONSTANT_CLASS_P@2)
/* If one of the types wraps, use that one. */
(if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
@@ -1961,17 +1980,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
/* (CST1 - A) +- CST2 -> CST3 - A */
(for outer_op (plus minus)
(simplify
- (outer_op (minus CONSTANT_CLASS_P@1 @0) CONSTANT_CLASS_P@2)
- (with { tree cst = const_binop (outer_op, type, @1, @2); }
- (if (cst && !TREE_OVERFLOW (cst))
- (minus { cst; } @0)))))
-
- /* CST1 - (CST2 - A) -> CST3 + A */
- (simplify
- (minus CONSTANT_CLASS_P@1 (minus CONSTANT_CLASS_P@2 @0))
- (with { tree cst = const_binop (MINUS_EXPR, type, @1, @2); }
- (if (cst && !TREE_OVERFLOW (cst))
- (plus { cst; } @0))))
+ (outer_op (nop_convert? (minus CONSTANT_CLASS_P@1 @0)) CONSTANT_CLASS_P@2)
+ /* If one of the types wraps, use that one. */
+ (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
+ /* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse
+ forever if something doesn't simplify into a constant. */
+ (if (!CONSTANT_CLASS_P (@0))
+ (minus (outer_op (view_convert @1) @2) (view_convert @0)))
+ (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+ (view_convert (minus (outer_op @1 (view_convert @2)) @0))
+ (if (types_match (type, @0))
+ (with { tree cst = const_binop (outer_op, type, @1, @2); }
+ (if (cst && !TREE_OVERFLOW (cst))
+ (minus { cst; } @0))))))))
+
+ /* CST1 - (CST2 - A) -> CST3 + A
+ Use view_convert because it is safe for vectors and equivalent for
+ scalars. */
+ (simplify
+ (minus CONSTANT_CLASS_P@1 (nop_convert? (minus CONSTANT_CLASS_P@2 @0)))
+ /* If one of the types wraps, use that one. */
+ (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
+ /* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse
+ forever if something doesn't simplify into a constant. */
+ (if (!CONSTANT_CLASS_P (@0))
+ (plus (view_convert @0) (minus @1 (view_convert @2))))
+ (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+ (view_convert (plus @0 (minus (view_convert @1) @2)))
+ (if (types_match (type, @0))
+ (with { tree cst = const_binop (MINUS_EXPR, type, @1, @2); }
+ (if (cst && !TREE_OVERFLOW (cst))
+ (plus { cst; } @0)))))))
+
+/* ((T)(A)) + CST -> (T)(A + CST) */
+#if GIMPLE
+ (simplify
+ (plus (convert SSA_NAME@0) INTEGER_CST@1)
+ (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
+ && TREE_CODE (type) == INTEGER_TYPE
+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+ && int_fits_type_p (@1, TREE_TYPE (@0)))
+ /* Perform binary operation inside the cast if the constant fits
+ and (A + CST)'s range does not overflow. */
+ (with
+ {
+ wi::overflow_type min_ovf = wi::OVF_OVERFLOW,
+ max_ovf = wi::OVF_OVERFLOW;
+ tree inner_type = TREE_TYPE (@0);
+
+ wide_int w1 = wide_int::from (wi::to_wide (@1), TYPE_PRECISION (inner_type),
+ TYPE_SIGN (inner_type));
+
+ wide_int wmin0, wmax0;
+ if (get_range_info (@0, &wmin0, &wmax0) == VR_RANGE)
+ {
+ wi::add (wmin0, w1, TYPE_SIGN (inner_type), &min_ovf);
+ wi::add (wmax0, w1, TYPE_SIGN (inner_type), &max_ovf);
+ }
+ }
+ (if (min_ovf == wi::OVF_NONE && max_ovf == wi::OVF_NONE)
+ (convert (plus @0 { wide_int_to_tree (TREE_TYPE (@0), w1); } )))
+ )))
+#endif
/* ~A + A -> -1 */
(simplify
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c 2020-03-12 19:07:22.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c 2020-11-24 14:49:14.568000000 +0800
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-ch2-details" } */
+/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-ch2-details" } */
int is_sorted(int *a, int n)
{
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c 2020-03-12 19:07:22.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c 2020-11-24 14:49:14.568000000 +0800
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-ch2-details --param logical-op-non-short-circuit=0" } */
+/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-ch2-details --param logical-op-non-short-circuit=0" } */
int is_sorted(int *a, int n, int m, int k)
{
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c 2020-03-12 19:07:22.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c 2020-11-24 14:49:14.568000000 +0800
@@ -19,7 +19,7 @@ int bla(void)
}
/* Since the loop is removed, there should be no addition. */
-/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */
/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
/* The if from the loop header copying remains in the code. */
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c 2020-03-12 19:07:22.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c 2020-11-24 14:49:14.568000000 +0800
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1" } */
+/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1-details" } */
void h (void);
@@ -17,4 +17,4 @@ int g (int i, int j)
return 1;
}
-/* { dg-final { scan-tree-dump-times "Folding predicate.*to 1" 1 "vrp1" } } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified" 1 "vrp1" } } */
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c 2020-11-24 14:49:14.568000000 +0800
@@ -0,0 +1,76 @@
+/* PR tree-optimization/92734 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* Verify there are no binary additions or subtractions left. There can
+ be just casts and negations. */
+/* { dg-final { scan-tree-dump-not " \[+-] " "optimized" } } */
+
+int
+f1 (int x, unsigned y)
+{
+ int a = x + y;
+ return a - x;
+}
+
+unsigned
+f2 (unsigned x, int y)
+{
+ unsigned a = (int) x + y;
+ return a - x;
+}
+
+int
+f3 (int x, unsigned y)
+{
+ int a = x - y;
+ return a - x;
+}
+
+unsigned
+f4 (unsigned x, int y)
+{
+ unsigned a = (int) x - y;
+ return a - x;
+}
+
+int
+f5 (unsigned x, int y)
+{
+ int a = x - y;
+ return a + y;
+}
+
+unsigned
+f6 (int x, unsigned y)
+{
+ unsigned a = x - (int) y;
+ return a + y;
+}
+
+int
+f7 (int x, unsigned y)
+{
+ int a = x + y;
+ return x - a;
+}
+
+unsigned
+f8 (unsigned x, int y)
+{
+ unsigned a = (int) x + y;
+ return x - a;
+}
+
+int
+f9 (int x, unsigned y)
+{
+ int a = x - y;
+ return x - a;
+}
+
+unsigned
+f10 (unsigned x, int y)
+{
+ unsigned a = (int) x - y;
+ return x - a;
+}
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c 2020-11-24 14:49:14.568000000 +0800
@@ -0,0 +1,31 @@
+/* PR tree-optimization/92734 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+/* { dg-final { scan-tree-dump-times "return t_\[0-9]*\\\(D\\\);" 4 "forwprop1" } } */
+
+int
+f1 (int t)
+{
+ return 1 - (int) (1U - t);
+}
+
+int
+f2 (int t)
+{
+ int a = 7U - t;
+ return 7 - a;
+}
+
+int
+f3 (int t)
+{
+ int a = 32U - t;
+ return 32 - a;
+}
+
+int
+f4 (int t)
+{
+ int a = 32 - t;
+ return (int) (32 - (unsigned) a);
+}
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c 2020-11-24 14:49:14.568000000 +0800
@@ -0,0 +1,45 @@
+/* PR tree-optimization/94718 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times " \\\(int\\\) " 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\\(unsigned int\\\) " 2 "optimized" } } */
+
+int
+f1 (int x, int y)
+{
+ return (int) ((unsigned) x | (unsigned) y);
+}
+
+int
+f2 (int x, int y)
+{
+ unsigned a = x;
+ unsigned b = y;
+ return a | b;
+}
+
+int
+f3 (int x, unsigned y)
+{
+ return (int) ((unsigned) x | y);
+}
+
+int
+f4 (int x, unsigned y)
+{
+ unsigned a = x;
+ return a | y;
+}
+
+unsigned
+f5 (int x, unsigned y)
+{
+ return (unsigned) (x | (int) y);
+}
+
+unsigned
+f6 (int x, unsigned y)
+{
+ int a = y;
+ return x | a;
+}
diff -Nurp a/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c b/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c
--- a/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c 2020-11-24 14:49:14.484000000 +0800
@@ -0,0 +1,43 @@
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* s390*-*-* } && lp64 } } } */
+/* { dg-options "-O2 -fdump-tree-vrp2-details" } */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to" 4 "vrp2" } } */
+
+void v1 (unsigned long *in, unsigned long *out, unsigned int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ out[i] = in[i];
+ }
+}
+
+void v2 (unsigned long *in, unsigned long *out, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ {
+ out[i] = in[i];
+ }
+}
+
+void v3 (unsigned long *in, unsigned long *out, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ {
+ out[i] = in[i];
+ }
+}
+
+void v4 (unsigned long *in, unsigned long *out, int n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ {
+ out[i] = in[i];
+ }
+}
diff -Nurp a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c
--- a/gcc/tree-ssa-propagate.c 2020-11-24 14:54:42.556000000 +0800
+++ b/gcc/tree-ssa-propagate.c 2020-11-24 14:49:12.792000000 +0800
@@ -814,7 +814,6 @@ ssa_propagation_engine::ssa_propagate (v
ssa_prop_fini ();
}
-
/* Return true if STMT is of the form 'mem_ref = RHS', where 'mem_ref'
is a non-volatile pointer dereference, a structure reference or a
reference to a single _DECL. Ignore volatile memory references
@@ -1071,6 +1070,14 @@ substitute_and_fold_dom_walker::before_d
stmt = gsi_stmt (i);
gimple_set_modified (stmt, true);
}
+ /* Also fold if we want to fold all statements. */
+ else if (substitute_and_fold_engine->fold_all_stmts
+ && fold_stmt (&i, follow_single_use_edges))
+ {
+ did_replace = true;
+ stmt = gsi_stmt (i);
+ gimple_set_modified (stmt, true);
+ }
/* Some statements may be simplified using propagator
specific information. Do this before propagating
diff -Nurp a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h
--- a/gcc/tree-ssa-propagate.h 2020-03-12 19:07:23.000000000 +0800
+++ b/gcc/tree-ssa-propagate.h 2020-11-24 14:49:12.792000000 +0800
@@ -100,6 +100,8 @@ class ssa_propagation_engine
class substitute_and_fold_engine
{
public:
+ substitute_and_fold_engine (bool fold_all_stmts = false)
+ : fold_all_stmts (fold_all_stmts) { }
virtual ~substitute_and_fold_engine (void) { }
virtual bool fold_stmt (gimple_stmt_iterator *) { return false; }
virtual tree get_value (tree) { return NULL_TREE; }
@@ -107,6 +109,10 @@ class substitute_and_fold_engine
bool substitute_and_fold (basic_block = NULL);
bool replace_uses_in (gimple *);
bool replace_phi_args_in (gphi *);
+
+ /* Users like VRP can set this when they want to perform
+ folding for every propagation. */
+ bool fold_all_stmts;
};
#endif /* _TREE_SSA_PROPAGATE_H */
diff -Nurp a/gcc/tree-vrp.c b/gcc/tree-vrp.c
--- a/gcc/tree-vrp.c 2020-11-24 14:54:43.564000000 +0800
+++ b/gcc/tree-vrp.c 2020-11-24 14:49:12.792000000 +0800
@@ -6384,6 +6384,7 @@ vrp_prop::visit_phi (gphi *phi)
class vrp_folder : public substitute_and_fold_engine
{
public:
+ vrp_folder () : substitute_and_fold_engine (/* Fold all stmts. */ true) { }
tree get_value (tree) FINAL OVERRIDE;
bool fold_stmt (gimple_stmt_iterator *) FINAL OVERRIDE;
bool fold_predicate_in (gimple_stmt_iterator *);

View File

@ -0,0 +1,108 @@
commit f6e1a4cd83190746b6544917f7526fa480ca5f18
Author: Bin Cheng <bin.cheng@linux.alibaba.com>
Date: Wed May 13 11:37:47 2020 +0800
Add missing unit dependence vector in data dependence analysis
Current data dependence analysis misses unit distant vector if DRs in
DDR have the same invariant access functions. This adds the vector as
the constant access function case.
2020-05-13 Bin Cheng <bin.cheng@linux.alibaba.com>
PR tree-optimization/94969
gcc/
* tree-data-dependence.c (constant_access_functions): Rename to...
(invariant_access_functions): ...this. Add parameter. Check for
invariant access function, rather than constant.
(build_classic_dist_vector): Call above function.
* tree-loop-distribution.c (pg_add_dependence_edges): Add comment.
gcc/testsuite/
* gcc.dg/tree-ssa/pr94969.c: New test.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c
new file mode 100644
index 00000000000..056b015f97c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c
@@ -0,0 +1,28 @@
+/* PR tree-optimization/52267 */
+/* { dg-do run } */
+/* { dg-options "-O3 -fdump-tree-ldist-details" } */
+
+int a = 0, b = 0, c = 0;
+struct S {
+ signed m : 7;
+ signed e : 2;
+};
+struct S f[2] = {{0, 0}, {0, 0}};
+struct S g = {0, 0};
+
+void __attribute__((noinline))
+k()
+{
+ for (; c <= 1; c++) {
+ f[b] = g;
+ f[b].e ^= 1;
+ }
+}
+int main()
+{
+ k();
+ if (f[b].e != 1)
+ __builtin_abort ();
+}
+
+/* { dg-final { scan-tree-dump-not "ldist" "Loop 1 distributed: split to 3 loops"} } */
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index 851225e1171..5505ba46778 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -4821,17 +4821,19 @@ build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
return true;
}
-/* Return true when the DDR contains only constant access functions. */
+/* Return true when the DDR contains only invariant access functions wrto. loop
+ number LNUM. */
static bool
-constant_access_functions (const struct data_dependence_relation *ddr)
+invariant_access_functions (const struct data_dependence_relation *ddr,
+ int lnum)
{
unsigned i;
subscript *sub;
FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
- if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
- || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
+ if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
+ || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
return false;
return true;
@@ -5030,7 +5032,7 @@ build_classic_dist_vector (struct data_dependence_relation *ddr,
dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
save_dist_v (ddr, dist_v);
- if (constant_access_functions (ddr))
+ if (invariant_access_functions (ddr, loop_nest->num))
add_distance_for_zero_overlaps (ddr);
if (DDR_NB_LOOPS (ddr) > 1)
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 44423215332..b122c3964a0 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -2080,7 +2080,8 @@ loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir,
this_dir = -this_dir;
/* Known dependences can still be unordered througout the
- iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
+ iteration space, see gcc.dg/tree-ssa/ldist-16.c and
+ gcc.dg/tree-ssa/pr94969.c. */
if (DDR_NUM_DIST_VECTS (ddr) != 1)
this_dir = 2;
/* If the overlap is exact preserve stmt order. */

View File

@ -1,23 +0,0 @@
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 36639b697f1..88f14e73d65 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -938,6 +938,18 @@ vect_compute_data_ref_alignment (dr_vec_info *dr_info)
= exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT);
DR_TARGET_ALIGNMENT (dr_info) = vector_alignment;
+ /* If the main loop has peeled for alignment we have no way of knowing
+ whether the data accesses in the epilogues are aligned. We can't at
+ compile time answer the question whether we have entered the main loop or
+ not. Fixes PR 92351. */
+ if (loop_vinfo)
+ {
+ loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
+ if (orig_loop_vinfo
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
+ return;
+ }
+
unsigned HOST_WIDE_INT vect_align_c;
if (!vector_alignment.is_constant (&vect_align_c))
return;

181
gcc.spec
View File

@ -1,4 +1,4 @@
%global DATE 20200922
%global DATE 20201229
%global gcc_version 9.3.1
%global gcc_major 9.3.1
@ -59,7 +59,7 @@
Summary: Various compilers (C, C++, Objective-C, ...)
Name: gcc
Version: %{gcc_version}
Release: %{DATE}.12
Release: %{DATE}.13
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
URL: https://gcc.gnu.org
@ -114,26 +114,26 @@ Provides: bundled(libiberty)
Provides: gcc(major) = %{gcc_major}
Patch0: enable-aarch64-libquadmath.patch
Patch1: medium-code-mode.patch
Patch2: generate-csel.patch
Patch3: delete-incorrect-smw.patch
Patch4: remove-array-index-inliner-hint.patch
Patch5: ivopts-1.patch
Patch6: ivopts-2.patch
Patch7: dont-generate-IF_THEN_ELSE.patch
Patch8: fix-cost-of-plus.patch
Patch9: div-opti.patch
Patch10: fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch
Patch11: fix-ICE-during-pass-ccp.patch
Patch12: loop-split.patch
Patch13: loop-finite.patch
Patch14: loop-finite-bugfix.patch
Patch15: fix-regno-out-of-range.patch
Patch16: fix-ICE-in-vectorizable-load.patch
Patch17: address-calculation-optimization-within-loop.patch
Patch18: skip-debug-insns-when-computing-inline-costs.patch
Patch19: ipa-const-prop.patch
Patch20: ipa-const-prop-self-recursion-bugfix.patch
Patch1: generate-csel.patch
Patch2: delete-incorrect-smw.patch
Patch3: remove-array-index-inliner-hint.patch
Patch4: ivopts-1.patch
Patch5: ivopts-2.patch
Patch6: dont-generate-IF_THEN_ELSE.patch
Patch7: fix-cost-of-plus.patch
Patch8: div-opti.patch
Patch9: fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch
Patch10: fix-ICE-during-pass-ccp.patch
Patch11: loop-split.patch
Patch12: loop-finite.patch
Patch13: loop-finite-bugfix.patch
Patch14: fix-regno-out-of-range.patch
Patch15: fix-ICE-in-vectorizable-load.patch
Patch16: address-calculation-optimization-within-loop.patch
Patch17: skip-debug-insns-when-computing-inline-costs.patch
Patch18: ipa-const-prop.patch
Patch19: ipa-const-prop-self-recursion-bugfix.patch
Patch20: ipa-const-prop-null-point-check-bugfix.patch
Patch21: change-gcc-BASE-VER.patch
Patch22: add-option-fallow-store-data-races.patch
Patch23: tighten-range-for-generating-csel.patch
@ -177,16 +177,49 @@ Patch60: fix-load-eliding-in-SM.patch
Patch61: fix-SSA-update-for-vectorizer-epilogue.patch
Patch62: fix-ICE-when-vectorizing-nested-cycles.patch
Patch63: fix-avoid-bogus-uninit-warning-with-store-motion.patch
Patch64: ipa-const-prop-null-point-check-bugfix.patch
Patch65: avoid-cycling-on-vertain-subreg-reloads.patch
Patch66: fix-ICE-in-verify_target_availability.patch
Patch67: fix-ICE-vect_slp_analyze_node_operations.patch
Patch68: fix-ICE-in-extract_constrain_insn.patch
Patch69: fix-ICE-during-GIMPLE-pass-dse.patch
Patch70: ipa-const-prop-buffer-overflow-bugfix.patch
Patch71: fix-ICE-in-eliminate_stmt.patch
Patch72: fix-make-ifcvt-clean-up-dead-comparisons.patch
Patch73: fix-when-peeling-for-alignment.patch
Patch64: avoid-cycling-on-vertain-subreg-reloads.patch
Patch65: fix-ICE-in-verify_target_availability.patch
Patch66: fix-ICE-vect_slp_analyze_node_operations.patch
Patch67: fix-ICE-in-extract_constrain_insn.patch
Patch68: fix-ICE-during-GIMPLE-pass-dse.patch
Patch69: ipa-const-prop-buffer-overflow-bugfix.patch
Patch70: fix-ICE-in-eliminate_stmt.patch
Patch71: fix-make-ifcvt-clean-up-dead-comparisons.patch
Patch72: fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch
Patch73: fix-ICE-in-vect_update_misalignment_for_peel.patch
Patch74: redundant-loop-elimination.patch
Patch75: bf16-and-matrix-characteristic.patch
Patch76: medium-code-mode.patch
Patch77: tree-optimization-96920-another-ICE-when-vectorizing.patch
Patch78: reduction-paths-with-unhandled-live-stmt.patch
Patch79: aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch
Patch80: tree-optimization-97812-fix-range-query-in-VRP-asser.patch
Patch81: aarch64-Fix-bf16-and-matrix-g++-gfortran.patch
Patch82: IRA-Handle-fully-tied-destinations.patch
Patch83: fix-ICE-in-pass-vect.patch
Patch84: SLP-VECT-Add-check-to-fix-96837.patch
Patch85: adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch
Patch86: fix-issue499-add-nop-convert.patch
Patch87: aarch64-fix-sve-acle-error.patch
Patch88: fix-ICE-IPA-compare-VRP-types.patch
Patch89: vectorizable-comparison-Swap-operands-only-once.patch
Patch90: sccvn-Improve-handling-of-load-masked-with-integer.patch
Patch91: speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch
Patch92: x86-Fix-bf16-and-matrix.patch
Patch93: Fix-up-push_partial_def-little-endian-bitfield.patch
Patch94: modulo-sched-Carefully-process-loop-counter-initiali.patch
Patch95: fix-ICE-in-affine-combination.patch
Patch96: aarch64-Fix-mismatched-SVE-predicate-modes.patch
Patch97: Fix-EXTRACT_LAST_REDUCTION-segfault.patch
Patch98: fix-PR-92351-When-peeling-for-alignment.patch
Patch99: fix-addlosymdi-ICE-in-pass-reload.patch
Patch100: store-merging-Consider-also-overlapping-stores-earlier.patch
Patch101: AArch64-Fix-constraints-for-CPY-M.patch
Patch102: Fix-zero-masking-for-vcvtps2ph.patch
Patch103: re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch
Patch104: fix-avx512vl-vcvttpd2dq-2-fail.patch
Patch105: fix-issue604-ldist-dependency-fixup.patch
Patch106: Apply-maximum-nunits-for-BB-SLP.patch
%global gcc_target_platform %{_arch}-linux-gnu
@ -703,6 +736,39 @@ not stable, so plugins must be rebuilt any time GCC is updated.
%patch71 -p1
%patch72 -p1
%patch73 -p1
%patch74 -p1
%patch75 -p1
%patch76 -p1
%patch77 -p1
%patch78 -p1
%patch79 -p1
%patch80 -p1
%patch81 -p1
%patch82 -p1
%patch83 -p1
%patch84 -p1
%patch85 -p1
%patch86 -p1
%patch87 -p1
%patch88 -p1
%patch89 -p1
%patch90 -p1
%patch91 -p1
%patch92 -p1
%patch93 -p1
%patch94 -p1
%patch95 -p1
%patch96 -p1
%patch97 -p1
%patch98 -p1
%patch99 -p1
%patch100 -p1
%patch101 -p1
%patch102 -p1
%patch103 -p1
%patch104 -p1
%patch105 -p1
%patch106 -p1
%build
@ -2631,6 +2697,57 @@ end
%doc rpm.doc/changelogs/libcc1/ChangeLog*
%changelog
* Tue Dec 29 2020 eastb233 <xiezhiheng@huawei.com> - 9.3.1-20201229.13
- avoid-cycling-on-vertain-subreg-reloads.patch: Add patch source comment
- change-gcc-BASE-VER.patch: Likewise
- dont-generate-IF_THEN_ELSE.patch: Likewise
- fix-ICE-in-compute_live_loop_exits.patch: Likewise
- fix-ICE-in-eliminate_stmt.patch: Likewise
- fix-ICE-in-vect_create_epilog_for_reduction.patch: Likewise
- fix-ICE-in-vect_stmt_to_vectorize.patch: Likewise
- fix-ICE-in-verify_ssa.patch: Likewise
- fix-ICE-when-vectorizing-nested-cycles.patch: Likewise
- fix-cost-of-plus.patch: Likewise
- ipa-const-prop-self-recursion-bugfix.patch: Likewise
- simplify-removing-subregs.patch: Likewise
- medium-code-mode.patch: Bugfix
- fix-when-peeling-for-alignment.patch: Move to ...
- fix-PR-92351-When-peeling-for-alignment.patch: ... this
- AArch64-Fix-constraints-for-CPY-M.patch: New file
- Apply-maximum-nunits-for-BB-SLP.patch: New file
- Fix-EXTRACT_LAST_REDUCTION-segfault.patch: New file
- Fix-up-push_partial_def-little-endian-bitfield.patch: New file
- Fix-zero-masking-for-vcvtps2ph.patch: New file
- IRA-Handle-fully-tied-destinations.patch: New file
- SLP-VECT-Add-check-to-fix-96837.patch: New file
- aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch: New file
- aarch64-Fix-bf16-and-matrix-g++-gfortran.patch: New file
- aarch64-Fix-mismatched-SVE-predicate-modes.patch: New file
- aarch64-fix-sve-acle-error.patch: New file
- adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch: New file
- bf16-and-matrix-characteristic.patch: New file
- fix-ICE-IPA-compare-VRP-types.patch: New file
- fix-ICE-in-affine-combination.patch: New file
- fix-ICE-in-pass-vect.patch: New file
- fix-ICE-in-vect_update_misalignment_for_peel.patch: New file
- fix-addlosymdi-ICE-in-pass-reload.patch: New file
- fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: New file
- fix-avx512vl-vcvttpd2dq-2-fail.patch: New file
- fix-issue499-add-nop-convert.patch: New file
- fix-issue604-ldist-dependency-fixup.patch: New file
- modulo-sched-Carefully-process-loop-counter-initiali.patch: New file
- re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch: New file
- reduction-paths-with-unhandled-live-stmt.patch: New file
- redundant-loop-elimination.patch: New file
- sccvn-Improve-handling-of-load-masked-with-integer.patch: New file
- speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch: New file
- store-merging-Consider-also-overlapping-stores-earlier.patch: New file
- tree-optimization-96920-another-ICE-when-vectorizing.patch: New file
- tree-optimization-97812-fix-range-query-in-VRP-asser.patch: New file
- vectorizable-comparison-Swap-operands-only-once.patch: New file
- x86-Fix-bf16-and-matrix.patch: New file
- gcc.spec: Add uploaded patch
* Tue Sep 22 2020 eastb233 <xiezhiheng@huawei.com> - 9.3.1-20200922.12
- fix-when-peeling-for-alignment.patch: New file

View File

@ -1,14 +1,11 @@
This patch is backport from gcc-trunk. It is a combined patch from
This backport contains 2 patchs from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
Find matched aggregate lattice for self-recursive CP (PR ipa/93084)
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=42d73fa9d575e3c8c21e88bd7f65922e17b052f1
0001-Find-matched-aggregate-lattice-for-self-recursive-CP.patch
709d7838e753bbb6f16e2ed88a118ed81c367040
and
Do not propagate self-dependent value (PR ipa/93763)
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=47772af10c00f7e1e95cd52557fc893dc602a420
adapted the using of parameter to gcc9 style.
0002-Do-not-propagate-self-dependent-value-PR-ipa-93763.patch
47772af10c00f7e1e95cd52557fc893dc602a420
diff -Nurp a/gcc/ipa-cp.c b/gcc/ipa-cp.c
--- a/gcc/ipa-cp.c 2020-05-23 16:16:58.032000000 +0800

View File

@ -194,8 +194,8 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
case AARCH64_CMODEL_SMALL:
+ AARCH64_SMALL_ROUTINE:
/* Same reasoning as the tiny code model, but the offset cap here is
4G. */
if ((SYMBOL_REF_WEAK (x)
1MB, allowing +/-3.9GB for the offset to the symbol. */
@@ -13121,7 +13225,48 @@ aarch64_classify_symbol (rtx x, HOST_WID
? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
return SYMBOL_SMALL_ABSOLUTE;
@ -300,7 +300,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+ UNSPEC_LOAD_SYMBOL_MEDIUM
UNSPEC_LD1_SVE
UNSPEC_ST1_SVE
UNSPEC_LD1RQ
UNSPEC_LDNT1_SVE
@@ -6548,6 +6553,39 @@
[(set_attr "type" "load_4")]
)

View File

@ -0,0 +1,251 @@
This backport contains 1 patchs from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-modulo-sched-Carefully-process-loop-counter-initiali.patch
4eb8f93d026eaa1de9b4820337069f3ce3465cd0
diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
index 6f699a874e3..4568674aa6c 100644
--- a/gcc/modulo-sched.c
+++ b/gcc/modulo-sched.c
@@ -210,8 +210,6 @@ static int sms_order_nodes (ddg_ptr, int, int *, int *);
static void set_node_sched_params (ddg_ptr);
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
-static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
- rtx, rtx);
static int calculate_stage_count (partial_schedule_ptr, int);
static void calculate_must_precede_follow (ddg_node_ptr, int, int,
int, int, sbitmap, sbitmap, sbitmap);
@@ -391,30 +389,40 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail)
this constant. Otherwise return 0. */
static rtx_insn *
const_iteration_count (rtx count_reg, basic_block pre_header,
- int64_t * count)
+ int64_t *count, bool* adjust_inplace)
{
rtx_insn *insn;
rtx_insn *head, *tail;
+ *adjust_inplace = false;
+ bool read_after = false;
+
if (! pre_header)
return NULL;
get_ebb_head_tail (pre_header, pre_header, &head, &tail);
for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
- if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
- rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
+ if (single_set (insn) && rtx_equal_p (count_reg,
+ SET_DEST (single_set (insn))))
{
rtx pat = single_set (insn);
if (CONST_INT_P (SET_SRC (pat)))
{
*count = INTVAL (SET_SRC (pat));
+ *adjust_inplace = !read_after;
return insn;
}
return NULL;
}
+ else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
+ {
+ read_after = true;
+ if (reg_set_p (count_reg, insn))
+ break;
+ }
return NULL;
}
@@ -1126,7 +1134,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
/* Generate the instructions (including reg_moves) for prolog & epilog. */
static void
generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
- rtx count_reg, rtx count_init)
+ rtx count_reg, bool adjust_init)
{
int i;
int last_stage = PS_STAGE_COUNT (ps) - 1;
@@ -1135,12 +1143,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
/* Generate the prolog, inserting its insns on the loop-entry edge. */
start_sequence ();
- if (!count_init)
+ if (adjust_init)
{
/* Generate instructions at the beginning of the prolog to
- adjust the loop count by STAGE_COUNT. If loop count is constant
- (count_init), this constant is adjusted by STAGE_COUNT in
- generate_prolog_epilog function. */
+ adjust the loop count by STAGE_COUNT. If loop count is constant
+ and it not used anywhere in prologue, this constant is adjusted by
+ STAGE_COUNT outside of generate_prolog_epilog function. */
rtx sub_reg = NULL_RTX;
sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
@@ -1528,7 +1536,8 @@ sms_schedule (void)
rtx_insn *count_init;
int mii, rec_mii, stage_count, min_cycle;
int64_t loop_count = 0;
- bool opt_sc_p;
+ bool opt_sc_p, adjust_inplace = false;
+ basic_block pre_header;
if (! (g = g_arr[loop->num]))
continue;
@@ -1569,19 +1578,13 @@ sms_schedule (void)
}
- /* In case of th loop have doloop register it gets special
- handling. */
- count_init = NULL;
- if ((count_reg = doloop_register_get (head, tail)))
- {
- basic_block pre_header;
-
- pre_header = loop_preheader_edge (loop)->src;
- count_init = const_iteration_count (count_reg, pre_header,
- &loop_count);
- }
+ count_reg = doloop_register_get (head, tail);
gcc_assert (count_reg);
+ pre_header = loop_preheader_edge (loop)->src;
+ count_init = const_iteration_count (count_reg, pre_header, &loop_count,
+ &adjust_inplace);
+
if (dump_file && count_init)
{
fprintf (dump_file, "SMS const-doloop ");
@@ -1701,9 +1704,20 @@ sms_schedule (void)
print_partial_schedule (ps, dump_file);
}
- /* case the BCT count is not known , Do loop-versioning */
- if (count_reg && ! count_init)
+ if (count_init)
+ {
+ if (adjust_inplace)
+ {
+ /* When possible, set new iteration count of loop kernel in
+ place. Otherwise, generate_prolog_epilog creates an insn
+ to adjust. */
+ SET_SRC (single_set (count_init)) = GEN_INT (loop_count
+ - stage_count + 1);
+ }
+ }
+ else
{
+ /* case the BCT count is not known , Do loop-versioning */
rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
gen_int_mode (stage_count,
GET_MODE (count_reg)));
@@ -1713,12 +1727,7 @@ sms_schedule (void)
loop_version (loop, comp_rtx, &condition_bb,
prob, prob.invert (),
prob, prob.invert (), true);
- }
-
- /* Set new iteration count of loop kernel. */
- if (count_reg && count_init)
- SET_SRC (single_set (count_init)) = GEN_INT (loop_count
- - stage_count + 1);
+ }
/* Now apply the scheduled kernel to the RTL of the loop. */
permute_partial_schedule (ps, g->closing_branch->first_note);
@@ -1735,7 +1744,7 @@ sms_schedule (void)
if (dump_file)
print_node_sched_params (dump_file, g->num_nodes, ps);
/* Generate prolog and epilog. */
- generate_prolog_epilog (ps, loop, count_reg, count_init);
+ generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
break;
}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c
new file mode 100644
index 00000000000..e32fb129f18
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c
@@ -0,0 +1,23 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched" } */
+
+int a, b, d, e;
+int *volatile c = &a;
+
+__attribute__((noinline))
+void f(void)
+{
+ for (int g = 2; g >= 0; g--) {
+ d = 0;
+ for (b = 0; b <= 2; b++)
+ ;
+ e = *c;
+ }
+}
+
+int main(void)
+{
+ f();
+ if (b != 3)
+ __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c
new file mode 100644
index 00000000000..142bcbcee91
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c
@@ -0,0 +1,18 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched -fno-dce -fno-strict-aliasing" } */
+
+static int a, b, c;
+int *d = &c;
+int **e = &d;
+int ***f = &e;
+int main()
+{
+ int h;
+ for (a = 2; a; a--)
+ for (h = 0; h <= 2; h++)
+ for (b = 0; b <= 2; b++)
+ ***f = 6;
+
+ if (b != 3)
+ __builtin_abort();
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c
new file mode 100644
index 00000000000..3f1485a4a3d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c
@@ -0,0 +1,22 @@
+/* PR rtl-optimization/97421 */
+/* { dg-additional-options "-fmodulo-sched" } */
+
+int a, b, c;
+short d;
+void e(void) {
+ unsigned f = 0;
+ for (; f <= 2; f++) {
+ int g[1];
+ int h = (long)g;
+ c = 0;
+ for (; c < 10; c++)
+ g[0] = a = 0;
+ for (; a <= 2; a++)
+ b = d;
+ }
+}
+int main(void) {
+ e();
+ if (a != 3)
+ __builtin_abort();
+}

View File

@ -0,0 +1,215 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
491b0b4015a70071a05e0faa5c2082c43a51a0d3
0001-re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch
diff -urpN a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
--- a/gcc/config/i386/i386-builtin.def 2020-03-12 07:07:21.000000000 -0400
+++ b/gcc/config/i386/i386-builtin.def 2020-12-17 20:46:53.868000000 -0500
@@ -2516,60 +2516,60 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPT
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di_mask, "__builtin_ia32_vpshld_v2di_mask", IX86_BUILTIN_VPSHLDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT)
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi, "__builtin_ia32_vpshrdv_v16hi", IX86_BUILTIN_VPSHRDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi, "__builtin_ia32_vpshrdv_v8hi", IX86_BUILTIN_VPSHRDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si, "__builtin_ia32_vpshrdv_v8si", IX86_BUILTIN_VPSHRDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si, "__builtin_ia32_vpshrdv_v4si", IX86_BUILTIN_VPSHRDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di, "__builtin_ia32_vpshrdv_v4di", IX86_BUILTIN_VPSHRDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di, "__builtin_ia32_vpshrdv_v2di", IX86_BUILTIN_VPSHRDVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi, "__builtin_ia32_vpshldv_v16hi", IX86_BUILTIN_VPSHLDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi, "__builtin_ia32_vpshldv_v8hi", IX86_BUILTIN_VPSHLDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si, "__builtin_ia32_vpshldv_v8si", IX86_BUILTIN_VPSHLDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si, "__builtin_ia32_vpshldv_v4si", IX86_BUILTIN_VPSHLDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di, "__builtin_ia32_vpshldv_v4di", IX86_BUILTIN_VPSHLDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di, "__builtin_ia32_vpshldv_v2di", IX86_BUILTIN_VPSHLDVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_mask, "__builtin_ia32_vpshldv_v2di_mask", IX86_BUILTIN_VPSHLDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_mask, "__builtin_ia32_vpshldv_v2di_mask", IX86_BUILTIN_VPSHLDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
/* GFNI */
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
@@ -2594,44 +2594,44 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MAS
/* VNNI */
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si, "__builtin_ia32_vpdpbusd_v4si", IX86_BUILTIN_VPDPBUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si, "__builtin_ia32_vpdpbusds_v4si", IX86_BUILTIN_VPDPBUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si, "__builtin_ia32_vpdpwssd_v4si", IX86_BUILTIN_VPDPWSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si, "__builtin_ia32_vpdpwssds_v4si", IX86_BUILTIN_VPDPWSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
/* VPCLMULQDQ */
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
diff -urpN a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
--- a/gcc/config/i386/i386-builtin-types.def 2020-03-12 07:07:21.000000000 -0400
+++ b/gcc/config/i386/i386-builtin-types.def 2020-12-17 20:46:53.868000000 -0500
@@ -1246,17 +1246,8 @@ DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, INT
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT, V4SI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT, V2DI, INT)
DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI)
-DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI, INT)
-DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, V16HI, INT)
-DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI, INT)
-DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI, INT)
-DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI, INT)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI)
-DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT)
-DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, INT)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI)
-DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT)
-DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT)
# BITALG builtins
DEF_FUNCTION_TYPE (V4DI, V4DI)
diff -urpN a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
--- a/gcc/config/i386/i386-expand.c 2020-12-17 20:44:55.508000000 -0500
+++ b/gcc/config/i386/i386-expand.c 2020-12-17 20:46:53.872000000 -0500
@@ -9437,15 +9437,6 @@ ix86_expand_args_builtin (const struct b
case USI_FTYPE_V32HI_V32HI_INT_USI:
case UHI_FTYPE_V16HI_V16HI_INT_UHI:
case UQI_FTYPE_V8HI_V8HI_INT_UQI:
- case V32HI_FTYPE_V32HI_V32HI_V32HI_INT:
- case V16HI_FTYPE_V16HI_V16HI_V16HI_INT:
- case V8HI_FTYPE_V8HI_V8HI_V8HI_INT:
- case V8SI_FTYPE_V8SI_V8SI_V8SI_INT:
- case V4DI_FTYPE_V4DI_V4DI_V4DI_INT:
- case V8DI_FTYPE_V8DI_V8DI_V8DI_INT:
- case V16SI_FTYPE_V16SI_V16SI_V16SI_INT:
- case V2DI_FTYPE_V2DI_V2DI_V2DI_INT:
- case V4SI_FTYPE_V4SI_V4SI_V4SI_INT:
nargs = 4;
mask_pos = 1;
nargs_constant = 1;

View File

@ -0,0 +1,64 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
2686de5617bfb572343933be2883e8274c9735b5
0001-tree-optimization-97760-reduction-paths-with-unhandl.patch
diff --git a/gcc/testsuite/gcc.dg/vect/pr97760.c b/gcc/testsuite/gcc.dg/vect/pr97760.c
new file mode 100644
index 00000000000..da5ac937a43
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr97760.c
@@ -0,0 +1,26 @@
+#include "tree-vect.h"
+
+int b=1;
+static int *g = &b;
+
+void __attribute__((noipa))
+h (unsigned int n)
+{
+ int i = 3;
+ int f = 3;
+ for (; f <= 50; f += 4) {
+ i += 4;
+ *g = i;
+ i += n;
+ }
+}
+
+int main ()
+{
+ check_vect ();
+
+ h (9);
+ if (*g != 150 || b != 150)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 977633a3ce3..39b7319e825 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -3326,14 +3326,17 @@ pop:
fail = true;
break;
}
- /* Check there's only a single stmt the op is used on inside
- of the loop. */
+ /* Check there's only a single stmt the op is used on. For the
+ not value-changing tail and the last stmt allow out-of-loop uses.
+ ??? We could relax this and handle arbitrary live stmts by
+ forcing a scalar epilogue for example. */
imm_use_iterator imm_iter;
gimple *op_use_stmt;
unsigned cnt = 0;
FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)
if (!is_gimple_debug (op_use_stmt)
- && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))
+ && (*code != ERROR_MARK
+ || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))))
{
/* We want to allow x + x but not x < 1 ? x : 2. */
if (is_gimple_assign (op_use_stmt)

View File

@ -0,0 +1,486 @@
diff -Nurp a/gcc/common.opt b/gcc/common.opt
--- a/gcc/common.opt 2020-11-23 03:24:54.760000000 -0500
+++ b/gcc/common.opt 2020-11-23 03:23:59.716000000 -0500
@@ -1150,6 +1150,10 @@ fcompare-elim
Common Report Var(flag_compare_elim_after_reload) Optimization
Perform comparison elimination after register allocation has finished.
+floop-elim
+Common Report Var(flag_loop_elim) Init(0) Optimization
+Perform redundant loop elimination.
+
fconserve-stack
Common Var(flag_conserve_stack) Optimization
Do not perform optimizations increasing noticeably stack usage.
diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
--- a/gcc/tree-ssa-phiopt.c 2020-11-23 03:24:54.760000000 -0500
+++ b/gcc/tree-ssa-phiopt.c 2020-11-23 03:27:42.824000000 -0500
@@ -71,6 +71,7 @@ static hash_set<tree> * get_non_trapping
static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree);
static void hoist_adjacent_loads (basic_block, basic_block,
basic_block, basic_block);
+static bool do_phiopt_pattern (basic_block, basic_block, basic_block);
static bool gate_hoist_loads (void);
/* This pass tries to transform conditional stores into unconditional
@@ -259,6 +260,10 @@ tree_ssa_phiopt_worker (bool do_store_el
hoist_adjacent_loads (bb, bb1, bb2, bb3);
continue;
}
+ else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2))
+ {
+ continue;
+ }
else
continue;
@@ -2899,6 +2904,449 @@ hoist_adjacent_loads (basic_block bb0, b
}
}
+static bool check_uses (tree, hash_set<tree> *);
+
+/* Check SSA_NAME is used in
+ if (SSA_NAME == 0)
+ ...
+ or
+ if (SSA_NAME != 0)
+ ...
+*/
+static bool
+check_uses_cond (tree ssa_name, gimple *stmt,
+ hash_set<tree> *hset ATTRIBUTE_UNUSED)
+{
+ tree_code code = gimple_cond_code (stmt);
+ if (code != EQ_EXPR && code != NE_EXPR)
+ {
+ return false;
+ }
+
+ tree lhs = gimple_cond_lhs (stmt);
+ tree rhs = gimple_cond_rhs (stmt);
+ if ((lhs == ssa_name && integer_zerop (rhs))
+ || (rhs == ssa_name && integer_zerop (lhs)))
+ {
+ return true;
+ }
+
+ return false;
+}
+
+/* Check SSA_NAME is used in
+ _tmp = SSA_NAME == 0;
+ or
+ _tmp = SSA_NAME != 0;
+ or
+ _tmp = SSA_NAME | _tmp2;
+*/
+static bool
+check_uses_assign (tree ssa_name, gimple *stmt, hash_set<tree> *hset)
+{
+ tree_code code = gimple_assign_rhs_code (stmt);
+ tree lhs, rhs1, rhs2;
+
+ switch (code)
+ {
+ case EQ_EXPR:
+ case NE_EXPR:
+ rhs1 = gimple_assign_rhs1 (stmt);
+ rhs2 = gimple_assign_rhs2 (stmt);
+ if ((rhs1 == ssa_name && integer_zerop (rhs2))
+ || (rhs2 == ssa_name && integer_zerop (rhs1)))
+ {
+ return true;
+ }
+ break;
+
+ case BIT_IOR_EXPR:
+ lhs = gimple_assign_lhs (stmt);
+ if (hset->contains (lhs))
+ {
+ return false;
+ }
+ /* We should check the use of _tmp further. */
+ return check_uses (lhs, hset);
+
+ default:
+ break;
+ }
+ return false;
+}
+
+/* Check SSA_NAME is used in
+ # result = PHI <SSA_NAME (bb1), 0 (bb2), 0 (bb3)>
+*/
+static bool
+check_uses_phi (tree ssa_name, gimple *stmt, hash_set<tree> *hset)
+{
+ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++)
+ {
+ tree arg = gimple_phi_arg_def (stmt, i);
+ if (!integer_zerop (arg) && arg != ssa_name)
+ {
+ return false;
+ }
+ }
+
+ tree result = gimple_phi_result (stmt);
+
+ /* It is used to avoid infinite recursion,
+ <bb 1>
+ if (cond)
+ goto <bb 2>
+ else
+ goto <bb 3>
+
+ <bb 2>
+ # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)>
+ {BODY}
+ if (cond)
+ goto <bb 3>
+ else
+ goto <bb 4>
+
+ <bb 3>
+ # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)>
+ {BODY}
+ if (cond)
+ goto <bb 2>
+ else
+ goto <bb 4>
+
+ <bb 4>
+ ...
+ */
+ if (hset->contains (result))
+ {
+ return false;
+ }
+
+ return check_uses (result, hset);
+}
+
+/* Check the use of SSA_NAME, it should only be used in comparison
+ operation and PHI node. HSET is used to record the ssa_names
+ that have been already checked. */
+static bool
+check_uses (tree ssa_name, hash_set<tree> *hset)
+{
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+
+ if (TREE_CODE (ssa_name) != SSA_NAME)
+ {
+ return false;
+ }
+
+ if (SSA_NAME_VAR (ssa_name)
+ && is_global_var (SSA_NAME_VAR (ssa_name)))
+ {
+ return false;
+ }
+
+ hset->add (ssa_name);
+
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name)
+ {
+ gimple *stmt = USE_STMT (use_p);
+
+ /* Ignore debug gimple statements. */
+ if (is_gimple_debug (stmt))
+ {
+ continue;
+ }
+
+ switch (gimple_code (stmt))
+ {
+ case GIMPLE_COND:
+ if (!check_uses_cond (ssa_name, stmt, hset))
+ {
+ return false;
+ }
+ break;
+
+ case GIMPLE_ASSIGN:
+ if (!check_uses_assign (ssa_name, stmt, hset))
+ {
+ return false;
+ }
+ break;
+
+ case GIMPLE_PHI:
+ if (!check_uses_phi (ssa_name, stmt, hset))
+ {
+ return false;
+ }
+ break;
+
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool
+check_def_gimple (gimple *def1, gimple *def2, tree result)
+{
+ /* def1 and def2 should be POINTER_PLUS_EXPR. */
+ if (!is_gimple_assign (def1) || !is_gimple_assign (def2)
+ || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR
+ || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR)
+ {
+ return false;
+ }
+
+ tree rhs12 = gimple_assign_rhs2 (def1);
+
+ tree rhs21 = gimple_assign_rhs1 (def2);
+ tree rhs22 = gimple_assign_rhs2 (def2);
+
+ if (rhs21 != result)
+ {
+ return false;
+ }
+
+ /* We should have a positive pointer-plus constant to ensure
+ that the pointer value is continuously increasing. */
+ if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST
+ || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0)
+ {
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+check_loop_body (basic_block bb0, basic_block bb2, tree result)
+{
+ gimple *g01 = first_stmt (bb0);
+ if (!g01 || !is_gimple_assign (g01)
+ || gimple_assign_rhs_code (g01) != MEM_REF
+ || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result)
+ {
+ return false;
+ }
+
+ gimple *g02 = g01->next;
+ /* GIMPLE_COND would be the last gimple in a basic block,
+ and have no other side effects on RESULT. */
+ if (!g02 || gimple_code (g02) != GIMPLE_COND)
+ {
+ return false;
+ }
+
+ if (first_stmt (bb2) != last_stmt (bb2))
+ {
+ return false;
+ }
+
+ return true;
+}
+
+/* Pattern is like
+ <pre bb>
+ arg1 = base (rhs11) + cst (rhs12); [def1]
+ goto <bb 0>
+
+ <bb 2>
+ arg2 = result (rhs21) + cst (rhs22); [def2]
+
+ <bb 0>
+ # result = PHI <arg1 (pre bb), arg2 (bb 2)>
+ _v = *result; [g01]
+ if (_v == 0) [g02]
+ goto <bb 1>
+ else
+ goto <bb 2>
+
+ <bb 1>
+ _1 = result - base; [g1]
+ _2 = _1 /[ex] cst; [g2]
+ _3 = (unsigned int) _2; [g3]
+ if (_3 == 0)
+ ...
+*/
+static bool
+check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
+ gphi *phi_stmt, gimple *&output)
+{
+ /* Start check from PHI node in BB0. */
+ if (gimple_phi_num_args (phi_stmt) != 2
+ || virtual_operand_p (gimple_phi_result (phi_stmt)))
+ {
+ return false;
+ }
+
+ tree result = gimple_phi_result (phi_stmt);
+ tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
+ tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
+
+ if (TREE_CODE (arg1) != SSA_NAME
+ || TREE_CODE (arg2) != SSA_NAME
+ || SSA_NAME_IS_DEFAULT_DEF (arg1)
+ || SSA_NAME_IS_DEFAULT_DEF (arg2))
+ {
+ return false;
+ }
+
+ gimple *def1 = SSA_NAME_DEF_STMT (arg1);
+ gimple *def2 = SSA_NAME_DEF_STMT (arg2);
+
+ /* Swap bb1 and bb2 if pattern is like
+ if (_v != 0)
+ goto <bb 2>
+ else
+ goto <bb 1>
+ */
+ if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
+ {
+ std::swap (bb1, bb2);
+ }
+
+ /* prebb[def1] --> bb0 <-- bb2[def2] */
+ if (!gimple_bb (def1)
+ || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
+ || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
+ {
+ return false;
+ }
+
+ /* Check whether define gimple meets the pattern requirements. */
+ if (!check_def_gimple (def1, def2, result))
+ {
+ return false;
+ }
+
+ if (!check_loop_body (bb0, bb2, result))
+ {
+ return false;
+ }
+
+ output = def1;
+ return true;
+}
+
+/* Check pattern
+ <bb 1>
+ _1 = result - base; [g1]
+ _2 = _1 /[ex] cst; [g2]
+ _3 = (unsigned int) _2; [g3]
+ if (_3 == 0)
+ ...
+*/
+static bool
+check_gimple_order (basic_block bb1, tree base, tree cst, tree result,
+ gimple *&output)
+{
+ gimple *g1 = first_stmt (bb1);
+ if (!g1 || !is_gimple_assign (g1)
+ || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
+ || gimple_assign_rhs1 (g1) != result
+ || gimple_assign_rhs2 (g1) != base)
+ {
+ return false;
+ }
+
+ gimple *g2 = g1->next;
+ if (!g2 || !is_gimple_assign (g2)
+ || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
+ || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
+ || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
+ {
+ return false;
+ }
+
+ /* INTEGER_CST cst in gimple def1. */
+ HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
+ /* INTEGER_CST cst in gimple g2. */
+ HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
+ /* _2 must be at least a positive number. */
+ if (num2 == 0 || num1 / num2 <= 0)
+ {
+ return false;
+ }
+
+ gimple *g3 = g2->next;
+ if (!g3 || !is_gimple_assign (g3)
+ || gimple_assign_rhs_code (g3) != NOP_EXPR
+ || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
+ || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
+ {
+ return false;
+ }
+
+ /* _3 should only be used in comparison operation or PHI node. */
+ hash_set<tree> *hset = new hash_set<tree>;
+ if (!check_uses (gimple_assign_lhs (g3), hset))
+ {
+ delete hset;
+ return false;
+ }
+ delete hset;
+
+ output = g3;
+ return true;
+}
+
+static bool
+do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
+{
+ gphi_iterator gsi;
+
+ for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gphi *phi_stmt = gsi.phi ();
+ gimple *def1 = NULL;
+ tree base, cst, result;
+
+ if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
+ {
+ continue;
+ }
+
+ base = gimple_assign_rhs1 (def1);
+ cst = gimple_assign_rhs2 (def1);
+ result = gimple_phi_result (phi_stmt);
+
+ gimple *stmt = NULL;
+ if (!check_gimple_order (bb1, base, cst, result, stmt))
+ {
+ continue;
+ }
+
+ gcc_assert (stmt);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
+ print_gimple_stmt (dump_file, stmt, 0);
+ fprintf (dump_file, "to\n");
+ }
+
+ /* Rewrite statement
+ _3 = (unsigned int) _2;
+ to
+ _3 = (unsigned int) 1;
+ */
+ tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
+ gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
+ update_stmt (stmt);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ print_gimple_stmt (dump_file, stmt, 0);
+ fprintf (dump_file, "\n");
+ }
+
+ return true;
+ }
+ return false;
+}
+
/* Determine whether we should attempt to hoist adjacent loads out of
diamond patterns in pass_phiopt. Always hoist loads if
-fhoist-adjacent-loads is specified and the target machine has

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,9 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-expand-Simplify-removing-subregs-when-expanding-a-co.patch
9a182ef9ee011935d827ab5c6c9a7cd8e22257d8
diff -Nurp a/gcc/expr.c b/gcc/expr.c
--- a/gcc/expr.c 2020-08-05 20:33:04.068000000 +0800
+++ b/gcc/expr.c 2020-08-05 20:33:21.420000000 +0800

View File

@ -0,0 +1,718 @@
This backport contains 2 patchs from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
728c2e5eeaa91cf708f2b1b1f996653a7eebae59
0001-modulo-sched-speed-up-DDG-analysis-PR90001.patch
06d5d63d9944691bb4286e5f6b2422cc97148336
0001-modulo-sched-fix-bootstrap-compare-debug-issue.patch
diff -Nurp a/gcc/ddg.c b/gcc/ddg.c
--- a/gcc/ddg.c 2020-11-28 18:40:12.371633230 +0800
+++ b/gcc/ddg.c 2020-11-28 18:38:33.835633230 +0800
@@ -32,9 +32,6 @@ along with GCC; see the file COPYING3.
#ifdef INSN_SCHEDULING
-/* A flag indicating that a ddg edge belongs to an SCC or not. */
-enum edge_flag {NOT_IN_SCC = 0, IN_SCC};
-
/* Forward declarations. */
static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr);
static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr);
@@ -188,9 +185,6 @@ create_ddg_dep_from_intra_loop_link (ddg
else if (DEP_TYPE (link) == REG_DEP_OUTPUT)
t = OUTPUT_DEP;
- gcc_assert (!DEBUG_INSN_P (dest_node->insn) || t == ANTI_DEP);
- gcc_assert (!DEBUG_INSN_P (src_node->insn) || t == ANTI_DEP);
-
/* We currently choose not to create certain anti-deps edges and
compensate for that by generating reg-moves based on the life-range
analysis. The anti-deps that will be deleted are the ones which
@@ -225,9 +219,9 @@ create_ddg_dep_from_intra_loop_link (ddg
}
}
- latency = dep_cost (link);
- e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
- add_edge_to_ddg (g, e);
+ latency = dep_cost (link);
+ e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
+ add_edge_to_ddg (g, e);
}
/* The same as the above function, but it doesn't require a link parameter. */
@@ -240,9 +234,6 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_n
enum reg_note dep_kind;
struct _dep _dep, *dep = &_dep;
- gcc_assert (!DEBUG_INSN_P (to->insn) || d_t == ANTI_DEP);
- gcc_assert (!DEBUG_INSN_P (from->insn) || d_t == ANTI_DEP);
-
if (d_t == ANTI_DEP)
dep_kind = REG_DEP_ANTI;
else if (d_t == OUTPUT_DEP)
@@ -275,16 +266,15 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_n
static void
add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
{
- int regno = DF_REF_REGNO (last_def);
struct df_link *r_use;
int has_use_in_bb_p = false;
- rtx_insn *def_insn = DF_REF_INSN (last_def);
- ddg_node_ptr last_def_node = get_node_of_insn (g, def_insn);
- ddg_node_ptr use_node;
+ int regno = DF_REF_REGNO (last_def);
+ ddg_node_ptr last_def_node = get_node_of_insn (g, DF_REF_INSN (last_def));
df_ref first_def = df_bb_regno_first_def_find (g->bb, regno);
+ ddg_node_ptr first_def_node = get_node_of_insn (g, DF_REF_INSN (first_def));
+ ddg_node_ptr use_node;
- gcc_assert (last_def_node);
- gcc_assert (first_def);
+ gcc_assert (last_def_node && first_def && first_def_node);
if (flag_checking && DF_REF_ID (last_def) != DF_REF_ID (first_def))
{
@@ -303,6 +293,9 @@ add_cross_iteration_register_deps (ddg_p
rtx_insn *use_insn = DF_REF_INSN (r_use->ref);
+ if (DEBUG_INSN_P (use_insn))
+ continue;
+
/* ??? Do not handle uses with DF_REF_IN_NOTE notes. */
use_node = get_node_of_insn (g, use_insn);
gcc_assert (use_node);
@@ -313,35 +306,28 @@ add_cross_iteration_register_deps (ddg_p
iteration. Any such upwards exposed use appears before
the last_def def. */
create_ddg_dep_no_link (g, last_def_node, use_node,
- DEBUG_INSN_P (use_insn) ? ANTI_DEP : TRUE_DEP,
- REG_DEP, 1);
+ TRUE_DEP, REG_DEP, 1);
}
- else if (!DEBUG_INSN_P (use_insn))
+ else
{
/* Add anti deps from last_def's uses in the current iteration
to the first def in the next iteration. We do not add ANTI
dep when there is an intra-loop TRUE dep in the opposite
direction, but use regmoves to fix such disregarded ANTI
deps when broken. If the first_def reaches the USE then
- there is such a dep. */
- ddg_node_ptr first_def_node = get_node_of_insn (g,
- DF_REF_INSN (first_def));
-
- gcc_assert (first_def_node);
-
- /* Always create the edge if the use node is a branch in
- order to prevent the creation of reg-moves.
- If the address that is being auto-inc or auto-dec in LAST_DEF
- is used in USE_INSN then do not remove the edge to make sure
- reg-moves will not be created for that address. */
- if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
- || !flag_modulo_sched_allow_regmoves
+ there is such a dep.
+ Always create the edge if the use node is a branch in
+ order to prevent the creation of reg-moves.
+ If the address that is being auto-inc or auto-dec in LAST_DEF
+ is used in USE_INSN then do not remove the edge to make sure
+ reg-moves will not be created for that address. */
+ if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
+ || !flag_modulo_sched_allow_regmoves
|| JUMP_P (use_node->insn)
- || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
+ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
|| def_has_ccmode_p (DF_REF_INSN (last_def)))
- create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
- REG_DEP, 1);
-
+ create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
+ REG_DEP, 1);
}
}
/* Create an inter-loop output dependence between LAST_DEF (which is the
@@ -351,19 +337,11 @@ add_cross_iteration_register_deps (ddg_p
defs starting with a true dependence to a use which can be in the
next iteration; followed by an anti dependence of that use to the
first def (i.e. if there is a use between the two defs.) */
- if (!has_use_in_bb_p)
- {
- ddg_node_ptr dest_node;
-
- if (DF_REF_ID (last_def) == DF_REF_ID (first_def))
- return;
-
- dest_node = get_node_of_insn (g, DF_REF_INSN (first_def));
- gcc_assert (dest_node);
- create_ddg_dep_no_link (g, last_def_node, dest_node,
- OUTPUT_DEP, REG_DEP, 1);
- }
+ if (!has_use_in_bb_p && DF_REF_ID (last_def) != DF_REF_ID (first_def))
+ create_ddg_dep_no_link (g, last_def_node, first_def_node,
+ OUTPUT_DEP, REG_DEP, 1);
}
+
/* Build inter-loop dependencies, by looking at DF analysis backwards. */
static void
build_inter_loop_deps (ddg_ptr g)
@@ -420,13 +398,9 @@ add_intra_loop_mem_dep (ddg_ptr g, ddg_n
if (mem_write_insn_p (from->insn))
{
if (mem_read_insn_p (to->insn))
- create_ddg_dep_no_link (g, from, to,
- DEBUG_INSN_P (to->insn)
- ? ANTI_DEP : TRUE_DEP, MEM_DEP, 0);
+ create_ddg_dep_no_link (g, from, to, TRUE_DEP, MEM_DEP, 0);
else
- create_ddg_dep_no_link (g, from, to,
- DEBUG_INSN_P (to->insn)
- ? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0);
+ create_ddg_dep_no_link (g, from, to, OUTPUT_DEP, MEM_DEP, 0);
}
else if (!mem_read_insn_p (to->insn))
create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0);
@@ -444,13 +418,9 @@ add_inter_loop_mem_dep (ddg_ptr g, ddg_n
if (mem_write_insn_p (from->insn))
{
if (mem_read_insn_p (to->insn))
- create_ddg_dep_no_link (g, from, to,
- DEBUG_INSN_P (to->insn)
- ? ANTI_DEP : TRUE_DEP, MEM_DEP, 1);
+ create_ddg_dep_no_link (g, from, to, TRUE_DEP, MEM_DEP, 1);
else if (from->cuid != to->cuid)
- create_ddg_dep_no_link (g, from, to,
- DEBUG_INSN_P (to->insn)
- ? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 1);
+ create_ddg_dep_no_link (g, from, to, OUTPUT_DEP, MEM_DEP, 1);
}
else
{
@@ -459,13 +429,9 @@ add_inter_loop_mem_dep (ddg_ptr g, ddg_n
else if (from->cuid != to->cuid)
{
create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 1);
- if (DEBUG_INSN_P (from->insn) || DEBUG_INSN_P (to->insn))
- create_ddg_dep_no_link (g, to, from, ANTI_DEP, MEM_DEP, 1);
- else
- create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
+ create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
}
}
-
}
/* Perform intra-block Data Dependency analysis and connect the nodes in
@@ -494,20 +460,10 @@ build_intra_loop_deps (ddg_ptr g)
sd_iterator_def sd_it;
dep_t dep;
- if (! INSN_P (dest_node->insn))
- continue;
-
FOR_EACH_DEP (dest_node->insn, SD_LIST_BACK, sd_it, dep)
{
rtx_insn *src_insn = DEP_PRO (dep);
- ddg_node_ptr src_node;
-
- /* Don't add dependencies on debug insns to non-debug insns
- to avoid codegen differences between -g and -g0. */
- if (DEBUG_INSN_P (src_insn) && !DEBUG_INSN_P (dest_node->insn))
- continue;
-
- src_node = get_node_of_insn (g, src_insn);
+ ddg_node_ptr src_node = get_node_of_insn (g, src_insn);
if (!src_node)
continue;
@@ -524,8 +480,7 @@ build_intra_loop_deps (ddg_ptr g)
for (j = 0; j <= i; j++)
{
ddg_node_ptr j_node = &g->nodes[j];
- if (DEBUG_INSN_P (j_node->insn))
- continue;
+
if (mem_access_insn_p (j_node->insn))
{
/* Don't bother calculating inter-loop dep if an intra-loop dep
@@ -564,7 +519,7 @@ create_ddg (basic_block bb, int closing_
{
ddg_ptr g;
rtx_insn *insn, *first_note;
- int i;
+ int i, j;
int num_nodes = 0;
g = (ddg_ptr) xcalloc (1, sizeof (struct ddg));
@@ -576,23 +531,21 @@ create_ddg (basic_block bb, int closing_
for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
insn = NEXT_INSN (insn))
{
- if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
+ if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
continue;
- if (DEBUG_INSN_P (insn))
- g->num_debug++;
- else
+ if (NONDEBUG_INSN_P (insn))
{
if (mem_read_insn_p (insn))
g->num_loads++;
if (mem_write_insn_p (insn))
g->num_stores++;
+ num_nodes++;
}
- num_nodes++;
}
/* There is nothing to do for this BB. */
- if ((num_nodes - g->num_debug) <= 1)
+ if (num_nodes <= 1)
{
free (g);
return NULL;
@@ -607,32 +560,39 @@ create_ddg (basic_block bb, int closing_
for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
insn = NEXT_INSN (insn))
{
- if (! INSN_P (insn))
- {
- if (! first_note && NOTE_P (insn)
- && NOTE_KIND (insn) != NOTE_INSN_BASIC_BLOCK)
- first_note = insn;
- continue;
- }
+ if (LABEL_P (insn) || NOTE_INSN_BASIC_BLOCK_P (insn))
+ continue;
+
+ if (!first_note && (INSN_P (insn) || NOTE_P (insn)))
+ first_note = insn;
+
+ if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
+ continue;
+
if (JUMP_P (insn))
{
gcc_assert (!g->closing_branch);
g->closing_branch = &g->nodes[i];
}
- else if (GET_CODE (PATTERN (insn)) == USE)
+
+ if (NONDEBUG_INSN_P (insn))
{
- if (! first_note)
- first_note = insn;
- continue;
- }
+ g->nodes[i].cuid = i;
+ g->nodes[i].successors = sbitmap_alloc (num_nodes);
+ bitmap_clear (g->nodes[i].successors);
+ g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
+ bitmap_clear (g->nodes[i].predecessors);
+
+ gcc_checking_assert (first_note);
+ g->nodes[i].first_note = first_note;
+
+ g->nodes[i].aux.count = -1;
+ g->nodes[i].max_dist = XCNEWVEC (int, num_nodes);
+ for (j = 0; j < num_nodes; j++)
+ g->nodes[i].max_dist[j] = -1;
- g->nodes[i].cuid = i;
- g->nodes[i].successors = sbitmap_alloc (num_nodes);
- bitmap_clear (g->nodes[i].successors);
- g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
- bitmap_clear (g->nodes[i].predecessors);
- g->nodes[i].first_note = (first_note ? first_note : insn);
- g->nodes[i++].insn = insn;
+ g->nodes[i++].insn = insn;
+ }
first_note = NULL;
}
@@ -668,6 +628,7 @@ free_ddg (ddg_ptr g)
}
sbitmap_free (g->nodes[i].successors);
sbitmap_free (g->nodes[i].predecessors);
+ free (g->nodes[i].max_dist);
}
if (g->num_backarcs > 0)
free (g->backarcs);
@@ -792,7 +753,7 @@ create_ddg_edge (ddg_node_ptr src, ddg_n
e->latency = l;
e->distance = d;
e->next_in = e->next_out = NULL;
- e->aux.info = 0;
+ e->in_scc = false;
return e;
}
@@ -820,7 +781,7 @@ add_edge_to_ddg (ddg_ptr g ATTRIBUTE_UNU
for now that cycles in the data dependence graph contain a single backarc.
This simplifies the algorithm, and can be generalized later. */
static void
-set_recurrence_length (ddg_scc_ptr scc, ddg_ptr g)
+set_recurrence_length (ddg_scc_ptr scc)
{
int j;
int result = -1;
@@ -828,17 +789,14 @@ set_recurrence_length (ddg_scc_ptr scc,
for (j = 0; j < scc->num_backarcs; j++)
{
ddg_edge_ptr backarc = scc->backarcs[j];
- int length;
int distance = backarc->distance;
ddg_node_ptr src = backarc->dest;
ddg_node_ptr dest = backarc->src;
+ int length = src->max_dist[dest->cuid];
+
+ if (length < 0)
+ continue;
- length = longest_simple_path (g, src->cuid, dest->cuid, scc->nodes);
- if (length < 0 )
- {
- /* fprintf (stderr, "Backarc not on simple cycle in SCC.\n"); */
- continue;
- }
length += backarc->latency;
result = MAX (result, (length / distance));
}
@@ -846,9 +804,9 @@ set_recurrence_length (ddg_scc_ptr scc,
}
/* Create a new SCC given the set of its nodes. Compute its recurrence_length
- and mark edges that belong to this scc as IN_SCC. */
+ and mark edges that belong to this scc. */
static ddg_scc_ptr
-create_scc (ddg_ptr g, sbitmap nodes)
+create_scc (ddg_ptr g, sbitmap nodes, int id)
{
ddg_scc_ptr scc;
unsigned int u = 0;
@@ -866,16 +824,18 @@ create_scc (ddg_ptr g, sbitmap nodes)
ddg_edge_ptr e;
ddg_node_ptr n = &g->nodes[u];
+ gcc_assert (n->aux.count == -1);
+ n->aux.count = id;
+
for (e = n->out; e; e = e->next_out)
if (bitmap_bit_p (nodes, e->dest->cuid))
{
- e->aux.count = IN_SCC;
+ e->in_scc = true;
if (e->distance > 0)
add_backarc_to_scc (scc, e);
}
}
- set_recurrence_length (scc, g);
return scc;
}
@@ -1018,7 +978,7 @@ check_sccs (ddg_all_sccs_ptr sccs, int n
ddg_all_sccs_ptr
create_ddg_all_sccs (ddg_ptr g)
{
- int i;
+ int i, j, k, scc, way;
int num_nodes = g->num_nodes;
auto_sbitmap from (num_nodes);
auto_sbitmap to (num_nodes);
@@ -1038,7 +998,7 @@ create_ddg_all_sccs (ddg_ptr g)
ddg_node_ptr dest = backarc->dest;
/* If the backarc already belongs to an SCC, continue. */
- if (backarc->aux.count == IN_SCC)
+ if (backarc->in_scc)
continue;
bitmap_clear (scc_nodes);
@@ -1049,10 +1009,52 @@ create_ddg_all_sccs (ddg_ptr g)
if (find_nodes_on_paths (scc_nodes, g, from, to))
{
- scc = create_scc (g, scc_nodes);
+ scc = create_scc (g, scc_nodes, sccs->num_sccs);
add_scc_to_ddg (sccs, scc);
}
}
+
+ /* Init max_dist arrays for Floyd–Warshall-like
+ longest patch calculation algorithm. */
+ for (k = 0; k < num_nodes; k++)
+ {
+ ddg_edge_ptr e;
+ ddg_node_ptr n = &g->nodes[k];
+
+ if (n->aux.count == -1)
+ continue;
+
+ n->max_dist[k] = 0;
+ for (e = n->out; e; e = e->next_out)
+ if (e->distance == 0 && g->nodes[e->dest->cuid].aux.count == n->aux.count)
+ n->max_dist[e->dest->cuid] = e->latency;
+ }
+
+ /* Run main Floid-Warshall loop. We use only non-backarc edges
+ inside each scc. */
+ for (k = 0; k < num_nodes; k++)
+ {
+ scc = g->nodes[k].aux.count;
+ if (scc != -1)
+ {
+ for (i = 0; i < num_nodes; i++)
+ if (g->nodes[i].aux.count == scc)
+ for (j = 0; j < num_nodes; j++)
+ if (g->nodes[j].aux.count == scc
+ && g->nodes[i].max_dist[k] >= 0
+ && g->nodes[k].max_dist[j] >= 0)
+ {
+ way = g->nodes[i].max_dist[k] + g->nodes[k].max_dist[j];
+ if (g->nodes[i].max_dist[j] < way)
+ g->nodes[i].max_dist[j] = way;
+ }
+ }
+ }
+
+ /* Calculate recurrence_length using max_dist info. */
+ for (i = 0; i < sccs->num_sccs; i++)
+ set_recurrence_length (sccs->sccs[i]);
+
order_sccs (sccs);
if (flag_checking)
@@ -1155,72 +1157,4 @@ find_nodes_on_paths (sbitmap result, ddg
return bitmap_and (result, reachable_from, reach_to);
}
-
-/* Updates the counts of U_NODE's successors (that belong to NODES) to be
- at-least as large as the count of U_NODE plus the latency between them.
- Sets a bit in TMP for each successor whose count was changed (increased).
- Returns nonzero if any count was changed. */
-static int
-update_dist_to_successors (ddg_node_ptr u_node, sbitmap nodes, sbitmap tmp)
-{
- ddg_edge_ptr e;
- int result = 0;
-
- for (e = u_node->out; e; e = e->next_out)
- {
- ddg_node_ptr v_node = e->dest;
- int v = v_node->cuid;
-
- if (bitmap_bit_p (nodes, v)
- && (e->distance == 0)
- && (v_node->aux.count < u_node->aux.count + e->latency))
- {
- v_node->aux.count = u_node->aux.count + e->latency;
- bitmap_set_bit (tmp, v);
- result = 1;
- }
- }
- return result;
-}
-
-
-/* Find the length of a longest path from SRC to DEST in G,
- going only through NODES, and disregarding backarcs. */
-int
-longest_simple_path (struct ddg * g, int src, int dest, sbitmap nodes)
-{
- int i;
- unsigned int u = 0;
- int change = 1;
- int num_nodes = g->num_nodes;
- auto_sbitmap workset (num_nodes);
- auto_sbitmap tmp (num_nodes);
-
-
- /* Data will hold the distance of the longest path found so far from
- src to each node. Initialize to -1 = less than minimum. */
- for (i = 0; i < g->num_nodes; i++)
- g->nodes[i].aux.count = -1;
- g->nodes[src].aux.count = 0;
-
- bitmap_clear (tmp);
- bitmap_set_bit (tmp, src);
-
- while (change)
- {
- sbitmap_iterator sbi;
-
- change = 0;
- bitmap_copy (workset, tmp);
- bitmap_clear (tmp);
- EXECUTE_IF_SET_IN_BITMAP (workset, 0, u, sbi)
- {
- ddg_node_ptr u_node = &g->nodes[u];
-
- change |= update_dist_to_successors (u_node, nodes, tmp);
- }
- }
- return g->nodes[dest].aux.count;
-}
-
#endif /* INSN_SCHEDULING */
diff -Nurp a/gcc/ddg.h b/gcc/ddg.h
--- a/gcc/ddg.h 2020-03-12 19:07:21.000000000 +0800
+++ b/gcc/ddg.h 2020-11-28 18:38:33.835633230 +0800
@@ -64,6 +64,10 @@ struct ddg_node
sbitmap successors;
sbitmap predecessors;
+ /* Temporary array used for Floyd-Warshall algorithm to find
+ scc recurrence length. */
+ int *max_dist;
+
/* For general use by algorithms manipulating the ddg. */
union {
int count;
@@ -95,11 +99,8 @@ struct ddg_edge
ddg_edge_ptr next_in;
ddg_edge_ptr next_out;
- /* For general use by algorithms manipulating the ddg. */
- union {
- int count;
- void *info;
- } aux;
+ /* Is true when edge is already in scc. */
+ bool in_scc;
};
/* This structure holds the Data Dependence Graph for a basic block. */
@@ -115,9 +116,6 @@ struct ddg
int num_loads;
int num_stores;
- /* Number of debug instructions in the BB. */
- int num_debug;
-
/* This array holds the nodes in the graph; it is indexed by the node
cuid, which follows the order of the instructions in the BB. */
ddg_node_ptr nodes;
@@ -178,7 +176,6 @@ ddg_all_sccs_ptr create_ddg_all_sccs (dd
void free_ddg_all_sccs (ddg_all_sccs_ptr);
int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to);
-int longest_simple_path (ddg_ptr, int from, int to, sbitmap via);
bool autoinc_var_is_used_p (rtx_insn *, rtx_insn *);
diff -Nurp a/gcc/modulo-sched.c b/gcc/modulo-sched.c
--- a/gcc/modulo-sched.c 2020-03-12 19:07:21.000000000 +0800
+++ b/gcc/modulo-sched.c 2020-11-28 18:38:33.835633230 +0800
@@ -370,7 +370,7 @@ doloop_register_get (rtx_insn *head, rtx
: prev_nondebug_insn (tail));
for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
- if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
+ if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
{
if (dump_file)
{
@@ -429,7 +429,7 @@ res_MII (ddg_ptr g)
if (targetm.sched.sms_res_mii)
return targetm.sched.sms_res_mii (g);
- return ((g->num_nodes - g->num_debug) / issue_rate);
+ return g->num_nodes / issue_rate;
}
@@ -2156,11 +2156,7 @@ sms_schedule_by_order (ddg_ptr g, int mi
ddg_node_ptr u_node = &ps->g->nodes[u];
rtx_insn *insn = u_node->insn;
- if (!NONDEBUG_INSN_P (insn))
- {
- bitmap_clear_bit (tobe_scheduled, u);
- continue;
- }
+ gcc_checking_assert (NONDEBUG_INSN_P (insn));
if (bitmap_bit_p (sched_nodes, u))
continue;
@@ -3162,9 +3158,6 @@ ps_has_conflicts (partial_schedule_ptr p
{
rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
- if (!NONDEBUG_INSN_P (insn))
- continue;
-
/* Check if there is room for the current insn. */
if (!can_issue_more || state_dead_lock_p (curr_state))
return true;
diff -Nurp a/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c b/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c
--- a/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c 2020-11-28 18:38:33.835633230 +0800
@@ -0,0 +1,23 @@
+/* { dg-additional-options "-fcompare-debug -fmodulo-sched" } */
+
+struct S { int f; signed int g : 2; } a[1], c = {5, 1}, d;
+short b;
+
+__attribute__((noinline, noclone)) void
+foo (int x)
+{
+ if (x != 1)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ while (b++ <= 0)
+ {
+ struct S e = {1, 1};
+ d = e = a[0] = c;
+ }
+ foo (a[0].g);
+ return 0;
+}
diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c b/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c
--- a/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c 2020-11-28 18:38:33.835633230 +0800
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fcompare-debug -fmodulo-sched --param sms-min-sc=1" } */
+
+int a, c, e, f, g;
+void
+h (int i)
+{
+ a = i;
+}
+void
+j (char *i, long k)
+{
+ while (k--)
+ c = *i++;
+}
+void
+l (unsigned char *i, long k)
+{
+ unsigned char *b = i + k;
+ while (i < b)
+ {
+ h (*i);
+ i++;
+ }
+}
+void
+m ()
+{
+ while (e)
+ {
+ float d = g;
+ l ((char *) &d, sizeof (g));
+ if (f)
+ j ((char *) &d, sizeof (g));
+ }
+}

View File

@ -0,0 +1,359 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-store-merging-Consider-also-overlapping-stores-earli.patch
bd909071ac04e94f4b6f0baab64d0687ec55681d
diff -uprN a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
--- a/gcc/gimple-ssa-store-merging.c 2020-12-16 17:03:16.155633230 +0800
+++ b/gcc/gimple-ssa-store-merging.c 2020-12-16 11:15:58.575633230 +0800
@@ -2021,7 +2021,8 @@ struct imm_store_chain_info
}
}
bool terminate_and_process_chain ();
- bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
+ bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
+ unsigned int);
bool coalesce_immediate_stores ();
bool output_merged_store (merged_store_group *);
bool output_merged_stores ();
@@ -2342,14 +2343,39 @@ gather_bswap_load_refs (vec<tree> *refs,
into the group. That way it will be its own store group and will
not be touched. If ALL_INTEGER_CST_P and there are overlapping
INTEGER_CST stores, those are mergeable using merge_overlapping,
- so don't return false for those. */
+ so don't return false for those.
+
+ Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
+ (exclusive), whether they don't overlap the bitrange START to END
+ and have order in between FIRST_ORDER and LAST_ORDER. This is to
+ prevent merging in cases like:
+ MEM <char[12]> [&b + 8B] = {};
+ MEM[(short *) &b] = 5;
+ _5 = *x_4(D);
+ MEM <long long unsigned int> [&b + 2B] = _5;
+ MEM[(char *)&b + 16B] = 88;
+ MEM[(int *)&b + 20B] = 1;
+ The = {} store comes in sort_by_bitpos before the = 88 store, and can't
+ be merged with it, because the = _5 store overlaps these and is in between
+ them in sort_by_order ordering. If it was merged, the merged store would
+ go after the = _5 store and thus change behavior. */
static bool
check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
- bool all_integer_cst_p, unsigned int last_order,
- unsigned HOST_WIDE_INT end)
+ bool all_integer_cst_p, unsigned int first_order,
+ unsigned int last_order, unsigned HOST_WIDE_INT start,
+ unsigned HOST_WIDE_INT end, unsigned int first_earlier,
+ unsigned end_earlier)
{
unsigned int len = m_store_info.length ();
+ for (unsigned int j = first_earlier; j < end_earlier; j++)
+ {
+ store_immediate_info *info = m_store_info[j];
+ if (info->order > first_order
+ && info->order < last_order
+ && info->bitpos + info->bitsize > start)
+ return false;
+ }
for (++i; i < len; ++i)
{
store_immediate_info *info = m_store_info[i];
@@ -2370,7 +2396,8 @@ check_no_overlap (vec<store_immediate_in
bool
imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
unsigned int first,
- unsigned int try_size)
+ unsigned int try_size,
+ unsigned int first_earlier)
{
unsigned int len = m_store_info.length (), last = first;
unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
@@ -2509,7 +2536,8 @@ imm_store_chain_info::try_coalesce_bswap
if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
return false;
- if (!check_no_overlap (m_store_info, last, false, last_order, end))
+ if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
+ merged_store->start, end, first_earlier, first))
return false;
/* Don't handle memory copy this way if normal non-bswap processing
@@ -2601,6 +2629,8 @@ imm_store_chain_info::coalesce_immediate
store_immediate_info *info;
unsigned int i, ignore = 0;
+ unsigned int first_earlier = 0;
+ unsigned int end_earlier = 0;
/* Order the stores by the bitposition they write to. */
m_store_info.qsort (sort_by_bitpos);
@@ -2615,6 +2645,12 @@ imm_store_chain_info::coalesce_immediate
if (i <= ignore)
goto done;
+ while (first_earlier < end_earlier
+ && (m_store_info[first_earlier]->bitpos
+ + m_store_info[first_earlier]->bitsize
+ <= merged_store->start))
+ first_earlier++;
+
/* First try to handle group of stores like:
p[0] = data >> 24;
p[1] = data >> 16;
@@ -2628,7 +2664,8 @@ imm_store_chain_info::coalesce_immediate
{
unsigned int try_size;
for (try_size = 64; try_size >= 16; try_size >>= 1)
- if (try_coalesce_bswap (merged_store, i - 1, try_size))
+ if (try_coalesce_bswap (merged_store, i - 1, try_size,
+ first_earlier))
break;
if (try_size >= 16)
@@ -2636,7 +2673,10 @@ imm_store_chain_info::coalesce_immediate
ignore = i + merged_store->stores.length () - 1;
m_merged_store_groups.safe_push (merged_store);
if (ignore < m_store_info.length ())
- merged_store = new merged_store_group (m_store_info[ignore]);
+ {
+ merged_store = new merged_store_group (m_store_info[ignore]);
+ end_earlier = ignore;
+ }
else
merged_store = NULL;
goto done;
@@ -2662,12 +2702,16 @@ imm_store_chain_info::coalesce_immediate
/* Only allow overlapping stores of constants. */
if (info->rhs_code == INTEGER_CST && merged_store->only_constants)
{
+ unsigned int first_order
+ = MIN (merged_store->first_order, info->order);
unsigned int last_order
= MAX (merged_store->last_order, info->order);
unsigned HOST_WIDE_INT end
= MAX (merged_store->start + merged_store->width,
info->bitpos + info->bitsize);
- if (check_no_overlap (m_store_info, i, true, last_order, end))
+ if (check_no_overlap (m_store_info, i, true, first_order,
+ last_order, merged_store->start, end,
+ first_earlier, end_earlier))
{
/* check_no_overlap call above made sure there are no
overlapping stores with non-INTEGER_CST rhs_code
@@ -2696,6 +2740,7 @@ imm_store_chain_info::coalesce_immediate
do
{
unsigned int max_order = 0;
+ unsigned int min_order = first_order;
unsigned first_nonmergeable_int_order = ~0U;
unsigned HOST_WIDE_INT this_end = end;
k = i;
@@ -2721,6 +2766,7 @@ imm_store_chain_info::coalesce_immediate
break;
}
k = j;
+ min_order = MIN (min_order, info2->order);
this_end = MAX (this_end,
info2->bitpos + info2->bitsize);
}
@@ -2736,6 +2782,12 @@ imm_store_chain_info::coalesce_immediate
first_nonmergeable_order
= MIN (first_nonmergeable_order, info2->order);
}
+ if (k > i
+ && !check_no_overlap (m_store_info, len - 1, true,
+ min_order, try_order,
+ merged_store->start, this_end,
+ first_earlier, end_earlier))
+ k = 0;
if (k == 0)
{
if (last_order == try_order)
@@ -2821,9 +2873,12 @@ imm_store_chain_info::coalesce_immediate
info->ops_swapped_p = true;
}
if (check_no_overlap (m_store_info, i, false,
+ MIN (merged_store->first_order, info->order),
MAX (merged_store->last_order, info->order),
+ merged_store->start,
MAX (merged_store->start + merged_store->width,
- info->bitpos + info->bitsize)))
+ info->bitpos + info->bitsize),
+ first_earlier, end_earlier))
{
/* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
@@ -2868,6 +2923,7 @@ imm_store_chain_info::coalesce_immediate
delete merged_store;
merged_store = new merged_store_group (info);
+ end_earlier = i;
if (dump_file && (dump_flags & TDF_DETAILS))
fputs ("New store group\n", dump_file);
diff -uprN a/gcc/testsuite/gcc.dg/store_merging_31.c b/gcc/testsuite/gcc.dg/store_merging_31.c
--- a/gcc/testsuite/gcc.dg/store_merging_31.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/store_merging_31.c 2020-12-16 11:15:58.575633230 +0800
@@ -0,0 +1,27 @@
+/* PR tree-optimization/97053 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+struct S { short a; char b[9]; int c; char d; int e; };
+
+__attribute__((noipa)) void
+foo (char *x, char *y)
+{
+ if (__builtin_strcmp (x, "ABCDXXXX") != 0
+ || __builtin_strcmp (y, "ABCDXXXX") != 0)
+ __builtin_abort ();
+}
+
+int
+main ()
+{
+ char a[9] = "XXXXXXXX";
+ struct S b = {};
+ __builtin_memcpy (a, "ABCD", 4);
+ b.a = 5;
+ __builtin_memcpy (b.b, a, 8);
+ b.d = 'X';
+ b.e = 1;
+ foo (a, b.b);
+ return 0;
+}
diff -uprN a/gcc/testsuite/gcc.dg/store_merging_32.c b/gcc/testsuite/gcc.dg/store_merging_32.c
--- a/gcc/testsuite/gcc.dg/store_merging_32.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/store_merging_32.c 2020-12-16 11:15:58.575633230 +0800
@@ -0,0 +1,129 @@
+/* PR tree-optimization/97053 */
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-tree-dse" } */
+
+struct __attribute__((packed, may_alias)) S { long long s; };
+struct __attribute__((packed, may_alias)) T { short t; };
+
+__attribute__((noipa)) void
+test (char *p, char *q, int s)
+{
+ if ((s & 1) == 0)
+ {
+ if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
+ != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
+ || (((struct S __attribute__((may_alias)) *) &p[1])->s
+ != ((struct S __attribute__((may_alias)) *) &q[1])->s)
+ || (*(short __attribute__((may_alias)) *) &p[2 * sizeof (short)]
+ != *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)]))
+ __builtin_abort ();
+ }
+ else
+ {
+ if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
+ != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
+ || (((struct S __attribute__((may_alias)) *) &p[1])->s
+ != ((struct S __attribute__((may_alias)) *) &q[1])->s)
+ || (((struct T __attribute__((may_alias)) *) &p[2 * sizeof (short) - 1])->t
+ != ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t)
+ || p[3 * sizeof (short) - 2] != q[3 * sizeof (short) - 2])
+ __builtin_abort ();
+ }
+}
+
+__attribute__((noipa)) void
+foo (long long *p, char *q, char *r, char *s)
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
+ *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &s[2 * sizeof (short)] = 2;
+ test (a, q, 0);
+}
+
+__attribute__((noipa)) void
+bar (long long *p, char *q, char *r, char *s, char *t)
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ ((struct T __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1])->t = 2;
+ a[3 * sizeof (short) - 2] = 3;
+ *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ ((struct T __attribute__((may_alias)) *) &s[2 * sizeof (short) - 1])->t = 2;
+ t[3 * sizeof (short) - 2] = 3;
+ test (a, q, 1);
+}
+
+__attribute__((noipa)) void
+baz (long long *p, char *q, char *r, char *s)
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = 2;
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = 1;
+ test (a, q, 2);
+}
+
+__attribute__((noipa)) void
+qux (long long *p, char *q, char *r, char *s, char *t)
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1] = 2;
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ a[3 * sizeof (short) - 2] = 3;
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
+ ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t = 2;
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ s[3 * sizeof (short) - 2] = 3;
+ ((struct T __attribute__((may_alias)) *) &t[sizeof (short)])->t = 1;
+ test (a, q, 3);
+}
+
+__attribute__((noipa)) void
+corge (long long *p, char *q, char *r, char *s, short u[3])
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
+ test (a, q, 4);
+}
+
+__attribute__((noipa)) void
+garply (long long *p, char *q, char *r, char *s, short u[3])
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
+ test (a, q, 6);
+}
+
+int
+main ()
+{
+ char a[64] __attribute__((aligned (__alignof (short))));
+ long long p = -1LL;
+ short u[] = { 1, 2, 3 };
+ foo (&p, &a[0], &a[0], &a[0]);
+ bar (&p, &a[0], &a[0], &a[0], &a[0]);
+ baz (&p, &a[0], &a[0], &a[0]);
+ qux (&p, &a[0], &a[0], &a[0], &a[0]);
+ corge (&p, &a[0], &a[0], &a[0], u);
+ garply (&p, &a[0], &a[0], &a[0], u);
+ return 0;
+}

View File

@ -0,0 +1,316 @@
This backport contains 1 patchs from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
46a58c779af3055a4b10b285a1f4be28abe4351c
0001-tree-optimization-96920-another-ICE-when-vectorizing.patch
diff -uprN a/gcc/testsuite/gcc.dg/vect/pr96920.c b/gcc/testsuite/gcc.dg/vect/pr96920.c
--- a/gcc/testsuite/gcc.dg/vect/pr96920.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/vect/pr96920.c 2020-10-26 21:46:25.316000000 +0800
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+
+int a[1024];
+int b[2048];
+
+void foo (int x, int y)
+{
+ for (int i = 0; i < 1024; ++i)
+ {
+ int tem0 = b[2*i];
+ int tem1 = b[2*i+1];
+ for (int j = 0; j < 32; ++j)
+ {
+ int tem = tem0;
+ tem0 = tem1;
+ tem1 = tem;
+ a[i] += tem0;
+ }
+ }
+}
diff -uprN a/gcc/testsuite/gfortran.dg/vect/pr96920.f90 b/gcc/testsuite/gfortran.dg/vect/pr96920.f90
--- a/gcc/testsuite/gfortran.dg/vect/pr96920.f90 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gfortran.dg/vect/pr96920.f90 2020-10-26 21:46:25.316000000 +0800
@@ -0,0 +1,37 @@
+! { dg-do compile }
+ subroutine ice(npoint, nterm, x, g)
+ implicit none
+ integer norder
+ parameter (norder=10)
+ integer j
+ integer k
+ integer ii
+ integer nterm
+ integer npoint
+ real b(norder)
+ real c(norder)
+ real d(norder)
+ real x(npoint)
+ real g(npoint)
+ real gg
+ real prev
+ real prev2
+
+ j = 1
+ 100 continue
+ j = j+1
+ if (nterm == j) then
+ do ii=1,npoint
+ k = nterm
+ gg= d(k)
+ prev= 0.0
+ do k=k-1,1,-1
+ prev2= prev
+ prev= gg
+ gg = d(k)+(x(ii)-b(k))*prev-c(k+1)*prev2
+ enddo
+ g(ii) = gg
+ enddo
+ endif
+ go to 100
+ end
diff -uprN a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
--- a/gcc/tree-vect-loop.c 2020-10-26 21:45:23.056000000 +0800
+++ b/gcc/tree-vect-loop.c 2020-10-26 21:49:02.884000000 +0800
@@ -8166,6 +8166,47 @@ scale_profile_for_vect_loop (struct loop
scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob);
}
+/* For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI
+ latch edge values originally defined by it. */
+
+static void
+maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo,
+ stmt_vec_info def_stmt_info)
+{
+ tree def = gimple_get_lhs (vect_orig_stmt (def_stmt_info)->stmt);
+ if (!def || TREE_CODE (def) != SSA_NAME)
+ return;
+ stmt_vec_info phi_info;
+ imm_use_iterator iter;
+ use_operand_p use_p;
+ FOR_EACH_IMM_USE_FAST (use_p, iter, def)
+ if (gphi *phi = dyn_cast <gphi *> (USE_STMT (use_p)))
+ if (gimple_bb (phi)->loop_father->header == gimple_bb (phi)
+ && (phi_info = loop_vinfo->lookup_stmt (phi))
+ && VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
+ && STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION
+ && STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION)
+ {
+ loop_p loop = gimple_bb (phi)->loop_father;
+ edge e = loop_latch_edge (loop);
+ if (PHI_ARG_DEF_FROM_EDGE (phi, e) == def)
+ {
+ stmt_vec_info phi_defs = STMT_VINFO_VEC_STMT (phi_info);
+ stmt_vec_info latch_defs = STMT_VINFO_VEC_STMT (def_stmt_info);
+ while (phi_defs && latch_defs)
+ {
+ add_phi_arg (as_a <gphi *> (phi_defs->stmt),
+ gimple_get_lhs (latch_defs->stmt), e,
+ gimple_phi_arg_location (phi, e->dest_idx));
+ phi_defs = STMT_VINFO_RELATED_STMT (phi_defs);
+ latch_defs = STMT_VINFO_RELATED_STMT (latch_defs);
+ }
+ gcc_assert (!latch_defs);
+ gcc_assert (!phi_defs);
+ }
+ }
+}
+
/* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.
When vectorizing STMT_INFO as a store, set *SEEN_STORE to its
stmt_vec_info. */
@@ -8533,7 +8574,7 @@ vect_transform_loop (loop_vec_info loop_
for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
gsi_next (&si))
- {
+ {
gphi *phi = si.phi ();
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
@@ -8568,6 +8609,27 @@ vect_transform_loop (loop_vec_info loop_
}
}
+ for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
+ gsi_next (&si))
+ {
+ gphi *phi = si.phi ();
+ stmt_info = loop_vinfo->lookup_stmt (phi);
+ if (!stmt_info)
+ continue;
+
+ if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ && !STMT_VINFO_LIVE_P (stmt_info))
+ continue;
+
+ if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
+ && ! PURE_SLP_STMT (stmt_info))
+ maybe_set_vectorized_backedge_value (loop_vinfo, stmt_info);
+ }
+
for (gimple_stmt_iterator si = gsi_start_bb (bb);
!gsi_end_p (si);)
{
@@ -8604,9 +8666,16 @@ vect_transform_loop (loop_vec_info loop_
= STMT_VINFO_RELATED_STMT (stmt_info);
vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si,
&seen_store);
+ maybe_set_vectorized_backedge_value (loop_vinfo,
+ pat_stmt_info);
+ }
+ else
+ {
+ vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
+ &seen_store);
+ maybe_set_vectorized_backedge_value (loop_vinfo,
+ stmt_info);
}
- vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
- &seen_store);
}
gsi_next (&si);
if (seen_store)
@@ -8623,43 +8692,6 @@ vect_transform_loop (loop_vec_info loop_
}
}
- /* Fill in backedge defs of reductions. */
- for (unsigned i = 0; i < loop_vinfo->reduc_latch_defs.length (); ++i)
- {
- stmt_vec_info stmt_info = loop_vinfo->reduc_latch_defs[i];
- stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
- stmt_vec_info phi_info
- = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
- stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
- gphi *phi
- = dyn_cast <gphi *> (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt);
- edge e = loop_latch_edge (gimple_bb (phi_info->stmt)->loop_father);
- do
- {
- add_phi_arg (as_a <gphi *> (phi_info->stmt),
- gimple_get_lhs (vec_stmt->stmt), e,
- gimple_phi_arg_location (phi, e->dest_idx));
- phi_info = STMT_VINFO_RELATED_STMT (phi_info);
- vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
- }
- while (phi_info);
- gcc_assert (!vec_stmt);
- }
- for (unsigned i = 0; i < loop_vinfo->reduc_latch_slp_defs.length (); ++i)
- {
- slp_tree slp_node = loop_vinfo->reduc_latch_slp_defs[i].first;
- slp_tree phi_node = loop_vinfo->reduc_latch_slp_defs[i].second;
- gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
- e = loop_latch_edge (gimple_bb (phi)->loop_father);
- gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
- == SLP_TREE_VEC_STMTS (slp_node).length ());
- for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
- add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt),
- gimple_get_lhs
- (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt),
- e, gimple_phi_arg_location (phi, e->dest_idx));
- }
-
/* Stub out scalar statements that must not survive vectorization.
Doing this here helps with grouped statements, or statements that
are involved in patterns. */
diff -uprN a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
--- a/gcc/tree-vectorizer.h 2020-10-26 21:45:23.052000000 +0800
+++ b/gcc/tree-vectorizer.h 2020-10-26 21:46:25.316000000 +0800
@@ -575,11 +575,6 @@ typedef struct _loop_vec_info : public v
stmt in the chain. */
auto_vec<stmt_vec_info> reduction_chains;
- /* The vectorized stmts defining the latch values of the reduction
- they are involved with. */
- auto_vec<stmt_vec_info> reduc_latch_defs;
- auto_vec<std::pair<slp_tree, slp_tree> > reduc_latch_slp_defs;
-
/* Cost vector for a single scalar iteration. */
auto_vec<stmt_info_for_cost> scalar_cost_vec;
diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
--- a/gcc/tree-vect-slp.c 2020-10-26 21:45:23.052000000 +0800
+++ b/gcc/tree-vect-slp.c 2020-10-26 21:46:25.320000000 +0800
@@ -2189,6 +2189,7 @@ vect_analyze_slp_instance (vec_info *vin
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
SLP_INSTANCE_LOADS (new_instance) = vNULL;
SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
+ new_instance->reduc_phis = NULL;
vect_gather_slp_loads (new_instance, node);
if (dump_enabled_p ())
@@ -4282,6 +4283,26 @@ vect_schedule_slp (vec_info *vinfo)
stmt_vec_info store_info;
unsigned int j;
+ /* For reductions set the latch values of the vectorized PHIs. */
+ if (instance->reduc_phis
+ && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS
+ (instance->reduc_phis)[0]) != FOLD_LEFT_REDUCTION
+ && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS
+ (instance->reduc_phis)[0]) != EXTRACT_LAST_REDUCTION)
+ {
+ slp_tree slp_node = root;
+ slp_tree phi_node = instance->reduc_phis;
+ gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
+ edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
+ gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
+ == SLP_TREE_VEC_STMTS (slp_node).length ());
+ for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
+ add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt),
+ gimple_get_lhs
+ (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt),
+ e, gimple_phi_arg_location (phi, e->dest_idx));
+ }
+
/* Remove scalar call stmts. Do not do this for basic-block
vectorization as not all uses may be vectorized.
??? Why should this be necessary? DCE should be able to
diff -uprN a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
--- a/gcc/tree-vect-stmts.c 2020-10-26 21:45:23.012000000 +0800
+++ b/gcc/tree-vect-stmts.c 2020-10-26 21:46:25.320000000 +0800
@@ -10229,37 +10229,6 @@ vect_transform_stmt (stmt_vec_info stmt_
if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
return is_store;
- /* If this stmt defines a value used on a backedge, record it so
- we can update the vectorized PHIs later. */
- stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
- stmt_vec_info reduc_info;
- if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
- && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
- && (reduc_info = info_for_reduction (orig_stmt_info))
- && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
- && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
- {
- gphi *phi;
- edge e;
- if (!slp_node
- && (phi = dyn_cast <gphi *>
- (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
- && dominated_by_p (CDI_DOMINATORS,
- gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
- && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
- && (PHI_ARG_DEF_FROM_EDGE (phi, e)
- == gimple_get_lhs (orig_stmt_info->stmt)))
- {
- as_a <loop_vec_info> (vinfo)->reduc_latch_defs.safe_push (stmt_info);
- }
- else if (slp_node
- && slp_node != slp_node_instance->reduc_phis)
- {
- as_a <loop_vec_info> (vinfo)->reduc_latch_slp_defs.safe_push
- (std::make_pair (slp_node, slp_node_instance->reduc_phis));
- }
- }
-
/* Handle stmts whose DEF is used outside the loop-nest that is
being vectorized. */
done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,

View File

@ -0,0 +1,48 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
dcfd302a79a5e2ea3bb16fc4fc45a5ee31cc0eab
0001-tree-optimization-97812-fix-range-query-in-VRP-asser.patch
diff --git a/gcc/testsuite/gcc.dg/torture/pr97812.c b/gcc/testsuite/gcc.dg/torture/pr97812.c
new file mode 100644
index 00000000000..4d468adf8fa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr97812.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-additional-options "-fdisable-tree-evrp" } */
+
+unsigned char c;
+
+int main() {
+volatile short b = 4066;
+ unsigned short bp = b;
+ unsigned d = bp & 2305;
+ signed char e = d;
+ c = e ? : e;
+ if (!d)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 54ce017e8b2..d661866630e 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -1740,8 +1740,14 @@ register_edge_assert_for_2 (tree name, edge e,
&& ((TYPE_PRECISION (TREE_TYPE (name))
> TYPE_PRECISION (TREE_TYPE (rhs1)))
|| (get_range_info (rhs1, &rmin, &rmax) == VR_RANGE
- && wi::fits_to_tree_p (rmin, TREE_TYPE (name))
- && wi::fits_to_tree_p (rmax, TREE_TYPE (name)))))
+ && wi::fits_to_tree_p
+ (widest_int::from (rmin,
+ TYPE_SIGN (TREE_TYPE (rhs1))),
+ TREE_TYPE (name))
+ && wi::fits_to_tree_p
+ (widest_int::from (rmax,
+ TYPE_SIGN (TREE_TYPE (rhs1))),
+ TREE_TYPE (name)))))
add_assert_info (asserts, rhs1, rhs1,
comp_code, fold_convert (TREE_TYPE (rhs1), val));
}

View File

@ -0,0 +1,19 @@
This backport contains 1 patch from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
a0aeb7fb93da156b64fd08391c79ff35a69af7ba
0001-tree-vect-stmts.c-vectorizable_comparison-Swap-opera.patch
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index e921225b5ec..601a6f55fbf 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -10369,7 +10369,7 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (!slp_node)
{
- if (swap_p)
+ if (swap_p && j == 0)
std::swap (vec_rhs1, vec_rhs2);
vec_oprnds0.quick_push (vec_rhs1);
vec_oprnds1.quick_push (vec_rhs2);

View File

@ -0,0 +1,321 @@
This backport contains 4 patchs from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
0001-re-PR-target-90424-memcpy-into-vector-builtin-not-op.patch
1bf2a0b90f2457f6d9301535560eb5e05978261b
0002-testsuite-aarch64-arm-Add-missing-quotes-to-expected.patch
0ec537f3500924f29505977aa89c2a1d4671c584
0003-x86-Tweak-testcases-for-PR82361.patch
ad4644f378fe2f731cd987a4aff14b935f530b88
0004-x86-Robustify-vzeroupper-handling-across-calls.patch
2a2e3a0dfcbe0861915f421d11b828f0c35023f0
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 9282a8fb6..ba72da1ec 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -95,6 +95,7 @@ along with GCC; see the file COPYING3. If not see
#include "i386-builtins.h"
#include "i386-expand.h"
#include "i386-features.h"
+#include "function-abi.h"
/* This file should be included last. */
#include "target-def.h"
@@ -13529,6 +13530,15 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
}
}
+ /* If the function is known to preserve some SSE registers,
+ RA and previous passes can legitimately rely on that for
+ modes wider than 256 bits. It's only safe to issue a
+ vzeroupper if all SSE registers are clobbered. */
+ const function_abi &abi = insn_callee_abi (insn);
+ if (!hard_reg_set_subset_p (reg_class_contents[ALL_SSE_REGS],
+ abi.mode_clobbers (V4DImode)))
+ return AVX_U128_ANY;
+
return AVX_U128_CLEAN;
}
diff --git a/gcc/testsuite/g++.target/i386/pr90424-1.C b/gcc/testsuite/g++.target/i386/pr90424-1.C
new file mode 100644
index 000000000..9df8c089b
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr90424-1.C
@@ -0,0 +1,32 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
+
+template <class T>
+using V [[gnu::vector_size(16)]] = T;
+
+template <class T, unsigned M = sizeof(V<T>)>
+V<T> load(const void *p) {
+ using W = V<T>;
+ W r;
+ __builtin_memcpy(&r, p, M);
+ return r;
+}
+
+// movq or movsd
+template V<char> load<char, 8>(const void *); // bad
+template V<short> load<short, 8>(const void *); // bad
+template V<int> load<int, 8>(const void *); // bad
+template V<long> load<long, 8>(const void *); // good
+// the following is disabled because V2SF isn't a supported mode
+// template V<float> load<float, 8>(const void *); // bad
+template V<double> load<double, 8>(const void *); // good (movsd?)
+
+// movd or movss
+template V<char> load<char, 4>(const void *); // bad
+template V<short> load<short, 4>(const void *); // bad
+template V<int> load<int, 4>(const void *); // good
+template V<float> load<float, 4>(const void *); // good
+
+/* We should end up with one load and one insert for each function. */
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */
diff --git a/gcc/testsuite/g++.target/i386/pr90424-2.C b/gcc/testsuite/g++.target/i386/pr90424-2.C
new file mode 100644
index 000000000..3abb65f45
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr90424-2.C
@@ -0,0 +1,31 @@
+/* { dg-do compile { target c++11 } } */
+/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
+
+template <class T>
+using V [[gnu::vector_size(16)]] = T;
+
+template <class T, unsigned M = sizeof(V<T>)>
+V<T> load(const void *p) {
+ V<T> r = {};
+ __builtin_memcpy(&r, p, M);
+ return r;
+}
+
+// movq or movsd
+template V<char> load<char, 8>(const void *); // bad
+template V<short> load<short, 8>(const void *); // bad
+template V<int> load<int, 8>(const void *); // bad
+template V<long> load<long, 8>(const void *); // good
+// the following is disabled because V2SF isn't a supported mode
+// template V<float> load<float, 8>(const void *); // bad
+template V<double> load<double, 8>(const void *); // good (movsd?)
+
+// movd or movss
+template V<char> load<char, 4>(const void *); // bad
+template V<short> load<short, 4>(const void *); // bad
+template V<int> load<int, 4>(const void *); // good
+template V<float> load<float, 4>(const void *); // good
+
+/* We should end up with one load and one insert for each function. */
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_10.c b/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
index 184990471..d96a8733a 100644
--- a/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
@@ -13,4 +13,4 @@ foo (uint8x16_t a, uint8x16_t b, uint8x16_t c)
return vbslq_u8 (a, b, c); /* { dg-message "called from here" } */
}
-/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */
+/* { dg-error "inlining failed in call to 'always_inline'" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c b/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
index 05dc579f2..fb6e0b9cd 100644
--- a/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
+++ b/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
@@ -14,5 +14,5 @@ foo (uint8x16_t *p)
*p = vmovq_n_u8 (3); /* { dg-message "called from here" } */
}
-/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */
+/* { dg-error "inlining failed in call to 'always_inline'" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/i386/pr82361-1.c b/gcc/testsuite/gcc.target/i386/pr82361-1.c
index e7c356557..dec1792ae 100644
--- a/gcc/testsuite/gcc.target/i386/pr82361-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr82361-1.c
@@ -4,50 +4,50 @@
/* We should be able to optimize all %eax to %rax zero extensions, because
div and idiv instructions with 32-bit operands zero-extend both results. */
/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
-/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
- one. */
+/* FIXME: The compiler does not merge zero-extension to the modulo part
+ of f1 and f2. */
/* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */
void
f1 (unsigned int a, unsigned int b)
{
- unsigned long long c = a / b;
- unsigned long long d = a % b;
+ register unsigned long long c asm ("rax") = a / b;
+ register unsigned long long d asm ("rdx") = a % b;
asm volatile ("" : : "r" (c), "r" (d));
}
void
f2 (int a, int b)
{
- unsigned long long c = (unsigned int) (a / b);
- unsigned long long d = (unsigned int) (a % b);
+ register unsigned long long c asm ("rax") = (unsigned int) (a / b);
+ register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
asm volatile ("" : : "r" (c), "r" (d));
}
void
f3 (unsigned int a, unsigned int b)
{
- unsigned long long c = a / b;
+ register unsigned long long c asm ("rax") = a / b;
asm volatile ("" : : "r" (c));
}
void
f4 (int a, int b)
{
- unsigned long long c = (unsigned int) (a / b);
+ register unsigned long long c asm ("rax") = (unsigned int) (a / b);
asm volatile ("" : : "r" (c));
}
void
f5 (unsigned int a, unsigned int b)
{
- unsigned long long d = a % b;
+ register unsigned long long d asm ("rdx") = a % b;
asm volatile ("" : : "r" (d));
}
void
f6 (int a, int b)
{
- unsigned long long d = (unsigned int) (a % b);
+ register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
asm volatile ("" : : "r" (d));
}
diff --git a/gcc/testsuite/gcc.target/i386/pr82361-2.c b/gcc/testsuite/gcc.target/i386/pr82361-2.c
index c1e484d6e..2d87de182 100644
--- a/gcc/testsuite/gcc.target/i386/pr82361-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr82361-2.c
@@ -4,7 +4,8 @@
/* We should be able to optimize all %eax to %rax zero extensions, because
div and idiv instructions with 32-bit operands zero-extend both results. */
/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
-/* Ditto %edx to %rdx zero extensions. */
-/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
+/* FIXME: The compiler does not merge zero-extension to the modulo part
+ of f1 and f2. */
+/* { dg-final { scan-assembler-times "movl\t%edx" 4 } } */
#include "pr82361-1.c"
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 527deffe4..be47519bc 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4297,8 +4297,17 @@ verify_gimple_assign_ternary (gassign *stmt)
}
if (! ((INTEGRAL_TYPE_P (rhs1_type)
&& INTEGRAL_TYPE_P (rhs2_type))
+ /* Vector element insert. */
|| (VECTOR_TYPE_P (rhs1_type)
- && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type))))
+ && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type))
+ /* Aligned sub-vector insert. */
+ || (VECTOR_TYPE_P (rhs1_type)
+ && VECTOR_TYPE_P (rhs2_type)
+ && types_compatible_p (TREE_TYPE (rhs1_type),
+ TREE_TYPE (rhs2_type))
+ && multiple_p (TYPE_VECTOR_SUBPARTS (rhs1_type),
+ TYPE_VECTOR_SUBPARTS (rhs2_type))
+ && multiple_of_p (bitsizetype, rhs3, TYPE_SIZE (rhs2_type)))))
{
error ("not allowed type combination in BIT_INSERT_EXPR");
debug_generic_expr (rhs1_type);
diff --git a/gcc/tree-ssa.c b/gcc/tree-ssa.c
index 1dc544b6d..a149f5e79 100644
--- a/gcc/tree-ssa.c
+++ b/gcc/tree-ssa.c
@@ -1522,8 +1522,6 @@ non_rewritable_lvalue_p (tree lhs)
if (DECL_P (decl)
&& VECTOR_TYPE_P (TREE_TYPE (decl))
&& TYPE_MODE (TREE_TYPE (decl)) != BLKmode
- && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)),
- TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl))), 0)
&& known_ge (mem_ref_offset (lhs), 0)
&& known_gt (wi::to_poly_offset (TYPE_SIZE_UNIT (TREE_TYPE (decl))),
mem_ref_offset (lhs))
@@ -1531,7 +1529,24 @@ non_rewritable_lvalue_p (tree lhs)
TYPE_SIZE_UNIT (TREE_TYPE (lhs)))
&& known_ge (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (decl))),
wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (lhs)))))
- return false;
+ {
+ poly_uint64 lhs_bits, nelts;
+ if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)), &lhs_bits)
+ && multiple_p (lhs_bits,
+ tree_to_uhwi
+ (TYPE_SIZE (TREE_TYPE (TREE_TYPE (decl)))),
+ &nelts))
+ {
+ if (known_eq (nelts, 1u))
+ return false;
+ /* For sub-vector inserts the insert vector mode has to be
+ supported. */
+ tree vtype = build_vector_type (TREE_TYPE (TREE_TYPE (decl)),
+ nelts);
+ if (TYPE_MODE (vtype) != BLKmode)
+ return false;
+ }
+ }
}
/* A vector-insert using a BIT_FIELD_REF is rewritable using
@@ -1869,20 +1884,30 @@ execute_update_addresses_taken (void)
&& bitmap_bit_p (suitable_for_renaming, DECL_UID (sym))
&& VECTOR_TYPE_P (TREE_TYPE (sym))
&& TYPE_MODE (TREE_TYPE (sym)) != BLKmode
- && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)),
- TYPE_SIZE_UNIT
- (TREE_TYPE (TREE_TYPE (sym))), 0)
- && tree_fits_uhwi_p (TREE_OPERAND (lhs, 1))
- && tree_int_cst_lt (TREE_OPERAND (lhs, 1),
- TYPE_SIZE_UNIT (TREE_TYPE (sym)))
- && (tree_to_uhwi (TREE_OPERAND (lhs, 1))
- % tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) == 0)
+ && known_ge (mem_ref_offset (lhs), 0)
+ && known_gt (wi::to_poly_offset
+ (TYPE_SIZE_UNIT (TREE_TYPE (sym))),
+ mem_ref_offset (lhs))
+ && multiple_of_p (sizetype,
+ TREE_OPERAND (lhs, 1),
+ TYPE_SIZE_UNIT (TREE_TYPE (lhs))))
{
tree val = gimple_assign_rhs1 (stmt);
if (! types_compatible_p (TREE_TYPE (val),
TREE_TYPE (TREE_TYPE (sym))))
{
- tree tem = make_ssa_name (TREE_TYPE (TREE_TYPE (sym)));
+ poly_uint64 lhs_bits, nelts;
+ tree temtype = TREE_TYPE (TREE_TYPE (sym));
+ if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)),
+ &lhs_bits)
+ && multiple_p (lhs_bits,
+ tree_to_uhwi
+ (TYPE_SIZE (TREE_TYPE
+ (TREE_TYPE (sym)))),
+ &nelts)
+ && maybe_ne (nelts, 1u))
+ temtype = build_vector_type (temtype, nelts);
+ tree tem = make_ssa_name (temtype);
gimple *pun
= gimple_build_assign (tem,
build1 (VIEW_CONVERT_EXPR,