Upload GCC feature and bugfix patches.
- avoid-cycling-on-vertain-subreg-reloads.patch: Add patch source comment - change-gcc-BASE-VER.patch: Likewise - dont-generate-IF_THEN_ELSE.patch: Likewise - fix-ICE-in-compute_live_loop_exits.patch: Likewise - fix-ICE-in-eliminate_stmt.patch: Likewise - fix-ICE-in-vect_create_epilog_for_reduction.patch: Likewise - fix-ICE-in-vect_stmt_to_vectorize.patch: Likewise - fix-ICE-in-verify_ssa.patch: Likewise - fix-ICE-when-vectorizing-nested-cycles.patch: Likewise - fix-cost-of-plus.patch: Likewise - ipa-const-prop-self-recursion-bugfix.patch: Likewise - simplify-removing-subregs.patch: Likewise - medium-code-mode.patch: Bugfix - fix-when-peeling-for-alignment.patch: Move to ... - fix-PR-92351-When-peeling-for-alignment.patch: ... this - AArch64-Fix-constraints-for-CPY-M.patch: New file - Apply-maximum-nunits-for-BB-SLP.patch: New file - Fix-EXTRACT_LAST_REDUCTION-segfault.patch: New file - Fix-up-push_partial_def-little-endian-bitfield.patch: New file - Fix-zero-masking-for-vcvtps2ph.patch: New file - IRA-Handle-fully-tied-destinations.patch: New file - SLP-VECT-Add-check-to-fix-96837.patch: New file - aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch: New file - aarch64-Fix-bf16-and-matrix-g++-gfortran.patch: New file - aarch64-Fix-mismatched-SVE-predicate-modes.patch: New file - aarch64-fix-sve-acle-error.patch: New file - adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch: New file - bf16-and-matrix-characteristic.patch: New file - fix-ICE-IPA-compare-VRP-types.patch: New file - fix-ICE-in-affine-combination.patch: New file - fix-ICE-in-pass-vect.patch: New file - fix-ICE-in-vect_update_misalignment_for_peel.patch: New file - fix-addlosymdi-ICE-in-pass-reload.patch: New file - fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: New file - fix-avx512vl-vcvttpd2dq-2-fail.patch: New file - fix-issue499-add-nop-convert.patch: New file - fix-issue604-ldist-dependency-fixup.patch: New file - modulo-sched-Carefully-process-loop-counter-initiali.patch: New file - re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch: New file - reduction-paths-with-unhandled-live-stmt.patch: New file - redundant-loop-elimination.patch: New file - sccvn-Improve-handling-of-load-masked-with-integer.patch: New file - speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch: New file - store-merging-Consider-also-overlapping-stores-earlier.patch: New file - tree-optimization-96920-another-ICE-when-vectorizing.patch: New file - tree-optimization-97812-fix-range-query-in-VRP-asser.patch: New file - vectorizable-comparison-Swap-operands-only-once.patch: New file - x86-Fix-bf16-and-matrix.patch: New file
This commit is contained in:
parent
7305b43b46
commit
01e0ec8ea6
67
AArch64-Fix-constraints-for-CPY-M.patch
Normal file
67
AArch64-Fix-constraints-for-CPY-M.patch
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-AArch64-Fix-constraints-for-CPY-M.patch
|
||||||
|
3c2707f33af46ac145769872b65e25fd0b870903
|
||||||
|
|
||||||
|
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
|
||||||
|
index cbf29a82e28..59bf4a69507 100644
|
||||||
|
--- a/gcc/config/aarch64/aarch64-sve.md
|
||||||
|
+++ b/gcc/config/aarch64/aarch64-sve.md
|
||||||
|
@@ -6523,7 +6523,7 @@
|
||||||
|
(define_insn "@aarch64_sel_dup<mode>"
|
||||||
|
[(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
|
||||||
|
(unspec:SVE_FULL
|
||||||
|
- [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upl, Upl, Upl, Upl")
|
||||||
|
+ [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
|
||||||
|
(vec_duplicate:SVE_FULL
|
||||||
|
(match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
|
||||||
|
(match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..1d8f429caeb
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general/cpy_1.c
|
||||||
|
@@ -0,0 +1,42 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O" } */
|
||||||
|
+/* { dg-final { check-function-bodies "**" "" } } */
|
||||||
|
+
|
||||||
|
+#include <arm_sve.h>
|
||||||
|
+
|
||||||
|
+#ifdef __cplusplus
|
||||||
|
+extern "C" {
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+** dup_x0_m:
|
||||||
|
+** add (x[0-9]+), x0, #?1
|
||||||
|
+** mov (p[0-7])\.b, p15\.b
|
||||||
|
+** mov z0\.d, \2/m, \1
|
||||||
|
+** ret
|
||||||
|
+*/
|
||||||
|
+svuint64_t
|
||||||
|
+dup_x0_m (svuint64_t z0, uint64_t x0)
|
||||||
|
+{
|
||||||
|
+ register svbool_t pg asm ("p15");
|
||||||
|
+ asm volatile ("" : "=Upa" (pg));
|
||||||
|
+ return svdup_u64_m (z0, pg, x0 + 1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+** dup_d1_z:
|
||||||
|
+** mov (p[0-7])\.b, p15\.b
|
||||||
|
+** mov z0\.d, \1/m, d1
|
||||||
|
+** ret
|
||||||
|
+*/
|
||||||
|
+svfloat64_t
|
||||||
|
+dup_d1_z (svfloat64_t z0, float64_t d1)
|
||||||
|
+{
|
||||||
|
+ register svbool_t pg asm ("p15");
|
||||||
|
+ asm volatile ("" : "=Upa" (pg));
|
||||||
|
+ return svdup_f64_m (z0, pg, d1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#ifdef __cplusplus
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
694
Apply-maximum-nunits-for-BB-SLP.patch
Normal file
694
Apply-maximum-nunits-for-BB-SLP.patch
Normal file
@ -0,0 +1,694 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-Apply-maximum-nunits-for-BB-SLP.patch
|
||||||
|
9b75f56d4b7951c60a656396dddd4a65787b95bc
|
||||||
|
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-4.c b/gcc/testsuite/gcc.dg/vect/bb-slp-4.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-4.c 2020-12-20 18:46:19.539633230 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-4.c 2020-12-20 18:48:12.799633230 +0800
|
||||||
|
@@ -38,5 +38,4 @@ int main (void)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-final { scan-tree-dump-times "basic block vectorized" 0 "slp2" } } */
|
||||||
|
-
|
||||||
|
+/* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp2" } } */
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-bool-1.c 2020-12-20 18:48:12.799633230 +0800
|
||||||
|
@@ -0,0 +1,44 @@
|
||||||
|
+#include "tree-vect.h"
|
||||||
|
+
|
||||||
|
+void __attribute__ ((noipa))
|
||||||
|
+f1 (_Bool *x, unsigned short *y)
|
||||||
|
+{
|
||||||
|
+ x[0] = (y[0] == 1);
|
||||||
|
+ x[1] = (y[1] == 1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void __attribute__ ((noipa))
|
||||||
|
+f2 (_Bool *x, unsigned short *y)
|
||||||
|
+{
|
||||||
|
+ x[0] = (y[0] == 1);
|
||||||
|
+ x[1] = (y[1] == 1);
|
||||||
|
+ x[2] = (y[2] == 1);
|
||||||
|
+ x[3] = (y[3] == 1);
|
||||||
|
+ x[4] = (y[4] == 1);
|
||||||
|
+ x[5] = (y[5] == 1);
|
||||||
|
+ x[6] = (y[6] == 1);
|
||||||
|
+ x[7] = (y[7] == 1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+_Bool x[8];
|
||||||
|
+unsigned short y[8] = { 11, 1, 9, 5, 1, 44, 1, 1 };
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main (void)
|
||||||
|
+{
|
||||||
|
+ check_vect ();
|
||||||
|
+
|
||||||
|
+ f1 (x, y);
|
||||||
|
+
|
||||||
|
+ if (x[0] || !x[1])
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+
|
||||||
|
+ x[1] = 0;
|
||||||
|
+
|
||||||
|
+ f2 (x, y);
|
||||||
|
+
|
||||||
|
+ if (x[0] || !x[1] || x[2] | x[3] || !x[4] || x[5] || !x[6] || !x[7])
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c
|
||||||
|
--- a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_14.c 2020-12-20 18:48:11.811633230 +0800
|
||||||
|
@@ -0,0 +1,26 @@
|
||||||
|
+/* { dg-options "-O2 -ftree-vectorize" } */
|
||||||
|
+/* { dg-final { check-function-bodies "**" "" } } */
|
||||||
|
+
|
||||||
|
+/*
|
||||||
|
+** foo:
|
||||||
|
+** (
|
||||||
|
+** ldr d([0-9]+), \[x1\]
|
||||||
|
+** ldr q([0-9]+), \[x0\]
|
||||||
|
+** saddw v([0-9]+)\.4s, v\2\.4s, v\1\.4h
|
||||||
|
+** str q\3, \[x0\]
|
||||||
|
+** |
|
||||||
|
+** ldr q([0-9]+), \[x0\]
|
||||||
|
+** ldr d([0-9]+), \[x1\]
|
||||||
|
+** saddw v([0-9]+)\.4s, v\4\.4s, v\5\.4h
|
||||||
|
+** str q\6, \[x0\]
|
||||||
|
+** )
|
||||||
|
+** ret
|
||||||
|
+*/
|
||||||
|
+void
|
||||||
|
+foo (int *x, short *y)
|
||||||
|
+{
|
||||||
|
+ x[0] += y[0];
|
||||||
|
+ x[1] += y[1];
|
||||||
|
+ x[2] += y[2];
|
||||||
|
+ x[3] += y[3];
|
||||||
|
+}
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.target/i386/pr84101.c b/gcc/testsuite/gcc.target/i386/pr84101.c
|
||||||
|
--- a/gcc/testsuite/gcc.target/i386/pr84101.c 2020-12-20 18:46:18.383633230 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/pr84101.c 2020-12-20 18:48:11.611633230 +0800
|
||||||
|
@@ -18,4 +18,5 @@ uint64_pair_t pair(int num)
|
||||||
|
return p ;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp2" } } */
|
||||||
|
+/* See PR92266 for the XFAIL. */
|
||||||
|
+/* { dg-final { scan-tree-dump-not "basic block vectorized" "slp2" { xfail ilp32 } } } */
|
||||||
|
diff -Nurp a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
|
||||||
|
--- a/gcc/tree-vect-data-refs.c 2020-12-20 18:46:19.911633230 +0800
|
||||||
|
+++ b/gcc/tree-vect-data-refs.c 2020-12-20 18:48:11.047633230 +0800
|
||||||
|
@@ -4312,9 +4312,8 @@ vect_analyze_data_refs (vec_info *vinfo,
|
||||||
|
|
||||||
|
/* Set vectype for STMT. */
|
||||||
|
scalar_type = TREE_TYPE (DR_REF (dr));
|
||||||
|
- STMT_VINFO_VECTYPE (stmt_info)
|
||||||
|
- = get_vectype_for_scalar_type (vinfo, scalar_type);
|
||||||
|
- if (!STMT_VINFO_VECTYPE (stmt_info))
|
||||||
|
+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
||||||
|
+ if (!vectype)
|
||||||
|
{
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
{
|
||||||
|
@@ -4345,14 +4344,19 @@ vect_analyze_data_refs (vec_info *vinfo,
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
|
"got vectype for stmt: %G%T\n",
|
||||||
|
- stmt_info->stmt, STMT_VINFO_VECTYPE (stmt_info));
|
||||||
|
+ stmt_info->stmt, vectype);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Adjust the minimal vectorization factor according to the
|
||||||
|
vector type. */
|
||||||
|
- vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
|
||||||
|
+ vf = TYPE_VECTOR_SUBPARTS (vectype);
|
||||||
|
*min_vf = upper_bound (*min_vf, vf);
|
||||||
|
|
||||||
|
+ /* Leave the BB vectorizer to pick the vector type later, based on
|
||||||
|
+ the final dataref group size and SLP node size. */
|
||||||
|
+ if (is_a <loop_vec_info> (vinfo))
|
||||||
|
+ STMT_VINFO_VECTYPE (stmt_info) = vectype;
|
||||||
|
+
|
||||||
|
if (gatherscatter != SG_NONE)
|
||||||
|
{
|
||||||
|
gather_scatter_info gs_info;
|
||||||
|
diff -Nurp a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
|
||||||
|
--- a/gcc/tree-vect-patterns.c 2020-12-20 18:46:19.979633230 +0800
|
||||||
|
+++ b/gcc/tree-vect-patterns.c 2020-12-20 18:48:11.227633230 +0800
|
||||||
|
@@ -4142,9 +4142,10 @@ vect_recog_bool_pattern (stmt_vec_info s
|
||||||
|
&& STMT_VINFO_DATA_REF (stmt_vinfo))
|
||||||
|
{
|
||||||
|
stmt_vec_info pattern_stmt_info;
|
||||||
|
- vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
|
||||||
|
- gcc_assert (vectype != NULL_TREE);
|
||||||
|
- if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
|
||||||
|
+ tree nunits_vectype;
|
||||||
|
+ if (!vect_get_vector_types_for_stmt (stmt_vinfo, &vectype,
|
||||||
|
+ &nunits_vectype)
|
||||||
|
+ || !VECTOR_MODE_P (TYPE_MODE (vectype)))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (check_bool_pattern (var, vinfo, bool_stmts))
|
||||||
|
diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
|
||||||
|
--- a/gcc/tree-vect-slp.c 2020-12-20 18:46:17.763633230 +0800
|
||||||
|
+++ b/gcc/tree-vect-slp.c 2020-12-20 18:48:11.227633230 +0800
|
||||||
|
@@ -606,6 +606,77 @@ again:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
|
||||||
|
+ Return true if we can, meaning that this choice doesn't conflict with
|
||||||
|
+ existing SLP nodes that use STMT_INFO. */
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
|
||||||
|
+{
|
||||||
|
+ tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||||
|
+ if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
+ if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
|
||||||
|
+ && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
|
||||||
|
+ {
|
||||||
|
+ /* We maintain the invariant that if any statement in the group is
|
||||||
|
+ used, all other members of the group have the same vector type. */
|
||||||
|
+ stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
|
||||||
|
+ stmt_vec_info member_info = first_info;
|
||||||
|
+ for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
|
||||||
|
+ if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
|
||||||
|
+ || is_pattern_stmt_p (member_info))
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ if (!member_info)
|
||||||
|
+ {
|
||||||
|
+ for (member_info = first_info; member_info;
|
||||||
|
+ member_info = DR_GROUP_NEXT_ELEMENT (member_info))
|
||||||
|
+ STMT_VINFO_VECTYPE (member_info) = vectype;
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
|
||||||
|
+ && !is_pattern_stmt_p (stmt_info))
|
||||||
|
+ {
|
||||||
|
+ STMT_VINFO_VECTYPE (stmt_info) = vectype;
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (dump_enabled_p ())
|
||||||
|
+ {
|
||||||
|
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||||
|
+ "Build SLP failed: incompatible vector"
|
||||||
|
+ " types for: %G", stmt_info->stmt);
|
||||||
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
|
+ " old vector type: %T\n", old_vectype);
|
||||||
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
|
+ " new vector type: %T\n", vectype);
|
||||||
|
+ }
|
||||||
|
+ return false;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Try to infer and assign a vector type to all the statements in STMTS.
|
||||||
|
+ Used only for BB vectorization. */
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+vect_update_all_shared_vectypes (vec<stmt_vec_info> stmts)
|
||||||
|
+{
|
||||||
|
+ tree vectype, nunits_vectype;
|
||||||
|
+ if (!vect_get_vector_types_for_stmt (stmts[0], &vectype,
|
||||||
|
+ &nunits_vectype, stmts.length ()))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ stmt_vec_info stmt_info;
|
||||||
|
+ unsigned int i;
|
||||||
|
+ FOR_EACH_VEC_ELT (stmts, i, stmt_info)
|
||||||
|
+ if (!vect_update_shared_vectype (stmt_info, vectype))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Return true if call statements CALL1 and CALL2 are similar enough
|
||||||
|
to be combined into the same SLP group. */
|
||||||
|
|
||||||
|
@@ -751,6 +822,7 @@ vect_build_slp_tree_1 (unsigned char *sw
|
||||||
|
stmt_vec_info stmt_info;
|
||||||
|
FOR_EACH_VEC_ELT (stmts, i, stmt_info)
|
||||||
|
{
|
||||||
|
+ vec_info *vinfo = stmt_info->vinfo;
|
||||||
|
gimple *stmt = stmt_info->stmt;
|
||||||
|
swap[i] = 0;
|
||||||
|
matches[i] = false;
|
||||||
|
@@ -784,7 +856,7 @@ vect_build_slp_tree_1 (unsigned char *sw
|
||||||
|
|
||||||
|
tree nunits_vectype;
|
||||||
|
if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
|
||||||
|
- &nunits_vectype)
|
||||||
|
+ &nunits_vectype, group_size)
|
||||||
|
|| (nunits_vectype
|
||||||
|
&& !vect_record_max_nunits (stmt_info, group_size,
|
||||||
|
nunits_vectype, max_nunits)))
|
||||||
|
@@ -796,6 +868,10 @@ vect_build_slp_tree_1 (unsigned char *sw
|
||||||
|
|
||||||
|
gcc_assert (vectype);
|
||||||
|
|
||||||
|
+ if (is_a <bb_vec_info> (vinfo)
|
||||||
|
+ && !vect_update_shared_vectype (stmt_info, vectype))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
|
||||||
|
{
|
||||||
|
rhs_code = CALL_EXPR;
|
||||||
|
@@ -1328,7 +1404,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
||||||
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
|
||||||
|
if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
|
||||||
|
break;
|
||||||
|
- if (!grandchild)
|
||||||
|
+ if (!grandchild
|
||||||
|
+ && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
|
||||||
|
{
|
||||||
|
/* Roll back. */
|
||||||
|
this_tree_size = old_tree_size;
|
||||||
|
@@ -1369,7 +1446,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
||||||
|
do extra work to cancel the pattern so the uses see the
|
||||||
|
scalar version. */
|
||||||
|
&& !is_pattern_stmt_p (stmt_info)
|
||||||
|
- && !oprnd_info->any_pattern)
|
||||||
|
+ && !oprnd_info->any_pattern
|
||||||
|
+ && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
|
||||||
|
{
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
|
@@ -1488,7 +1566,9 @@ vect_build_slp_tree_2 (vec_info *vinfo,
|
||||||
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
|
||||||
|
if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
|
||||||
|
break;
|
||||||
|
- if (!grandchild)
|
||||||
|
+ if (!grandchild
|
||||||
|
+ && (vect_update_all_shared_vectypes
|
||||||
|
+ (oprnd_info->def_stmts)))
|
||||||
|
{
|
||||||
|
/* Roll back. */
|
||||||
|
this_tree_size = old_tree_size;
|
||||||
|
@@ -2026,8 +2106,8 @@ vect_analyze_slp_instance (vec_info *vin
|
||||||
|
if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
|
||||||
|
{
|
||||||
|
scalar_type = TREE_TYPE (DR_REF (dr));
|
||||||
|
- vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
||||||
|
group_size = DR_GROUP_SIZE (stmt_info);
|
||||||
|
+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
|
||||||
|
}
|
||||||
|
else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
|
||||||
|
{
|
||||||
|
@@ -2669,22 +2749,13 @@ vect_slp_analyze_node_operations_1 (vec_
|
||||||
|
Memory accesses already got their vector type assigned
|
||||||
|
in vect_analyze_data_refs. */
|
||||||
|
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||||
|
- if (bb_vinfo
|
||||||
|
- && ! STMT_VINFO_DATA_REF (stmt_info))
|
||||||
|
+ if (bb_vinfo && STMT_VINFO_VECTYPE (stmt_info) == boolean_type_node)
|
||||||
|
{
|
||||||
|
- tree vectype, nunits_vectype;
|
||||||
|
- if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
|
||||||
|
- &nunits_vectype))
|
||||||
|
- /* We checked this when building the node. */
|
||||||
|
- gcc_unreachable ();
|
||||||
|
- if (vectype == boolean_type_node)
|
||||||
|
- {
|
||||||
|
- vectype = vect_get_mask_type_for_stmt (stmt_info);
|
||||||
|
- if (!vectype)
|
||||||
|
- /* vect_get_mask_type_for_stmt has already explained the
|
||||||
|
- failure. */
|
||||||
|
- return false;
|
||||||
|
- }
|
||||||
|
+ tree vectype = vect_get_mask_type_for_stmt (stmt_info, node);
|
||||||
|
+ if (!vectype)
|
||||||
|
+ /* vect_get_mask_type_for_stmt has already explained the
|
||||||
|
+ failure. */
|
||||||
|
+ return false;
|
||||||
|
|
||||||
|
stmt_vec_info sstmt_info;
|
||||||
|
unsigned int i;
|
||||||
|
@@ -3585,7 +3656,7 @@ vect_get_constant_vectors (slp_tree op_n
|
||||||
|
&& vect_mask_constant_operand_p (stmt_vinfo))
|
||||||
|
vector_type = truth_type_for (stmt_vectype);
|
||||||
|
else
|
||||||
|
- vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
|
||||||
|
+ vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
|
||||||
|
|
||||||
|
unsigned int number_of_vectors
|
||||||
|
= vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
|
||||||
|
diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
||||||
|
--- a/gcc/tree-vect-stmts.c 2020-12-20 18:46:17.707633230 +0800
|
||||||
|
+++ b/gcc/tree-vect-stmts.c 2020-12-20 18:48:11.227633230 +0800
|
||||||
|
@@ -798,7 +798,7 @@ vect_prologue_cost_for_slp_op (slp_tree
|
||||||
|
/* Without looking at the actual initializer a vector of
|
||||||
|
constants can be implemented as load from the constant pool.
|
||||||
|
When all elements are the same we can use a splat. */
|
||||||
|
- tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
|
||||||
|
+ tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
|
||||||
|
unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
|
||||||
|
unsigned num_vects_to_check;
|
||||||
|
unsigned HOST_WIDE_INT const_nunits;
|
||||||
|
@@ -3308,7 +3308,7 @@ vectorizable_call (stmt_vec_info stmt_in
|
||||||
|
/* If all arguments are external or constant defs, infer the vector type
|
||||||
|
from the scalar type. */
|
||||||
|
if (!vectype_in)
|
||||||
|
- vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
|
||||||
|
+ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
|
||||||
|
if (vec_stmt)
|
||||||
|
gcc_assert (vectype_in);
|
||||||
|
if (!vectype_in)
|
||||||
|
@@ -4106,7 +4106,8 @@ vectorizable_simd_clone_call (stmt_vec_i
|
||||||
|
&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
|
||||||
|
{
|
||||||
|
tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
|
||||||
|
- arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type);
|
||||||
|
+ arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
|
||||||
|
+ slp_node);
|
||||||
|
if (arginfo[i].vectype == NULL
|
||||||
|
|| (simd_clone_subparts (arginfo[i].vectype)
|
||||||
|
> bestn->simdclone->simdlen))
|
||||||
|
@@ -4805,7 +4806,7 @@ vectorizable_conversion (stmt_vec_info s
|
||||||
|
/* If op0 is an external or constant def, infer the vector type
|
||||||
|
from the scalar type. */
|
||||||
|
if (!vectype_in)
|
||||||
|
- vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
|
||||||
|
+ vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
|
||||||
|
if (vec_stmt)
|
||||||
|
gcc_assert (vectype_in);
|
||||||
|
if (!vectype_in)
|
||||||
|
@@ -5558,7 +5559,7 @@ vectorizable_shift (stmt_vec_info stmt_i
|
||||||
|
/* If op0 is an external or constant def, infer the vector type
|
||||||
|
from the scalar type. */
|
||||||
|
if (!vectype)
|
||||||
|
- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
|
||||||
|
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
|
||||||
|
if (vec_stmt)
|
||||||
|
gcc_assert (vectype);
|
||||||
|
if (!vectype)
|
||||||
|
@@ -5656,7 +5657,8 @@ vectorizable_shift (stmt_vec_info stmt_i
|
||||||
|
"vector/vector shift/rotate found.\n");
|
||||||
|
|
||||||
|
if (!op1_vectype)
|
||||||
|
- op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1));
|
||||||
|
+ op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
|
||||||
|
+ slp_node);
|
||||||
|
incompatible_op1_vectype_p
|
||||||
|
= (op1_vectype == NULL_TREE
|
||||||
|
|| maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
|
||||||
|
@@ -6000,7 +6002,8 @@ vectorizable_operation (stmt_vec_info st
|
||||||
|
vectype = vectype_out;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
|
||||||
|
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
|
||||||
|
+ slp_node);
|
||||||
|
}
|
||||||
|
if (vec_stmt)
|
||||||
|
gcc_assert (vectype);
|
||||||
|
@@ -8903,7 +8906,7 @@ vectorizable_load (stmt_vec_info stmt_in
|
||||||
|
condition operands are supportable using vec_is_simple_use. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
-vect_is_simple_cond (tree cond, vec_info *vinfo,
|
||||||
|
+vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
|
||||||
|
tree *comp_vectype, enum vect_def_type *dts,
|
||||||
|
tree vectype)
|
||||||
|
{
|
||||||
|
@@ -8966,7 +8969,8 @@ vect_is_simple_cond (tree cond, vec_info
|
||||||
|
scalar_type = build_nonstandard_integer_type
|
||||||
|
(tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
|
||||||
|
TYPE_UNSIGNED (scalar_type));
|
||||||
|
- *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
||||||
|
+ *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
|
||||||
|
+ slp_node);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
@@ -9073,7 +9077,7 @@ vectorizable_condition (stmt_vec_info st
|
||||||
|
then_clause = gimple_assign_rhs2 (stmt);
|
||||||
|
else_clause = gimple_assign_rhs3 (stmt);
|
||||||
|
|
||||||
|
- if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
|
||||||
|
+ if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, slp_node,
|
||||||
|
&comp_vectype, &dts[0], slp_node ? NULL : vectype)
|
||||||
|
|| !comp_vectype)
|
||||||
|
return false;
|
||||||
|
@@ -9564,7 +9568,8 @@ vectorizable_comparison (stmt_vec_info s
|
||||||
|
/* Invariant comparison. */
|
||||||
|
if (!vectype)
|
||||||
|
{
|
||||||
|
- vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
|
||||||
|
+ vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
|
||||||
|
+ slp_node);
|
||||||
|
if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
@@ -10322,31 +10327,93 @@ get_related_vectype_for_scalar_type (mac
|
||||||
|
/* Function get_vectype_for_scalar_type.
|
||||||
|
|
||||||
|
Returns the vector type corresponding to SCALAR_TYPE as supported
|
||||||
|
- by the target. */
|
||||||
|
+ by the target. If GROUP_SIZE is nonzero and we're performing BB
|
||||||
|
+ vectorization, make sure that the number of elements in the vector
|
||||||
|
+ is no bigger than GROUP_SIZE. */
|
||||||
|
|
||||||
|
tree
|
||||||
|
-get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type)
|
||||||
|
+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
|
||||||
|
+ unsigned int group_size)
|
||||||
|
{
|
||||||
|
+ /* For BB vectorization, we should always have a group size once we've
|
||||||
|
+ constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
|
||||||
|
+ are tentative requests during things like early data reference
|
||||||
|
+ analysis and pattern recognition. */
|
||||||
|
+ if (is_a <bb_vec_info> (vinfo))
|
||||||
|
+ gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
|
||||||
|
+ else
|
||||||
|
+ group_size = 0;
|
||||||
|
+
|
||||||
|
tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
|
||||||
|
scalar_type);
|
||||||
|
if (vectype && vinfo->vector_mode == VOIDmode)
|
||||||
|
vinfo->vector_mode = TYPE_MODE (vectype);
|
||||||
|
|
||||||
|
+ /* Register the natural choice of vector type, before the group size
|
||||||
|
+ has been applied. */
|
||||||
|
if (vectype)
|
||||||
|
vinfo->used_vector_modes.add (TYPE_MODE (vectype));
|
||||||
|
|
||||||
|
+ /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
|
||||||
|
+ try again with an explicit number of elements. */
|
||||||
|
+ if (vectype
|
||||||
|
+ && group_size
|
||||||
|
+ && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
|
||||||
|
+ {
|
||||||
|
+ /* Start with the biggest number of units that fits within
|
||||||
|
+ GROUP_SIZE and halve it until we find a valid vector type.
|
||||||
|
+ Usually either the first attempt will succeed or all will
|
||||||
|
+ fail (in the latter case because GROUP_SIZE is too small
|
||||||
|
+ for the target), but it's possible that a target could have
|
||||||
|
+ a hole between supported vector types.
|
||||||
|
+
|
||||||
|
+ If GROUP_SIZE is not a power of 2, this has the effect of
|
||||||
|
+ trying the largest power of 2 that fits within the group,
|
||||||
|
+ even though the group is not a multiple of that vector size.
|
||||||
|
+ The BB vectorizer will then try to carve up the group into
|
||||||
|
+ smaller pieces. */
|
||||||
|
+ unsigned int nunits = 1 << floor_log2 (group_size);
|
||||||
|
+ do
|
||||||
|
+ {
|
||||||
|
+ vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
|
||||||
|
+ scalar_type, nunits);
|
||||||
|
+ nunits /= 2;
|
||||||
|
+ }
|
||||||
|
+ while (nunits > 1 && !vectype);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
return vectype;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Return the vector type corresponding to SCALAR_TYPE as supported
|
||||||
|
+ by the target. NODE, if nonnull, is the SLP tree node that will
|
||||||
|
+ use the returned vector type. */
|
||||||
|
+
|
||||||
|
+tree
|
||||||
|
+get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
|
||||||
|
+{
|
||||||
|
+ unsigned int group_size = 0;
|
||||||
|
+ if (node)
|
||||||
|
+ {
|
||||||
|
+ group_size = SLP_TREE_SCALAR_OPS (node).length ();
|
||||||
|
+ if (group_size == 0)
|
||||||
|
+ group_size = SLP_TREE_SCALAR_STMTS (node).length ();
|
||||||
|
+ }
|
||||||
|
+ return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Function get_mask_type_for_scalar_type.
|
||||||
|
|
||||||
|
Returns the mask type corresponding to a result of comparison
|
||||||
|
- of vectors of specified SCALAR_TYPE as supported by target. */
|
||||||
|
+ of vectors of specified SCALAR_TYPE as supported by target.
|
||||||
|
+ NODE, if nonnull, is the SLP tree node that will use the returned
|
||||||
|
+ vector type. */
|
||||||
|
|
||||||
|
tree
|
||||||
|
-get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type)
|
||||||
|
+get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
|
||||||
|
+ slp_tree node)
|
||||||
|
{
|
||||||
|
- tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
||||||
|
+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
|
||||||
|
|
||||||
|
if (!vectype)
|
||||||
|
return NULL;
|
||||||
|
@@ -11033,6 +11100,9 @@ vect_gen_while_not (gimple_seq *seq, tre
|
||||||
|
|
||||||
|
/* Try to compute the vector types required to vectorize STMT_INFO,
|
||||||
|
returning true on success and false if vectorization isn't possible.
|
||||||
|
+ If GROUP_SIZE is nonzero and we're performing BB vectorization,
|
||||||
|
+ take sure that the number of elements in the vectors is no bigger
|
||||||
|
+ than GROUP_SIZE.
|
||||||
|
|
||||||
|
On success:
|
||||||
|
|
||||||
|
@@ -11050,11 +11120,21 @@ vect_gen_while_not (gimple_seq *seq, tre
|
||||||
|
opt_result
|
||||||
|
vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
|
||||||
|
tree *stmt_vectype_out,
|
||||||
|
- tree *nunits_vectype_out)
|
||||||
|
+ tree *nunits_vectype_out,
|
||||||
|
+ unsigned int group_size)
|
||||||
|
{
|
||||||
|
vec_info *vinfo = stmt_info->vinfo;
|
||||||
|
gimple *stmt = stmt_info->stmt;
|
||||||
|
|
||||||
|
+ /* For BB vectorization, we should always have a group size once we've
|
||||||
|
+ constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
|
||||||
|
+ are tentative requests during things like early data reference
|
||||||
|
+ analysis and pattern recognition. */
|
||||||
|
+ if (is_a <bb_vec_info> (vinfo))
|
||||||
|
+ gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
|
||||||
|
+ else
|
||||||
|
+ group_size = 0;
|
||||||
|
+
|
||||||
|
*stmt_vectype_out = NULL_TREE;
|
||||||
|
*nunits_vectype_out = NULL_TREE;
|
||||||
|
|
||||||
|
@@ -11085,7 +11165,7 @@ vect_get_vector_types_for_stmt (stmt_vec
|
||||||
|
|
||||||
|
tree vectype;
|
||||||
|
tree scalar_type = NULL_TREE;
|
||||||
|
- if (STMT_VINFO_VECTYPE (stmt_info))
|
||||||
|
+ if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
|
||||||
|
{
|
||||||
|
*stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
@@ -11094,15 +11174,17 @@ vect_get_vector_types_for_stmt (stmt_vec
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
- gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
|
||||||
|
- if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
|
||||||
|
+ if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
|
||||||
|
+ scalar_type = TREE_TYPE (DR_REF (dr));
|
||||||
|
+ else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
|
||||||
|
scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
|
||||||
|
else
|
||||||
|
scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
|
||||||
|
|
||||||
|
/* Pure bool ops don't participate in number-of-units computation.
|
||||||
|
For comparisons use the types being compared. */
|
||||||
|
- if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
|
||||||
|
+ if (!STMT_VINFO_DATA_REF (stmt_info)
|
||||||
|
+ && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
|
||||||
|
&& is_gimple_assign (stmt)
|
||||||
|
&& gimple_assign_rhs_code (stmt) != COND_EXPR)
|
||||||
|
{
|
||||||
|
@@ -11122,9 +11204,16 @@ vect_get_vector_types_for_stmt (stmt_vec
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
- dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
|
- "get vectype for scalar type: %T\n", scalar_type);
|
||||||
|
- vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
||||||
|
+ {
|
||||||
|
+ if (group_size)
|
||||||
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
|
+ "get vectype for scalar type (group size %d):"
|
||||||
|
+ " %T\n", group_size, scalar_type);
|
||||||
|
+ else
|
||||||
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
|
+ "get vectype for scalar type: %T\n", scalar_type);
|
||||||
|
+ }
|
||||||
|
+ vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
|
||||||
|
if (!vectype)
|
||||||
|
return opt_result::failure_at (stmt,
|
||||||
|
"not vectorized:"
|
||||||
|
@@ -11155,7 +11244,8 @@ vect_get_vector_types_for_stmt (stmt_vec
|
||||||
|
dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
|
"get vectype for smallest scalar type: %T\n",
|
||||||
|
scalar_type);
|
||||||
|
- nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
|
||||||
|
+ nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
|
||||||
|
+ group_size);
|
||||||
|
if (!nunits_vectype)
|
||||||
|
return opt_result::failure_at
|
||||||
|
(stmt, "not vectorized: unsupported data-type %T\n",
|
||||||
|
@@ -11183,10 +11273,11 @@ vect_get_vector_types_for_stmt (stmt_vec
|
||||||
|
|
||||||
|
/* Try to determine the correct vector type for STMT_INFO, which is a
|
||||||
|
statement that produces a scalar boolean result. Return the vector
|
||||||
|
- type on success, otherwise return NULL_TREE. */
|
||||||
|
+ type on success, otherwise return NULL_TREE. NODE, if nonnull,
|
||||||
|
+ is the SLP tree node that will use the returned vector type. */
|
||||||
|
|
||||||
|
opt_tree
|
||||||
|
-vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
|
||||||
|
+vect_get_mask_type_for_stmt (stmt_vec_info stmt_info, slp_tree node)
|
||||||
|
{
|
||||||
|
vec_info *vinfo = stmt_info->vinfo;
|
||||||
|
gimple *stmt = stmt_info->stmt;
|
||||||
|
@@ -11198,7 +11289,7 @@ vect_get_mask_type_for_stmt (stmt_vec_in
|
||||||
|
&& !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
|
||||||
|
{
|
||||||
|
scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
|
||||||
|
- mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type);
|
||||||
|
+ mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type, node);
|
||||||
|
|
||||||
|
if (!mask_type)
|
||||||
|
return opt_tree::failure_at (stmt,
|
||||||
|
diff -Nurp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
|
||||||
|
--- a/gcc/tree-vectorizer.h 2020-12-20 18:46:17.851633230 +0800
|
||||||
|
+++ b/gcc/tree-vectorizer.h 2020-12-20 18:48:11.227633230 +0800
|
||||||
|
@@ -1618,8 +1618,9 @@ extern void vect_update_inits_of_drs (lo
|
||||||
|
/* In tree-vect-stmts.c. */
|
||||||
|
extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
|
||||||
|
poly_uint64 = 0);
|
||||||
|
-extern tree get_vectype_for_scalar_type (vec_info *, tree);
|
||||||
|
-extern tree get_mask_type_for_scalar_type (vec_info *, tree);
|
||||||
|
+extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0);
|
||||||
|
+extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree);
|
||||||
|
+extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree = 0);
|
||||||
|
extern tree get_same_sized_vectype (tree, tree);
|
||||||
|
extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
|
||||||
|
extern bool vect_get_loop_mask_type (loop_vec_info);
|
||||||
|
@@ -1671,8 +1672,8 @@ extern void optimize_mask_stores (struct
|
||||||
|
extern gcall *vect_gen_while (tree, tree, tree);
|
||||||
|
extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
|
||||||
|
extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *,
|
||||||
|
- tree *);
|
||||||
|
-extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info);
|
||||||
|
+ tree *, unsigned int = 0);
|
||||||
|
+extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, slp_tree = 0);
|
||||||
|
|
||||||
|
/* In tree-vect-data-refs.c. */
|
||||||
|
extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
|
||||||
82
Fix-EXTRACT_LAST_REDUCTION-segfault.patch
Normal file
82
Fix-EXTRACT_LAST_REDUCTION-segfault.patch
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
This backport contains 2 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-Fix-EXTRACT_LAST_REDUCTION-handling-of-pattern-stmts.patch
|
||||||
|
9ec35478ccf0f3539988a054b7996278706a7710
|
||||||
|
|
||||||
|
0001-Fix-EXTRACT_LAST_REDUCTION-segfault.patch
|
||||||
|
dc176c3ccd6a8cd3f809f3c1549ad00674061eb5
|
||||||
|
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c 1969-12-31 19:00:00.000000000 -0500
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-6.c 2020-12-14 21:16:26.492000000 -0500
|
||||||
|
@@ -0,0 +1,10 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f (int *y)
|
||||||
|
+{
|
||||||
|
+ int res = 0;
|
||||||
|
+ for (int i = 0; i < 100; ++i)
|
||||||
|
+ res = (y[i] & 1) == 0 && (y[i] < 10) ? res : 1;
|
||||||
|
+ return res;
|
||||||
|
+}
|
||||||
|
diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
||||||
|
--- a/gcc/tree-vect-stmts.c 2020-12-14 21:15:27.004000000 -0500
|
||||||
|
+++ b/gcc/tree-vect-stmts.c 2020-12-14 21:16:26.492000000 -0500
|
||||||
|
@@ -1777,9 +1777,10 @@ vect_finish_stmt_generation_1 (stmt_vec_
|
||||||
|
stmt_vec_info
|
||||||
|
vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
|
||||||
|
{
|
||||||
|
- gcc_assert (gimple_get_lhs (stmt_info->stmt) == gimple_get_lhs (vec_stmt));
|
||||||
|
+ gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
|
||||||
|
+ gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
|
||||||
|
|
||||||
|
- gimple_stmt_iterator gsi = gsi_for_stmt (stmt_info->stmt);
|
||||||
|
+ gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
|
||||||
|
gsi_replace (&gsi, vec_stmt, true);
|
||||||
|
|
||||||
|
return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
|
||||||
|
@@ -9118,10 +9119,12 @@ vectorizable_condition (stmt_vec_info st
|
||||||
|
if (new_code == ERROR_MARK)
|
||||||
|
must_invert_cmp_result = true;
|
||||||
|
else
|
||||||
|
- cond_code = new_code;
|
||||||
|
+ {
|
||||||
|
+ cond_code = new_code;
|
||||||
|
+ /* Make sure we don't accidentally use the old condition. */
|
||||||
|
+ cond_expr = NULL_TREE;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
- /* Make sure we don't accidentally use the old condition. */
|
||||||
|
- cond_expr = NULL_TREE;
|
||||||
|
std::swap (then_clause, else_clause);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -9426,20 +9429,21 @@ vectorizable_condition (stmt_vec_info st
|
||||||
|
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
||||||
|
vec_compare = vec_compare_name;
|
||||||
|
}
|
||||||
|
+ gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
|
||||||
|
+ tree lhs = gimple_get_lhs (old_stmt);
|
||||||
|
gcall *new_stmt = gimple_build_call_internal
|
||||||
|
(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
|
||||||
|
vec_then_clause);
|
||||||
|
- gimple_call_set_lhs (new_stmt, scalar_dest);
|
||||||
|
- SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
|
||||||
|
- if (stmt_info->stmt == gsi_stmt (*gsi))
|
||||||
|
+ gimple_call_set_lhs (new_stmt, lhs);
|
||||||
|
+ SSA_NAME_DEF_STMT (lhs) = new_stmt;
|
||||||
|
+ if (old_stmt == gsi_stmt (*gsi))
|
||||||
|
new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* In this case we're moving the definition to later in the
|
||||||
|
block. That doesn't matter because the only uses of the
|
||||||
|
lhs are in phi statements. */
|
||||||
|
- gimple_stmt_iterator old_gsi
|
||||||
|
- = gsi_for_stmt (stmt_info->stmt);
|
||||||
|
+ gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
|
||||||
|
gsi_remove (&old_gsi, true);
|
||||||
|
new_stmt_info
|
||||||
|
= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
|
||||||
51
Fix-up-push_partial_def-little-endian-bitfield.patch
Normal file
51
Fix-up-push_partial_def-little-endian-bitfield.patch
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
c69325a5db450dbac198f76f1162734af05a1061
|
||||||
|
0001-sccvn-Fix-up-push_partial_def-little-endian-bitfield.patch
|
||||||
|
|
||||||
|
diff -urpN a/gcc/testsuite/gcc.c-torture/execute/pr97764.c b/gcc/testsuite/gcc.c-torture/execute/pr97764.c
|
||||||
|
--- a/gcc/testsuite/gcc.c-torture/execute/pr97764.c 1969-12-31 19:00:00.000000000 -0500
|
||||||
|
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97764.c 2020-12-07 03:42:13.404000000 -0500
|
||||||
|
@@ -0,0 +1,14 @@
|
||||||
|
+/* PR tree-optimization/97764 */
|
||||||
|
+/* { dg-require-effective-target int32plus } */
|
||||||
|
+
|
||||||
|
+struct S { int b : 3; int c : 28; int d : 1; };
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main ()
|
||||||
|
+{
|
||||||
|
+ struct S e = {};
|
||||||
|
+ e.c = -1;
|
||||||
|
+ if (e.d)
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff -urpN a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
|
||||||
|
--- a/gcc/tree-ssa-sccvn.c 2020-12-07 03:43:37.792000000 -0500
|
||||||
|
+++ b/gcc/tree-ssa-sccvn.c 2020-12-07 03:42:13.404000000 -0500
|
||||||
|
@@ -2013,12 +2013,12 @@ vn_walk_cb_data::push_partial_def (const
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
- size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT);
|
||||||
|
if (pd.offset >= 0)
|
||||||
|
{
|
||||||
|
/* LSB of this_buffer[0] byte should be at pd.offset bits
|
||||||
|
in buffer. */
|
||||||
|
unsigned int msk;
|
||||||
|
+ size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT);
|
||||||
|
amnt = pd.offset % BITS_PER_UNIT;
|
||||||
|
if (amnt)
|
||||||
|
shift_bytes_in_array_left (this_buffer, len + 1, amnt);
|
||||||
|
@@ -2046,6 +2046,9 @@ vn_walk_cb_data::push_partial_def (const
|
||||||
|
{
|
||||||
|
amnt = (unsigned HOST_WIDE_INT) pd.offset % BITS_PER_UNIT;
|
||||||
|
if (amnt)
|
||||||
|
+ size -= BITS_PER_UNIT - amnt;
|
||||||
|
+ size = MIN (size, (HOST_WIDE_INT) needed_len * BITS_PER_UNIT);
|
||||||
|
+ if (amnt)
|
||||||
|
shift_bytes_in_array_left (this_buffer, len + 1, amnt);
|
||||||
|
}
|
||||||
|
memcpy (p, this_buffer + (amnt != 0), size / BITS_PER_UNIT);
|
||||||
139
Fix-zero-masking-for-vcvtps2ph.patch
Normal file
139
Fix-zero-masking-for-vcvtps2ph.patch
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-Fix-zero-masking-for-vcvtps2ph-when-dest-operand-is-.patch
|
||||||
|
43088bb4dadd3d14b6b594c5f9363fe879f3d7f7
|
||||||
|
|
||||||
|
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||||||
|
index 87354451c58..7815d77bcbf 100644
|
||||||
|
--- a/gcc/config/i386/sse.md
|
||||||
|
+++ b/gcc/config/i386/sse.md
|
||||||
|
@@ -21775,19 +21775,19 @@
|
||||||
|
(set_attr "prefix" "maybe_evex")
|
||||||
|
(set_attr "mode" "V4SF")])
|
||||||
|
|
||||||
|
-(define_insn "*vcvtps2ph_store<mask_name>"
|
||||||
|
+(define_insn "*vcvtps2ph_store<merge_mask_name>"
|
||||||
|
[(set (match_operand:V4HI 0 "memory_operand" "=m")
|
||||||
|
(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
|
||||||
|
(match_operand:SI 2 "const_0_to_255_operand" "N")]
|
||||||
|
UNSPEC_VCVTPS2PH))]
|
||||||
|
"TARGET_F16C || TARGET_AVX512VL"
|
||||||
|
- "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
|
||||||
|
+ "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
|
||||||
|
[(set_attr "type" "ssecvt")
|
||||||
|
(set_attr "prefix" "maybe_evex")
|
||||||
|
(set_attr "mode" "V4SF")])
|
||||||
|
|
||||||
|
(define_insn "vcvtps2ph256<mask_name>"
|
||||||
|
- [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
|
||||||
|
+ [(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||||
|
(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
|
||||||
|
(match_operand:SI 2 "const_0_to_255_operand" "N")]
|
||||||
|
UNSPEC_VCVTPS2PH))]
|
||||||
|
@@ -21798,8 +21798,20 @@
|
||||||
|
(set_attr "btver2_decode" "vector")
|
||||||
|
(set_attr "mode" "V8SF")])
|
||||||
|
|
||||||
|
+(define_insn "*vcvtps2ph256<merge_mask_name>"
|
||||||
|
+ [(set (match_operand:V8HI 0 "memory_operand" "=m")
|
||||||
|
+ (unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
|
||||||
|
+ (match_operand:SI 2 "const_0_to_255_operand" "N")]
|
||||||
|
+ UNSPEC_VCVTPS2PH))]
|
||||||
|
+ "TARGET_F16C || TARGET_AVX512VL"
|
||||||
|
+ "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "maybe_evex")
|
||||||
|
+ (set_attr "btver2_decode" "vector")
|
||||||
|
+ (set_attr "mode" "V8SF")])
|
||||||
|
+
|
||||||
|
(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
|
||||||
|
- [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
|
||||||
|
+ [(set (match_operand:V16HI 0 "register_operand" "=v")
|
||||||
|
(unspec:V16HI
|
||||||
|
[(match_operand:V16SF 1 "register_operand" "v")
|
||||||
|
(match_operand:SI 2 "const_0_to_255_operand" "N")]
|
||||||
|
@@ -21810,6 +21822,18 @@
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
(set_attr "mode" "V16SF")])
|
||||||
|
|
||||||
|
+(define_insn "*avx512f_vcvtps2ph512<merge_mask_name>"
|
||||||
|
+ [(set (match_operand:V16HI 0 "memory_operand" "=m")
|
||||||
|
+ (unspec:V16HI
|
||||||
|
+ [(match_operand:V16SF 1 "register_operand" "v")
|
||||||
|
+ (match_operand:SI 2 "const_0_to_255_operand" "N")]
|
||||||
|
+ UNSPEC_VCVTPS2PH))]
|
||||||
|
+ "TARGET_AVX512F"
|
||||||
|
+ "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "mode" "V16SF")])
|
||||||
|
+
|
||||||
|
;; For gather* insn patterns
|
||||||
|
(define_mode_iterator VEC_GATHER_MODE
|
||||||
|
[V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
|
||||||
|
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
|
||||||
|
index a5ca144c7f7..58ea9dc83e2 100644
|
||||||
|
--- a/gcc/config/i386/subst.md
|
||||||
|
+++ b/gcc/config/i386/subst.md
|
||||||
|
@@ -73,6 +73,18 @@
|
||||||
|
(match_operand:SUBST_V 2 "nonimm_or_0_operand" "0C")
|
||||||
|
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))])
|
||||||
|
|
||||||
|
+(define_subst_attr "merge_mask_name" "merge_mask" "" "_merge_mask")
|
||||||
|
+(define_subst_attr "merge_mask_operand3" "merge_mask" "" "%{%3%}")
|
||||||
|
+(define_subst "merge_mask"
|
||||||
|
+ [(set (match_operand:SUBST_V 0)
|
||||||
|
+ (match_operand:SUBST_V 1))]
|
||||||
|
+ "TARGET_AVX512F"
|
||||||
|
+ [(set (match_dup 0)
|
||||||
|
+ (vec_merge:SUBST_V
|
||||||
|
+ (match_dup 1)
|
||||||
|
+ (match_dup 0)
|
||||||
|
+ (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))])
|
||||||
|
+
|
||||||
|
(define_subst_attr "mask_scalar_merge_name" "mask_scalar_merge" "" "_mask")
|
||||||
|
(define_subst_attr "mask_scalar_merge_operand3" "mask_scalar_merge" "" "%{%3%}")
|
||||||
|
(define_subst_attr "mask_scalar_merge_operand4" "mask_scalar_merge" "" "%{%4%}")
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..9e0da947368
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vcvtps2ph-pr95254.c
|
||||||
|
@@ -0,0 +1,12 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mavx512f" } */
|
||||||
|
+
|
||||||
|
+#include<immintrin.h>
|
||||||
|
+extern __m256i res;
|
||||||
|
+void
|
||||||
|
+foo (__m512 a, __mmask16 m)
|
||||||
|
+{
|
||||||
|
+ res = _mm512_maskz_cvtps_ph (m, a, 10);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..0c685ea66fd
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-vcvtps2ph-pr95254.c
|
||||||
|
@@ -0,0 +1,18 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mavx512vl -mavx512f" } */
|
||||||
|
+
|
||||||
|
+#include<immintrin.h>
|
||||||
|
+extern __m128i res;
|
||||||
|
+void
|
||||||
|
+foo (__m256 a, __mmask8 m)
|
||||||
|
+{
|
||||||
|
+ res = _mm256_maskz_cvtps_ph (m, a, 10);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+foo1 (__m128 a, __mmask8 m)
|
||||||
|
+{
|
||||||
|
+ res = _mm_maskz_cvtps_ph (m, a, 10);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-not "vcvtps2ph\[ \\t\]+\[^\{\n\]*%\[xy\]mm\[0-9\]\[^\n\]*res\[^\n\]*\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)"} } */
|
||||||
155
IRA-Handle-fully-tied-destinations.patch
Normal file
155
IRA-Handle-fully-tied-destinations.patch
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-IRA-Handle-fully-tied-destinations-in-a-similar-way-.patch
|
||||||
|
9b0365879b3c4917f5a2485a1fca8bb678484bfe
|
||||||
|
|
||||||
|
diff --git a/gcc/ira-lives.c b/gcc/ira-lives.c
|
||||||
|
index cce73a1c3d4..098b0e73953 100644
|
||||||
|
--- a/gcc/ira-lives.c
|
||||||
|
+++ b/gcc/ira-lives.c
|
||||||
|
@@ -633,9 +633,28 @@ check_and_make_def_use_conflict (rtx dreg, rtx orig_dreg,
|
||||||
|
|
||||||
|
/* Check and make if necessary conflicts for definition DEF of class
|
||||||
|
DEF_CL of the current insn with input operands. Process only
|
||||||
|
- constraints of alternative ALT. */
|
||||||
|
+ constraints of alternative ALT.
|
||||||
|
+
|
||||||
|
+ One of three things is true when this function is called:
|
||||||
|
+
|
||||||
|
+ (1) DEF is an earlyclobber for alternative ALT. Input operands then
|
||||||
|
+ conflict with DEF in ALT unless they explicitly match DEF via 0-9
|
||||||
|
+ constraints.
|
||||||
|
+
|
||||||
|
+ (2) DEF matches (via 0-9 constraints) an operand that is an
|
||||||
|
+ earlyclobber for alternative ALT. Other input operands then
|
||||||
|
+ conflict with DEF in ALT.
|
||||||
|
+
|
||||||
|
+ (3) [FOR_TIE_P] Some input operand X matches DEF for alternative ALT.
|
||||||
|
+ Input operands with a different value from X then conflict with
|
||||||
|
+ DEF in ALT.
|
||||||
|
+
|
||||||
|
+ However, there's still a judgement call to make when deciding
|
||||||
|
+ whether a conflict in ALT is important enough to be reflected
|
||||||
|
+ in the pan-alternative allocno conflict set. */
|
||||||
|
static void
|
||||||
|
-check_and_make_def_conflict (int alt, int def, enum reg_class def_cl)
|
||||||
|
+check_and_make_def_conflict (int alt, int def, enum reg_class def_cl,
|
||||||
|
+ bool for_tie_p)
|
||||||
|
{
|
||||||
|
int use, use_match;
|
||||||
|
ira_allocno_t a;
|
||||||
|
@@ -669,14 +688,40 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl)
|
||||||
|
if (use == def || recog_data.operand_type[use] == OP_OUT)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
+ /* An earlyclobber on DEF doesn't apply to an input operand X if X
|
||||||
|
+ explicitly matches DEF, but it applies to other input operands
|
||||||
|
+ even if they happen to be the same value as X.
|
||||||
|
+
|
||||||
|
+ In contrast, if an input operand X is tied to a non-earlyclobber
|
||||||
|
+ DEF, there's no conflict with other input operands that have the
|
||||||
|
+ same value as X. */
|
||||||
|
+ if (op_alt[use].matches == def
|
||||||
|
+ || (for_tie_p
|
||||||
|
+ && rtx_equal_p (recog_data.operand[use],
|
||||||
|
+ recog_data.operand[op_alt[def].matched])))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
if (op_alt[use].anything_ok)
|
||||||
|
use_cl = ALL_REGS;
|
||||||
|
else
|
||||||
|
use_cl = op_alt[use].cl;
|
||||||
|
+ if (use_cl == NO_REGS)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ /* If DEF is simply a tied operand, ignore cases in which this
|
||||||
|
+ alternative requires USE to have a likely-spilled class.
|
||||||
|
+ Adding a conflict would just constrain USE further if DEF
|
||||||
|
+ happens to be allocated first. */
|
||||||
|
+ if (for_tie_p && targetm.class_likely_spilled_p (use_cl))
|
||||||
|
+ continue;
|
||||||
|
|
||||||
|
/* If there's any alternative that allows USE to match DEF, do not
|
||||||
|
record a conflict. If that causes us to create an invalid
|
||||||
|
- instruction due to the earlyclobber, reload must fix it up. */
|
||||||
|
+ instruction due to the earlyclobber, reload must fix it up.
|
||||||
|
+
|
||||||
|
+ Likewise, if we're treating a tied DEF like a partial earlyclobber,
|
||||||
|
+ do not record a conflict if there's another alternative in which
|
||||||
|
+ DEF is neither tied nor earlyclobber. */
|
||||||
|
for (alt1 = 0; alt1 < recog_data.n_alternatives; alt1++)
|
||||||
|
{
|
||||||
|
if (!TEST_BIT (preferred_alternatives, alt1))
|
||||||
|
@@ -691,6 +736,12 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl)
|
||||||
|
&& recog_data.constraints[use - 1][0] == '%'
|
||||||
|
&& op_alt1[use - 1].matches == def))
|
||||||
|
break;
|
||||||
|
+ if (for_tie_p
|
||||||
|
+ && !op_alt1[def].earlyclobber
|
||||||
|
+ && op_alt1[def].matched < 0
|
||||||
|
+ && alternative_class (op_alt1, def) != NO_REGS
|
||||||
|
+ && alternative_class (op_alt1, use) != NO_REGS)
|
||||||
|
+ break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (alt1 < recog_data.n_alternatives)
|
||||||
|
@@ -701,8 +752,7 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl)
|
||||||
|
|
||||||
|
if ((use_match = op_alt[use].matches) >= 0)
|
||||||
|
{
|
||||||
|
- if (use_match == def)
|
||||||
|
- continue;
|
||||||
|
+ gcc_checking_assert (use_match != def);
|
||||||
|
|
||||||
|
if (op_alt[use_match].anything_ok)
|
||||||
|
use_cl = ALL_REGS;
|
||||||
|
@@ -717,7 +767,11 @@ check_and_make_def_conflict (int alt, int def, enum reg_class def_cl)
|
||||||
|
/* Make conflicts of early clobber pseudo registers of the current
|
||||||
|
insn with its inputs. Avoid introducing unnecessary conflicts by
|
||||||
|
checking classes of the constraints and pseudos because otherwise
|
||||||
|
- significant code degradation is possible for some targets. */
|
||||||
|
+ significant code degradation is possible for some targets.
|
||||||
|
+
|
||||||
|
+ For these purposes, tying an input to an output makes that output act
|
||||||
|
+ like an earlyclobber for inputs with a different value, since the output
|
||||||
|
+ register then has a predetermined purpose on input to the instruction. */
|
||||||
|
static void
|
||||||
|
make_early_clobber_and_input_conflicts (void)
|
||||||
|
{
|
||||||
|
@@ -732,15 +786,19 @@ make_early_clobber_and_input_conflicts (void)
|
||||||
|
if (TEST_BIT (preferred_alternatives, alt))
|
||||||
|
for (def = 0; def < n_operands; def++)
|
||||||
|
{
|
||||||
|
- def_cl = NO_REGS;
|
||||||
|
- if (op_alt[def].earlyclobber)
|
||||||
|
+ if (op_alt[def].anything_ok)
|
||||||
|
+ def_cl = ALL_REGS;
|
||||||
|
+ else
|
||||||
|
+ def_cl = op_alt[def].cl;
|
||||||
|
+ if (def_cl != NO_REGS)
|
||||||
|
{
|
||||||
|
- if (op_alt[def].anything_ok)
|
||||||
|
- def_cl = ALL_REGS;
|
||||||
|
- else
|
||||||
|
- def_cl = op_alt[def].cl;
|
||||||
|
- check_and_make_def_conflict (alt, def, def_cl);
|
||||||
|
+ if (op_alt[def].earlyclobber)
|
||||||
|
+ check_and_make_def_conflict (alt, def, def_cl, false);
|
||||||
|
+ else if (op_alt[def].matched >= 0
|
||||||
|
+ && !targetm.class_likely_spilled_p (def_cl))
|
||||||
|
+ check_and_make_def_conflict (alt, def, def_cl, true);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
if ((def_match = op_alt[def].matches) >= 0
|
||||||
|
&& (op_alt[def_match].earlyclobber
|
||||||
|
|| op_alt[def].earlyclobber))
|
||||||
|
@@ -749,7 +807,7 @@ make_early_clobber_and_input_conflicts (void)
|
||||||
|
def_cl = ALL_REGS;
|
||||||
|
else
|
||||||
|
def_cl = op_alt[def_match].cl;
|
||||||
|
- check_and_make_def_conflict (alt, def, def_cl);
|
||||||
|
+ check_and_make_def_conflict (alt, def, def_cl, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
99
SLP-VECT-Add-check-to-fix-96837.patch
Normal file
99
SLP-VECT-Add-check-to-fix-96837.patch
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
This backport contains 2 patchs from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
97b798d80baf945ea28236eef3fa69f36626b579
|
||||||
|
0001-SLP-VECT-Add-check-to-fix-96837.patch
|
||||||
|
|
||||||
|
373b99dc40949efa697326f378e5022a02e0328b
|
||||||
|
0002-Add-a-testcase-for-PR-target-96827.patch
|
||||||
|
|
||||||
|
diff -uprN a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 2020-11-17 15:58:12.118126065 +0800
|
||||||
|
@@ -0,0 +1,28 @@
|
||||||
|
+/* This checks that vectorized constructors have the correct ordering. */
|
||||||
|
+/* { dg-require-effective-target vect_int } */
|
||||||
|
+
|
||||||
|
+typedef int V __attribute__((__vector_size__(16)));
|
||||||
|
+
|
||||||
|
+__attribute__((__noipa__)) void
|
||||||
|
+foo (unsigned int x, V *y)
|
||||||
|
+{
|
||||||
|
+ unsigned int a[4] = { x + 0, x + 2, x + 4, x + 6 };
|
||||||
|
+ for (unsigned int i = 0; i < 3; ++i)
|
||||||
|
+ if (a[i] == 1234)
|
||||||
|
+ a[i]--;
|
||||||
|
+ *y = (V) { a[3], a[2], a[1], a[0] };
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main ()
|
||||||
|
+{
|
||||||
|
+ V b;
|
||||||
|
+ foo (0, &b);
|
||||||
|
+ if (b[0] != 6 || b[1] != 4 || b[2] != 2 || b[3] != 0)
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* See that we vectorize an SLP instance. */
|
||||||
|
+/* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } */
|
||||||
|
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */
|
||||||
|
diff -uprN a/gcc/testsuite/gcc.target/i386/pr96827.c b/gcc/testsuite/gcc.target/i386/pr96827.c
|
||||||
|
--- a/gcc/testsuite/gcc.target/i386/pr96827.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/pr96827.c 2020-11-17 15:58:15.182126065 +0800
|
||||||
|
@@ -0,0 +1,41 @@
|
||||||
|
+/* { dg-do run { target sse2_runtime } } */
|
||||||
|
+/* { dg-options "-O3 -msse2 -mfpmath=sse" } */
|
||||||
|
+
|
||||||
|
+typedef unsigned short int __uint16_t;
|
||||||
|
+typedef unsigned int __uint32_t;
|
||||||
|
+typedef __uint16_t uint16_t;
|
||||||
|
+typedef __uint32_t uint32_t;
|
||||||
|
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
|
||||||
|
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
|
||||||
|
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+_mm_store_si128 (__m128i *__P, __m128i __B)
|
||||||
|
+{
|
||||||
|
+ *__P = __B;
|
||||||
|
+}
|
||||||
|
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
|
||||||
|
+{
|
||||||
|
+ return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
|
||||||
|
+}
|
||||||
|
+typedef uint16_t u16;
|
||||||
|
+typedef uint32_t u32;
|
||||||
|
+extern int printf (const char *__restrict __format, ...);
|
||||||
|
+void do_the_thing(u32 idx, __m128i *dude)
|
||||||
|
+{
|
||||||
|
+ u32 dude_[4] = { idx+0, idx+2, idx+4, idx+6 };
|
||||||
|
+ for (u32 i = 0; i < 3; ++i)
|
||||||
|
+ if (dude_[i] == 1234)
|
||||||
|
+ dude_[i]--;
|
||||||
|
+ *dude = _mm_set_epi32(dude_[0], dude_[1], dude_[2], dude_[3]);
|
||||||
|
+}
|
||||||
|
+int main()
|
||||||
|
+{
|
||||||
|
+ __m128i dude;
|
||||||
|
+ u32 idx = 0;
|
||||||
|
+ do_the_thing(idx, &dude);
|
||||||
|
+ __attribute__((aligned(16))) u32 dude_[4];
|
||||||
|
+ _mm_store_si128((__m128i*)dude_, dude);
|
||||||
|
+ if (!(6 == dude_[0] && 4 == dude_[1] && 2 == dude_[2] && 0 == dude_[3]))
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
|
||||||
|
--- a/gcc/tree-vect-slp.c 2020-11-17 15:55:57.098126065 +0800
|
||||||
|
+++ b/gcc/tree-vect-slp.c 2020-11-17 15:59:25.862126065 +0800
|
||||||
|
@@ -1842,7 +1842,8 @@ vect_supported_load_permutation_p (slp_i
|
||||||
|
/* Reduction (there are no data-refs in the root).
|
||||||
|
In reduction chain the order of the loads is not important. */
|
||||||
|
if (!STMT_VINFO_DATA_REF (stmt_info)
|
||||||
|
- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
|
||||||
|
+ && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
|
||||||
|
+ && !SLP_INSTANCE_ROOT_STMT (slp_instn))
|
||||||
|
vect_attempt_slp_rearrange_stmts (slp_instn);
|
||||||
|
|
||||||
|
/* In basic block vectorization we allow any subchain of an interleaving
|
||||||
165
aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch
Normal file
165
aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch
Normal file
@ -0,0 +1,165 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
7a6588fe65432c0f1a8b5fdefba81700ebf88711
|
||||||
|
0001-aarch64-Fix-ash-lr-lshr-mode-3-expanders-PR94488.patch
|
||||||
|
|
||||||
|
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||||||
|
index 24a11fb5040..9f0e2bd1e6f 100644
|
||||||
|
--- a/gcc/config/aarch64/aarch64-simd.md
|
||||||
|
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||||||
|
@@ -1105,31 +1105,17 @@
|
||||||
|
tmp));
|
||||||
|
DONE;
|
||||||
|
}
|
||||||
|
- else
|
||||||
|
- {
|
||||||
|
- operands[2] = force_reg (SImode, operands[2]);
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
- else if (MEM_P (operands[2]))
|
||||||
|
- {
|
||||||
|
- operands[2] = force_reg (SImode, operands[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (REG_P (operands[2]))
|
||||||
|
- {
|
||||||
|
- rtx tmp = gen_reg_rtx (<MODE>mode);
|
||||||
|
- emit_insn (gen_aarch64_simd_dup<mode> (tmp,
|
||||||
|
- convert_to_mode (<VEL>mode,
|
||||||
|
- operands[2],
|
||||||
|
- 0)));
|
||||||
|
- emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
|
||||||
|
- tmp));
|
||||||
|
- DONE;
|
||||||
|
- }
|
||||||
|
- else
|
||||||
|
- FAIL;
|
||||||
|
-}
|
||||||
|
-)
|
||||||
|
+ operands[2] = force_reg (SImode, operands[2]);
|
||||||
|
+
|
||||||
|
+ rtx tmp = gen_reg_rtx (<MODE>mode);
|
||||||
|
+ emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
|
||||||
|
+ operands[2],
|
||||||
|
+ 0)));
|
||||||
|
+ emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
|
||||||
|
(define_expand "lshr<mode>3"
|
||||||
|
[(match_operand:VDQ_I 0 "register_operand")
|
||||||
|
@@ -1152,31 +1138,19 @@
|
||||||
|
tmp));
|
||||||
|
DONE;
|
||||||
|
}
|
||||||
|
- else
|
||||||
|
- operands[2] = force_reg (SImode, operands[2]);
|
||||||
|
- }
|
||||||
|
- else if (MEM_P (operands[2]))
|
||||||
|
- {
|
||||||
|
- operands[2] = force_reg (SImode, operands[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (REG_P (operands[2]))
|
||||||
|
- {
|
||||||
|
- rtx tmp = gen_reg_rtx (SImode);
|
||||||
|
- rtx tmp1 = gen_reg_rtx (<MODE>mode);
|
||||||
|
- emit_insn (gen_negsi2 (tmp, operands[2]));
|
||||||
|
- emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
|
||||||
|
- convert_to_mode (<VEL>mode,
|
||||||
|
- tmp, 0)));
|
||||||
|
- emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
|
||||||
|
- operands[1],
|
||||||
|
- tmp1));
|
||||||
|
- DONE;
|
||||||
|
- }
|
||||||
|
- else
|
||||||
|
- FAIL;
|
||||||
|
-}
|
||||||
|
-)
|
||||||
|
+ operands[2] = force_reg (SImode, operands[2]);
|
||||||
|
+
|
||||||
|
+ rtx tmp = gen_reg_rtx (SImode);
|
||||||
|
+ rtx tmp1 = gen_reg_rtx (<MODE>mode);
|
||||||
|
+ emit_insn (gen_negsi2 (tmp, operands[2]));
|
||||||
|
+ emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
|
||||||
|
+ convert_to_mode (<VEL>mode, tmp, 0)));
|
||||||
|
+ emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
|
||||||
|
+ tmp1));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
|
||||||
|
(define_expand "ashr<mode>3"
|
||||||
|
[(match_operand:VDQ_I 0 "register_operand")
|
||||||
|
@@ -1199,31 +1173,19 @@
|
||||||
|
tmp));
|
||||||
|
DONE;
|
||||||
|
}
|
||||||
|
- else
|
||||||
|
- operands[2] = force_reg (SImode, operands[2]);
|
||||||
|
- }
|
||||||
|
- else if (MEM_P (operands[2]))
|
||||||
|
- {
|
||||||
|
- operands[2] = force_reg (SImode, operands[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (REG_P (operands[2]))
|
||||||
|
- {
|
||||||
|
- rtx tmp = gen_reg_rtx (SImode);
|
||||||
|
- rtx tmp1 = gen_reg_rtx (<MODE>mode);
|
||||||
|
- emit_insn (gen_negsi2 (tmp, operands[2]));
|
||||||
|
- emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
|
||||||
|
- convert_to_mode (<VEL>mode,
|
||||||
|
- tmp, 0)));
|
||||||
|
- emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
|
||||||
|
- operands[1],
|
||||||
|
- tmp1));
|
||||||
|
- DONE;
|
||||||
|
- }
|
||||||
|
- else
|
||||||
|
- FAIL;
|
||||||
|
-}
|
||||||
|
-)
|
||||||
|
+ operands[2] = force_reg (SImode, operands[2]);
|
||||||
|
+
|
||||||
|
+ rtx tmp = gen_reg_rtx (SImode);
|
||||||
|
+ rtx tmp1 = gen_reg_rtx (<MODE>mode);
|
||||||
|
+ emit_insn (gen_negsi2 (tmp, operands[2]));
|
||||||
|
+ emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
|
||||||
|
+ tmp, 0)));
|
||||||
|
+ emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
|
||||||
|
+ tmp1));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
|
||||||
|
(define_expand "vashl<mode>3"
|
||||||
|
[(match_operand:VDQ_I 0 "register_operand")
|
||||||
|
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr94488.c b/gcc/testsuite/gcc.c-torture/compile/pr94488.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..6e20a4168de
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.c-torture/compile/pr94488.c
|
||||||
|
@@ -0,0 +1,22 @@
|
||||||
|
+/* PR target/94488 */
|
||||||
|
+
|
||||||
|
+typedef unsigned long V __attribute__((__vector_size__(16)));
|
||||||
|
+typedef long W __attribute__((__vector_size__(16)));
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+foo (V *x, unsigned long y)
|
||||||
|
+{
|
||||||
|
+ *x = *x >> (unsigned int) y;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+bar (V *x, unsigned long y)
|
||||||
|
+{
|
||||||
|
+ *x = *x << (unsigned int) y;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+baz (W *x, unsigned long y)
|
||||||
|
+{
|
||||||
|
+ *x = *x >> (unsigned int) y;
|
||||||
|
+}
|
||||||
1613
aarch64-Fix-bf16-and-matrix-g++-gfortran.patch
Normal file
1613
aarch64-Fix-bf16-and-matrix-g++-gfortran.patch
Normal file
File diff suppressed because it is too large
Load Diff
34
aarch64-Fix-mismatched-SVE-predicate-modes.patch
Normal file
34
aarch64-Fix-mismatched-SVE-predicate-modes.patch
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-aarch64-Fix-mismatched-SVE-predicate-modes.patch
|
||||||
|
26bebf576ddcdcfb596f07e8c2896f17c48516e7
|
||||||
|
|
||||||
|
diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||||
|
--- a/gcc/config/aarch64/aarch64.c 2020-12-14 00:57:20.128000000 -0500
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.c 2020-12-14 01:00:15.080000000 -0500
|
||||||
|
@@ -4328,6 +4328,7 @@ aarch64_expand_sve_const_pred_eor (rtx t
|
||||||
|
/* EOR the result with an ELT_SIZE PTRUE. */
|
||||||
|
rtx mask = aarch64_ptrue_all (elt_size);
|
||||||
|
mask = force_reg (VNx16BImode, mask);
|
||||||
|
+ inv = gen_lowpart (VNx16BImode, inv);
|
||||||
|
target = aarch64_target_reg (target, VNx16BImode);
|
||||||
|
emit_insn (gen_aarch64_pred_z (XOR, VNx16BImode, target, mask, inv, mask));
|
||||||
|
return target;
|
||||||
|
diff -urpN a/gcc/testsuite/gcc.dg/vect/pr94606.c b/gcc/testsuite/gcc.dg/vect/pr94606.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/pr94606.c 1969-12-31 19:00:00.000000000 -0500
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/pr94606.c 2020-12-14 01:00:15.080000000 -0500
|
||||||
|
@@ -0,0 +1,13 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-additional-options "-march=armv8.2-a+sve -msve-vector-bits=256" { target aarch64*-*-* } } */
|
||||||
|
+
|
||||||
|
+const short mask[] = { 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
+ 0, 0, 0, 1, 1, 1, 1, 1 };
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+foo (short *restrict x, short *restrict y)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 16; ++i)
|
||||||
|
+ if (mask[i])
|
||||||
|
+ x[i] += y[i];
|
||||||
|
+}
|
||||||
2128
aarch64-fix-sve-acle-error.patch
Normal file
2128
aarch64-fix-sve-acle-error.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,88 @@
|
|||||||
|
This backport contains 2 patchs from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
4bf29d15f2e01348a45a1f4e1a135962f123fdd6
|
||||||
|
0001-AArch64-PR79262-Adjust-vector-cost.patch
|
||||||
|
|
||||||
|
27071013521b015d17a2666448f27a6ff0c55aca
|
||||||
|
0001-Move-EXTRACT_LAST_REDUCTION-costing-to-vectorizable_.patch
|
||||||
|
|
||||||
|
diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||||
|
--- a/gcc/config/aarch64/aarch64.c 2020-11-20 04:36:33.988000000 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.c 2020-11-20 04:32:20.984000000 +0800
|
||||||
|
@@ -448,7 +448,7 @@ static const struct cpu_vector_cost gene
|
||||||
|
1, /* vec_int_stmt_cost */
|
||||||
|
1, /* vec_fp_stmt_cost */
|
||||||
|
2, /* vec_permute_cost */
|
||||||
|
- 1, /* vec_to_scalar_cost */
|
||||||
|
+ 2, /* vec_to_scalar_cost */
|
||||||
|
1, /* scalar_to_vec_cost */
|
||||||
|
1, /* vec_align_load_cost */
|
||||||
|
1, /* vec_unalign_load_cost */
|
||||||
|
diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
|
||||||
|
--- a/gcc/tree-vect-loop.c 2020-11-20 04:36:34.016000000 +0800
|
||||||
|
+++ b/gcc/tree-vect-loop.c 2020-11-20 04:32:20.984000000 +0800
|
||||||
|
@@ -3926,8 +3926,11 @@ vect_model_reduction_cost (stmt_vec_info
|
||||||
|
|
||||||
|
code = gimple_assign_rhs_code (orig_stmt_info->stmt);
|
||||||
|
|
||||||
|
- if (reduction_type == EXTRACT_LAST_REDUCTION
|
||||||
|
- || reduction_type == FOLD_LEFT_REDUCTION)
|
||||||
|
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
|
||||||
|
+ /* No extra instructions are needed in the prologue. The loop body
|
||||||
|
+ operations are costed in vectorizable_condition. */
|
||||||
|
+ inside_cost = 0;
|
||||||
|
+ else if (reduction_type == FOLD_LEFT_REDUCTION)
|
||||||
|
{
|
||||||
|
/* No extra instructions needed in the prologue. */
|
||||||
|
prologue_cost = 0;
|
||||||
|
diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
||||||
|
--- a/gcc/tree-vect-stmts.c 2020-11-20 04:36:33.996000000 +0800
|
||||||
|
+++ b/gcc/tree-vect-stmts.c 2020-11-20 04:32:20.984000000 +0800
|
||||||
|
@@ -859,7 +859,8 @@ vect_model_simple_cost (stmt_vec_info st
|
||||||
|
enum vect_def_type *dt,
|
||||||
|
int ndts,
|
||||||
|
slp_tree node,
|
||||||
|
- stmt_vector_for_cost *cost_vec)
|
||||||
|
+ stmt_vector_for_cost *cost_vec,
|
||||||
|
+ vect_cost_for_stmt kind = vector_stmt)
|
||||||
|
{
|
||||||
|
int inside_cost = 0, prologue_cost = 0;
|
||||||
|
|
||||||
|
@@ -906,7 +907,7 @@ vect_model_simple_cost (stmt_vec_info st
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Pass the inside-of-loop statements to the target-specific cost model. */
|
||||||
|
- inside_cost += record_stmt_cost (cost_vec, ncopies, vector_stmt,
|
||||||
|
+ inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
|
||||||
|
stmt_info, 0, vect_body);
|
||||||
|
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
@@ -9194,15 +9195,18 @@ vectorizable_condition (stmt_vec_info st
|
||||||
|
" EXTRACT_LAST_REDUCTION.\n");
|
||||||
|
LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
|
||||||
|
}
|
||||||
|
- if (expand_vec_cond_expr_p (vectype, comp_vectype,
|
||||||
|
- cond_code))
|
||||||
|
- {
|
||||||
|
- STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
|
||||||
|
- vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
|
||||||
|
- cost_vec);
|
||||||
|
- return true;
|
||||||
|
- }
|
||||||
|
- return false;
|
||||||
|
+
|
||||||
|
+ vect_cost_for_stmt kind = vector_stmt;
|
||||||
|
+ if (reduction_type == EXTRACT_LAST_REDUCTION)
|
||||||
|
+ /* Count one reduction-like operation per vector. */
|
||||||
|
+ kind = vec_to_scalar;
|
||||||
|
+ else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
|
||||||
|
+ vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
|
||||||
|
+ cost_vec, kind);
|
||||||
|
+ return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Transform. */
|
||||||
@ -1,3 +1,9 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-lra-Avoid-cycling-on-certain-subreg-reloads-PR96796.patch
|
||||||
|
6001db79c477b03eacc7e7049560921fb54b7845
|
||||||
|
|
||||||
diff -uprN a/gcc/lra-constraints.c b/gcc/lra-constraints.c
|
diff -uprN a/gcc/lra-constraints.c b/gcc/lra-constraints.c
|
||||||
--- a/gcc/lra-constraints.c 2020-03-12 19:07:21.000000000 +0800
|
--- a/gcc/lra-constraints.c 2020-03-12 19:07:21.000000000 +0800
|
||||||
+++ b/gcc/lra-constraints.c 2020-09-08 10:02:52.308147305 +0800
|
+++ b/gcc/lra-constraints.c 2020-09-08 10:02:52.308147305 +0800
|
||||||
|
|||||||
466067
bf16-and-matrix-characteristic.patch
Normal file
466067
bf16-and-matrix-characteristic.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,9 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-Bump-BASE-VER-to-9.3.1.patch
|
||||||
|
9f26e5863a75744bbee1479792ecae084a3ceb20
|
||||||
|
|
||||||
diff -Nurp a/gcc/BASE-VER b/gcc/BASE-VER
|
diff -Nurp a/gcc/BASE-VER b/gcc/BASE-VER
|
||||||
--- a/gcc/BASE-VER 2020-08-19 10:47:14.100000000 +0800
|
--- a/gcc/BASE-VER 2020-08-19 10:47:14.100000000 +0800
|
||||||
+++ b/gcc/BASE-VER 2020-08-19 10:32:30.380000000 +0800
|
+++ b/gcc/BASE-VER 2020-08-19 10:32:30.380000000 +0800
|
||||||
|
|||||||
@ -1,3 +1,9 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-combine-Don-t-generate-IF_THEN_ELSE.patch
|
||||||
|
ddbb5da5199fb421dc398911c37fa7f896efc13f
|
||||||
|
|
||||||
diff --git a/gcc/combine.c b/gcc/combine.c
|
diff --git a/gcc/combine.c b/gcc/combine.c
|
||||||
index 4de759a8e6b..ce7aeecb5c2 100644
|
index 4de759a8e6b..ce7aeecb5c2 100644
|
||||||
--- a/gcc/combine.c
|
--- a/gcc/combine.c
|
||||||
|
|||||||
51
fix-ICE-IPA-compare-VRP-types.patch
Normal file
51
fix-ICE-IPA-compare-VRP-types.patch
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-IPA-compare-VRP-types.patch
|
||||||
|
a86623902767122c71c7229150a8b8a79cbb3673
|
||||||
|
|
||||||
|
diff -Nurp a/gcc/ipa-prop.c b/gcc/ipa-prop.c
|
||||||
|
--- a/gcc/ipa-prop.c 2020-11-28 00:19:34.340000000 +0800
|
||||||
|
+++ b/gcc/ipa-prop.c 2020-11-28 00:21:24.680000000 +0800
|
||||||
|
@@ -122,7 +122,8 @@ struct ipa_vr_ggc_hash_traits : public g
|
||||||
|
static bool
|
||||||
|
equal (const value_range_base *a, const value_range_base *b)
|
||||||
|
{
|
||||||
|
- return a->equal_p (*b);
|
||||||
|
+ return (a->equal_p (*b)
|
||||||
|
+ && types_compatible_p (a->type (), b->type ()));
|
||||||
|
}
|
||||||
|
static void
|
||||||
|
mark_empty (value_range_base *&p)
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.c-torture/execute/pr97404.c b/gcc/testsuite/gcc.c-torture/execute/pr97404.c
|
||||||
|
--- a/gcc/testsuite/gcc.c-torture/execute/pr97404.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97404.c 2020-11-28 00:21:24.680000000 +0800
|
||||||
|
@@ -0,0 +1,28 @@
|
||||||
|
+/* PR ipa/97404 */
|
||||||
|
+/* { dg-additional-options "-fno-inline" } */
|
||||||
|
+
|
||||||
|
+char a, b;
|
||||||
|
+long c;
|
||||||
|
+short d, e;
|
||||||
|
+long *f = &c;
|
||||||
|
+int g;
|
||||||
|
+char h(signed char i) { return 0; }
|
||||||
|
+static short j(short i, int k) { return i < 0 ? 0 : i >> k; }
|
||||||
|
+void l(void);
|
||||||
|
+void m(void)
|
||||||
|
+{
|
||||||
|
+ e = j(d | 9766, 11);
|
||||||
|
+ *f = e;
|
||||||
|
+}
|
||||||
|
+void l(void)
|
||||||
|
+{
|
||||||
|
+ a = 5 | g;
|
||||||
|
+ b = h(a);
|
||||||
|
+}
|
||||||
|
+int main()
|
||||||
|
+{
|
||||||
|
+ m();
|
||||||
|
+ if (c != 4)
|
||||||
|
+ __builtin_abort();
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
396
fix-ICE-in-affine-combination.patch
Normal file
396
fix-ICE-in-affine-combination.patch
Normal file
@ -0,0 +1,396 @@
|
|||||||
|
This backport contains 2 patchs from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-tree-affine.c-expr_to_aff_combination-New-function-s.patch
|
||||||
|
5120e0d8d48f4590a275e60565de6c5a4e772fc1
|
||||||
|
|
||||||
|
0001-PR-tree-optimization-94574-aarch64-ICE-during-GIMPLE.patch
|
||||||
|
0447929f11e6a3e1b076841712b90a8b6bc7d33a
|
||||||
|
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c 2020-12-08 14:54:11.467633230 +0800
|
||||||
|
@@ -0,0 +1,8 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
|
||||||
|
+
|
||||||
|
+#define TYPE unsigned int
|
||||||
|
+
|
||||||
|
+#include "pr83403.h"
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c 2020-12-08 14:54:11.467633230 +0800
|
||||||
|
@@ -0,0 +1,8 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */
|
||||||
|
+
|
||||||
|
+#define TYPE int
|
||||||
|
+
|
||||||
|
+#include "pr83403.h"
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h
|
||||||
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h 2020-12-08 14:54:11.467633230 +0800
|
||||||
|
@@ -0,0 +1,30 @@
|
||||||
|
+__attribute__ ((noinline)) void
|
||||||
|
+calculate (const double *__restrict__ A, const double *__restrict__ B,
|
||||||
|
+ double *__restrict__ C)
|
||||||
|
+{
|
||||||
|
+ TYPE m = 0;
|
||||||
|
+ TYPE n = 0;
|
||||||
|
+ TYPE k = 0;
|
||||||
|
+
|
||||||
|
+ A = (const double *) __builtin_assume_aligned (A, 16);
|
||||||
|
+ B = (const double *) __builtin_assume_aligned (B, 16);
|
||||||
|
+ C = (double *) __builtin_assume_aligned (C, 16);
|
||||||
|
+
|
||||||
|
+ for (n = 0; n < 9; n++)
|
||||||
|
+ {
|
||||||
|
+ for (m = 0; m < 10; m++)
|
||||||
|
+ {
|
||||||
|
+ C[(n * 10) + m] = 0.0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ for (k = 0; k < 17; k++)
|
||||||
|
+ {
|
||||||
|
+#pragma simd
|
||||||
|
+ for (m = 0; m < 10; m++)
|
||||||
|
+ {
|
||||||
|
+ C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k];
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
diff -Nurp a/gcc/tree-affine.c b/gcc/tree-affine.c
|
||||||
|
--- a/gcc/tree-affine.c 2020-12-09 09:01:13.179633230 +0800
|
||||||
|
+++ b/gcc/tree-affine.c 2020-12-08 14:54:11.467633230 +0800
|
||||||
|
@@ -259,104 +259,66 @@ aff_combination_convert (aff_tree *comb,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Splits EXPR into an affine combination of parts. */
|
||||||
|
+/* Tries to handle OP0 CODE OP1 as affine combination of parts. Returns
|
||||||
|
+ true when that was successful and returns the combination in COMB. */
|
||||||
|
|
||||||
|
-void
|
||||||
|
-tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
|
||||||
|
+static bool
|
||||||
|
+expr_to_aff_combination (aff_tree *comb, tree_code code, tree type,
|
||||||
|
+ tree op0, tree op1 = NULL_TREE)
|
||||||
|
{
|
||||||
|
aff_tree tmp;
|
||||||
|
- enum tree_code code;
|
||||||
|
- tree cst, core, toffset;
|
||||||
|
poly_int64 bitpos, bitsize, bytepos;
|
||||||
|
- machine_mode mode;
|
||||||
|
- int unsignedp, reversep, volatilep;
|
||||||
|
-
|
||||||
|
- STRIP_NOPS (expr);
|
||||||
|
|
||||||
|
- code = TREE_CODE (expr);
|
||||||
|
switch (code)
|
||||||
|
{
|
||||||
|
case POINTER_PLUS_EXPR:
|
||||||
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||||||
|
- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
|
||||||
|
+ tree_to_aff_combination (op0, type, comb);
|
||||||
|
+ tree_to_aff_combination (op1, sizetype, &tmp);
|
||||||
|
aff_combination_add (comb, &tmp);
|
||||||
|
- return;
|
||||||
|
+ return true;
|
||||||
|
|
||||||
|
case PLUS_EXPR:
|
||||||
|
case MINUS_EXPR:
|
||||||
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||||||
|
- tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
|
||||||
|
+ tree_to_aff_combination (op0, type, comb);
|
||||||
|
+ tree_to_aff_combination (op1, type, &tmp);
|
||||||
|
if (code == MINUS_EXPR)
|
||||||
|
aff_combination_scale (&tmp, -1);
|
||||||
|
aff_combination_add (comb, &tmp);
|
||||||
|
- return;
|
||||||
|
+ return true;
|
||||||
|
|
||||||
|
case MULT_EXPR:
|
||||||
|
- cst = TREE_OPERAND (expr, 1);
|
||||||
|
- if (TREE_CODE (cst) != INTEGER_CST)
|
||||||
|
+ if (TREE_CODE (op1) != INTEGER_CST)
|
||||||
|
break;
|
||||||
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||||||
|
- aff_combination_scale (comb, wi::to_widest (cst));
|
||||||
|
- return;
|
||||||
|
+ tree_to_aff_combination (op0, type, comb);
|
||||||
|
+ aff_combination_scale (comb, wi::to_widest (op1));
|
||||||
|
+ return true;
|
||||||
|
|
||||||
|
case NEGATE_EXPR:
|
||||||
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||||||
|
+ tree_to_aff_combination (op0, type, comb);
|
||||||
|
aff_combination_scale (comb, -1);
|
||||||
|
- return;
|
||||||
|
+ return true;
|
||||||
|
|
||||||
|
case BIT_NOT_EXPR:
|
||||||
|
/* ~x = -x - 1 */
|
||||||
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||||||
|
+ tree_to_aff_combination (op0, type, comb);
|
||||||
|
aff_combination_scale (comb, -1);
|
||||||
|
aff_combination_add_cst (comb, -1);
|
||||||
|
- return;
|
||||||
|
-
|
||||||
|
- case ADDR_EXPR:
|
||||||
|
- /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
|
||||||
|
- if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF)
|
||||||
|
- {
|
||||||
|
- expr = TREE_OPERAND (expr, 0);
|
||||||
|
- tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||||||
|
- tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
|
||||||
|
- aff_combination_add (comb, &tmp);
|
||||||
|
- return;
|
||||||
|
- }
|
||||||
|
- core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
|
||||||
|
- &toffset, &mode, &unsignedp, &reversep,
|
||||||
|
- &volatilep);
|
||||||
|
- if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
|
||||||
|
- break;
|
||||||
|
- aff_combination_const (comb, type, bytepos);
|
||||||
|
- if (TREE_CODE (core) == MEM_REF)
|
||||||
|
- {
|
||||||
|
- tree mem_offset = TREE_OPERAND (core, 1);
|
||||||
|
- aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset));
|
||||||
|
- core = TREE_OPERAND (core, 0);
|
||||||
|
- }
|
||||||
|
- else
|
||||||
|
- core = build_fold_addr_expr (core);
|
||||||
|
-
|
||||||
|
- if (TREE_CODE (core) == ADDR_EXPR)
|
||||||
|
- aff_combination_add_elt (comb, core, 1);
|
||||||
|
- else
|
||||||
|
- {
|
||||||
|
- tree_to_aff_combination (core, type, &tmp);
|
||||||
|
- aff_combination_add (comb, &tmp);
|
||||||
|
- }
|
||||||
|
- if (toffset)
|
||||||
|
- {
|
||||||
|
- tree_to_aff_combination (toffset, type, &tmp);
|
||||||
|
- aff_combination_add (comb, &tmp);
|
||||||
|
- }
|
||||||
|
- return;
|
||||||
|
+ return true;
|
||||||
|
|
||||||
|
CASE_CONVERT:
|
||||||
|
{
|
||||||
|
- tree otype = TREE_TYPE (expr);
|
||||||
|
- tree inner = TREE_OPERAND (expr, 0);
|
||||||
|
+ tree otype = type;
|
||||||
|
+ tree inner = op0;
|
||||||
|
tree itype = TREE_TYPE (inner);
|
||||||
|
enum tree_code icode = TREE_CODE (inner);
|
||||||
|
|
||||||
|
+ /* STRIP_NOPS */
|
||||||
|
+ if (tree_nop_conversion_p (otype, itype))
|
||||||
|
+ {
|
||||||
|
+ tree_to_aff_combination (op0, type, comb);
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/* In principle this is a valid folding, but it isn't necessarily
|
||||||
|
an optimization, so do it here and not in fold_unary. */
|
||||||
|
if ((icode == PLUS_EXPR || icode == MINUS_EXPR || icode == MULT_EXPR)
|
||||||
|
@@ -376,38 +338,127 @@ tree_to_aff_combination (tree expr, tree
|
||||||
|
{
|
||||||
|
op0 = fold_convert (otype, op0);
|
||||||
|
op1 = fold_convert (otype, op1);
|
||||||
|
- expr = fold_build2 (icode, otype, op0, op1);
|
||||||
|
- tree_to_aff_combination (expr, type, comb);
|
||||||
|
- return;
|
||||||
|
+ return expr_to_aff_combination (comb, icode, otype, op0, op1);
|
||||||
|
}
|
||||||
|
wide_int minv, maxv;
|
||||||
|
/* If inner type has wrapping overflow behavior, fold conversion
|
||||||
|
for below case:
|
||||||
|
- (T1)(X - CST) -> (T1)X - (T1)CST
|
||||||
|
- if X - CST doesn't overflow by range information. Also handle
|
||||||
|
- (T1)(X + CST) as (T1)(X - (-CST)). */
|
||||||
|
+ (T1)(X *+- CST) -> (T1)X *+- (T1)CST
|
||||||
|
+ if X *+- CST doesn't overflow by range information. */
|
||||||
|
if (TYPE_UNSIGNED (itype)
|
||||||
|
&& TYPE_OVERFLOW_WRAPS (itype)
|
||||||
|
- && TREE_CODE (op0) == SSA_NAME
|
||||||
|
&& TREE_CODE (op1) == INTEGER_CST
|
||||||
|
- && icode != MULT_EXPR
|
||||||
|
- && get_range_info (op0, &minv, &maxv) == VR_RANGE)
|
||||||
|
+ && determine_value_range (op0, &minv, &maxv) == VR_RANGE)
|
||||||
|
{
|
||||||
|
+ wi::overflow_type overflow = wi::OVF_NONE;
|
||||||
|
+ signop sign = UNSIGNED;
|
||||||
|
if (icode == PLUS_EXPR)
|
||||||
|
- op1 = wide_int_to_tree (itype, -wi::to_wide (op1));
|
||||||
|
- if (wi::geu_p (minv, wi::to_wide (op1)))
|
||||||
|
+ wi::add (maxv, wi::to_wide (op1), sign, &overflow);
|
||||||
|
+ else if (icode == MULT_EXPR)
|
||||||
|
+ wi::mul (maxv, wi::to_wide (op1), sign, &overflow);
|
||||||
|
+ else
|
||||||
|
+ wi::sub (minv, wi::to_wide (op1), sign, &overflow);
|
||||||
|
+
|
||||||
|
+ if (overflow == wi::OVF_NONE)
|
||||||
|
{
|
||||||
|
op0 = fold_convert (otype, op0);
|
||||||
|
op1 = fold_convert (otype, op1);
|
||||||
|
- expr = fold_build2 (MINUS_EXPR, otype, op0, op1);
|
||||||
|
- tree_to_aff_combination (expr, type, comb);
|
||||||
|
- return;
|
||||||
|
+ return expr_to_aff_combination (comb, icode, otype, op0,
|
||||||
|
+ op1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
+ default:;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return false;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Splits EXPR into an affine combination of parts. */
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
|
||||||
|
+{
|
||||||
|
+ aff_tree tmp;
|
||||||
|
+ enum tree_code code;
|
||||||
|
+ tree core, toffset;
|
||||||
|
+ poly_int64 bitpos, bitsize, bytepos;
|
||||||
|
+ machine_mode mode;
|
||||||
|
+ int unsignedp, reversep, volatilep;
|
||||||
|
+
|
||||||
|
+ STRIP_NOPS (expr);
|
||||||
|
+
|
||||||
|
+ code = TREE_CODE (expr);
|
||||||
|
+ switch (code)
|
||||||
|
+ {
|
||||||
|
+ case POINTER_PLUS_EXPR:
|
||||||
|
+ case PLUS_EXPR:
|
||||||
|
+ case MINUS_EXPR:
|
||||||
|
+ case MULT_EXPR:
|
||||||
|
+ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0),
|
||||||
|
+ TREE_OPERAND (expr, 1)))
|
||||||
|
+ return;
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ case NEGATE_EXPR:
|
||||||
|
+ case BIT_NOT_EXPR:
|
||||||
|
+ if (expr_to_aff_combination (comb, code, type, TREE_OPERAND (expr, 0)))
|
||||||
|
+ return;
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ CASE_CONVERT:
|
||||||
|
+ /* ??? TREE_TYPE (expr) should be equal to type here, but IVOPTS
|
||||||
|
+ calls this with not showing an outer widening cast. */
|
||||||
|
+ if (expr_to_aff_combination (comb, code,
|
||||||
|
+ TREE_TYPE (expr), TREE_OPERAND (expr, 0)))
|
||||||
|
+ {
|
||||||
|
+ aff_combination_convert (comb, type);
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ case ADDR_EXPR:
|
||||||
|
+ /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
|
||||||
|
+ if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF)
|
||||||
|
+ {
|
||||||
|
+ expr = TREE_OPERAND (expr, 0);
|
||||||
|
+ tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
|
||||||
|
+ tree_to_aff_combination (TREE_OPERAND (expr, 1), sizetype, &tmp);
|
||||||
|
+ aff_combination_add (comb, &tmp);
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
|
||||||
|
+ &toffset, &mode, &unsignedp, &reversep,
|
||||||
|
+ &volatilep);
|
||||||
|
+ if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
|
||||||
|
+ break;
|
||||||
|
+ aff_combination_const (comb, type, bytepos);
|
||||||
|
+ if (TREE_CODE (core) == MEM_REF)
|
||||||
|
+ {
|
||||||
|
+ tree mem_offset = TREE_OPERAND (core, 1);
|
||||||
|
+ aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset));
|
||||||
|
+ core = TREE_OPERAND (core, 0);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ core = build_fold_addr_expr (core);
|
||||||
|
+
|
||||||
|
+ if (TREE_CODE (core) == ADDR_EXPR)
|
||||||
|
+ aff_combination_add_elt (comb, core, 1);
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ tree_to_aff_combination (core, type, &tmp);
|
||||||
|
+ aff_combination_add (comb, &tmp);
|
||||||
|
+ }
|
||||||
|
+ if (toffset)
|
||||||
|
+ {
|
||||||
|
+ tree_to_aff_combination (toffset, type, &tmp);
|
||||||
|
+ aff_combination_add (comb, &tmp);
|
||||||
|
+ }
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
if (poly_int_tree_p (expr))
|
||||||
|
@@ -665,7 +716,7 @@ aff_combination_expand (aff_tree *comb A
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
aff_tree to_add, current, curre;
|
||||||
|
- tree e, rhs;
|
||||||
|
+ tree e;
|
||||||
|
gimple *def;
|
||||||
|
widest_int scale;
|
||||||
|
struct name_expansion *exp;
|
||||||
|
@@ -715,20 +766,38 @@ aff_combination_expand (aff_tree *comb A
|
||||||
|
case PLUS_EXPR:
|
||||||
|
case MINUS_EXPR:
|
||||||
|
case MULT_EXPR:
|
||||||
|
+ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name),
|
||||||
|
+ gimple_assign_rhs1 (def),
|
||||||
|
+ gimple_assign_rhs2 (def)))
|
||||||
|
+ continue;
|
||||||
|
+ break;
|
||||||
|
case NEGATE_EXPR:
|
||||||
|
case BIT_NOT_EXPR:
|
||||||
|
+ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name),
|
||||||
|
+ gimple_assign_rhs1 (def)))
|
||||||
|
+ continue;
|
||||||
|
+ break;
|
||||||
|
CASE_CONVERT:
|
||||||
|
- rhs = gimple_assign_rhs_to_tree (def);
|
||||||
|
+ if (!expr_to_aff_combination (¤t, code, TREE_TYPE (name),
|
||||||
|
+ gimple_assign_rhs1 (def)))
|
||||||
|
+ /* This makes us always expand conversions which we did
|
||||||
|
+ in the past and makes gcc.dg/tree-ssa/ivopts-lt-2.c
|
||||||
|
+ PASS, eliminating one induction variable in IVOPTs.
|
||||||
|
+ ??? But it is really excessive and we should try
|
||||||
|
+ harder to do without it. */
|
||||||
|
+ aff_combination_elt (¤t, TREE_TYPE (name),
|
||||||
|
+ fold_convert (TREE_TYPE (name),
|
||||||
|
+ gimple_assign_rhs1 (def)));
|
||||||
|
break;
|
||||||
|
case ADDR_EXPR:
|
||||||
|
case INTEGER_CST:
|
||||||
|
case POLY_INT_CST:
|
||||||
|
- rhs = gimple_assign_rhs1 (def);
|
||||||
|
+ tree_to_aff_combination (gimple_assign_rhs1 (def),
|
||||||
|
+ TREE_TYPE (name), ¤t);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
- tree_to_aff_combination (rhs, TREE_TYPE (name), ¤t);
|
||||||
|
exp = XNEW (struct name_expansion);
|
||||||
|
exp->in_progress = 1;
|
||||||
|
if (!*cache)
|
||||||
@ -1,3 +1,9 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-re-PR-tree-optimization-92085-ICE-tree-check-expecte.patch
|
||||||
|
3c8e341b996546607fa1f39a0fd9a9d7c2c38214
|
||||||
|
|
||||||
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c
|
||||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c 1970-01-01 08:00:00.000000000 +0800
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c 2020-07-09 11:05:23.136000000 +0800
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c 2020-07-09 11:05:23.136000000 +0800
|
||||||
|
|||||||
37
fix-ICE-in-pass-vect.patch
Normal file
37
fix-ICE-in-pass-vect.patch
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
diff -uprN a/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c
|
||||||
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_fix_1.c 2020-11-17 02:38:45.284000000 +0800
|
||||||
|
@@ -0,0 +1,19 @@
|
||||||
|
+/* { dg-do compiler} */
|
||||||
|
+/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=256 -funsafe-math-optimizations" } */
|
||||||
|
+
|
||||||
|
+long a, b;
|
||||||
|
+float c, e;
|
||||||
|
+float *d;
|
||||||
|
+void f() {
|
||||||
|
+ float g, h, i, j;
|
||||||
|
+ b = 0;
|
||||||
|
+ for (; b < a; b++) {
|
||||||
|
+ i = d[0];
|
||||||
|
+ g = g + i * e;
|
||||||
|
+ j = d[1];
|
||||||
|
+ h = h - j * e;
|
||||||
|
+ d = d + 2;
|
||||||
|
+ }
|
||||||
|
+ c = g;
|
||||||
|
+ e = h;
|
||||||
|
+}
|
||||||
|
diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
|
||||||
|
--- a/gcc/tree-vect-slp.c 2020-11-16 10:59:36.000000000 +0800
|
||||||
|
+++ b/gcc/tree-vect-slp.c 2020-11-16 23:30:19.560000000 +0800
|
||||||
|
@@ -4140,8 +4140,8 @@ vect_schedule_slp_instance (slp_tree nod
|
||||||
|
gimple *vstmt;
|
||||||
|
vstmt = gimple_build_assign (make_ssa_name (vectype),
|
||||||
|
VEC_PERM_EXPR,
|
||||||
|
- gimple_assign_lhs (v0[j]->stmt),
|
||||||
|
- gimple_assign_lhs (v1[j]->stmt),
|
||||||
|
+ gimple_get_lhs (v0[j]->stmt),
|
||||||
|
+ gimple_get_lhs (v1[j]->stmt),
|
||||||
|
tmask);
|
||||||
|
SLP_TREE_VEC_STMTS (node).quick_push
|
||||||
|
(vect_finish_stmt_generation (stmt_info, vstmt, &si));
|
||||||
@ -1,3 +1,9 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-Don-t-assign-a-cost-to-vectorizable_assignment.patch
|
||||||
|
e4020b28d02a00d478a3a769855ae6a8d9cc6b26
|
||||||
|
|
||||||
diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
|
diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
|
||||||
--- a/gcc/tree-vect-loop.c 2020-07-09 10:42:35.824000000 +0800
|
--- a/gcc/tree-vect-loop.c 2020-07-09 10:42:35.824000000 +0800
|
||||||
+++ b/gcc/tree-vect-loop.c 2020-07-09 10:43:23.920000000 +0800
|
+++ b/gcc/tree-vect-loop.c 2020-07-09 10:43:23.920000000 +0800
|
||||||
|
|||||||
@ -1,3 +1,9 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-re-PR-tree-optimization-92252-ICE-Segmentation-fault.patch
|
||||||
|
97c6bea819ec0a773041308e62a7c05c33f093b0
|
||||||
|
|
||||||
diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92252.c b/gcc/testsuite/gcc.dg/torture/pr92252.c
|
diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92252.c b/gcc/testsuite/gcc.dg/torture/pr92252.c
|
||||||
--- a/gcc/testsuite/gcc.dg/torture/pr92252.c 1970-01-01 08:00:00.000000000 +0800
|
--- a/gcc/testsuite/gcc.dg/torture/pr92252.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
+++ b/gcc/testsuite/gcc.dg/torture/pr92252.c 2020-07-03 10:39:44.808000000 +0800
|
+++ b/gcc/testsuite/gcc.dg/torture/pr92252.c 2020-07-03 10:39:44.808000000 +0800
|
||||||
|
|||||||
784
fix-ICE-in-vect_update_misalignment_for_peel.patch
Normal file
784
fix-ICE-in-vect_update_misalignment_for_peel.patch
Normal file
@ -0,0 +1,784 @@
|
|||||||
|
This backport contains 5 patchs from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
8801ca5c28c3a9e9f36fa39a6a4455b48c8221fa
|
||||||
|
9ac1403ca2c65ba4f28cf051b5326617fa9298d1
|
||||||
|
7e99af4816cfad578094fcf08e2377f3ed76e201
|
||||||
|
ef8777c14ce8694f53eab7a88d24513cbf541ba4
|
||||||
|
dccbf1e2a6e544f71b4a5795f0c79015db019fc3
|
||||||
|
|
||||||
|
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/vect/pr92677.c b/gcc/testsuite/gcc.dg/vect/pr92677.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/pr92677.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/pr92677.c 2020-10-26 18:31:50.980000000 +0800
|
||||||
|
@@ -0,0 +1,26 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-additional-options "-O3" } */
|
||||||
|
+
|
||||||
|
+int a, c;
|
||||||
|
+int *b;
|
||||||
|
+long d;
|
||||||
|
+double *e;
|
||||||
|
+
|
||||||
|
+void fn1() {
|
||||||
|
+ long f;
|
||||||
|
+ double g, h;
|
||||||
|
+ while (c) {
|
||||||
|
+ if (d) {
|
||||||
|
+ g = *e;
|
||||||
|
+ *(b + 4) = g;
|
||||||
|
+ }
|
||||||
|
+ if (f) {
|
||||||
|
+ h = *(e + 2);
|
||||||
|
+ *(b + 6) = h;
|
||||||
|
+ }
|
||||||
|
+ e += a;
|
||||||
|
+ b += 8;
|
||||||
|
+ c--;
|
||||||
|
+ d += 2;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/vect/slp-46.c b/gcc/testsuite/gcc.dg/vect/slp-46.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/slp-46.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-46.c 2020-10-26 18:31:56.512000000 +0800
|
||||||
|
@@ -0,0 +1,96 @@
|
||||||
|
+/* { dg-require-effective-target vect_double } */
|
||||||
|
+
|
||||||
|
+#include "tree-vect.h"
|
||||||
|
+
|
||||||
|
+double x[1024], y[1024];
|
||||||
|
+
|
||||||
|
+void __attribute__((noipa)) foo()
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 512; ++i)
|
||||||
|
+ {
|
||||||
|
+ x[2*i] = y[i];
|
||||||
|
+ x[2*i+1] = y[i];
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void __attribute__((noipa)) bar()
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 512; ++i)
|
||||||
|
+ {
|
||||||
|
+ x[2*i] = y[2*i];
|
||||||
|
+ x[2*i+1] = y[2*i];
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void __attribute__((noipa)) baz()
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 512; ++i)
|
||||||
|
+ {
|
||||||
|
+ x[2*i] = y[511-i];
|
||||||
|
+ x[2*i+1] = y[511-i];
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void __attribute__((noipa)) boo()
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 512; ++i)
|
||||||
|
+ {
|
||||||
|
+ x[2*i] = y[2*(511-i)];
|
||||||
|
+ x[2*i+1] = y[2*(511-i)];
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main ()
|
||||||
|
+{
|
||||||
|
+ check_vect ();
|
||||||
|
+
|
||||||
|
+ for (int i = 0; i < 1024; ++i)
|
||||||
|
+ {
|
||||||
|
+ x[i] = 0;
|
||||||
|
+ y[i] = i;
|
||||||
|
+ __asm__ volatile ("");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ foo ();
|
||||||
|
+ for (int i = 0; i < 1024; ++i)
|
||||||
|
+ if (x[i] != y[i/2])
|
||||||
|
+ abort ();
|
||||||
|
+
|
||||||
|
+ for (int i = 0; i < 1024; ++i)
|
||||||
|
+ {
|
||||||
|
+ x[i] = 0;
|
||||||
|
+ __asm__ volatile ("");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ bar ();
|
||||||
|
+ for (int i = 0; i < 1024; ++i)
|
||||||
|
+ if (x[i] != y[2*(i/2)])
|
||||||
|
+ abort ();
|
||||||
|
+
|
||||||
|
+ for (int i = 0; i < 1024; ++i)
|
||||||
|
+ {
|
||||||
|
+ x[i] = 0;
|
||||||
|
+ __asm__ volatile ("");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ baz ();
|
||||||
|
+ for (int i = 0; i < 1024; ++i)
|
||||||
|
+ if (x[i] != y[511 - i/2])
|
||||||
|
+ abort ();
|
||||||
|
+
|
||||||
|
+ for (int i = 0; i < 1024; ++i)
|
||||||
|
+ {
|
||||||
|
+ x[i] = 0;
|
||||||
|
+ __asm__ volatile ("");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ boo ();
|
||||||
|
+ for (int i = 0; i < 1024; ++i)
|
||||||
|
+ if (x[i] != y[2*(511 - i/2)])
|
||||||
|
+ abort ();
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-5.c 2020-10-26 18:31:53.584000000 +0800
|
||||||
|
@@ -0,0 +1,36 @@
|
||||||
|
+#include "tree-vect.h"
|
||||||
|
+
|
||||||
|
+#define N 512
|
||||||
|
+
|
||||||
|
+int a[N], b[N];
|
||||||
|
+
|
||||||
|
+int __attribute__((noipa))
|
||||||
|
+foo (int aval, int bval)
|
||||||
|
+{
|
||||||
|
+ int i, res = 0;
|
||||||
|
+ for (i=0; i<N; i++)
|
||||||
|
+ {
|
||||||
|
+ if (a[i] != 0)
|
||||||
|
+ res = aval;
|
||||||
|
+ if (b[i] != 0)
|
||||||
|
+ res = bval;
|
||||||
|
+ }
|
||||||
|
+ return res;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int main()
|
||||||
|
+{
|
||||||
|
+ check_vect ();
|
||||||
|
+ if (foo (1, 2) != 0)
|
||||||
|
+ abort ();
|
||||||
|
+ a[3] = 1;
|
||||||
|
+ b[4] = 1;
|
||||||
|
+ if (foo (1, 2) != 2)
|
||||||
|
+ abort ();
|
||||||
|
+ a[7] = 1;
|
||||||
|
+ if (foo (1, 2) != 1)
|
||||||
|
+ abort ();
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_condition } } } */
|
||||||
|
diff -Nurp a/gcc/testsuite/g++.dg/pr91221.C b/gcc/testsuite/g++.dg/pr91221.C
|
||||||
|
--- a/gcc/testsuite/g++.dg/pr91221.C 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/g++.dg/pr91221.C 2020-10-26 18:31:45.768000000 +0800
|
||||||
|
@@ -0,0 +1,13 @@
|
||||||
|
+// { dg-do compile }
|
||||||
|
+// { dg-options "-O2 -fno-ipa-pure-const -fpack-struct -Wno-address-of-packed-member" }
|
||||||
|
+
|
||||||
|
+void printf(...);
|
||||||
|
+struct A {
|
||||||
|
+ A() : bar_(), dbar_() {
|
||||||
|
+ for (int i;; i++)
|
||||||
|
+ printf(i, bar_[i]);
|
||||||
|
+ }
|
||||||
|
+ int bar_[5];
|
||||||
|
+ double dbar_[5];
|
||||||
|
+};
|
||||||
|
+void fn1() { A a; }
|
||||||
|
diff -Nurp a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
|
||||||
|
--- a/gcc/tree-scalar-evolution.c 2020-10-26 18:28:58.720000000 +0800
|
||||||
|
+++ b/gcc/tree-scalar-evolution.c 2020-10-26 18:31:48.472000000 +0800
|
||||||
|
@@ -933,8 +933,8 @@ enum t_bool {
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
-static t_bool follow_ssa_edge (struct loop *loop, gimple *, gphi *,
|
||||||
|
- tree *, int);
|
||||||
|
+static t_bool follow_ssa_edge_expr (struct loop *loop, gimple *, tree, gphi *,
|
||||||
|
+ tree *, int);
|
||||||
|
|
||||||
|
/* Follow the ssa edge into the binary expression RHS0 CODE RHS1.
|
||||||
|
Return true if the strongly connected component has been found. */
|
||||||
|
@@ -969,8 +969,8 @@ follow_ssa_edge_binary (struct loop *loo
|
||||||
|
(loop->num,
|
||||||
|
chrec_convert (type, evol, at_stmt),
|
||||||
|
code, rhs1, at_stmt);
|
||||||
|
- res = follow_ssa_edge
|
||||||
|
- (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi, &evol, limit);
|
||||||
|
+ res = follow_ssa_edge_expr
|
||||||
|
+ (loop, at_stmt, rhs0, halting_phi, &evol, limit);
|
||||||
|
if (res == t_true)
|
||||||
|
*evolution_of_loop = evol;
|
||||||
|
else if (res == t_false)
|
||||||
|
@@ -979,8 +979,8 @@ follow_ssa_edge_binary (struct loop *loo
|
||||||
|
(loop->num,
|
||||||
|
chrec_convert (type, *evolution_of_loop, at_stmt),
|
||||||
|
code, rhs0, at_stmt);
|
||||||
|
- res = follow_ssa_edge
|
||||||
|
- (loop, SSA_NAME_DEF_STMT (rhs1), halting_phi,
|
||||||
|
+ res = follow_ssa_edge_expr
|
||||||
|
+ (loop, at_stmt, rhs1, halting_phi,
|
||||||
|
evolution_of_loop, limit);
|
||||||
|
if (res == t_true)
|
||||||
|
;
|
||||||
|
@@ -1000,8 +1000,8 @@ follow_ssa_edge_binary (struct loop *loo
|
||||||
|
(loop->num, chrec_convert (type, *evolution_of_loop,
|
||||||
|
at_stmt),
|
||||||
|
code, rhs1, at_stmt);
|
||||||
|
- res = follow_ssa_edge
|
||||||
|
- (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi,
|
||||||
|
+ res = follow_ssa_edge_expr
|
||||||
|
+ (loop, at_stmt, rhs0, halting_phi,
|
||||||
|
evolution_of_loop, limit);
|
||||||
|
if (res == t_true)
|
||||||
|
;
|
||||||
|
@@ -1018,8 +1018,8 @@ follow_ssa_edge_binary (struct loop *loo
|
||||||
|
(loop->num, chrec_convert (type, *evolution_of_loop,
|
||||||
|
at_stmt),
|
||||||
|
code, rhs0, at_stmt);
|
||||||
|
- res = follow_ssa_edge
|
||||||
|
- (loop, SSA_NAME_DEF_STMT (rhs1), halting_phi,
|
||||||
|
+ res = follow_ssa_edge_expr
|
||||||
|
+ (loop, at_stmt, rhs1, halting_phi,
|
||||||
|
evolution_of_loop, limit);
|
||||||
|
if (res == t_true)
|
||||||
|
;
|
||||||
|
@@ -1050,8 +1050,8 @@ follow_ssa_edge_binary (struct loop *loo
|
||||||
|
*evolution_of_loop = add_to_evolution
|
||||||
|
(loop->num, chrec_convert (type, *evolution_of_loop, at_stmt),
|
||||||
|
MINUS_EXPR, rhs1, at_stmt);
|
||||||
|
- res = follow_ssa_edge (loop, SSA_NAME_DEF_STMT (rhs0), halting_phi,
|
||||||
|
- evolution_of_loop, limit);
|
||||||
|
+ res = follow_ssa_edge_expr (loop, at_stmt, rhs0, halting_phi,
|
||||||
|
+ evolution_of_loop, limit);
|
||||||
|
if (res == t_true)
|
||||||
|
;
|
||||||
|
else if (res == t_dont_know)
|
||||||
|
@@ -1071,140 +1071,6 @@ follow_ssa_edge_binary (struct loop *loo
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Follow the ssa edge into the expression EXPR.
|
||||||
|
- Return true if the strongly connected component has been found. */
|
||||||
|
-
|
||||||
|
-static t_bool
|
||||||
|
-follow_ssa_edge_expr (struct loop *loop, gimple *at_stmt, tree expr,
|
||||||
|
- gphi *halting_phi, tree *evolution_of_loop,
|
||||||
|
- int limit)
|
||||||
|
-{
|
||||||
|
- enum tree_code code = TREE_CODE (expr);
|
||||||
|
- tree type = TREE_TYPE (expr), rhs0, rhs1;
|
||||||
|
- t_bool res;
|
||||||
|
-
|
||||||
|
- /* The EXPR is one of the following cases:
|
||||||
|
- - an SSA_NAME,
|
||||||
|
- - an INTEGER_CST,
|
||||||
|
- - a PLUS_EXPR,
|
||||||
|
- - a POINTER_PLUS_EXPR,
|
||||||
|
- - a MINUS_EXPR,
|
||||||
|
- - an ASSERT_EXPR,
|
||||||
|
- - other cases are not yet handled. */
|
||||||
|
-
|
||||||
|
- switch (code)
|
||||||
|
- {
|
||||||
|
- CASE_CONVERT:
|
||||||
|
- /* This assignment is under the form "a_1 = (cast) rhs. */
|
||||||
|
- res = follow_ssa_edge_expr (loop, at_stmt, TREE_OPERAND (expr, 0),
|
||||||
|
- halting_phi, evolution_of_loop, limit);
|
||||||
|
- *evolution_of_loop = chrec_convert (type, *evolution_of_loop, at_stmt);
|
||||||
|
- break;
|
||||||
|
-
|
||||||
|
- case INTEGER_CST:
|
||||||
|
- /* This assignment is under the form "a_1 = 7". */
|
||||||
|
- res = t_false;
|
||||||
|
- break;
|
||||||
|
-
|
||||||
|
- case SSA_NAME:
|
||||||
|
- /* This assignment is under the form: "a_1 = b_2". */
|
||||||
|
- res = follow_ssa_edge
|
||||||
|
- (loop, SSA_NAME_DEF_STMT (expr), halting_phi, evolution_of_loop, limit);
|
||||||
|
- break;
|
||||||
|
-
|
||||||
|
- case POINTER_PLUS_EXPR:
|
||||||
|
- case PLUS_EXPR:
|
||||||
|
- case MINUS_EXPR:
|
||||||
|
- /* This case is under the form "rhs0 +- rhs1". */
|
||||||
|
- rhs0 = TREE_OPERAND (expr, 0);
|
||||||
|
- rhs1 = TREE_OPERAND (expr, 1);
|
||||||
|
- type = TREE_TYPE (rhs0);
|
||||||
|
- STRIP_USELESS_TYPE_CONVERSION (rhs0);
|
||||||
|
- STRIP_USELESS_TYPE_CONVERSION (rhs1);
|
||||||
|
- res = follow_ssa_edge_binary (loop, at_stmt, type, rhs0, code, rhs1,
|
||||||
|
- halting_phi, evolution_of_loop, limit);
|
||||||
|
- break;
|
||||||
|
-
|
||||||
|
- case ADDR_EXPR:
|
||||||
|
- /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
|
||||||
|
- if (TREE_CODE (TREE_OPERAND (expr, 0)) == MEM_REF)
|
||||||
|
- {
|
||||||
|
- expr = TREE_OPERAND (expr, 0);
|
||||||
|
- rhs0 = TREE_OPERAND (expr, 0);
|
||||||
|
- rhs1 = TREE_OPERAND (expr, 1);
|
||||||
|
- type = TREE_TYPE (rhs0);
|
||||||
|
- STRIP_USELESS_TYPE_CONVERSION (rhs0);
|
||||||
|
- STRIP_USELESS_TYPE_CONVERSION (rhs1);
|
||||||
|
- res = follow_ssa_edge_binary (loop, at_stmt, type,
|
||||||
|
- rhs0, POINTER_PLUS_EXPR, rhs1,
|
||||||
|
- halting_phi, evolution_of_loop, limit);
|
||||||
|
- }
|
||||||
|
- else
|
||||||
|
- res = t_false;
|
||||||
|
- break;
|
||||||
|
-
|
||||||
|
- case ASSERT_EXPR:
|
||||||
|
- /* This assignment is of the form: "a_1 = ASSERT_EXPR <a_2, ...>"
|
||||||
|
- It must be handled as a copy assignment of the form a_1 = a_2. */
|
||||||
|
- rhs0 = ASSERT_EXPR_VAR (expr);
|
||||||
|
- if (TREE_CODE (rhs0) == SSA_NAME)
|
||||||
|
- res = follow_ssa_edge (loop, SSA_NAME_DEF_STMT (rhs0),
|
||||||
|
- halting_phi, evolution_of_loop, limit);
|
||||||
|
- else
|
||||||
|
- res = t_false;
|
||||||
|
- break;
|
||||||
|
-
|
||||||
|
- default:
|
||||||
|
- res = t_false;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return res;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-/* Follow the ssa edge into the right hand side of an assignment STMT.
|
||||||
|
- Return true if the strongly connected component has been found. */
|
||||||
|
-
|
||||||
|
-static t_bool
|
||||||
|
-follow_ssa_edge_in_rhs (struct loop *loop, gimple *stmt,
|
||||||
|
- gphi *halting_phi, tree *evolution_of_loop,
|
||||||
|
- int limit)
|
||||||
|
-{
|
||||||
|
- enum tree_code code = gimple_assign_rhs_code (stmt);
|
||||||
|
- tree type = gimple_expr_type (stmt), rhs1, rhs2;
|
||||||
|
- t_bool res;
|
||||||
|
-
|
||||||
|
- switch (code)
|
||||||
|
- {
|
||||||
|
- CASE_CONVERT:
|
||||||
|
- /* This assignment is under the form "a_1 = (cast) rhs. */
|
||||||
|
- res = follow_ssa_edge_expr (loop, stmt, gimple_assign_rhs1 (stmt),
|
||||||
|
- halting_phi, evolution_of_loop, limit);
|
||||||
|
- *evolution_of_loop = chrec_convert (type, *evolution_of_loop, stmt);
|
||||||
|
- break;
|
||||||
|
-
|
||||||
|
- case POINTER_PLUS_EXPR:
|
||||||
|
- case PLUS_EXPR:
|
||||||
|
- case MINUS_EXPR:
|
||||||
|
- rhs1 = gimple_assign_rhs1 (stmt);
|
||||||
|
- rhs2 = gimple_assign_rhs2 (stmt);
|
||||||
|
- type = TREE_TYPE (rhs1);
|
||||||
|
- res = follow_ssa_edge_binary (loop, stmt, type, rhs1, code, rhs2,
|
||||||
|
- halting_phi, evolution_of_loop, limit);
|
||||||
|
- break;
|
||||||
|
-
|
||||||
|
- default:
|
||||||
|
- if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
|
||||||
|
- res = follow_ssa_edge_expr (loop, stmt, gimple_assign_rhs1 (stmt),
|
||||||
|
- halting_phi, evolution_of_loop, limit);
|
||||||
|
- else
|
||||||
|
- res = t_false;
|
||||||
|
- break;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return res;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
/* Checks whether the I-th argument of a PHI comes from a backedge. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
@@ -1244,8 +1110,8 @@ follow_ssa_edge_in_condition_phi_branch
|
||||||
|
if (TREE_CODE (branch) == SSA_NAME)
|
||||||
|
{
|
||||||
|
*evolution_of_branch = init_cond;
|
||||||
|
- return follow_ssa_edge (loop, SSA_NAME_DEF_STMT (branch), halting_phi,
|
||||||
|
- evolution_of_branch, limit);
|
||||||
|
+ return follow_ssa_edge_expr (loop, condition_phi, branch, halting_phi,
|
||||||
|
+ evolution_of_branch, limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This case occurs when one of the condition branches sets
|
||||||
|
@@ -1352,65 +1218,158 @@ follow_ssa_edge_inner_loop_phi (struct l
|
||||||
|
evolution_of_loop, limit);
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Follow an SSA edge from a loop-phi-node to itself, constructing a
|
||||||
|
- path that is analyzed on the return walk. */
|
||||||
|
+/* Follow the ssa edge into the expression EXPR.
|
||||||
|
+ Return true if the strongly connected component has been found. */
|
||||||
|
|
||||||
|
static t_bool
|
||||||
|
-follow_ssa_edge (struct loop *loop, gimple *def, gphi *halting_phi,
|
||||||
|
- tree *evolution_of_loop, int limit)
|
||||||
|
+follow_ssa_edge_expr (struct loop *loop, gimple *at_stmt, tree expr,
|
||||||
|
+ gphi *halting_phi, tree *evolution_of_loop,
|
||||||
|
+ int limit)
|
||||||
|
{
|
||||||
|
- struct loop *def_loop;
|
||||||
|
+ enum tree_code code;
|
||||||
|
+ tree type, rhs0, rhs1 = NULL_TREE;
|
||||||
|
|
||||||
|
- if (gimple_nop_p (def))
|
||||||
|
- return t_false;
|
||||||
|
+ /* The EXPR is one of the following cases:
|
||||||
|
+ - an SSA_NAME,
|
||||||
|
+ - an INTEGER_CST,
|
||||||
|
+ - a PLUS_EXPR,
|
||||||
|
+ - a POINTER_PLUS_EXPR,
|
||||||
|
+ - a MINUS_EXPR,
|
||||||
|
+ - an ASSERT_EXPR,
|
||||||
|
+ - other cases are not yet handled. */
|
||||||
|
|
||||||
|
- /* Give up if the path is longer than the MAX that we allow. */
|
||||||
|
- if (limit > PARAM_VALUE (PARAM_SCEV_MAX_EXPR_COMPLEXITY))
|
||||||
|
- return t_dont_know;
|
||||||
|
-
|
||||||
|
- def_loop = loop_containing_stmt (def);
|
||||||
|
-
|
||||||
|
- switch (gimple_code (def))
|
||||||
|
- {
|
||||||
|
- case GIMPLE_PHI:
|
||||||
|
- if (!loop_phi_node_p (def))
|
||||||
|
- /* DEF is a condition-phi-node. Follow the branches, and
|
||||||
|
- record their evolutions. Finally, merge the collected
|
||||||
|
- information and set the approximation to the main
|
||||||
|
- variable. */
|
||||||
|
- return follow_ssa_edge_in_condition_phi
|
||||||
|
- (loop, as_a <gphi *> (def), halting_phi, evolution_of_loop,
|
||||||
|
- limit);
|
||||||
|
-
|
||||||
|
- /* When the analyzed phi is the halting_phi, the
|
||||||
|
- depth-first search is over: we have found a path from
|
||||||
|
- the halting_phi to itself in the loop. */
|
||||||
|
- if (def == halting_phi)
|
||||||
|
- return t_true;
|
||||||
|
+ /* For SSA_NAME look at the definition statement, handling
|
||||||
|
+ PHI nodes and otherwise expand appropriately for the expression
|
||||||
|
+ handling below. */
|
||||||
|
+ if (TREE_CODE (expr) == SSA_NAME)
|
||||||
|
+ {
|
||||||
|
+ gimple *def = SSA_NAME_DEF_STMT (expr);
|
||||||
|
|
||||||
|
- /* Otherwise, the evolution of the HALTING_PHI depends
|
||||||
|
- on the evolution of another loop-phi-node, i.e. the
|
||||||
|
- evolution function is a higher degree polynomial. */
|
||||||
|
- if (def_loop == loop)
|
||||||
|
+ if (gimple_nop_p (def))
|
||||||
|
return t_false;
|
||||||
|
|
||||||
|
- /* Inner loop. */
|
||||||
|
- if (flow_loop_nested_p (loop, def_loop))
|
||||||
|
- return follow_ssa_edge_inner_loop_phi
|
||||||
|
- (loop, as_a <gphi *> (def), halting_phi, evolution_of_loop,
|
||||||
|
- limit + 1);
|
||||||
|
+ /* Give up if the path is longer than the MAX that we allow. */
|
||||||
|
+ if (limit > PARAM_VALUE (PARAM_SCEV_MAX_EXPR_COMPLEXITY))
|
||||||
|
+ return t_dont_know;
|
||||||
|
|
||||||
|
- /* Outer loop. */
|
||||||
|
- return t_false;
|
||||||
|
+ if (gphi *phi = dyn_cast <gphi *>(def))
|
||||||
|
+ {
|
||||||
|
+ if (!loop_phi_node_p (phi))
|
||||||
|
+ /* DEF is a condition-phi-node. Follow the branches, and
|
||||||
|
+ record their evolutions. Finally, merge the collected
|
||||||
|
+ information and set the approximation to the main
|
||||||
|
+ variable. */
|
||||||
|
+ return follow_ssa_edge_in_condition_phi
|
||||||
|
+ (loop, phi, halting_phi, evolution_of_loop, limit);
|
||||||
|
+
|
||||||
|
+ /* When the analyzed phi is the halting_phi, the
|
||||||
|
+ depth-first search is over: we have found a path from
|
||||||
|
+ the halting_phi to itself in the loop. */
|
||||||
|
+ if (phi == halting_phi)
|
||||||
|
+ return t_true;
|
||||||
|
+
|
||||||
|
+ /* Otherwise, the evolution of the HALTING_PHI depends
|
||||||
|
+ on the evolution of another loop-phi-node, i.e. the
|
||||||
|
+ evolution function is a higher degree polynomial. */
|
||||||
|
+ class loop *def_loop = loop_containing_stmt (def);
|
||||||
|
+ if (def_loop == loop)
|
||||||
|
+ return t_false;
|
||||||
|
+
|
||||||
|
+ /* Inner loop. */
|
||||||
|
+ if (flow_loop_nested_p (loop, def_loop))
|
||||||
|
+ return follow_ssa_edge_inner_loop_phi
|
||||||
|
+ (loop, phi, halting_phi, evolution_of_loop,
|
||||||
|
+ limit + 1);
|
||||||
|
|
||||||
|
- case GIMPLE_ASSIGN:
|
||||||
|
- return follow_ssa_edge_in_rhs (loop, def, halting_phi,
|
||||||
|
- evolution_of_loop, limit);
|
||||||
|
+ /* Outer loop. */
|
||||||
|
+ return t_false;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- default:
|
||||||
|
/* At this level of abstraction, the program is just a set
|
||||||
|
of GIMPLE_ASSIGNs and PHI_NODEs. In principle there is no
|
||||||
|
- other node to be handled. */
|
||||||
|
+ other def to be handled. */
|
||||||
|
+ if (!is_gimple_assign (def))
|
||||||
|
+ return t_false;
|
||||||
|
+
|
||||||
|
+ code = gimple_assign_rhs_code (def);
|
||||||
|
+ switch (get_gimple_rhs_class (code))
|
||||||
|
+ {
|
||||||
|
+ case GIMPLE_BINARY_RHS:
|
||||||
|
+ rhs0 = gimple_assign_rhs1 (def);
|
||||||
|
+ rhs1 = gimple_assign_rhs2 (def);
|
||||||
|
+ break;
|
||||||
|
+ case GIMPLE_UNARY_RHS:
|
||||||
|
+ case GIMPLE_SINGLE_RHS:
|
||||||
|
+ rhs0 = gimple_assign_rhs1 (def);
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return t_false;
|
||||||
|
+ }
|
||||||
|
+ type = TREE_TYPE (gimple_assign_lhs (def));
|
||||||
|
+ at_stmt = def;
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ code = TREE_CODE (expr);
|
||||||
|
+ type = TREE_TYPE (expr);
|
||||||
|
+ switch (code)
|
||||||
|
+ {
|
||||||
|
+ CASE_CONVERT:
|
||||||
|
+ rhs0 = TREE_OPERAND (expr, 0);
|
||||||
|
+ break;
|
||||||
|
+ case POINTER_PLUS_EXPR:
|
||||||
|
+ case PLUS_EXPR:
|
||||||
|
+ case MINUS_EXPR:
|
||||||
|
+ rhs0 = TREE_OPERAND (expr, 0);
|
||||||
|
+ rhs1 = TREE_OPERAND (expr, 1);
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ rhs0 = expr;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ switch (code)
|
||||||
|
+ {
|
||||||
|
+ CASE_CONVERT:
|
||||||
|
+ {
|
||||||
|
+ /* This assignment is under the form "a_1 = (cast) rhs. */
|
||||||
|
+ t_bool res = follow_ssa_edge_expr (loop, at_stmt, rhs0, halting_phi,
|
||||||
|
+ evolution_of_loop, limit);
|
||||||
|
+ *evolution_of_loop = chrec_convert (type, *evolution_of_loop, at_stmt);
|
||||||
|
+ return res;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ case INTEGER_CST:
|
||||||
|
+ /* This assignment is under the form "a_1 = 7". */
|
||||||
|
+ return t_false;
|
||||||
|
+
|
||||||
|
+ case ADDR_EXPR:
|
||||||
|
+ {
|
||||||
|
+ /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
|
||||||
|
+ if (TREE_CODE (TREE_OPERAND (rhs0, 0)) != MEM_REF)
|
||||||
|
+ return t_false;
|
||||||
|
+ tree mem = TREE_OPERAND (rhs0, 0);
|
||||||
|
+ rhs0 = TREE_OPERAND (mem, 0);
|
||||||
|
+ rhs1 = TREE_OPERAND (mem, 1);
|
||||||
|
+ code = POINTER_PLUS_EXPR;
|
||||||
|
+ }
|
||||||
|
+ /* Fallthru. */
|
||||||
|
+ case POINTER_PLUS_EXPR:
|
||||||
|
+ case PLUS_EXPR:
|
||||||
|
+ case MINUS_EXPR:
|
||||||
|
+ /* This case is under the form "rhs0 +- rhs1". */
|
||||||
|
+ STRIP_USELESS_TYPE_CONVERSION (rhs0);
|
||||||
|
+ STRIP_USELESS_TYPE_CONVERSION (rhs1);
|
||||||
|
+ return follow_ssa_edge_binary (loop, at_stmt, type, rhs0, code, rhs1,
|
||||||
|
+ halting_phi, evolution_of_loop, limit);
|
||||||
|
+
|
||||||
|
+ case ASSERT_EXPR:
|
||||||
|
+ /* This assignment is of the form: "a_1 = ASSERT_EXPR <a_2, ...>"
|
||||||
|
+ It must be handled as a copy assignment of the form a_1 = a_2. */
|
||||||
|
+ return follow_ssa_edge_expr (loop, at_stmt, ASSERT_EXPR_VAR (rhs0),
|
||||||
|
+ halting_phi, evolution_of_loop, limit);
|
||||||
|
+
|
||||||
|
+ default:
|
||||||
|
return t_false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -1504,7 +1463,6 @@ analyze_evolution_in_loop (gphi *loop_ph
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
tree arg = PHI_ARG_DEF (loop_phi_node, i);
|
||||||
|
- gimple *ssa_chain;
|
||||||
|
tree ev_fn;
|
||||||
|
t_bool res;
|
||||||
|
|
||||||
|
@@ -1517,11 +1475,10 @@ analyze_evolution_in_loop (gphi *loop_ph
|
||||||
|
{
|
||||||
|
bool val = false;
|
||||||
|
|
||||||
|
- ssa_chain = SSA_NAME_DEF_STMT (arg);
|
||||||
|
-
|
||||||
|
/* Pass in the initial condition to the follow edge function. */
|
||||||
|
ev_fn = init_cond;
|
||||||
|
- res = follow_ssa_edge (loop, ssa_chain, loop_phi_node, &ev_fn, 0);
|
||||||
|
+ res = follow_ssa_edge_expr (loop, loop_phi_node, arg,
|
||||||
|
+ loop_phi_node, &ev_fn, 0);
|
||||||
|
|
||||||
|
/* If ev_fn has no evolution in the inner loop, and the
|
||||||
|
init_cond is not equal to ev_fn, then we have an
|
||||||
|
diff -Nurp a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
|
||||||
|
--- a/gcc/tree-ssa-sccvn.c 2020-10-26 18:28:58.736000000 +0800
|
||||||
|
+++ b/gcc/tree-ssa-sccvn.c 2020-10-26 18:31:45.768000000 +0800
|
||||||
|
@@ -2456,7 +2456,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
|
||||||
|
(vuse, vr->set, vr->type, vr->operands, val);
|
||||||
|
}
|
||||||
|
/* For now handle clearing memory with partial defs. */
|
||||||
|
- else if (integer_zerop (gimple_call_arg (def_stmt, 1))
|
||||||
|
+ else if (known_eq (ref->size, maxsize)
|
||||||
|
+ && integer_zerop (gimple_call_arg (def_stmt, 1))
|
||||||
|
&& tree_to_poly_int64 (len).is_constant (&leni)
|
||||||
|
&& offset.is_constant (&offseti)
|
||||||
|
&& offset2.is_constant (&offset2i)
|
||||||
|
@@ -2494,7 +2495,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
|
||||||
|
return vn_reference_lookup_or_insert_for_pieces
|
||||||
|
(vuse, vr->set, vr->type, vr->operands, val);
|
||||||
|
}
|
||||||
|
- else if (maxsize.is_constant (&maxsizei)
|
||||||
|
+ else if (known_eq (ref->size, maxsize)
|
||||||
|
+ && maxsize.is_constant (&maxsizei)
|
||||||
|
&& maxsizei % BITS_PER_UNIT == 0
|
||||||
|
&& offset.is_constant (&offseti)
|
||||||
|
&& offseti % BITS_PER_UNIT == 0
|
||||||
|
diff -Nurp a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
|
||||||
|
--- a/gcc/tree-vect-data-refs.c 2020-10-26 18:28:58.792000000 +0800
|
||||||
|
+++ b/gcc/tree-vect-data-refs.c 2020-10-26 18:31:56.512000000 +0800
|
||||||
|
@@ -1045,7 +1045,7 @@ vect_compute_data_ref_alignment (dr_vec_
|
||||||
|
if (tree_int_cst_sgn (drb->step) < 0)
|
||||||
|
/* PLUS because STEP is negative. */
|
||||||
|
misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
|
||||||
|
- * TREE_INT_CST_LOW (drb->step));
|
||||||
|
+ * -TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
|
||||||
|
|
||||||
|
unsigned int const_misalignment;
|
||||||
|
if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
|
||||||
|
diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
|
||||||
|
--- a/gcc/tree-vect-loop.c 2020-10-26 18:28:58.728000000 +0800
|
||||||
|
+++ b/gcc/tree-vect-loop.c 2020-10-26 18:31:53.584000000 +0800
|
||||||
|
@@ -1850,7 +1850,10 @@ vect_dissolve_slp_only_groups (loop_vec_
|
||||||
|
DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
|
||||||
|
DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
|
||||||
|
DR_GROUP_SIZE (vinfo) = 1;
|
||||||
|
- DR_GROUP_GAP (vinfo) = group_size - 1;
|
||||||
|
+ if (STMT_VINFO_STRIDED_P (first_element))
|
||||||
|
+ DR_GROUP_GAP (vinfo) = 0;
|
||||||
|
+ else
|
||||||
|
+ DR_GROUP_GAP (vinfo) = group_size - 1;
|
||||||
|
vinfo = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -4516,18 +4519,26 @@ vect_create_epilog_for_reduction (stmt_v
|
||||||
|
zeroes. */
|
||||||
|
if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
|
||||||
|
{
|
||||||
|
+ auto_vec<std::pair<tree, bool>, 2> ccompares;
|
||||||
|
stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info);
|
||||||
|
cond_info = vect_stmt_to_vectorize (cond_info);
|
||||||
|
- while (gimple_assign_rhs_code (cond_info->stmt) != COND_EXPR)
|
||||||
|
+ while (cond_info != reduc_info)
|
||||||
|
{
|
||||||
|
+ if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
|
||||||
|
+ {
|
||||||
|
+ gimple *vec_stmt = STMT_VINFO_VEC_STMT (cond_info)->stmt;
|
||||||
|
+ gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
|
||||||
|
+ ccompares.safe_push
|
||||||
|
+ (std::make_pair (unshare_expr (gimple_assign_rhs1 (vec_stmt)),
|
||||||
|
+ STMT_VINFO_REDUC_IDX (cond_info) == 2));
|
||||||
|
+ }
|
||||||
|
cond_info
|
||||||
|
= loop_vinfo->lookup_def (gimple_op (cond_info->stmt,
|
||||||
|
1 + STMT_VINFO_REDUC_IDX
|
||||||
|
(cond_info)));
|
||||||
|
cond_info = vect_stmt_to_vectorize (cond_info);
|
||||||
|
}
|
||||||
|
- gimple *vec_stmt = STMT_VINFO_VEC_STMT (cond_info)->stmt;
|
||||||
|
- gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
|
||||||
|
+ gcc_assert (ccompares.length () != 0);
|
||||||
|
|
||||||
|
tree indx_before_incr, indx_after_incr;
|
||||||
|
poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
|
||||||
|
@@ -4569,37 +4580,35 @@ vect_create_epilog_for_reduction (stmt_v
|
||||||
|
add_phi_arg (as_a <gphi *> (new_phi), vec_zero,
|
||||||
|
loop_preheader_edge (loop), UNKNOWN_LOCATION);
|
||||||
|
|
||||||
|
- /* Now take the condition from the loops original cond_expr
|
||||||
|
- (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
|
||||||
|
+ /* Now take the condition from the loops original cond_exprs
|
||||||
|
+ and produce a new cond_exprs (INDEX_COND_EXPR) which for
|
||||||
|
every match uses values from the induction variable
|
||||||
|
(INDEX_BEFORE_INCR) otherwise uses values from the phi node
|
||||||
|
(NEW_PHI_TREE).
|
||||||
|
Finally, we update the phi (NEW_PHI_TREE) to take the value of
|
||||||
|
the new cond_expr (INDEX_COND_EXPR). */
|
||||||
|
-
|
||||||
|
- /* Duplicate the condition from vec_stmt. */
|
||||||
|
- tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt));
|
||||||
|
-
|
||||||
|
- /* Create a conditional, where the condition is taken from vec_stmt
|
||||||
|
- (CCOMPARE). The then and else values mirror the main VEC_COND_EXPR:
|
||||||
|
- the reduction phi corresponds to NEW_PHI_TREE and the new values
|
||||||
|
- correspond to INDEX_BEFORE_INCR. */
|
||||||
|
- gcc_assert (STMT_VINFO_REDUC_IDX (cond_info) >= 1);
|
||||||
|
- tree index_cond_expr;
|
||||||
|
- if (STMT_VINFO_REDUC_IDX (cond_info) == 2)
|
||||||
|
- index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
|
||||||
|
- ccompare, indx_before_incr, new_phi_tree);
|
||||||
|
- else
|
||||||
|
- index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
|
||||||
|
- ccompare, new_phi_tree, indx_before_incr);
|
||||||
|
- induction_index = make_ssa_name (cr_index_vector_type);
|
||||||
|
- gimple *index_condition = gimple_build_assign (induction_index,
|
||||||
|
- index_cond_expr);
|
||||||
|
- gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
|
||||||
|
- stmt_vec_info index_vec_info = loop_vinfo->add_stmt (index_condition);
|
||||||
|
+ gimple_seq stmts = NULL;
|
||||||
|
+ for (int i = ccompares.length () - 1; i != -1; --i)
|
||||||
|
+ {
|
||||||
|
+ tree ccompare = ccompares[i].first;
|
||||||
|
+ if (ccompares[i].second)
|
||||||
|
+ new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR,
|
||||||
|
+ cr_index_vector_type,
|
||||||
|
+ ccompare,
|
||||||
|
+ indx_before_incr, new_phi_tree);
|
||||||
|
+ else
|
||||||
|
+ new_phi_tree = gimple_build (&stmts, VEC_COND_EXPR,
|
||||||
|
+ cr_index_vector_type,
|
||||||
|
+ ccompare,
|
||||||
|
+ new_phi_tree, indx_before_incr);
|
||||||
|
+ }
|
||||||
|
+ gsi_insert_seq_before (&incr_gsi, stmts, GSI_SAME_STMT);
|
||||||
|
+ stmt_vec_info index_vec_info
|
||||||
|
+ = loop_vinfo->add_stmt (SSA_NAME_DEF_STMT (new_phi_tree));
|
||||||
|
STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
|
||||||
|
|
||||||
|
/* Update the phi with the vec cond. */
|
||||||
|
+ induction_index = new_phi_tree;
|
||||||
|
add_phi_arg (as_a <gphi *> (new_phi), induction_index,
|
||||||
|
loop_latch_edge (loop), UNKNOWN_LOCATION);
|
||||||
|
}
|
||||||
@ -1,3 +1,9 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-re-PR-tree-optimization-92461-ICE-verify_ssa-failed-.patch
|
||||||
|
830d1b18526dd1f085e8a2e1467a6dde18fc6434
|
||||||
|
|
||||||
diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92461.c b/gcc/testsuite/gcc.dg/torture/pr92461.c
|
diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92461.c b/gcc/testsuite/gcc.dg/torture/pr92461.c
|
||||||
--- a/gcc/testsuite/gcc.dg/torture/pr92461.c 1970-01-01 08:00:00.000000000 +0800
|
--- a/gcc/testsuite/gcc.dg/torture/pr92461.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
+++ b/gcc/testsuite/gcc.dg/torture/pr92461.c 2020-07-28 19:48:09.324000000 +0800
|
+++ b/gcc/testsuite/gcc.dg/torture/pr92461.c 2020-07-28 19:48:09.324000000 +0800
|
||||||
|
|||||||
@ -1,3 +1,9 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-tree-optimization-96698-fix-ICE-when-vectorizing-nes.patch
|
||||||
|
2130efe6ac7beba72d289e3dd145daa10aeaed54
|
||||||
|
|
||||||
diff -uprN a/gcc/testsuite/gcc.dg/vect/pr96698.c b/gcc/testsuite/gcc.dg/vect/pr96698.c
|
diff -uprN a/gcc/testsuite/gcc.dg/vect/pr96698.c b/gcc/testsuite/gcc.dg/vect/pr96698.c
|
||||||
--- a/gcc/testsuite/gcc.dg/vect/pr96698.c 1970-01-01 08:00:00.000000000 +0800
|
--- a/gcc/testsuite/gcc.dg/vect/pr96698.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
+++ b/gcc/testsuite/gcc.dg/vect/pr96698.c 2020-08-27 17:53:24.396000000 +0800
|
+++ b/gcc/testsuite/gcc.dg/vect/pr96698.c 2020-08-27 17:53:24.396000000 +0800
|
||||||
|
|||||||
152
fix-PR-92351-When-peeling-for-alignment.patch
Normal file
152
fix-PR-92351-When-peeling-for-alignment.patch
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-vect-PR-92351-When-peeling-for-alignment-make-alignm.patch
|
||||||
|
4e9d58d16767b1bc686f0c4b3bd2da25dc71e8f3
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..c06fa442faf
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2-epilogues.c
|
||||||
|
@@ -0,0 +1,3 @@
|
||||||
|
+/* { dg-require-effective-target vect_int } */
|
||||||
|
+
|
||||||
|
+#include "vect-peel-2-src.c"
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..f6fc134c870
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2-src.c
|
||||||
|
@@ -0,0 +1,48 @@
|
||||||
|
+#include <stdarg.h>
|
||||||
|
+#include "tree-vect.h"
|
||||||
|
+
|
||||||
|
+#define N 128
|
||||||
|
+
|
||||||
|
+/* unaligned store. */
|
||||||
|
+
|
||||||
|
+int ib[N+7];
|
||||||
|
+
|
||||||
|
+__attribute__ ((noinline))
|
||||||
|
+int main1 ()
|
||||||
|
+{
|
||||||
|
+ int i;
|
||||||
|
+ int ia[N+1];
|
||||||
|
+
|
||||||
|
+ /* The store is aligned and the loads are misaligned with the same
|
||||||
|
+ misalignment. Cost model is disabled. If misaligned stores are supported,
|
||||||
|
+ we peel according to the loads to align them. */
|
||||||
|
+ for (i = 0; i <= N; i++)
|
||||||
|
+ {
|
||||||
|
+ ia[i] = ib[i+2] + ib[i+6];
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* check results: */
|
||||||
|
+ for (i = 1; i <= N; i++)
|
||||||
|
+ {
|
||||||
|
+ if (ia[i] != ib[i+2] + ib[i+6])
|
||||||
|
+ abort ();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int main (void)
|
||||||
|
+{
|
||||||
|
+ int i;
|
||||||
|
+
|
||||||
|
+ check_vect ();
|
||||||
|
+
|
||||||
|
+ for (i = 0; i <= N+6; i++)
|
||||||
|
+ {
|
||||||
|
+ asm volatile ("" : "+r" (i));
|
||||||
|
+ ib[i] = i;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return main1 ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-2.c b/gcc/testsuite/gcc.dg/vect/vect-peel-2.c
|
||||||
|
index b6061c3b855..65e70bd4417 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-peel-2.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-peel-2.c
|
||||||
|
@@ -1,52 +1,8 @@
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
+/* Disabling epilogues until we find a better way to deal with scans. */
|
||||||
|
+/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||||
|
|
||||||
|
-#include <stdarg.h>
|
||||||
|
-#include "tree-vect.h"
|
||||||
|
-
|
||||||
|
-#define N 128
|
||||||
|
-
|
||||||
|
-/* unaligned store. */
|
||||||
|
-
|
||||||
|
-int ib[N+7];
|
||||||
|
-
|
||||||
|
-__attribute__ ((noinline))
|
||||||
|
-int main1 ()
|
||||||
|
-{
|
||||||
|
- int i;
|
||||||
|
- int ia[N+1];
|
||||||
|
-
|
||||||
|
- /* The store is aligned and the loads are misaligned with the same
|
||||||
|
- misalignment. Cost model is disabled. If misaligned stores are supported,
|
||||||
|
- we peel according to the loads to align them. */
|
||||||
|
- for (i = 0; i <= N; i++)
|
||||||
|
- {
|
||||||
|
- ia[i] = ib[i+2] + ib[i+6];
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- /* check results: */
|
||||||
|
- for (i = 1; i <= N; i++)
|
||||||
|
- {
|
||||||
|
- if (ia[i] != ib[i+2] + ib[i+6])
|
||||||
|
- abort ();
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return 0;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-int main (void)
|
||||||
|
-{
|
||||||
|
- int i;
|
||||||
|
-
|
||||||
|
- check_vect ();
|
||||||
|
-
|
||||||
|
- for (i = 0; i <= N+6; i++)
|
||||||
|
- {
|
||||||
|
- asm volatile ("" : "+r" (i));
|
||||||
|
- ib[i] = i;
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- return main1 ();
|
||||||
|
-}
|
||||||
|
+#include "vect-peel-2-src.c"
|
||||||
|
|
||||||
|
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } } } } */
|
||||||
|
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
|
||||||
|
index 36639b697f1..88f14e73d65 100644
|
||||||
|
--- a/gcc/tree-vect-data-refs.c
|
||||||
|
+++ b/gcc/tree-vect-data-refs.c
|
||||||
|
@@ -938,6 +938,18 @@ vect_compute_data_ref_alignment (dr_vec_info *dr_info)
|
||||||
|
= exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT);
|
||||||
|
DR_TARGET_ALIGNMENT (dr_info) = vector_alignment;
|
||||||
|
|
||||||
|
+ /* If the main loop has peeled for alignment we have no way of knowing
|
||||||
|
+ whether the data accesses in the epilogues are aligned. We can't at
|
||||||
|
+ compile time answer the question whether we have entered the main loop or
|
||||||
|
+ not. Fixes PR 92351. */
|
||||||
|
+ if (loop_vinfo)
|
||||||
|
+ {
|
||||||
|
+ loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
|
||||||
|
+ if (orig_loop_vinfo
|
||||||
|
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
unsigned HOST_WIDE_INT vect_align_c;
|
||||||
|
if (!vector_alignment.is_constant (&vect_align_c))
|
||||||
|
return;
|
||||||
30
fix-addlosymdi-ICE-in-pass-reload.patch
Normal file
30
fix-addlosymdi-ICE-in-pass-reload.patch
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
diff -uprN a/gcc/lra.c b/gcc/lra.c
|
||||||
|
--- a/gcc/lra.c 2020-12-14 15:26:36.331633230 +0800
|
||||||
|
+++ b/gcc/lra.c 2020-12-15 18:56:33.699633230 +0800
|
||||||
|
@@ -507,6 +507,26 @@ lra_emit_move (rtx x, rtx y)
|
||||||
|
data. */
|
||||||
|
if (old != max_reg_num ())
|
||||||
|
expand_reg_data (old);
|
||||||
|
+ while (insn != NULL)
|
||||||
|
+ {
|
||||||
|
+ if (GET_CODE (PATTERN (insn)) == SET
|
||||||
|
+ && GET_CODE (SET_SRC (PATTERN (insn))) == LO_SUM
|
||||||
|
+ && GET_CODE (SET_DEST (PATTERN (insn))) == REG
|
||||||
|
+ && strcmp (insn_data[recog_memoized (insn)].name,
|
||||||
|
+ "add_losym_di") == 0)
|
||||||
|
+ {
|
||||||
|
+ rtx add_losym_dest = SET_DEST (PATTERN (insn));
|
||||||
|
+ for (int i = (int) max_reg_num () - 1; i >= old; i--)
|
||||||
|
+ {
|
||||||
|
+ if (regno_reg_rtx[i] == add_losym_dest)
|
||||||
|
+ {
|
||||||
|
+ setup_reg_classes (i, GENERAL_REGS,
|
||||||
|
+ NO_REGS, GENERAL_REGS);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ insn = PREV_INSN (insn);
|
||||||
|
+ }
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
lra_emit_add (x, XEXP (y, 0), XEXP (y, 1));
|
||||||
115
fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch
Normal file
115
fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-aarch64-Fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch:
|
||||||
|
91d80cf4bd2827dd9c40fe6a7c719c909d79083d
|
||||||
|
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.target/aarch64/pr96757.c b/gcc/testsuite/gcc.target/aarch64/pr96757.c
|
||||||
|
--- a/gcc/testsuite/gcc.target/aarch64/pr96757.c 1969-12-31 19:00:00.000000000 -0500
|
||||||
|
+++ b/gcc/testsuite/gcc.target/aarch64/pr96757.c 2020-10-12 08:32:12.192000000 -0400
|
||||||
|
@@ -0,0 +1,23 @@
|
||||||
|
+/* PR target/96757 */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3" } */
|
||||||
|
+
|
||||||
|
+short
|
||||||
|
+fun1(short i, short j)
|
||||||
|
+{
|
||||||
|
+ return i * j;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+fun(int a, int b, int c)
|
||||||
|
+{
|
||||||
|
+ int *v, z, k, m;
|
||||||
|
+ short f, d;
|
||||||
|
+ for (int i=0; i<c; i++)
|
||||||
|
+ {
|
||||||
|
+ f= 4 <= d;
|
||||||
|
+ k= a > m;
|
||||||
|
+ z = f > k;
|
||||||
|
+ *v += fun1(z,b);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
diff -Nurp a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
|
||||||
|
--- a/gcc/tree-vect-patterns.c 2020-10-12 08:05:18.924000000 -0400
|
||||||
|
+++ b/gcc/tree-vect-patterns.c 2020-10-12 08:50:56.996000000 -0400
|
||||||
|
@@ -3917,6 +3917,8 @@ vect_recog_mask_conversion_pattern (stmt
|
||||||
|
tree vectype1, vectype2;
|
||||||
|
stmt_vec_info pattern_stmt_info;
|
||||||
|
vec_info *vinfo = stmt_vinfo->vinfo;
|
||||||
|
+ tree rhs1_op0 = NULL_TREE, rhs1_op1 = NULL_TREE;
|
||||||
|
+ tree rhs1_op0_type = NULL_TREE, rhs1_op1_type = NULL_TREE;
|
||||||
|
|
||||||
|
/* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
|
||||||
|
if (is_gimple_call (last_stmt)
|
||||||
|
@@ -4016,9 +4018,37 @@ vect_recog_mask_conversion_pattern (stmt
|
||||||
|
|
||||||
|
it is better for b1 and b2 to use the mask type associated
|
||||||
|
with int elements rather bool (byte) elements. */
|
||||||
|
- rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
|
||||||
|
- if (!rhs1_type)
|
||||||
|
- rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
|
||||||
|
+ rhs1_op0 = TREE_OPERAND (rhs1, 0);
|
||||||
|
+ rhs1_op1 = TREE_OPERAND (rhs1, 1);
|
||||||
|
+ if (!rhs1_op0 || !rhs1_op1)
|
||||||
|
+ return NULL;
|
||||||
|
+ rhs1_op0_type = search_type_for_mask (rhs1_op0, vinfo);
|
||||||
|
+ rhs1_op1_type = search_type_for_mask (rhs1_op1, vinfo);
|
||||||
|
+
|
||||||
|
+ if (!rhs1_op0_type)
|
||||||
|
+ rhs1_type = TREE_TYPE (rhs1_op0);
|
||||||
|
+ else if (!rhs1_op1_type)
|
||||||
|
+ rhs1_type = TREE_TYPE (rhs1_op1);
|
||||||
|
+ else if (TYPE_PRECISION (rhs1_op0_type)
|
||||||
|
+ != TYPE_PRECISION (rhs1_op1_type))
|
||||||
|
+ {
|
||||||
|
+ int tmp0 = (int) TYPE_PRECISION (rhs1_op0_type)
|
||||||
|
+ - (int) TYPE_PRECISION (TREE_TYPE (lhs));
|
||||||
|
+ int tmp1 = (int) TYPE_PRECISION (rhs1_op1_type)
|
||||||
|
+ - (int) TYPE_PRECISION (TREE_TYPE (lhs));
|
||||||
|
+ if ((tmp0 > 0 && tmp1 > 0) || (tmp0 < 0 && tmp1 < 0))
|
||||||
|
+ {
|
||||||
|
+ if (abs (tmp0) > abs (tmp1))
|
||||||
|
+ rhs1_type = rhs1_op1_type;
|
||||||
|
+ else
|
||||||
|
+ rhs1_type = rhs1_op0_type;
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ rhs1_type = build_nonstandard_integer_type
|
||||||
|
+ (TYPE_PRECISION (TREE_TYPE (lhs)), 1);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ rhs1_type = rhs1_op0_type;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return NULL;
|
||||||
|
@@ -4036,8 +4066,8 @@ vect_recog_mask_conversion_pattern (stmt
|
||||||
|
name from the outset. */
|
||||||
|
if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
|
||||||
|
TYPE_VECTOR_SUBPARTS (vectype2))
|
||||||
|
- && (TREE_CODE (rhs1) == SSA_NAME
|
||||||
|
- || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
|
||||||
|
+ && !rhs1_op0_type
|
||||||
|
+ && !rhs1_op1_type)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* If rhs1 is invariant and we can promote it leave the COND_EXPR
|
||||||
|
@@ -4069,7 +4099,16 @@ vect_recog_mask_conversion_pattern (stmt
|
||||||
|
if (TREE_CODE (rhs1) != SSA_NAME)
|
||||||
|
{
|
||||||
|
tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
|
||||||
|
- pattern_stmt = gimple_build_assign (tmp, rhs1);
|
||||||
|
+ if (rhs1_op0_type
|
||||||
|
+ && TYPE_PRECISION (rhs1_op0_type) != TYPE_PRECISION (rhs1_type))
|
||||||
|
+ rhs1_op0 = build_mask_conversion (rhs1_op0,
|
||||||
|
+ vectype2, stmt_vinfo);
|
||||||
|
+ if (rhs1_op1_type
|
||||||
|
+ && TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION (rhs1_type))
|
||||||
|
+ rhs1_op1 = build_mask_conversion (rhs1_op1,
|
||||||
|
+ vectype2, stmt_vinfo);
|
||||||
|
+ pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
|
||||||
|
+ rhs1_op0, rhs1_op1);
|
||||||
|
rhs1 = tmp;
|
||||||
|
append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
|
||||||
|
}
|
||||||
301
fix-avx512vl-vcvttpd2dq-2-fail.patch
Normal file
301
fix-avx512vl-vcvttpd2dq-2-fail.patch
Normal file
@ -0,0 +1,301 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch
|
||||||
|
946732df902dbb23dd44abe97fea41e154e6e5f9
|
||||||
|
|
||||||
|
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||||||
|
index 3ce22395c65..12d6dc0cb7e 100644
|
||||||
|
--- a/gcc/config/i386/sse.md
|
||||||
|
+++ b/gcc/config/i386/sse.md
|
||||||
|
@@ -5927,16 +5927,16 @@
|
||||||
|
(set_attr "btver2_decode" "vector")
|
||||||
|
(set_attr "mode" "OI")])
|
||||||
|
|
||||||
|
-(define_insn "sse2_cvtpd2dq<mask_name>"
|
||||||
|
+(define_insn "sse2_cvtpd2dq"
|
||||||
|
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
(vec_concat:V4SI
|
||||||
|
(unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
|
||||||
|
UNSPEC_FIX_NOTRUNC)
|
||||||
|
(const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
- "TARGET_SSE2 && <mask_avx512vl_condition>"
|
||||||
|
+ "TARGET_SSE2"
|
||||||
|
{
|
||||||
|
if (TARGET_AVX)
|
||||||
|
- return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
|
||||||
|
+ return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
|
||||||
|
else
|
||||||
|
return "cvtpd2dq\t{%1, %0|%0, %1}";
|
||||||
|
}
|
||||||
|
@@ -5949,6 +5949,38 @@
|
||||||
|
(set_attr "athlon_decode" "vector")
|
||||||
|
(set_attr "bdver1_decode" "double")])
|
||||||
|
|
||||||
|
+(define_insn "sse2_cvtpd2dq_mask"
|
||||||
|
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
+ (vec_concat:V4SI
|
||||||
|
+ (vec_merge:V2SI
|
||||||
|
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
|
||||||
|
+ UNSPEC_FIX_NOTRUNC)
|
||||||
|
+ (vec_select:V2SI
|
||||||
|
+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
|
||||||
|
+ (parallel [(const_int 0) (const_int 1)]))
|
||||||
|
+ (match_operand:QI 3 "register_operand" "Yk"))
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
+ "TARGET_AVX512VL"
|
||||||
|
+ "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "mode" "TI")])
|
||||||
|
+
|
||||||
|
+(define_insn "*sse2_cvtpd2dq_mask_1"
|
||||||
|
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
+ (vec_concat:V4SI
|
||||||
|
+ (vec_merge:V2SI
|
||||||
|
+ (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
|
||||||
|
+ UNSPEC_FIX_NOTRUNC)
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])
|
||||||
|
+ (match_operand:QI 2 "register_operand" "Yk"))
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
+ "TARGET_AVX512VL"
|
||||||
|
+ "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "mode" "TI")])
|
||||||
|
+
|
||||||
|
;; For ufix_notrunc* insn patterns
|
||||||
|
(define_mode_attr pd2udqsuff
|
||||||
|
[(V8DF "") (V4DF "{y}")])
|
||||||
|
@@ -5964,15 +5996,49 @@
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
|
|
||||||
|
-(define_insn "ufix_notruncv2dfv2si2<mask_name>"
|
||||||
|
+(define_insn "ufix_notruncv2dfv2si2"
|
||||||
|
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
(vec_concat:V4SI
|
||||||
|
(unspec:V2SI
|
||||||
|
[(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
|
||||||
|
- UNSPEC_UNSIGNED_FIX_NOTRUNC)
|
||||||
|
+ UNSPEC_UNSIGNED_FIX_NOTRUNC)
|
||||||
|
(const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
"TARGET_AVX512VL"
|
||||||
|
- "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
|
||||||
|
+ "vcvtpd2udq{x}\t{%1, %0|%0, %1}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "mode" "TI")])
|
||||||
|
+
|
||||||
|
+(define_insn "ufix_notruncv2dfv2si2_mask"
|
||||||
|
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
+ (vec_concat:V4SI
|
||||||
|
+ (vec_merge:V2SI
|
||||||
|
+ (unspec:V2SI
|
||||||
|
+ [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
|
||||||
|
+ UNSPEC_UNSIGNED_FIX_NOTRUNC)
|
||||||
|
+ (vec_select:V2SI
|
||||||
|
+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
|
||||||
|
+ (parallel [(const_int 0) (const_int 1)]))
|
||||||
|
+ (match_operand:QI 3 "register_operand" "Yk"))
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
+ "TARGET_AVX512VL"
|
||||||
|
+ "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "mode" "TI")])
|
||||||
|
+
|
||||||
|
+(define_insn "*ufix_notruncv2dfv2si2_mask_1"
|
||||||
|
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
+ (vec_concat:V4SI
|
||||||
|
+ (vec_merge:V2SI
|
||||||
|
+ (unspec:V2SI
|
||||||
|
+ [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
|
||||||
|
+ UNSPEC_UNSIGNED_FIX_NOTRUNC)
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])
|
||||||
|
+ (match_operand:QI 2 "register_operand" "Yk"))
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
+ "TARGET_AVX512VL"
|
||||||
|
+ "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
|
||||||
|
[(set_attr "type" "ssecvt")
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
(set_attr "mode" "TI")])
|
||||||
|
@@ -5987,13 +6053,43 @@
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
(set_attr "mode" "OI")])
|
||||||
|
|
||||||
|
-(define_insn "ufix_truncv2dfv2si2<mask_name>"
|
||||||
|
+(define_insn "ufix_truncv2dfv2si2"
|
||||||
|
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
(vec_concat:V4SI
|
||||||
|
(unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
|
||||||
|
(const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
"TARGET_AVX512VL"
|
||||||
|
- "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
|
||||||
|
+ "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "mode" "TI")])
|
||||||
|
+
|
||||||
|
+(define_insn "ufix_truncv2dfv2si2_mask"
|
||||||
|
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
+ (vec_concat:V4SI
|
||||||
|
+ (vec_merge:V2SI
|
||||||
|
+ (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
|
||||||
|
+ (vec_select:V2SI
|
||||||
|
+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
|
||||||
|
+ (parallel [(const_int 0) (const_int 1)]))
|
||||||
|
+ (match_operand:QI 3 "register_operand" "Yk"))
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
+ "TARGET_AVX512VL"
|
||||||
|
+ "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "mode" "TI")])
|
||||||
|
+
|
||||||
|
+(define_insn "*ufix_truncv2dfv2si2_mask_1"
|
||||||
|
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
+ (vec_concat:V4SI
|
||||||
|
+ (vec_merge:V2SI
|
||||||
|
+ (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])
|
||||||
|
+ (match_operand:QI 2 "register_operand" "Yk"))
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
+ "TARGET_AVX512VL"
|
||||||
|
+ "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
|
||||||
|
[(set_attr "type" "ssecvt")
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
(set_attr "mode" "TI")])
|
||||||
|
@@ -6138,15 +6234,15 @@
|
||||||
|
"TARGET_AVX"
|
||||||
|
"operands[2] = CONST0_RTX (V4SImode);")
|
||||||
|
|
||||||
|
-(define_insn "sse2_cvttpd2dq<mask_name>"
|
||||||
|
+(define_insn "sse2_cvttpd2dq"
|
||||||
|
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
(vec_concat:V4SI
|
||||||
|
(fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
|
||||||
|
(const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
- "TARGET_SSE2 && <mask_avx512vl_condition>"
|
||||||
|
+ "TARGET_SSE2"
|
||||||
|
{
|
||||||
|
if (TARGET_AVX)
|
||||||
|
- return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
|
||||||
|
+ return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
|
||||||
|
else
|
||||||
|
return "cvttpd2dq\t{%1, %0|%0, %1}";
|
||||||
|
}
|
||||||
|
@@ -6157,6 +6253,36 @@
|
||||||
|
(set_attr "prefix" "maybe_vex")
|
||||||
|
(set_attr "mode" "TI")])
|
||||||
|
|
||||||
|
+(define_insn "sse2_cvttpd2dq_mask"
|
||||||
|
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
+ (vec_concat:V4SI
|
||||||
|
+ (vec_merge:V2SI
|
||||||
|
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
|
||||||
|
+ (vec_select:V2SI
|
||||||
|
+ (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
|
||||||
|
+ (parallel [(const_int 0) (const_int 1)]))
|
||||||
|
+ (match_operand:QI 3 "register_operand" "Yk"))
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
+ "TARGET_AVX512VL"
|
||||||
|
+ "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "mode" "TI")])
|
||||||
|
+
|
||||||
|
+(define_insn "*sse2_cvttpd2dq_mask_1"
|
||||||
|
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||||
|
+ (vec_concat:V4SI
|
||||||
|
+ (vec_merge:V2SI
|
||||||
|
+ (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])
|
||||||
|
+ (match_operand:QI 2 "register_operand" "Yk"))
|
||||||
|
+ (const_vector:V2SI [(const_int 0) (const_int 0)])))]
|
||||||
|
+ "TARGET_AVX512VL"
|
||||||
|
+ "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "mode" "TI")])
|
||||||
|
+
|
||||||
|
(define_insn "sse2_cvtsd2ss<round_name>"
|
||||||
|
[(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
|
||||||
|
(vec_merge:V4SF
|
||||||
|
@@ -6276,26 +6402,28 @@
|
||||||
|
|
||||||
|
(define_expand "sse2_cvtpd2ps_mask"
|
||||||
|
[(set (match_operand:V4SF 0 "register_operand")
|
||||||
|
- (vec_merge:V4SF
|
||||||
|
- (vec_concat:V4SF
|
||||||
|
+ (vec_concat:V4SF
|
||||||
|
+ (vec_merge:V2SF
|
||||||
|
(float_truncate:V2SF
|
||||||
|
(match_operand:V2DF 1 "vector_operand"))
|
||||||
|
- (match_dup 4))
|
||||||
|
- (match_operand:V4SF 2 "register_operand")
|
||||||
|
- (match_operand:QI 3 "register_operand")))]
|
||||||
|
+ (vec_select:V2SF
|
||||||
|
+ (match_operand:V4SF 2 "nonimm_or_0_operand")
|
||||||
|
+ (parallel [(const_int 0) (const_int 1)]))
|
||||||
|
+ (match_operand:QI 3 "register_operand"))
|
||||||
|
+ (match_dup 4)))]
|
||||||
|
"TARGET_SSE2"
|
||||||
|
"operands[4] = CONST0_RTX (V2SFmode);")
|
||||||
|
|
||||||
|
-(define_insn "*sse2_cvtpd2ps<mask_name>"
|
||||||
|
+(define_insn "*sse2_cvtpd2ps"
|
||||||
|
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||||
|
(vec_concat:V4SF
|
||||||
|
(float_truncate:V2SF
|
||||||
|
(match_operand:V2DF 1 "vector_operand" "vBm"))
|
||||||
|
- (match_operand:V2SF 2 "const0_operand")))]
|
||||||
|
- "TARGET_SSE2 && <mask_avx512vl_condition>"
|
||||||
|
+ (match_operand:V2SF 2 "const0_operand" "C")))]
|
||||||
|
+ "TARGET_SSE2"
|
||||||
|
{
|
||||||
|
if (TARGET_AVX)
|
||||||
|
- return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
|
||||||
|
+ return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
|
||||||
|
else
|
||||||
|
return "cvtpd2ps\t{%1, %0|%0, %1}";
|
||||||
|
}
|
||||||
|
@@ -6307,6 +6435,38 @@
|
||||||
|
(set_attr "prefix" "maybe_vex")
|
||||||
|
(set_attr "mode" "V4SF")])
|
||||||
|
|
||||||
|
+(define_insn "*sse2_cvtpd2ps_mask"
|
||||||
|
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||||
|
+ (vec_concat:V4SF
|
||||||
|
+ (vec_merge:V2SF
|
||||||
|
+ (float_truncate:V2SF
|
||||||
|
+ (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
|
||||||
|
+ (vec_select:V2SF
|
||||||
|
+ (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
|
||||||
|
+ (parallel [(const_int 0) (const_int 1)]))
|
||||||
|
+ (match_operand:QI 3 "register_operand" "Yk"))
|
||||||
|
+ (match_operand:V2SF 4 "const0_operand" "C")))]
|
||||||
|
+ "TARGET_AVX512VL"
|
||||||
|
+ "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "mode" "V4SF")])
|
||||||
|
+
|
||||||
|
+(define_insn "*sse2_cvtpd2ps_mask_1"
|
||||||
|
+ [(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||||
|
+ (vec_concat:V4SF
|
||||||
|
+ (vec_merge:V2SF
|
||||||
|
+ (float_truncate:V2SF
|
||||||
|
+ (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
|
||||||
|
+ (match_operand:V2SF 3 "const0_operand" "C")
|
||||||
|
+ (match_operand:QI 2 "register_operand" "Yk"))
|
||||||
|
+ (match_operand:V2SF 4 "const0_operand" "C")))]
|
||||||
|
+ "TARGET_AVX512VL"
|
||||||
|
+ "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
|
||||||
|
+ [(set_attr "type" "ssecvt")
|
||||||
|
+ (set_attr "prefix" "evex")
|
||||||
|
+ (set_attr "mode" "V4SF")])
|
||||||
|
+
|
||||||
|
;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
|
||||||
|
(define_mode_attr sf2dfmode
|
||||||
|
[(V8DF "V8SF") (V4DF "V4SF")])
|
||||||
@ -1,3 +1,6 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
AArch64-Fix-cost-of-plus-.-const_int-C.patch:
|
AArch64-Fix-cost-of-plus-.-const_int-C.patch:
|
||||||
commit 835d50c66aa5bde2f354a6e63a2afa7d2f76a05a
|
commit 835d50c66aa5bde2f354a6e63a2afa7d2f76a05a
|
||||||
|
|
||||||
|
|||||||
928
fix-issue499-add-nop-convert.patch
Normal file
928
fix-issue499-add-nop-convert.patch
Normal file
@ -0,0 +1,928 @@
|
|||||||
|
This patch is a combine of following 8 commits
|
||||||
|
|
||||||
|
commit e944354ec05891474b0d204c6c239c04ee7b527b
|
||||||
|
Author: Robin Dapp <rdapp@linux.ibm.com>
|
||||||
|
Date: Mon Aug 26 10:18:24 2019 +0000
|
||||||
|
|
||||||
|
[PATCH 1/2] Allow folding all statements.
|
||||||
|
|
||||||
|
commit df7d46d925c7baca7bf9961aee900876d8aef225
|
||||||
|
Author: Robin Dapp <rdapp@linux.ibm.com>
|
||||||
|
Date: Mon Aug 26 10:24:44 2019 +0000
|
||||||
|
|
||||||
|
[PATCH 2/2] Add simplify rule for wrapped addition.
|
||||||
|
|
||||||
|
commit 6c14d008122fcee4157be79a60f8d6685869ad19
|
||||||
|
Author: Robin Dapp <rdapp@linux.ibm.com>
|
||||||
|
Date: Tue Aug 27 12:08:58 2019 +0000
|
||||||
|
|
||||||
|
re PR testsuite/91549 (gcc.dg/wrapped-binop-simplify.c fails starting with r274925)
|
||||||
|
|
||||||
|
commit 129bd066049f065e522990e63bb10ff92b3c018d
|
||||||
|
Author: Jakub Jelinek <jakub@redhat.com>
|
||||||
|
Date: Tue Dec 3 10:20:43 2019 +0100
|
||||||
|
|
||||||
|
re PR tree-optimization/92734 (Missing match.pd simplification done by fold_binary_loc on generic)
|
||||||
|
|
||||||
|
commit 526b4c716a340ee9464965e63eee2b9954fe21f1
|
||||||
|
Author: Jakub Jelinek <jakub@redhat.com>
|
||||||
|
Date: Wed Dec 4 10:38:48 2019 +0100
|
||||||
|
|
||||||
|
re PR tree-optimization/92734 (Missing match.pd simplification done by fold_binary_loc on generic)
|
||||||
|
|
||||||
|
commit 28fabd43d9d249134244eb9d7815917c7ae44b64
|
||||||
|
Author: Richard Biener <rguenther@suse.de>
|
||||||
|
Date: Fri Dec 6 10:25:08 2019 +0000
|
||||||
|
|
||||||
|
genmatch.c (enum tree_code): Remove CONVERT{0,1,2} and VIEW_CONVERT{0,1,2}.
|
||||||
|
|
||||||
|
commit e150da383346adc762bc904342f9877f2f071265
|
||||||
|
Author: Richard Biener <rguenther@suse.de>
|
||||||
|
Date: Fri Dec 6 11:44:27 2019 +0000
|
||||||
|
|
||||||
|
match.pd (nop_convert): Remove empty match.
|
||||||
|
|
||||||
|
commit 496f4f884716ae061f771a62e44868a32dbd502f
|
||||||
|
Author: Jakub Jelinek <jakub@redhat.com>
|
||||||
|
Date: Mon May 4 11:01:08 2020 +0200
|
||||||
|
|
||||||
|
match.pd: Decrease number of nop conversions around bitwise ops [PR94718]
|
||||||
|
|
||||||
|
diff -Nurp a/gcc/genmatch.c b/gcc/genmatch.c
|
||||||
|
--- a/gcc/genmatch.c 2020-03-12 19:07:21.000000000 +0800
|
||||||
|
+++ b/gcc/genmatch.c 2020-11-24 14:49:12.792000000 +0800
|
||||||
|
@@ -224,12 +224,6 @@ output_line_directive (FILE *f, location
|
||||||
|
#define DEFTREECODE(SYM, STRING, TYPE, NARGS) SYM,
|
||||||
|
enum tree_code {
|
||||||
|
#include "tree.def"
|
||||||
|
-CONVERT0,
|
||||||
|
-CONVERT1,
|
||||||
|
-CONVERT2,
|
||||||
|
-VIEW_CONVERT0,
|
||||||
|
-VIEW_CONVERT1,
|
||||||
|
-VIEW_CONVERT2,
|
||||||
|
MAX_TREE_CODES
|
||||||
|
};
|
||||||
|
#undef DEFTREECODE
|
||||||
|
@@ -695,11 +689,12 @@ struct expr : public operand
|
||||||
|
expr (id_base *operation_, location_t loc, bool is_commutative_ = false)
|
||||||
|
: operand (OP_EXPR, loc), operation (operation_),
|
||||||
|
ops (vNULL), expr_type (NULL), is_commutative (is_commutative_),
|
||||||
|
- is_generic (false), force_single_use (false) {}
|
||||||
|
+ is_generic (false), force_single_use (false), opt_grp (0) {}
|
||||||
|
expr (expr *e)
|
||||||
|
: operand (OP_EXPR, e->location), operation (e->operation),
|
||||||
|
ops (vNULL), expr_type (e->expr_type), is_commutative (e->is_commutative),
|
||||||
|
- is_generic (e->is_generic), force_single_use (e->force_single_use) {}
|
||||||
|
+ is_generic (e->is_generic), force_single_use (e->force_single_use),
|
||||||
|
+ opt_grp (e->opt_grp) {}
|
||||||
|
void append_op (operand *op) { ops.safe_push (op); }
|
||||||
|
/* The operator and its operands. */
|
||||||
|
id_base *operation;
|
||||||
|
@@ -714,6 +709,8 @@ struct expr : public operand
|
||||||
|
/* Whether pushing any stmt to the sequence should be conditional
|
||||||
|
on this expression having a single-use. */
|
||||||
|
bool force_single_use;
|
||||||
|
+ /* If non-zero, the group for optional handling. */
|
||||||
|
+ unsigned char opt_grp;
|
||||||
|
virtual void gen_transform (FILE *f, int, const char *, bool, int,
|
||||||
|
const char *, capture_info *,
|
||||||
|
dt_operand ** = 0, int = 0);
|
||||||
|
@@ -1079,18 +1076,17 @@ lower_commutative (simplify *s, vec<simp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Strip conditional conversios using operator OPER from O and its
|
||||||
|
- children if STRIP, else replace them with an unconditional convert. */
|
||||||
|
+/* Strip conditional operations using group GRP from O and its
|
||||||
|
+ children if STRIP, else replace them with an unconditional operation. */
|
||||||
|
|
||||||
|
operand *
|
||||||
|
-lower_opt_convert (operand *o, enum tree_code oper,
|
||||||
|
- enum tree_code to_oper, bool strip)
|
||||||
|
+lower_opt (operand *o, unsigned char grp, bool strip)
|
||||||
|
{
|
||||||
|
if (capture *c = dyn_cast<capture *> (o))
|
||||||
|
{
|
||||||
|
if (c->what)
|
||||||
|
return new capture (c->location, c->where,
|
||||||
|
- lower_opt_convert (c->what, oper, to_oper, strip),
|
||||||
|
+ lower_opt (c->what, grp, strip),
|
||||||
|
c->value_match);
|
||||||
|
else
|
||||||
|
return c;
|
||||||
|
@@ -1100,36 +1096,34 @@ lower_opt_convert (operand *o, enum tree
|
||||||
|
if (!e)
|
||||||
|
return o;
|
||||||
|
|
||||||
|
- if (*e->operation == oper)
|
||||||
|
+ if (e->opt_grp == grp)
|
||||||
|
{
|
||||||
|
if (strip)
|
||||||
|
- return lower_opt_convert (e->ops[0], oper, to_oper, strip);
|
||||||
|
+ return lower_opt (e->ops[0], grp, strip);
|
||||||
|
|
||||||
|
expr *ne = new expr (e);
|
||||||
|
- ne->operation = (to_oper == CONVERT_EXPR
|
||||||
|
- ? get_operator ("CONVERT_EXPR")
|
||||||
|
- : get_operator ("VIEW_CONVERT_EXPR"));
|
||||||
|
- ne->append_op (lower_opt_convert (e->ops[0], oper, to_oper, strip));
|
||||||
|
+ ne->opt_grp = 0;
|
||||||
|
+ ne->append_op (lower_opt (e->ops[0], grp, strip));
|
||||||
|
return ne;
|
||||||
|
}
|
||||||
|
|
||||||
|
expr *ne = new expr (e);
|
||||||
|
for (unsigned i = 0; i < e->ops.length (); ++i)
|
||||||
|
- ne->append_op (lower_opt_convert (e->ops[i], oper, to_oper, strip));
|
||||||
|
+ ne->append_op (lower_opt (e->ops[i], grp, strip));
|
||||||
|
|
||||||
|
return ne;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Determine whether O or its children uses the conditional conversion
|
||||||
|
- operator OPER. */
|
||||||
|
+/* Determine whether O or its children uses the conditional operation
|
||||||
|
+ group GRP. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
-has_opt_convert (operand *o, enum tree_code oper)
|
||||||
|
+has_opt (operand *o, unsigned char grp)
|
||||||
|
{
|
||||||
|
if (capture *c = dyn_cast<capture *> (o))
|
||||||
|
{
|
||||||
|
if (c->what)
|
||||||
|
- return has_opt_convert (c->what, oper);
|
||||||
|
+ return has_opt (c->what, grp);
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
@@ -1138,11 +1132,11 @@ has_opt_convert (operand *o, enum tree_c
|
||||||
|
if (!e)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
- if (*e->operation == oper)
|
||||||
|
+ if (e->opt_grp == grp)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < e->ops.length (); ++i)
|
||||||
|
- if (has_opt_convert (e->ops[i], oper))
|
||||||
|
+ if (has_opt (e->ops[i], grp))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
@@ -1152,34 +1146,24 @@ has_opt_convert (operand *o, enum tree_c
|
||||||
|
if required. */
|
||||||
|
|
||||||
|
static vec<operand *>
|
||||||
|
-lower_opt_convert (operand *o)
|
||||||
|
+lower_opt (operand *o)
|
||||||
|
{
|
||||||
|
vec<operand *> v1 = vNULL, v2;
|
||||||
|
|
||||||
|
v1.safe_push (o);
|
||||||
|
|
||||||
|
- enum tree_code opers[]
|
||||||
|
- = { CONVERT0, CONVERT_EXPR,
|
||||||
|
- CONVERT1, CONVERT_EXPR,
|
||||||
|
- CONVERT2, CONVERT_EXPR,
|
||||||
|
- VIEW_CONVERT0, VIEW_CONVERT_EXPR,
|
||||||
|
- VIEW_CONVERT1, VIEW_CONVERT_EXPR,
|
||||||
|
- VIEW_CONVERT2, VIEW_CONVERT_EXPR };
|
||||||
|
-
|
||||||
|
- /* Conditional converts are lowered to a pattern with the
|
||||||
|
- conversion and one without. The three different conditional
|
||||||
|
- convert codes are lowered separately. */
|
||||||
|
+ /* Conditional operations are lowered to a pattern with the
|
||||||
|
+ operation and one without. All different conditional operation
|
||||||
|
+ groups are lowered separately. */
|
||||||
|
|
||||||
|
- for (unsigned i = 0; i < sizeof (opers) / sizeof (enum tree_code); i += 2)
|
||||||
|
+ for (unsigned i = 1; i <= 10; ++i)
|
||||||
|
{
|
||||||
|
v2 = vNULL;
|
||||||
|
for (unsigned j = 0; j < v1.length (); ++j)
|
||||||
|
- if (has_opt_convert (v1[j], opers[i]))
|
||||||
|
+ if (has_opt (v1[j], i))
|
||||||
|
{
|
||||||
|
- v2.safe_push (lower_opt_convert (v1[j],
|
||||||
|
- opers[i], opers[i+1], false));
|
||||||
|
- v2.safe_push (lower_opt_convert (v1[j],
|
||||||
|
- opers[i], opers[i+1], true));
|
||||||
|
+ v2.safe_push (lower_opt (v1[j], i, false));
|
||||||
|
+ v2.safe_push (lower_opt (v1[j], i, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (v2 != vNULL)
|
||||||
|
@@ -1197,9 +1181,9 @@ lower_opt_convert (operand *o)
|
||||||
|
the resulting multiple patterns to SIMPLIFIERS. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
-lower_opt_convert (simplify *s, vec<simplify *>& simplifiers)
|
||||||
|
+lower_opt (simplify *s, vec<simplify *>& simplifiers)
|
||||||
|
{
|
||||||
|
- vec<operand *> matchers = lower_opt_convert (s->match);
|
||||||
|
+ vec<operand *> matchers = lower_opt (s->match);
|
||||||
|
for (unsigned i = 0; i < matchers.length (); ++i)
|
||||||
|
{
|
||||||
|
simplify *ns = new simplify (s->kind, s->id, matchers[i], s->result,
|
||||||
|
@@ -1543,7 +1527,7 @@ lower (vec<simplify *>& simplifiers, boo
|
||||||
|
{
|
||||||
|
auto_vec<simplify *> out_simplifiers;
|
||||||
|
for (unsigned i = 0; i < simplifiers.length (); ++i)
|
||||||
|
- lower_opt_convert (simplifiers[i], out_simplifiers);
|
||||||
|
+ lower_opt (simplifiers[i], out_simplifiers);
|
||||||
|
|
||||||
|
simplifiers.truncate (0);
|
||||||
|
for (unsigned i = 0; i < out_simplifiers.length (); ++i)
|
||||||
|
@@ -3927,7 +3911,7 @@ private:
|
||||||
|
|
||||||
|
unsigned get_internal_capture_id ();
|
||||||
|
|
||||||
|
- id_base *parse_operation ();
|
||||||
|
+ id_base *parse_operation (unsigned char &);
|
||||||
|
operand *parse_capture (operand *, bool);
|
||||||
|
operand *parse_expr ();
|
||||||
|
c_expr *parse_c_expr (cpp_ttype);
|
||||||
|
@@ -4118,47 +4102,36 @@ parser::record_operlist (location_t loc,
|
||||||
|
convert2? */
|
||||||
|
|
||||||
|
id_base *
|
||||||
|
-parser::parse_operation ()
|
||||||
|
+parser::parse_operation (unsigned char &opt_grp)
|
||||||
|
{
|
||||||
|
const cpp_token *id_tok = peek ();
|
||||||
|
+ char *alt_id = NULL;
|
||||||
|
const char *id = get_ident ();
|
||||||
|
const cpp_token *token = peek ();
|
||||||
|
- if (strcmp (id, "convert0") == 0)
|
||||||
|
- fatal_at (id_tok, "use 'convert?' here");
|
||||||
|
- else if (strcmp (id, "view_convert0") == 0)
|
||||||
|
- fatal_at (id_tok, "use 'view_convert?' here");
|
||||||
|
+ opt_grp = 0;
|
||||||
|
if (token->type == CPP_QUERY
|
||||||
|
&& !(token->flags & PREV_WHITE))
|
||||||
|
{
|
||||||
|
- if (strcmp (id, "convert") == 0)
|
||||||
|
- id = "convert0";
|
||||||
|
- else if (strcmp (id, "convert1") == 0)
|
||||||
|
- ;
|
||||||
|
- else if (strcmp (id, "convert2") == 0)
|
||||||
|
- ;
|
||||||
|
- else if (strcmp (id, "view_convert") == 0)
|
||||||
|
- id = "view_convert0";
|
||||||
|
- else if (strcmp (id, "view_convert1") == 0)
|
||||||
|
- ;
|
||||||
|
- else if (strcmp (id, "view_convert2") == 0)
|
||||||
|
- ;
|
||||||
|
- else
|
||||||
|
- fatal_at (id_tok, "non-convert operator conditionalized");
|
||||||
|
-
|
||||||
|
if (!parsing_match_operand)
|
||||||
|
fatal_at (id_tok, "conditional convert can only be used in "
|
||||||
|
"match expression");
|
||||||
|
+ if (ISDIGIT (id[strlen (id) - 1]))
|
||||||
|
+ {
|
||||||
|
+ opt_grp = id[strlen (id) - 1] - '0' + 1;
|
||||||
|
+ alt_id = xstrdup (id);
|
||||||
|
+ alt_id[strlen (id) - 1] = '\0';
|
||||||
|
+ if (opt_grp == 1)
|
||||||
|
+ fatal_at (id_tok, "use '%s?' here", alt_id);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ opt_grp = 1;
|
||||||
|
eat_token (CPP_QUERY);
|
||||||
|
}
|
||||||
|
- else if (strcmp (id, "convert1") == 0
|
||||||
|
- || strcmp (id, "convert2") == 0
|
||||||
|
- || strcmp (id, "view_convert1") == 0
|
||||||
|
- || strcmp (id, "view_convert2") == 0)
|
||||||
|
- fatal_at (id_tok, "expected '?' after conditional operator");
|
||||||
|
- id_base *op = get_operator (id);
|
||||||
|
+ id_base *op = get_operator (alt_id ? alt_id : id);
|
||||||
|
if (!op)
|
||||||
|
- fatal_at (id_tok, "unknown operator %s", id);
|
||||||
|
-
|
||||||
|
+ fatal_at (id_tok, "unknown operator %s", alt_id ? alt_id : id);
|
||||||
|
+ if (alt_id)
|
||||||
|
+ free (alt_id);
|
||||||
|
user_id *p = dyn_cast<user_id *> (op);
|
||||||
|
if (p && p->is_oper_list)
|
||||||
|
{
|
||||||
|
@@ -4214,7 +4187,8 @@ struct operand *
|
||||||
|
parser::parse_expr ()
|
||||||
|
{
|
||||||
|
const cpp_token *token = peek ();
|
||||||
|
- expr *e = new expr (parse_operation (), token->src_loc);
|
||||||
|
+ unsigned char opt_grp;
|
||||||
|
+ expr *e = new expr (parse_operation (opt_grp), token->src_loc);
|
||||||
|
token = peek ();
|
||||||
|
operand *op;
|
||||||
|
bool is_commutative = false;
|
||||||
|
@@ -4310,6 +4284,12 @@ parser::parse_expr ()
|
||||||
|
"commutative");
|
||||||
|
}
|
||||||
|
e->expr_type = expr_type;
|
||||||
|
+ if (opt_grp != 0)
|
||||||
|
+ {
|
||||||
|
+ if (e->ops.length () != 1)
|
||||||
|
+ fatal_at (token, "only unary operations can be conditional");
|
||||||
|
+ e->opt_grp = opt_grp;
|
||||||
|
+ }
|
||||||
|
return op;
|
||||||
|
}
|
||||||
|
else if (!(token->flags & PREV_WHITE))
|
||||||
|
@@ -4692,10 +4672,6 @@ parser::parse_for (location_t)
|
||||||
|
id_base *idb = get_operator (oper, true);
|
||||||
|
if (idb == NULL)
|
||||||
|
fatal_at (token, "no such operator '%s'", oper);
|
||||||
|
- if (*idb == CONVERT0 || *idb == CONVERT1 || *idb == CONVERT2
|
||||||
|
- || *idb == VIEW_CONVERT0 || *idb == VIEW_CONVERT1
|
||||||
|
- || *idb == VIEW_CONVERT2)
|
||||||
|
- fatal_at (token, "conditional operators cannot be used inside for");
|
||||||
|
|
||||||
|
if (arity == -1)
|
||||||
|
arity = idb->nargs;
|
||||||
|
@@ -5102,12 +5078,6 @@ main (int argc, char **argv)
|
||||||
|
add_operator (SYM, # SYM, # TYPE, NARGS);
|
||||||
|
#define END_OF_BASE_TREE_CODES
|
||||||
|
#include "tree.def"
|
||||||
|
-add_operator (CONVERT0, "convert0", "tcc_unary", 1);
|
||||||
|
-add_operator (CONVERT1, "convert1", "tcc_unary", 1);
|
||||||
|
-add_operator (CONVERT2, "convert2", "tcc_unary", 1);
|
||||||
|
-add_operator (VIEW_CONVERT0, "view_convert0", "tcc_unary", 1);
|
||||||
|
-add_operator (VIEW_CONVERT1, "view_convert1", "tcc_unary", 1);
|
||||||
|
-add_operator (VIEW_CONVERT2, "view_convert2", "tcc_unary", 1);
|
||||||
|
#undef END_OF_BASE_TREE_CODES
|
||||||
|
#undef DEFTREECODE
|
||||||
|
|
||||||
|
diff -Nurp a/gcc/gimple-loop-versioning.cc b/gcc/gimple-loop-versioning.cc
|
||||||
|
--- a/gcc/gimple-loop-versioning.cc 2020-03-12 19:07:21.000000000 +0800
|
||||||
|
+++ b/gcc/gimple-loop-versioning.cc 2020-11-24 14:49:12.792000000 +0800
|
||||||
|
@@ -1264,6 +1264,12 @@ loop_versioning::record_address_fragment
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+ if (CONVERT_EXPR_CODE_P (code))
|
||||||
|
+ {
|
||||||
|
+ tree op1 = gimple_assign_rhs1 (assign);
|
||||||
|
+ address->terms[i].expr = strip_casts (op1);
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
diff -Nurp a/gcc/match.pd b/gcc/match.pd
|
||||||
|
--- a/gcc/match.pd 2020-11-24 14:54:43.576000000 +0800
|
||||||
|
+++ b/gcc/match.pd 2020-11-24 14:49:12.792000000 +0800
|
||||||
|
@@ -97,8 +97,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
(define_operator_list COND_TERNARY
|
||||||
|
IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
|
||||||
|
|
||||||
|
-/* As opposed to convert?, this still creates a single pattern, so
|
||||||
|
- it is not a suitable replacement for convert? in all cases. */
|
||||||
|
+/* With nop_convert? combine convert? and view_convert? in one pattern
|
||||||
|
+ plus conditionalize on tree_nop_conversion_p conversions. */
|
||||||
|
(match (nop_convert @0)
|
||||||
|
(convert @0)
|
||||||
|
(if (tree_nop_conversion_p (type, TREE_TYPE (@0)))))
|
||||||
|
@@ -108,9 +108,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
&& known_eq (TYPE_VECTOR_SUBPARTS (type),
|
||||||
|
TYPE_VECTOR_SUBPARTS (TREE_TYPE (@0)))
|
||||||
|
&& tree_nop_conversion_p (TREE_TYPE (type), TREE_TYPE (TREE_TYPE (@0))))))
|
||||||
|
-/* This one has to be last, or it shadows the others. */
|
||||||
|
-(match (nop_convert @0)
|
||||||
|
- @0)
|
||||||
|
|
||||||
|
/* Transform likes of (char) ABS_EXPR <(int) x> into (char) ABSU_EXPR <x>
|
||||||
|
ABSU_EXPR returns unsigned absolute value of the operand and the operand
|
||||||
|
@@ -1260,7 +1257,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
We combine the above two cases by using a conditional convert. */
|
||||||
|
(for bitop (bit_and bit_ior bit_xor)
|
||||||
|
(simplify
|
||||||
|
- (bitop (convert @0) (convert? @1))
|
||||||
|
+ (bitop (convert@2 @0) (convert?@3 @1))
|
||||||
|
(if (((TREE_CODE (@1) == INTEGER_CST
|
||||||
|
&& INTEGRAL_TYPE_P (TREE_TYPE (@0))
|
||||||
|
&& int_fits_type_p (@1, TREE_TYPE (@0)))
|
||||||
|
@@ -1279,8 +1276,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
|| GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT
|
||||||
|
/* Or if the precision of TO is not the same as the precision
|
||||||
|
of its mode. */
|
||||||
|
- || !type_has_mode_precision_p (type)))
|
||||||
|
- (convert (bitop @0 (convert @1))))))
|
||||||
|
+ || !type_has_mode_precision_p (type)
|
||||||
|
+ /* In GIMPLE, getting rid of 2 conversions for one new results
|
||||||
|
+ in smaller IL. */
|
||||||
|
+ || (GIMPLE
|
||||||
|
+ && TREE_CODE (@1) != INTEGER_CST
|
||||||
|
+ && tree_nop_conversion_p (type, TREE_TYPE (@0))
|
||||||
|
+ && single_use (@2)
|
||||||
|
+ && single_use (@3))))
|
||||||
|
+ (convert (bitop @0 (convert @1)))))
|
||||||
|
+ /* In GIMPLE, getting rid of 2 conversions for one new results
|
||||||
|
+ in smaller IL. */
|
||||||
|
+ (simplify
|
||||||
|
+ (convert (bitop:cs@2 (nop_convert:s @0) @1))
|
||||||
|
+ (if (GIMPLE
|
||||||
|
+ && TREE_CODE (@1) != INTEGER_CST
|
||||||
|
+ && tree_nop_conversion_p (type, TREE_TYPE (@2))
|
||||||
|
+ && types_match (type, @0))
|
||||||
|
+ (bitop @0 (convert @1)))))
|
||||||
|
|
||||||
|
(for bitop (bit_and bit_ior)
|
||||||
|
rbitop (bit_ior bit_and)
|
||||||
|
@@ -1374,7 +1387,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
|
||||||
|
/* Convert - (~A) to A + 1. */
|
||||||
|
(simplify
|
||||||
|
- (negate (nop_convert (bit_not @0)))
|
||||||
|
+ (negate (nop_convert? (bit_not @0)))
|
||||||
|
(plus (view_convert @0) { build_each_one_cst (type); }))
|
||||||
|
|
||||||
|
/* Convert ~ (A - 1) or ~ (A + -1) to -A. */
|
||||||
|
@@ -1401,7 +1414,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
|
||||||
|
/* Otherwise prefer ~(X ^ Y) to ~X ^ Y as more canonical. */
|
||||||
|
(simplify
|
||||||
|
- (bit_xor:c (nop_convert:s (bit_not:s @0)) @1)
|
||||||
|
+ (bit_xor:c (nop_convert?:s (bit_not:s @0)) @1)
|
||||||
|
(if (tree_nop_conversion_p (type, TREE_TYPE (@0)))
|
||||||
|
(bit_not (bit_xor (view_convert @0) @1))))
|
||||||
|
|
||||||
|
@@ -1614,7 +1627,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
/* For equality, this is also true with wrapping overflow. */
|
||||||
|
(for op (eq ne)
|
||||||
|
(simplify
|
||||||
|
- (op:c (nop_convert@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
|
||||||
|
+ (op:c (nop_convert?@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
|
||||||
|
(if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
|
||||||
|
&& (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
|
||||||
|
|| TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
|
||||||
|
@@ -1623,7 +1636,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
&& tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
|
||||||
|
(op @0 { build_zero_cst (TREE_TYPE (@0)); })))
|
||||||
|
(simplify
|
||||||
|
- (op:c (nop_convert@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0))
|
||||||
|
+ (op:c (nop_convert?@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0))
|
||||||
|
(if (tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@0))
|
||||||
|
&& tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0))
|
||||||
|
&& (CONSTANT_CLASS_P (@1) || (single_use (@2) && single_use (@3))))
|
||||||
|
@@ -1866,7 +1879,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
|| !HONOR_SIGN_DEPENDENT_ROUNDING (type)))
|
||||||
|
(convert (negate @1))))
|
||||||
|
(simplify
|
||||||
|
- (negate (nop_convert (negate @1)))
|
||||||
|
+ (negate (nop_convert? (negate @1)))
|
||||||
|
(if (!TYPE_OVERFLOW_SANITIZED (type)
|
||||||
|
&& !TYPE_OVERFLOW_SANITIZED (TREE_TYPE (@1)))
|
||||||
|
(view_convert @1)))
|
||||||
|
@@ -1883,20 +1896,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
/* A - (A +- B) -> -+ B */
|
||||||
|
/* A +- (B -+ A) -> +- B */
|
||||||
|
(simplify
|
||||||
|
- (minus (plus:c @0 @1) @0)
|
||||||
|
- @1)
|
||||||
|
- (simplify
|
||||||
|
- (minus (minus @0 @1) @0)
|
||||||
|
- (negate @1))
|
||||||
|
+ (minus (nop_convert1? (plus:c (nop_convert2? @0) @1)) @0)
|
||||||
|
+ (view_convert @1))
|
||||||
|
(simplify
|
||||||
|
- (plus:c (minus @0 @1) @1)
|
||||||
|
- @0)
|
||||||
|
+ (minus (nop_convert1? (minus (nop_convert2? @0) @1)) @0)
|
||||||
|
+ (if (!ANY_INTEGRAL_TYPE_P (type)
|
||||||
|
+ || TYPE_OVERFLOW_WRAPS (type))
|
||||||
|
+ (negate (view_convert @1))
|
||||||
|
+ (view_convert (negate @1))))
|
||||||
|
+ (simplify
|
||||||
|
+ (plus:c (nop_convert1? (minus @0 (nop_convert2? @1))) @1)
|
||||||
|
+ (view_convert @0))
|
||||||
|
+ (simplify
|
||||||
|
+ (minus @0 (nop_convert1? (plus:c (nop_convert2? @0) @1)))
|
||||||
|
+ (if (!ANY_INTEGRAL_TYPE_P (type)
|
||||||
|
+ || TYPE_OVERFLOW_WRAPS (type))
|
||||||
|
+ (negate (view_convert @1))
|
||||||
|
+ (view_convert (negate @1))))
|
||||||
|
(simplify
|
||||||
|
- (minus @0 (plus:c @0 @1))
|
||||||
|
- (negate @1))
|
||||||
|
- (simplify
|
||||||
|
- (minus @0 (minus @0 @1))
|
||||||
|
- @1)
|
||||||
|
+ (minus @0 (nop_convert1? (minus (nop_convert2? @0) @1)))
|
||||||
|
+ (view_convert @1))
|
||||||
|
/* (A +- B) + (C - A) -> C +- B */
|
||||||
|
/* (A + B) - (A - C) -> B + C */
|
||||||
|
/* More cases are handled with comparisons. */
|
||||||
|
@@ -1922,7 +1941,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
(for inner_op (plus minus)
|
||||||
|
neg_inner_op (minus plus)
|
||||||
|
(simplify
|
||||||
|
- (outer_op (nop_convert (inner_op @0 CONSTANT_CLASS_P@1))
|
||||||
|
+ (outer_op (nop_convert? (inner_op @0 CONSTANT_CLASS_P@1))
|
||||||
|
CONSTANT_CLASS_P@2)
|
||||||
|
/* If one of the types wraps, use that one. */
|
||||||
|
(if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
|
||||||
|
@@ -1961,17 +1980,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||||
|
/* (CST1 - A) +- CST2 -> CST3 - A */
|
||||||
|
(for outer_op (plus minus)
|
||||||
|
(simplify
|
||||||
|
- (outer_op (minus CONSTANT_CLASS_P@1 @0) CONSTANT_CLASS_P@2)
|
||||||
|
- (with { tree cst = const_binop (outer_op, type, @1, @2); }
|
||||||
|
- (if (cst && !TREE_OVERFLOW (cst))
|
||||||
|
- (minus { cst; } @0)))))
|
||||||
|
-
|
||||||
|
- /* CST1 - (CST2 - A) -> CST3 + A */
|
||||||
|
- (simplify
|
||||||
|
- (minus CONSTANT_CLASS_P@1 (minus CONSTANT_CLASS_P@2 @0))
|
||||||
|
- (with { tree cst = const_binop (MINUS_EXPR, type, @1, @2); }
|
||||||
|
- (if (cst && !TREE_OVERFLOW (cst))
|
||||||
|
- (plus { cst; } @0))))
|
||||||
|
+ (outer_op (nop_convert? (minus CONSTANT_CLASS_P@1 @0)) CONSTANT_CLASS_P@2)
|
||||||
|
+ /* If one of the types wraps, use that one. */
|
||||||
|
+ (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
|
||||||
|
+ /* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse
|
||||||
|
+ forever if something doesn't simplify into a constant. */
|
||||||
|
+ (if (!CONSTANT_CLASS_P (@0))
|
||||||
|
+ (minus (outer_op (view_convert @1) @2) (view_convert @0)))
|
||||||
|
+ (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
|
||||||
|
+ || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
|
||||||
|
+ (view_convert (minus (outer_op @1 (view_convert @2)) @0))
|
||||||
|
+ (if (types_match (type, @0))
|
||||||
|
+ (with { tree cst = const_binop (outer_op, type, @1, @2); }
|
||||||
|
+ (if (cst && !TREE_OVERFLOW (cst))
|
||||||
|
+ (minus { cst; } @0))))))))
|
||||||
|
+
|
||||||
|
+ /* CST1 - (CST2 - A) -> CST3 + A
|
||||||
|
+ Use view_convert because it is safe for vectors and equivalent for
|
||||||
|
+ scalars. */
|
||||||
|
+ (simplify
|
||||||
|
+ (minus CONSTANT_CLASS_P@1 (nop_convert? (minus CONSTANT_CLASS_P@2 @0)))
|
||||||
|
+ /* If one of the types wraps, use that one. */
|
||||||
|
+ (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
|
||||||
|
+ /* If all 3 captures are CONSTANT_CLASS_P, punt, as we might recurse
|
||||||
|
+ forever if something doesn't simplify into a constant. */
|
||||||
|
+ (if (!CONSTANT_CLASS_P (@0))
|
||||||
|
+ (plus (view_convert @0) (minus @1 (view_convert @2))))
|
||||||
|
+ (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
|
||||||
|
+ || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
|
||||||
|
+ (view_convert (plus @0 (minus (view_convert @1) @2)))
|
||||||
|
+ (if (types_match (type, @0))
|
||||||
|
+ (with { tree cst = const_binop (MINUS_EXPR, type, @1, @2); }
|
||||||
|
+ (if (cst && !TREE_OVERFLOW (cst))
|
||||||
|
+ (plus { cst; } @0)))))))
|
||||||
|
+
|
||||||
|
+/* ((T)(A)) + CST -> (T)(A + CST) */
|
||||||
|
+#if GIMPLE
|
||||||
|
+ (simplify
|
||||||
|
+ (plus (convert SSA_NAME@0) INTEGER_CST@1)
|
||||||
|
+ (if (TREE_CODE (TREE_TYPE (@0)) == INTEGER_TYPE
|
||||||
|
+ && TREE_CODE (type) == INTEGER_TYPE
|
||||||
|
+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
|
||||||
|
+ && int_fits_type_p (@1, TREE_TYPE (@0)))
|
||||||
|
+ /* Perform binary operation inside the cast if the constant fits
|
||||||
|
+ and (A + CST)'s range does not overflow. */
|
||||||
|
+ (with
|
||||||
|
+ {
|
||||||
|
+ wi::overflow_type min_ovf = wi::OVF_OVERFLOW,
|
||||||
|
+ max_ovf = wi::OVF_OVERFLOW;
|
||||||
|
+ tree inner_type = TREE_TYPE (@0);
|
||||||
|
+
|
||||||
|
+ wide_int w1 = wide_int::from (wi::to_wide (@1), TYPE_PRECISION (inner_type),
|
||||||
|
+ TYPE_SIGN (inner_type));
|
||||||
|
+
|
||||||
|
+ wide_int wmin0, wmax0;
|
||||||
|
+ if (get_range_info (@0, &wmin0, &wmax0) == VR_RANGE)
|
||||||
|
+ {
|
||||||
|
+ wi::add (wmin0, w1, TYPE_SIGN (inner_type), &min_ovf);
|
||||||
|
+ wi::add (wmax0, w1, TYPE_SIGN (inner_type), &max_ovf);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ (if (min_ovf == wi::OVF_NONE && max_ovf == wi::OVF_NONE)
|
||||||
|
+ (convert (plus @0 { wide_int_to_tree (TREE_TYPE (@0), w1); } )))
|
||||||
|
+ )))
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/* ~A + A -> -1 */
|
||||||
|
(simplify
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c 2020-03-12 19:07:22.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-5.c 2020-11-24 14:49:14.568000000 +0800
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
-/* { dg-options "-O2 -fdump-tree-ch2-details" } */
|
||||||
|
+/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-ch2-details" } */
|
||||||
|
|
||||||
|
int is_sorted(int *a, int n)
|
||||||
|
{
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c 2020-03-12 19:07:22.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-headers-7.c 2020-11-24 14:49:14.568000000 +0800
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
-/* { dg-options "-O2 -fdump-tree-ch2-details --param logical-op-non-short-circuit=0" } */
|
||||||
|
+/* { dg-options "-O2 -fno-tree-vrp -fdump-tree-ch2-details --param logical-op-non-short-circuit=0" } */
|
||||||
|
|
||||||
|
int is_sorted(int *a, int n, int m, int k)
|
||||||
|
{
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c 2020-03-12 19:07:22.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-15.c 2020-11-24 14:49:14.568000000 +0800
|
||||||
|
@@ -19,7 +19,7 @@ int bla(void)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Since the loop is removed, there should be no addition. */
|
||||||
|
-/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" { xfail *-*-* } } } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */
|
||||||
|
/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
|
||||||
|
|
||||||
|
/* The if from the loop header copying remains in the code. */
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c 2020-03-12 19:07:22.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr23744.c 2020-11-24 14:49:14.568000000 +0800
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
-/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1" } */
|
||||||
|
+/* { dg-options "-O2 -fno-tree-ccp -fdisable-tree-evrp -fdump-tree-vrp1-details" } */
|
||||||
|
|
||||||
|
void h (void);
|
||||||
|
|
||||||
|
@@ -17,4 +17,4 @@ int g (int i, int j)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-final { scan-tree-dump-times "Folding predicate.*to 1" 1 "vrp1" } } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times "gimple_simplified" 1 "vrp1" } } */
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92734-2.c 2020-11-24 14:49:14.568000000 +0800
|
||||||
|
@@ -0,0 +1,76 @@
|
||||||
|
+/* PR tree-optimization/92734 */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||||
|
+/* Verify there are no binary additions or subtractions left. There can
|
||||||
|
+ be just casts and negations. */
|
||||||
|
+/* { dg-final { scan-tree-dump-not " \[+-] " "optimized" } } */
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f1 (int x, unsigned y)
|
||||||
|
+{
|
||||||
|
+ int a = x + y;
|
||||||
|
+ return a - x;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+f2 (unsigned x, int y)
|
||||||
|
+{
|
||||||
|
+ unsigned a = (int) x + y;
|
||||||
|
+ return a - x;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f3 (int x, unsigned y)
|
||||||
|
+{
|
||||||
|
+ int a = x - y;
|
||||||
|
+ return a - x;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+f4 (unsigned x, int y)
|
||||||
|
+{
|
||||||
|
+ unsigned a = (int) x - y;
|
||||||
|
+ return a - x;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f5 (unsigned x, int y)
|
||||||
|
+{
|
||||||
|
+ int a = x - y;
|
||||||
|
+ return a + y;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+f6 (int x, unsigned y)
|
||||||
|
+{
|
||||||
|
+ unsigned a = x - (int) y;
|
||||||
|
+ return a + y;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f7 (int x, unsigned y)
|
||||||
|
+{
|
||||||
|
+ int a = x + y;
|
||||||
|
+ return x - a;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+f8 (unsigned x, int y)
|
||||||
|
+{
|
||||||
|
+ unsigned a = (int) x + y;
|
||||||
|
+ return x - a;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f9 (int x, unsigned y)
|
||||||
|
+{
|
||||||
|
+ int a = x - y;
|
||||||
|
+ return x - a;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+f10 (unsigned x, int y)
|
||||||
|
+{
|
||||||
|
+ unsigned a = (int) x - y;
|
||||||
|
+ return x - a;
|
||||||
|
+}
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92734.c 2020-11-24 14:49:14.568000000 +0800
|
||||||
|
@@ -0,0 +1,31 @@
|
||||||
|
+/* PR tree-optimization/92734 */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times "return t_\[0-9]*\\\(D\\\);" 4 "forwprop1" } } */
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f1 (int t)
|
||||||
|
+{
|
||||||
|
+ return 1 - (int) (1U - t);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f2 (int t)
|
||||||
|
+{
|
||||||
|
+ int a = 7U - t;
|
||||||
|
+ return 7 - a;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f3 (int t)
|
||||||
|
+{
|
||||||
|
+ int a = 32U - t;
|
||||||
|
+ return 32 - a;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f4 (int t)
|
||||||
|
+{
|
||||||
|
+ int a = 32 - t;
|
||||||
|
+ return (int) (32 - (unsigned) a);
|
||||||
|
+}
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94718-3.c 2020-11-24 14:49:14.568000000 +0800
|
||||||
|
@@ -0,0 +1,45 @@
|
||||||
|
+/* PR tree-optimization/94718 */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times " \\\(int\\\) " 2 "optimized" } } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times " \\\(unsigned int\\\) " 2 "optimized" } } */
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f1 (int x, int y)
|
||||||
|
+{
|
||||||
|
+ return (int) ((unsigned) x | (unsigned) y);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f2 (int x, int y)
|
||||||
|
+{
|
||||||
|
+ unsigned a = x;
|
||||||
|
+ unsigned b = y;
|
||||||
|
+ return a | b;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f3 (int x, unsigned y)
|
||||||
|
+{
|
||||||
|
+ return (int) ((unsigned) x | y);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+f4 (int x, unsigned y)
|
||||||
|
+{
|
||||||
|
+ unsigned a = x;
|
||||||
|
+ return a | y;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+f5 (int x, unsigned y)
|
||||||
|
+{
|
||||||
|
+ return (unsigned) (x | (int) y);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+f6 (int x, unsigned y)
|
||||||
|
+{
|
||||||
|
+ int a = y;
|
||||||
|
+ return x | a;
|
||||||
|
+}
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c b/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/wrapped-binop-simplify.c 2020-11-24 14:49:14.484000000 +0800
|
||||||
|
@@ -0,0 +1,43 @@
|
||||||
|
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* s390*-*-* } && lp64 } } } */
|
||||||
|
+/* { dg-options "-O2 -fdump-tree-vrp2-details" } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times "gimple_simplified to" 4 "vrp2" } } */
|
||||||
|
+
|
||||||
|
+void v1 (unsigned long *in, unsigned long *out, unsigned int n)
|
||||||
|
+{
|
||||||
|
+ int i;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < n; i++)
|
||||||
|
+ {
|
||||||
|
+ out[i] = in[i];
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void v2 (unsigned long *in, unsigned long *out, int n)
|
||||||
|
+{
|
||||||
|
+ int i;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < n; i++)
|
||||||
|
+ {
|
||||||
|
+ out[i] = in[i];
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void v3 (unsigned long *in, unsigned long *out, unsigned int n)
|
||||||
|
+{
|
||||||
|
+ unsigned int i;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < n; i++)
|
||||||
|
+ {
|
||||||
|
+ out[i] = in[i];
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void v4 (unsigned long *in, unsigned long *out, int n)
|
||||||
|
+{
|
||||||
|
+ unsigned int i;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < n; i++)
|
||||||
|
+ {
|
||||||
|
+ out[i] = in[i];
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
diff -Nurp a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c
|
||||||
|
--- a/gcc/tree-ssa-propagate.c 2020-11-24 14:54:42.556000000 +0800
|
||||||
|
+++ b/gcc/tree-ssa-propagate.c 2020-11-24 14:49:12.792000000 +0800
|
||||||
|
@@ -814,7 +814,6 @@ ssa_propagation_engine::ssa_propagate (v
|
||||||
|
ssa_prop_fini ();
|
||||||
|
}
|
||||||
|
|
||||||
|
-
|
||||||
|
/* Return true if STMT is of the form 'mem_ref = RHS', where 'mem_ref'
|
||||||
|
is a non-volatile pointer dereference, a structure reference or a
|
||||||
|
reference to a single _DECL. Ignore volatile memory references
|
||||||
|
@@ -1071,6 +1070,14 @@ substitute_and_fold_dom_walker::before_d
|
||||||
|
stmt = gsi_stmt (i);
|
||||||
|
gimple_set_modified (stmt, true);
|
||||||
|
}
|
||||||
|
+ /* Also fold if we want to fold all statements. */
|
||||||
|
+ else if (substitute_and_fold_engine->fold_all_stmts
|
||||||
|
+ && fold_stmt (&i, follow_single_use_edges))
|
||||||
|
+ {
|
||||||
|
+ did_replace = true;
|
||||||
|
+ stmt = gsi_stmt (i);
|
||||||
|
+ gimple_set_modified (stmt, true);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
/* Some statements may be simplified using propagator
|
||||||
|
specific information. Do this before propagating
|
||||||
|
diff -Nurp a/gcc/tree-ssa-propagate.h b/gcc/tree-ssa-propagate.h
|
||||||
|
--- a/gcc/tree-ssa-propagate.h 2020-03-12 19:07:23.000000000 +0800
|
||||||
|
+++ b/gcc/tree-ssa-propagate.h 2020-11-24 14:49:12.792000000 +0800
|
||||||
|
@@ -100,6 +100,8 @@ class ssa_propagation_engine
|
||||||
|
class substitute_and_fold_engine
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
+ substitute_and_fold_engine (bool fold_all_stmts = false)
|
||||||
|
+ : fold_all_stmts (fold_all_stmts) { }
|
||||||
|
virtual ~substitute_and_fold_engine (void) { }
|
||||||
|
virtual bool fold_stmt (gimple_stmt_iterator *) { return false; }
|
||||||
|
virtual tree get_value (tree) { return NULL_TREE; }
|
||||||
|
@@ -107,6 +109,10 @@ class substitute_and_fold_engine
|
||||||
|
bool substitute_and_fold (basic_block = NULL);
|
||||||
|
bool replace_uses_in (gimple *);
|
||||||
|
bool replace_phi_args_in (gphi *);
|
||||||
|
+
|
||||||
|
+ /* Users like VRP can set this when they want to perform
|
||||||
|
+ folding for every propagation. */
|
||||||
|
+ bool fold_all_stmts;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _TREE_SSA_PROPAGATE_H */
|
||||||
|
diff -Nurp a/gcc/tree-vrp.c b/gcc/tree-vrp.c
|
||||||
|
--- a/gcc/tree-vrp.c 2020-11-24 14:54:43.564000000 +0800
|
||||||
|
+++ b/gcc/tree-vrp.c 2020-11-24 14:49:12.792000000 +0800
|
||||||
|
@@ -6384,6 +6384,7 @@ vrp_prop::visit_phi (gphi *phi)
|
||||||
|
class vrp_folder : public substitute_and_fold_engine
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
+ vrp_folder () : substitute_and_fold_engine (/* Fold all stmts. */ true) { }
|
||||||
|
tree get_value (tree) FINAL OVERRIDE;
|
||||||
|
bool fold_stmt (gimple_stmt_iterator *) FINAL OVERRIDE;
|
||||||
|
bool fold_predicate_in (gimple_stmt_iterator *);
|
||||||
108
fix-issue604-ldist-dependency-fixup.patch
Normal file
108
fix-issue604-ldist-dependency-fixup.patch
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
commit f6e1a4cd83190746b6544917f7526fa480ca5f18
|
||||||
|
Author: Bin Cheng <bin.cheng@linux.alibaba.com>
|
||||||
|
Date: Wed May 13 11:37:47 2020 +0800
|
||||||
|
|
||||||
|
Add missing unit dependence vector in data dependence analysis
|
||||||
|
|
||||||
|
Current data dependence analysis misses unit distant vector if DRs in
|
||||||
|
DDR have the same invariant access functions. This adds the vector as
|
||||||
|
the constant access function case.
|
||||||
|
|
||||||
|
2020-05-13 Bin Cheng <bin.cheng@linux.alibaba.com>
|
||||||
|
PR tree-optimization/94969
|
||||||
|
|
||||||
|
gcc/
|
||||||
|
* tree-data-dependence.c (constant_access_functions): Rename to...
|
||||||
|
(invariant_access_functions): ...this. Add parameter. Check for
|
||||||
|
invariant access function, rather than constant.
|
||||||
|
(build_classic_dist_vector): Call above function.
|
||||||
|
* tree-loop-distribution.c (pg_add_dependence_edges): Add comment.
|
||||||
|
|
||||||
|
gcc/testsuite/
|
||||||
|
* gcc.dg/tree-ssa/pr94969.c: New test.
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c b/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..056b015f97c
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr94969.c
|
||||||
|
@@ -0,0 +1,28 @@
|
||||||
|
+/* PR tree-optimization/52267 */
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-options "-O3 -fdump-tree-ldist-details" } */
|
||||||
|
+
|
||||||
|
+int a = 0, b = 0, c = 0;
|
||||||
|
+struct S {
|
||||||
|
+ signed m : 7;
|
||||||
|
+ signed e : 2;
|
||||||
|
+};
|
||||||
|
+struct S f[2] = {{0, 0}, {0, 0}};
|
||||||
|
+struct S g = {0, 0};
|
||||||
|
+
|
||||||
|
+void __attribute__((noinline))
|
||||||
|
+k()
|
||||||
|
+{
|
||||||
|
+ for (; c <= 1; c++) {
|
||||||
|
+ f[b] = g;
|
||||||
|
+ f[b].e ^= 1;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+int main()
|
||||||
|
+{
|
||||||
|
+ k();
|
||||||
|
+ if (f[b].e != 1)
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-tree-dump-not "ldist" "Loop 1 distributed: split to 3 loops"} } */
|
||||||
|
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
|
||||||
|
index 851225e1171..5505ba46778 100644
|
||||||
|
--- a/gcc/tree-data-ref.c
|
||||||
|
+++ b/gcc/tree-data-ref.c
|
||||||
|
@@ -4821,17 +4821,19 @@ build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Return true when the DDR contains only constant access functions. */
|
||||||
|
+/* Return true when the DDR contains only invariant access functions wrto. loop
|
||||||
|
+ number LNUM. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
-constant_access_functions (const struct data_dependence_relation *ddr)
|
||||||
|
+invariant_access_functions (const struct data_dependence_relation *ddr,
|
||||||
|
+ int lnum)
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
subscript *sub;
|
||||||
|
|
||||||
|
FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
|
||||||
|
- if (!evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 0))
|
||||||
|
- || !evolution_function_is_constant_p (SUB_ACCESS_FN (sub, 1)))
|
||||||
|
+ if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
|
||||||
|
+ || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
@@ -5030,7 +5032,7 @@ build_classic_dist_vector (struct data_dependence_relation *ddr,
|
||||||
|
dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
|
||||||
|
save_dist_v (ddr, dist_v);
|
||||||
|
|
||||||
|
- if (constant_access_functions (ddr))
|
||||||
|
+ if (invariant_access_functions (ddr, loop_nest->num))
|
||||||
|
add_distance_for_zero_overlaps (ddr);
|
||||||
|
|
||||||
|
if (DDR_NB_LOOPS (ddr) > 1)
|
||||||
|
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
|
||||||
|
index 44423215332..b122c3964a0 100644
|
||||||
|
--- a/gcc/tree-loop-distribution.c
|
||||||
|
+++ b/gcc/tree-loop-distribution.c
|
||||||
|
@@ -2080,7 +2080,8 @@ loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir,
|
||||||
|
this_dir = -this_dir;
|
||||||
|
|
||||||
|
/* Known dependences can still be unordered througout the
|
||||||
|
- iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
|
||||||
|
+ iteration space, see gcc.dg/tree-ssa/ldist-16.c and
|
||||||
|
+ gcc.dg/tree-ssa/pr94969.c. */
|
||||||
|
if (DDR_NUM_DIST_VECTS (ddr) != 1)
|
||||||
|
this_dir = 2;
|
||||||
|
/* If the overlap is exact preserve stmt order. */
|
||||||
@ -1,23 +0,0 @@
|
|||||||
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
|
|
||||||
index 36639b697f1..88f14e73d65 100644
|
|
||||||
--- a/gcc/tree-vect-data-refs.c
|
|
||||||
+++ b/gcc/tree-vect-data-refs.c
|
|
||||||
@@ -938,6 +938,18 @@ vect_compute_data_ref_alignment (dr_vec_info *dr_info)
|
|
||||||
= exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT);
|
|
||||||
DR_TARGET_ALIGNMENT (dr_info) = vector_alignment;
|
|
||||||
|
|
||||||
+ /* If the main loop has peeled for alignment we have no way of knowing
|
|
||||||
+ whether the data accesses in the epilogues are aligned. We can't at
|
|
||||||
+ compile time answer the question whether we have entered the main loop or
|
|
||||||
+ not. Fixes PR 92351. */
|
|
||||||
+ if (loop_vinfo)
|
|
||||||
+ {
|
|
||||||
+ loop_vec_info orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
|
|
||||||
+ if (orig_loop_vinfo
|
|
||||||
+ && LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo) != 0)
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
unsigned HOST_WIDE_INT vect_align_c;
|
|
||||||
if (!vector_alignment.is_constant (&vect_align_c))
|
|
||||||
return;
|
|
||||||
181
gcc.spec
181
gcc.spec
@ -1,4 +1,4 @@
|
|||||||
%global DATE 20200922
|
%global DATE 20201229
|
||||||
|
|
||||||
%global gcc_version 9.3.1
|
%global gcc_version 9.3.1
|
||||||
%global gcc_major 9.3.1
|
%global gcc_major 9.3.1
|
||||||
@ -59,7 +59,7 @@
|
|||||||
Summary: Various compilers (C, C++, Objective-C, ...)
|
Summary: Various compilers (C, C++, Objective-C, ...)
|
||||||
Name: gcc
|
Name: gcc
|
||||||
Version: %{gcc_version}
|
Version: %{gcc_version}
|
||||||
Release: %{DATE}.12
|
Release: %{DATE}.13
|
||||||
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
||||||
URL: https://gcc.gnu.org
|
URL: https://gcc.gnu.org
|
||||||
|
|
||||||
@ -114,26 +114,26 @@ Provides: bundled(libiberty)
|
|||||||
Provides: gcc(major) = %{gcc_major}
|
Provides: gcc(major) = %{gcc_major}
|
||||||
|
|
||||||
Patch0: enable-aarch64-libquadmath.patch
|
Patch0: enable-aarch64-libquadmath.patch
|
||||||
Patch1: medium-code-mode.patch
|
Patch1: generate-csel.patch
|
||||||
Patch2: generate-csel.patch
|
Patch2: delete-incorrect-smw.patch
|
||||||
Patch3: delete-incorrect-smw.patch
|
Patch3: remove-array-index-inliner-hint.patch
|
||||||
Patch4: remove-array-index-inliner-hint.patch
|
Patch4: ivopts-1.patch
|
||||||
Patch5: ivopts-1.patch
|
Patch5: ivopts-2.patch
|
||||||
Patch6: ivopts-2.patch
|
Patch6: dont-generate-IF_THEN_ELSE.patch
|
||||||
Patch7: dont-generate-IF_THEN_ELSE.patch
|
Patch7: fix-cost-of-plus.patch
|
||||||
Patch8: fix-cost-of-plus.patch
|
Patch8: div-opti.patch
|
||||||
Patch9: div-opti.patch
|
Patch9: fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch
|
||||||
Patch10: fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch
|
Patch10: fix-ICE-during-pass-ccp.patch
|
||||||
Patch11: fix-ICE-during-pass-ccp.patch
|
Patch11: loop-split.patch
|
||||||
Patch12: loop-split.patch
|
Patch12: loop-finite.patch
|
||||||
Patch13: loop-finite.patch
|
Patch13: loop-finite-bugfix.patch
|
||||||
Patch14: loop-finite-bugfix.patch
|
Patch14: fix-regno-out-of-range.patch
|
||||||
Patch15: fix-regno-out-of-range.patch
|
Patch15: fix-ICE-in-vectorizable-load.patch
|
||||||
Patch16: fix-ICE-in-vectorizable-load.patch
|
Patch16: address-calculation-optimization-within-loop.patch
|
||||||
Patch17: address-calculation-optimization-within-loop.patch
|
Patch17: skip-debug-insns-when-computing-inline-costs.patch
|
||||||
Patch18: skip-debug-insns-when-computing-inline-costs.patch
|
Patch18: ipa-const-prop.patch
|
||||||
Patch19: ipa-const-prop.patch
|
Patch19: ipa-const-prop-self-recursion-bugfix.patch
|
||||||
Patch20: ipa-const-prop-self-recursion-bugfix.patch
|
Patch20: ipa-const-prop-null-point-check-bugfix.patch
|
||||||
Patch21: change-gcc-BASE-VER.patch
|
Patch21: change-gcc-BASE-VER.patch
|
||||||
Patch22: add-option-fallow-store-data-races.patch
|
Patch22: add-option-fallow-store-data-races.patch
|
||||||
Patch23: tighten-range-for-generating-csel.patch
|
Patch23: tighten-range-for-generating-csel.patch
|
||||||
@ -177,16 +177,49 @@ Patch60: fix-load-eliding-in-SM.patch
|
|||||||
Patch61: fix-SSA-update-for-vectorizer-epilogue.patch
|
Patch61: fix-SSA-update-for-vectorizer-epilogue.patch
|
||||||
Patch62: fix-ICE-when-vectorizing-nested-cycles.patch
|
Patch62: fix-ICE-when-vectorizing-nested-cycles.patch
|
||||||
Patch63: fix-avoid-bogus-uninit-warning-with-store-motion.patch
|
Patch63: fix-avoid-bogus-uninit-warning-with-store-motion.patch
|
||||||
Patch64: ipa-const-prop-null-point-check-bugfix.patch
|
Patch64: avoid-cycling-on-vertain-subreg-reloads.patch
|
||||||
Patch65: avoid-cycling-on-vertain-subreg-reloads.patch
|
Patch65: fix-ICE-in-verify_target_availability.patch
|
||||||
Patch66: fix-ICE-in-verify_target_availability.patch
|
Patch66: fix-ICE-vect_slp_analyze_node_operations.patch
|
||||||
Patch67: fix-ICE-vect_slp_analyze_node_operations.patch
|
Patch67: fix-ICE-in-extract_constrain_insn.patch
|
||||||
Patch68: fix-ICE-in-extract_constrain_insn.patch
|
Patch68: fix-ICE-during-GIMPLE-pass-dse.patch
|
||||||
Patch69: fix-ICE-during-GIMPLE-pass-dse.patch
|
Patch69: ipa-const-prop-buffer-overflow-bugfix.patch
|
||||||
Patch70: ipa-const-prop-buffer-overflow-bugfix.patch
|
Patch70: fix-ICE-in-eliminate_stmt.patch
|
||||||
Patch71: fix-ICE-in-eliminate_stmt.patch
|
Patch71: fix-make-ifcvt-clean-up-dead-comparisons.patch
|
||||||
Patch72: fix-make-ifcvt-clean-up-dead-comparisons.patch
|
Patch72: fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch
|
||||||
Patch73: fix-when-peeling-for-alignment.patch
|
Patch73: fix-ICE-in-vect_update_misalignment_for_peel.patch
|
||||||
|
Patch74: redundant-loop-elimination.patch
|
||||||
|
Patch75: bf16-and-matrix-characteristic.patch
|
||||||
|
Patch76: medium-code-mode.patch
|
||||||
|
Patch77: tree-optimization-96920-another-ICE-when-vectorizing.patch
|
||||||
|
Patch78: reduction-paths-with-unhandled-live-stmt.patch
|
||||||
|
Patch79: aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch
|
||||||
|
Patch80: tree-optimization-97812-fix-range-query-in-VRP-asser.patch
|
||||||
|
Patch81: aarch64-Fix-bf16-and-matrix-g++-gfortran.patch
|
||||||
|
Patch82: IRA-Handle-fully-tied-destinations.patch
|
||||||
|
Patch83: fix-ICE-in-pass-vect.patch
|
||||||
|
Patch84: SLP-VECT-Add-check-to-fix-96837.patch
|
||||||
|
Patch85: adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch
|
||||||
|
Patch86: fix-issue499-add-nop-convert.patch
|
||||||
|
Patch87: aarch64-fix-sve-acle-error.patch
|
||||||
|
Patch88: fix-ICE-IPA-compare-VRP-types.patch
|
||||||
|
Patch89: vectorizable-comparison-Swap-operands-only-once.patch
|
||||||
|
Patch90: sccvn-Improve-handling-of-load-masked-with-integer.patch
|
||||||
|
Patch91: speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch
|
||||||
|
Patch92: x86-Fix-bf16-and-matrix.patch
|
||||||
|
Patch93: Fix-up-push_partial_def-little-endian-bitfield.patch
|
||||||
|
Patch94: modulo-sched-Carefully-process-loop-counter-initiali.patch
|
||||||
|
Patch95: fix-ICE-in-affine-combination.patch
|
||||||
|
Patch96: aarch64-Fix-mismatched-SVE-predicate-modes.patch
|
||||||
|
Patch97: Fix-EXTRACT_LAST_REDUCTION-segfault.patch
|
||||||
|
Patch98: fix-PR-92351-When-peeling-for-alignment.patch
|
||||||
|
Patch99: fix-addlosymdi-ICE-in-pass-reload.patch
|
||||||
|
Patch100: store-merging-Consider-also-overlapping-stores-earlier.patch
|
||||||
|
Patch101: AArch64-Fix-constraints-for-CPY-M.patch
|
||||||
|
Patch102: Fix-zero-masking-for-vcvtps2ph.patch
|
||||||
|
Patch103: re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch
|
||||||
|
Patch104: fix-avx512vl-vcvttpd2dq-2-fail.patch
|
||||||
|
Patch105: fix-issue604-ldist-dependency-fixup.patch
|
||||||
|
Patch106: Apply-maximum-nunits-for-BB-SLP.patch
|
||||||
|
|
||||||
|
|
||||||
%global gcc_target_platform %{_arch}-linux-gnu
|
%global gcc_target_platform %{_arch}-linux-gnu
|
||||||
@ -703,6 +736,39 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
|||||||
%patch71 -p1
|
%patch71 -p1
|
||||||
%patch72 -p1
|
%patch72 -p1
|
||||||
%patch73 -p1
|
%patch73 -p1
|
||||||
|
%patch74 -p1
|
||||||
|
%patch75 -p1
|
||||||
|
%patch76 -p1
|
||||||
|
%patch77 -p1
|
||||||
|
%patch78 -p1
|
||||||
|
%patch79 -p1
|
||||||
|
%patch80 -p1
|
||||||
|
%patch81 -p1
|
||||||
|
%patch82 -p1
|
||||||
|
%patch83 -p1
|
||||||
|
%patch84 -p1
|
||||||
|
%patch85 -p1
|
||||||
|
%patch86 -p1
|
||||||
|
%patch87 -p1
|
||||||
|
%patch88 -p1
|
||||||
|
%patch89 -p1
|
||||||
|
%patch90 -p1
|
||||||
|
%patch91 -p1
|
||||||
|
%patch92 -p1
|
||||||
|
%patch93 -p1
|
||||||
|
%patch94 -p1
|
||||||
|
%patch95 -p1
|
||||||
|
%patch96 -p1
|
||||||
|
%patch97 -p1
|
||||||
|
%patch98 -p1
|
||||||
|
%patch99 -p1
|
||||||
|
%patch100 -p1
|
||||||
|
%patch101 -p1
|
||||||
|
%patch102 -p1
|
||||||
|
%patch103 -p1
|
||||||
|
%patch104 -p1
|
||||||
|
%patch105 -p1
|
||||||
|
%patch106 -p1
|
||||||
|
|
||||||
|
|
||||||
%build
|
%build
|
||||||
@ -2631,6 +2697,57 @@ end
|
|||||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Tue Dec 29 2020 eastb233 <xiezhiheng@huawei.com> - 9.3.1-20201229.13
|
||||||
|
- avoid-cycling-on-vertain-subreg-reloads.patch: Add patch source comment
|
||||||
|
- change-gcc-BASE-VER.patch: Likewise
|
||||||
|
- dont-generate-IF_THEN_ELSE.patch: Likewise
|
||||||
|
- fix-ICE-in-compute_live_loop_exits.patch: Likewise
|
||||||
|
- fix-ICE-in-eliminate_stmt.patch: Likewise
|
||||||
|
- fix-ICE-in-vect_create_epilog_for_reduction.patch: Likewise
|
||||||
|
- fix-ICE-in-vect_stmt_to_vectorize.patch: Likewise
|
||||||
|
- fix-ICE-in-verify_ssa.patch: Likewise
|
||||||
|
- fix-ICE-when-vectorizing-nested-cycles.patch: Likewise
|
||||||
|
- fix-cost-of-plus.patch: Likewise
|
||||||
|
- ipa-const-prop-self-recursion-bugfix.patch: Likewise
|
||||||
|
- simplify-removing-subregs.patch: Likewise
|
||||||
|
- medium-code-mode.patch: Bugfix
|
||||||
|
- fix-when-peeling-for-alignment.patch: Move to ...
|
||||||
|
- fix-PR-92351-When-peeling-for-alignment.patch: ... this
|
||||||
|
- AArch64-Fix-constraints-for-CPY-M.patch: New file
|
||||||
|
- Apply-maximum-nunits-for-BB-SLP.patch: New file
|
||||||
|
- Fix-EXTRACT_LAST_REDUCTION-segfault.patch: New file
|
||||||
|
- Fix-up-push_partial_def-little-endian-bitfield.patch: New file
|
||||||
|
- Fix-zero-masking-for-vcvtps2ph.patch: New file
|
||||||
|
- IRA-Handle-fully-tied-destinations.patch: New file
|
||||||
|
- SLP-VECT-Add-check-to-fix-96837.patch: New file
|
||||||
|
- aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch: New file
|
||||||
|
- aarch64-Fix-bf16-and-matrix-g++-gfortran.patch: New file
|
||||||
|
- aarch64-Fix-mismatched-SVE-predicate-modes.patch: New file
|
||||||
|
- aarch64-fix-sve-acle-error.patch: New file
|
||||||
|
- adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch: New file
|
||||||
|
- bf16-and-matrix-characteristic.patch: New file
|
||||||
|
- fix-ICE-IPA-compare-VRP-types.patch: New file
|
||||||
|
- fix-ICE-in-affine-combination.patch: New file
|
||||||
|
- fix-ICE-in-pass-vect.patch: New file
|
||||||
|
- fix-ICE-in-vect_update_misalignment_for_peel.patch: New file
|
||||||
|
- fix-addlosymdi-ICE-in-pass-reload.patch: New file
|
||||||
|
- fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: New file
|
||||||
|
- fix-avx512vl-vcvttpd2dq-2-fail.patch: New file
|
||||||
|
- fix-issue499-add-nop-convert.patch: New file
|
||||||
|
- fix-issue604-ldist-dependency-fixup.patch: New file
|
||||||
|
- modulo-sched-Carefully-process-loop-counter-initiali.patch: New file
|
||||||
|
- re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch: New file
|
||||||
|
- reduction-paths-with-unhandled-live-stmt.patch: New file
|
||||||
|
- redundant-loop-elimination.patch: New file
|
||||||
|
- sccvn-Improve-handling-of-load-masked-with-integer.patch: New file
|
||||||
|
- speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch: New file
|
||||||
|
- store-merging-Consider-also-overlapping-stores-earlier.patch: New file
|
||||||
|
- tree-optimization-96920-another-ICE-when-vectorizing.patch: New file
|
||||||
|
- tree-optimization-97812-fix-range-query-in-VRP-asser.patch: New file
|
||||||
|
- vectorizable-comparison-Swap-operands-only-once.patch: New file
|
||||||
|
- x86-Fix-bf16-and-matrix.patch: New file
|
||||||
|
- gcc.spec: Add uploaded patch
|
||||||
|
|
||||||
* Tue Sep 22 2020 eastb233 <xiezhiheng@huawei.com> - 9.3.1-20200922.12
|
* Tue Sep 22 2020 eastb233 <xiezhiheng@huawei.com> - 9.3.1-20200922.12
|
||||||
- fix-when-peeling-for-alignment.patch: New file
|
- fix-when-peeling-for-alignment.patch: New file
|
||||||
|
|
||||||
|
|||||||
@ -1,14 +1,11 @@
|
|||||||
This patch is backport from gcc-trunk. It is a combined patch from
|
This backport contains 2 patchs from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
Find matched aggregate lattice for self-recursive CP (PR ipa/93084)
|
0001-Find-matched-aggregate-lattice-for-self-recursive-CP.patch
|
||||||
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=42d73fa9d575e3c8c21e88bd7f65922e17b052f1
|
709d7838e753bbb6f16e2ed88a118ed81c367040
|
||||||
|
|
||||||
and
|
0002-Do-not-propagate-self-dependent-value-PR-ipa-93763.patch
|
||||||
|
47772af10c00f7e1e95cd52557fc893dc602a420
|
||||||
Do not propagate self-dependent value (PR ipa/93763)
|
|
||||||
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=47772af10c00f7e1e95cd52557fc893dc602a420
|
|
||||||
|
|
||||||
adapted the using of parameter to gcc9 style.
|
|
||||||
|
|
||||||
diff -Nurp a/gcc/ipa-cp.c b/gcc/ipa-cp.c
|
diff -Nurp a/gcc/ipa-cp.c b/gcc/ipa-cp.c
|
||||||
--- a/gcc/ipa-cp.c 2020-05-23 16:16:58.032000000 +0800
|
--- a/gcc/ipa-cp.c 2020-05-23 16:16:58.032000000 +0800
|
||||||
|
|||||||
@ -194,8 +194,8 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|||||||
case AARCH64_CMODEL_SMALL:
|
case AARCH64_CMODEL_SMALL:
|
||||||
+ AARCH64_SMALL_ROUTINE:
|
+ AARCH64_SMALL_ROUTINE:
|
||||||
/* Same reasoning as the tiny code model, but the offset cap here is
|
/* Same reasoning as the tiny code model, but the offset cap here is
|
||||||
4G. */
|
1MB, allowing +/-3.9GB for the offset to the symbol. */
|
||||||
if ((SYMBOL_REF_WEAK (x)
|
|
||||||
@@ -13121,7 +13225,48 @@ aarch64_classify_symbol (rtx x, HOST_WID
|
@@ -13121,7 +13225,48 @@ aarch64_classify_symbol (rtx x, HOST_WID
|
||||||
? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
|
? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
|
||||||
return SYMBOL_SMALL_ABSOLUTE;
|
return SYMBOL_SMALL_ABSOLUTE;
|
||||||
@ -300,7 +300,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
|||||||
+ UNSPEC_LOAD_SYMBOL_MEDIUM
|
+ UNSPEC_LOAD_SYMBOL_MEDIUM
|
||||||
UNSPEC_LD1_SVE
|
UNSPEC_LD1_SVE
|
||||||
UNSPEC_ST1_SVE
|
UNSPEC_ST1_SVE
|
||||||
UNSPEC_LD1RQ
|
UNSPEC_LDNT1_SVE
|
||||||
@@ -6548,6 +6553,39 @@
|
@@ -6548,6 +6553,39 @@
|
||||||
[(set_attr "type" "load_4")]
|
[(set_attr "type" "load_4")]
|
||||||
)
|
)
|
||||||
|
|||||||
251
modulo-sched-Carefully-process-loop-counter-initiali.patch
Normal file
251
modulo-sched-Carefully-process-loop-counter-initiali.patch
Normal file
@ -0,0 +1,251 @@
|
|||||||
|
This backport contains 1 patchs from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-modulo-sched-Carefully-process-loop-counter-initiali.patch
|
||||||
|
4eb8f93d026eaa1de9b4820337069f3ce3465cd0
|
||||||
|
|
||||||
|
diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c
|
||||||
|
index 6f699a874e3..4568674aa6c 100644
|
||||||
|
--- a/gcc/modulo-sched.c
|
||||||
|
+++ b/gcc/modulo-sched.c
|
||||||
|
@@ -210,8 +210,6 @@ static int sms_order_nodes (ddg_ptr, int, int *, int *);
|
||||||
|
static void set_node_sched_params (ddg_ptr);
|
||||||
|
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
|
||||||
|
static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
|
||||||
|
-static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
|
||||||
|
- rtx, rtx);
|
||||||
|
static int calculate_stage_count (partial_schedule_ptr, int);
|
||||||
|
static void calculate_must_precede_follow (ddg_node_ptr, int, int,
|
||||||
|
int, int, sbitmap, sbitmap, sbitmap);
|
||||||
|
@@ -391,30 +389,40 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail)
|
||||||
|
this constant. Otherwise return 0. */
|
||||||
|
static rtx_insn *
|
||||||
|
const_iteration_count (rtx count_reg, basic_block pre_header,
|
||||||
|
- int64_t * count)
|
||||||
|
+ int64_t *count, bool* adjust_inplace)
|
||||||
|
{
|
||||||
|
rtx_insn *insn;
|
||||||
|
rtx_insn *head, *tail;
|
||||||
|
|
||||||
|
+ *adjust_inplace = false;
|
||||||
|
+ bool read_after = false;
|
||||||
|
+
|
||||||
|
if (! pre_header)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
get_ebb_head_tail (pre_header, pre_header, &head, &tail);
|
||||||
|
|
||||||
|
for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
|
||||||
|
- if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
|
||||||
|
- rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
|
||||||
|
+ if (single_set (insn) && rtx_equal_p (count_reg,
|
||||||
|
+ SET_DEST (single_set (insn))))
|
||||||
|
{
|
||||||
|
rtx pat = single_set (insn);
|
||||||
|
|
||||||
|
if (CONST_INT_P (SET_SRC (pat)))
|
||||||
|
{
|
||||||
|
*count = INTVAL (SET_SRC (pat));
|
||||||
|
+ *adjust_inplace = !read_after;
|
||||||
|
return insn;
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
+ else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
|
||||||
|
+ {
|
||||||
|
+ read_after = true;
|
||||||
|
+ if (reg_set_p (count_reg, insn))
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
@@ -1126,7 +1134,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
|
||||||
|
/* Generate the instructions (including reg_moves) for prolog & epilog. */
|
||||||
|
static void
|
||||||
|
generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
|
||||||
|
- rtx count_reg, rtx count_init)
|
||||||
|
+ rtx count_reg, bool adjust_init)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int last_stage = PS_STAGE_COUNT (ps) - 1;
|
||||||
|
@@ -1135,12 +1143,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
|
||||||
|
/* Generate the prolog, inserting its insns on the loop-entry edge. */
|
||||||
|
start_sequence ();
|
||||||
|
|
||||||
|
- if (!count_init)
|
||||||
|
+ if (adjust_init)
|
||||||
|
{
|
||||||
|
/* Generate instructions at the beginning of the prolog to
|
||||||
|
- adjust the loop count by STAGE_COUNT. If loop count is constant
|
||||||
|
- (count_init), this constant is adjusted by STAGE_COUNT in
|
||||||
|
- generate_prolog_epilog function. */
|
||||||
|
+ adjust the loop count by STAGE_COUNT. If loop count is constant
|
||||||
|
+ and it not used anywhere in prologue, this constant is adjusted by
|
||||||
|
+ STAGE_COUNT outside of generate_prolog_epilog function. */
|
||||||
|
rtx sub_reg = NULL_RTX;
|
||||||
|
|
||||||
|
sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
|
||||||
|
@@ -1528,7 +1536,8 @@ sms_schedule (void)
|
||||||
|
rtx_insn *count_init;
|
||||||
|
int mii, rec_mii, stage_count, min_cycle;
|
||||||
|
int64_t loop_count = 0;
|
||||||
|
- bool opt_sc_p;
|
||||||
|
+ bool opt_sc_p, adjust_inplace = false;
|
||||||
|
+ basic_block pre_header;
|
||||||
|
|
||||||
|
if (! (g = g_arr[loop->num]))
|
||||||
|
continue;
|
||||||
|
@@ -1569,19 +1578,13 @@ sms_schedule (void)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
- /* In case of th loop have doloop register it gets special
|
||||||
|
- handling. */
|
||||||
|
- count_init = NULL;
|
||||||
|
- if ((count_reg = doloop_register_get (head, tail)))
|
||||||
|
- {
|
||||||
|
- basic_block pre_header;
|
||||||
|
-
|
||||||
|
- pre_header = loop_preheader_edge (loop)->src;
|
||||||
|
- count_init = const_iteration_count (count_reg, pre_header,
|
||||||
|
- &loop_count);
|
||||||
|
- }
|
||||||
|
+ count_reg = doloop_register_get (head, tail);
|
||||||
|
gcc_assert (count_reg);
|
||||||
|
|
||||||
|
+ pre_header = loop_preheader_edge (loop)->src;
|
||||||
|
+ count_init = const_iteration_count (count_reg, pre_header, &loop_count,
|
||||||
|
+ &adjust_inplace);
|
||||||
|
+
|
||||||
|
if (dump_file && count_init)
|
||||||
|
{
|
||||||
|
fprintf (dump_file, "SMS const-doloop ");
|
||||||
|
@@ -1701,9 +1704,20 @@ sms_schedule (void)
|
||||||
|
print_partial_schedule (ps, dump_file);
|
||||||
|
}
|
||||||
|
|
||||||
|
- /* case the BCT count is not known , Do loop-versioning */
|
||||||
|
- if (count_reg && ! count_init)
|
||||||
|
+ if (count_init)
|
||||||
|
+ {
|
||||||
|
+ if (adjust_inplace)
|
||||||
|
+ {
|
||||||
|
+ /* When possible, set new iteration count of loop kernel in
|
||||||
|
+ place. Otherwise, generate_prolog_epilog creates an insn
|
||||||
|
+ to adjust. */
|
||||||
|
+ SET_SRC (single_set (count_init)) = GEN_INT (loop_count
|
||||||
|
+ - stage_count + 1);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
{
|
||||||
|
+ /* case the BCT count is not known , Do loop-versioning */
|
||||||
|
rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
|
||||||
|
gen_int_mode (stage_count,
|
||||||
|
GET_MODE (count_reg)));
|
||||||
|
@@ -1713,12 +1727,7 @@ sms_schedule (void)
|
||||||
|
loop_version (loop, comp_rtx, &condition_bb,
|
||||||
|
prob, prob.invert (),
|
||||||
|
prob, prob.invert (), true);
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- /* Set new iteration count of loop kernel. */
|
||||||
|
- if (count_reg && count_init)
|
||||||
|
- SET_SRC (single_set (count_init)) = GEN_INT (loop_count
|
||||||
|
- - stage_count + 1);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
/* Now apply the scheduled kernel to the RTL of the loop. */
|
||||||
|
permute_partial_schedule (ps, g->closing_branch->first_note);
|
||||||
|
@@ -1735,7 +1744,7 @@ sms_schedule (void)
|
||||||
|
if (dump_file)
|
||||||
|
print_node_sched_params (dump_file, g->num_nodes, ps);
|
||||||
|
/* Generate prolog and epilog. */
|
||||||
|
- generate_prolog_epilog (ps, loop, count_reg, count_init);
|
||||||
|
+ generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..e32fb129f18
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c
|
||||||
|
@@ -0,0 +1,23 @@
|
||||||
|
+/* PR rtl-optimization/97421 */
|
||||||
|
+/* { dg-additional-options "-fmodulo-sched" } */
|
||||||
|
+
|
||||||
|
+int a, b, d, e;
|
||||||
|
+int *volatile c = &a;
|
||||||
|
+
|
||||||
|
+__attribute__((noinline))
|
||||||
|
+void f(void)
|
||||||
|
+{
|
||||||
|
+ for (int g = 2; g >= 0; g--) {
|
||||||
|
+ d = 0;
|
||||||
|
+ for (b = 0; b <= 2; b++)
|
||||||
|
+ ;
|
||||||
|
+ e = *c;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int main(void)
|
||||||
|
+{
|
||||||
|
+ f();
|
||||||
|
+ if (b != 3)
|
||||||
|
+ __builtin_abort();
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..142bcbcee91
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c
|
||||||
|
@@ -0,0 +1,18 @@
|
||||||
|
+/* PR rtl-optimization/97421 */
|
||||||
|
+/* { dg-additional-options "-fmodulo-sched -fno-dce -fno-strict-aliasing" } */
|
||||||
|
+
|
||||||
|
+static int a, b, c;
|
||||||
|
+int *d = &c;
|
||||||
|
+int **e = &d;
|
||||||
|
+int ***f = &e;
|
||||||
|
+int main()
|
||||||
|
+{
|
||||||
|
+ int h;
|
||||||
|
+ for (a = 2; a; a--)
|
||||||
|
+ for (h = 0; h <= 2; h++)
|
||||||
|
+ for (b = 0; b <= 2; b++)
|
||||||
|
+ ***f = 6;
|
||||||
|
+
|
||||||
|
+ if (b != 3)
|
||||||
|
+ __builtin_abort();
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..3f1485a4a3d
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c
|
||||||
|
@@ -0,0 +1,22 @@
|
||||||
|
+/* PR rtl-optimization/97421 */
|
||||||
|
+/* { dg-additional-options "-fmodulo-sched" } */
|
||||||
|
+
|
||||||
|
+int a, b, c;
|
||||||
|
+short d;
|
||||||
|
+void e(void) {
|
||||||
|
+ unsigned f = 0;
|
||||||
|
+ for (; f <= 2; f++) {
|
||||||
|
+ int g[1];
|
||||||
|
+ int h = (long)g;
|
||||||
|
+ c = 0;
|
||||||
|
+ for (; c < 10; c++)
|
||||||
|
+ g[0] = a = 0;
|
||||||
|
+ for (; a <= 2; a++)
|
||||||
|
+ b = d;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+int main(void) {
|
||||||
|
+ e();
|
||||||
|
+ if (a != 3)
|
||||||
|
+ __builtin_abort();
|
||||||
|
+}
|
||||||
215
re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch
Normal file
215
re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
491b0b4015a70071a05e0faa5c2082c43a51a0d3
|
||||||
|
0001-re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch
|
||||||
|
|
||||||
|
diff -urpN a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
|
||||||
|
--- a/gcc/config/i386/i386-builtin.def 2020-03-12 07:07:21.000000000 -0400
|
||||||
|
+++ b/gcc/config/i386/i386-builtin.def 2020-12-17 20:46:53.868000000 -0500
|
||||||
|
@@ -2516,60 +2516,60 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPT
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di_mask, "__builtin_ia32_vpshld_v2di_mask", IX86_BUILTIN_VPSHLDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT)
|
||||||
|
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi, "__builtin_ia32_vpshrdv_v16hi", IX86_BUILTIN_VPSHRDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi, "__builtin_ia32_vpshrdv_v8hi", IX86_BUILTIN_VPSHRDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si, "__builtin_ia32_vpshrdv_v8si", IX86_BUILTIN_VPSHRDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si, "__builtin_ia32_vpshrdv_v4si", IX86_BUILTIN_VPSHRDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di, "__builtin_ia32_vpshrdv_v4di", IX86_BUILTIN_VPSHRDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di, "__builtin_ia32_vpshrdv_v2di", IX86_BUILTIN_VPSHRDVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
|
||||||
|
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi, "__builtin_ia32_vpshldv_v16hi", IX86_BUILTIN_VPSHLDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi, "__builtin_ia32_vpshldv_v8hi", IX86_BUILTIN_VPSHLDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si, "__builtin_ia32_vpshldv_v8si", IX86_BUILTIN_VPSHLDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si, "__builtin_ia32_vpshldv_v4si", IX86_BUILTIN_VPSHLDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di, "__builtin_ia32_vpshldv_v4di", IX86_BUILTIN_VPSHLDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di, "__builtin_ia32_vpshldv_v2di", IX86_BUILTIN_VPSHLDVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_mask, "__builtin_ia32_vpshldv_v2di_mask", IX86_BUILTIN_VPSHLDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_mask, "__builtin_ia32_vpshldv_v2di_mask", IX86_BUILTIN_VPSHLDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
|
||||||
|
|
||||||
|
/* GFNI */
|
||||||
|
BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
|
||||||
|
@@ -2594,44 +2594,44 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MAS
|
||||||
|
/* VNNI */
|
||||||
|
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si, "__builtin_ia32_vpdpbusd_v4si", IX86_BUILTIN_VPDPBUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si, "__builtin_ia32_vpdpbusds_v4si", IX86_BUILTIN_VPDPBUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si, "__builtin_ia32_vpdpwssd_v4si", IX86_BUILTIN_VPDPWSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
|
||||||
|
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si, "__builtin_ia32_vpdpwssds_v4si", IX86_BUILTIN_VPDPWSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
-BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
+BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||||
|
|
||||||
|
/* VPCLMULQDQ */
|
||||||
|
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
|
||||||
|
diff -urpN a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
|
||||||
|
--- a/gcc/config/i386/i386-builtin-types.def 2020-03-12 07:07:21.000000000 -0400
|
||||||
|
+++ b/gcc/config/i386/i386-builtin-types.def 2020-12-17 20:46:53.868000000 -0500
|
||||||
|
@@ -1246,17 +1246,8 @@ DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, INT
|
||||||
|
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, INT, V4SI, INT)
|
||||||
|
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, INT, V2DI, INT)
|
||||||
|
DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI)
|
||||||
|
-DEF_FUNCTION_TYPE (V32HI, V32HI, V32HI, V32HI, INT)
|
||||||
|
-DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, V16HI, INT)
|
||||||
|
-DEF_FUNCTION_TYPE (V8HI, V8HI, V8HI, V8HI, INT)
|
||||||
|
-DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, V8SI, INT)
|
||||||
|
-DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, V4SI, INT)
|
||||||
|
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI)
|
||||||
|
-DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, V8DI, INT)
|
||||||
|
-DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, INT)
|
||||||
|
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI)
|
||||||
|
-DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT)
|
||||||
|
-DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT)
|
||||||
|
|
||||||
|
# BITALG builtins
|
||||||
|
DEF_FUNCTION_TYPE (V4DI, V4DI)
|
||||||
|
diff -urpN a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
|
||||||
|
--- a/gcc/config/i386/i386-expand.c 2020-12-17 20:44:55.508000000 -0500
|
||||||
|
+++ b/gcc/config/i386/i386-expand.c 2020-12-17 20:46:53.872000000 -0500
|
||||||
|
@@ -9437,15 +9437,6 @@ ix86_expand_args_builtin (const struct b
|
||||||
|
case USI_FTYPE_V32HI_V32HI_INT_USI:
|
||||||
|
case UHI_FTYPE_V16HI_V16HI_INT_UHI:
|
||||||
|
case UQI_FTYPE_V8HI_V8HI_INT_UQI:
|
||||||
|
- case V32HI_FTYPE_V32HI_V32HI_V32HI_INT:
|
||||||
|
- case V16HI_FTYPE_V16HI_V16HI_V16HI_INT:
|
||||||
|
- case V8HI_FTYPE_V8HI_V8HI_V8HI_INT:
|
||||||
|
- case V8SI_FTYPE_V8SI_V8SI_V8SI_INT:
|
||||||
|
- case V4DI_FTYPE_V4DI_V4DI_V4DI_INT:
|
||||||
|
- case V8DI_FTYPE_V8DI_V8DI_V8DI_INT:
|
||||||
|
- case V16SI_FTYPE_V16SI_V16SI_V16SI_INT:
|
||||||
|
- case V2DI_FTYPE_V2DI_V2DI_V2DI_INT:
|
||||||
|
- case V4SI_FTYPE_V4SI_V4SI_V4SI_INT:
|
||||||
|
nargs = 4;
|
||||||
|
mask_pos = 1;
|
||||||
|
nargs_constant = 1;
|
||||||
64
reduction-paths-with-unhandled-live-stmt.patch
Normal file
64
reduction-paths-with-unhandled-live-stmt.patch
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
2686de5617bfb572343933be2883e8274c9735b5
|
||||||
|
0001-tree-optimization-97760-reduction-paths-with-unhandl.patch
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/pr97760.c b/gcc/testsuite/gcc.dg/vect/pr97760.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..da5ac937a43
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/pr97760.c
|
||||||
|
@@ -0,0 +1,26 @@
|
||||||
|
+#include "tree-vect.h"
|
||||||
|
+
|
||||||
|
+int b=1;
|
||||||
|
+static int *g = &b;
|
||||||
|
+
|
||||||
|
+void __attribute__((noipa))
|
||||||
|
+h (unsigned int n)
|
||||||
|
+{
|
||||||
|
+ int i = 3;
|
||||||
|
+ int f = 3;
|
||||||
|
+ for (; f <= 50; f += 4) {
|
||||||
|
+ i += 4;
|
||||||
|
+ *g = i;
|
||||||
|
+ i += n;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int main ()
|
||||||
|
+{
|
||||||
|
+ check_vect ();
|
||||||
|
+
|
||||||
|
+ h (9);
|
||||||
|
+ if (*g != 150 || b != 150)
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
|
||||||
|
index 977633a3ce3..39b7319e825 100644
|
||||||
|
--- a/gcc/tree-vect-loop.c
|
||||||
|
+++ b/gcc/tree-vect-loop.c
|
||||||
|
@@ -3326,14 +3326,17 @@ pop:
|
||||||
|
fail = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
- /* Check there's only a single stmt the op is used on inside
|
||||||
|
- of the loop. */
|
||||||
|
+ /* Check there's only a single stmt the op is used on. For the
|
||||||
|
+ not value-changing tail and the last stmt allow out-of-loop uses.
|
||||||
|
+ ??? We could relax this and handle arbitrary live stmts by
|
||||||
|
+ forcing a scalar epilogue for example. */
|
||||||
|
imm_use_iterator imm_iter;
|
||||||
|
gimple *op_use_stmt;
|
||||||
|
unsigned cnt = 0;
|
||||||
|
FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)
|
||||||
|
if (!is_gimple_debug (op_use_stmt)
|
||||||
|
- && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))
|
||||||
|
+ && (*code != ERROR_MARK
|
||||||
|
+ || flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt))))
|
||||||
|
{
|
||||||
|
/* We want to allow x + x but not x < 1 ? x : 2. */
|
||||||
|
if (is_gimple_assign (op_use_stmt)
|
||||||
|
|
||||||
486
redundant-loop-elimination.patch
Normal file
486
redundant-loop-elimination.patch
Normal file
@ -0,0 +1,486 @@
|
|||||||
|
diff -Nurp a/gcc/common.opt b/gcc/common.opt
|
||||||
|
--- a/gcc/common.opt 2020-11-23 03:24:54.760000000 -0500
|
||||||
|
+++ b/gcc/common.opt 2020-11-23 03:23:59.716000000 -0500
|
||||||
|
@@ -1150,6 +1150,10 @@ fcompare-elim
|
||||||
|
Common Report Var(flag_compare_elim_after_reload) Optimization
|
||||||
|
Perform comparison elimination after register allocation has finished.
|
||||||
|
|
||||||
|
+floop-elim
|
||||||
|
+Common Report Var(flag_loop_elim) Init(0) Optimization
|
||||||
|
+Perform redundant loop elimination.
|
||||||
|
+
|
||||||
|
fconserve-stack
|
||||||
|
Common Var(flag_conserve_stack) Optimization
|
||||||
|
Do not perform optimizations increasing noticeably stack usage.
|
||||||
|
diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||||
|
--- a/gcc/tree-ssa-phiopt.c 2020-11-23 03:24:54.760000000 -0500
|
||||||
|
+++ b/gcc/tree-ssa-phiopt.c 2020-11-23 03:27:42.824000000 -0500
|
||||||
|
@@ -71,6 +71,7 @@ static hash_set<tree> * get_non_trapping
|
||||||
|
static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree);
|
||||||
|
static void hoist_adjacent_loads (basic_block, basic_block,
|
||||||
|
basic_block, basic_block);
|
||||||
|
+static bool do_phiopt_pattern (basic_block, basic_block, basic_block);
|
||||||
|
static bool gate_hoist_loads (void);
|
||||||
|
|
||||||
|
/* This pass tries to transform conditional stores into unconditional
|
||||||
|
@@ -259,6 +260,10 @@ tree_ssa_phiopt_worker (bool do_store_el
|
||||||
|
hoist_adjacent_loads (bb, bb1, bb2, bb3);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
+ else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2))
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
else
|
||||||
|
continue;
|
||||||
|
|
||||||
|
@@ -2899,6 +2904,449 @@ hoist_adjacent_loads (basic_block bb0, b
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static bool check_uses (tree, hash_set<tree> *);
|
||||||
|
+
|
||||||
|
+/* Check SSA_NAME is used in
|
||||||
|
+ if (SSA_NAME == 0)
|
||||||
|
+ ...
|
||||||
|
+ or
|
||||||
|
+ if (SSA_NAME != 0)
|
||||||
|
+ ...
|
||||||
|
+*/
|
||||||
|
+static bool
|
||||||
|
+check_uses_cond (tree ssa_name, gimple *stmt,
|
||||||
|
+ hash_set<tree> *hset ATTRIBUTE_UNUSED)
|
||||||
|
+{
|
||||||
|
+ tree_code code = gimple_cond_code (stmt);
|
||||||
|
+ if (code != EQ_EXPR && code != NE_EXPR)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ tree lhs = gimple_cond_lhs (stmt);
|
||||||
|
+ tree rhs = gimple_cond_rhs (stmt);
|
||||||
|
+ if ((lhs == ssa_name && integer_zerop (rhs))
|
||||||
|
+ || (rhs == ssa_name && integer_zerop (lhs)))
|
||||||
|
+ {
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return false;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Check SSA_NAME is used in
|
||||||
|
+ _tmp = SSA_NAME == 0;
|
||||||
|
+ or
|
||||||
|
+ _tmp = SSA_NAME != 0;
|
||||||
|
+ or
|
||||||
|
+ _tmp = SSA_NAME | _tmp2;
|
||||||
|
+*/
|
||||||
|
+static bool
|
||||||
|
+check_uses_assign (tree ssa_name, gimple *stmt, hash_set<tree> *hset)
|
||||||
|
+{
|
||||||
|
+ tree_code code = gimple_assign_rhs_code (stmt);
|
||||||
|
+ tree lhs, rhs1, rhs2;
|
||||||
|
+
|
||||||
|
+ switch (code)
|
||||||
|
+ {
|
||||||
|
+ case EQ_EXPR:
|
||||||
|
+ case NE_EXPR:
|
||||||
|
+ rhs1 = gimple_assign_rhs1 (stmt);
|
||||||
|
+ rhs2 = gimple_assign_rhs2 (stmt);
|
||||||
|
+ if ((rhs1 == ssa_name && integer_zerop (rhs2))
|
||||||
|
+ || (rhs2 == ssa_name && integer_zerop (rhs1)))
|
||||||
|
+ {
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ case BIT_IOR_EXPR:
|
||||||
|
+ lhs = gimple_assign_lhs (stmt);
|
||||||
|
+ if (hset->contains (lhs))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ /* We should check the use of _tmp further. */
|
||||||
|
+ return check_uses (lhs, hset);
|
||||||
|
+
|
||||||
|
+ default:
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ return false;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Check SSA_NAME is used in
|
||||||
|
+ # result = PHI <SSA_NAME (bb1), 0 (bb2), 0 (bb3)>
|
||||||
|
+*/
|
||||||
|
+static bool
|
||||||
|
+check_uses_phi (tree ssa_name, gimple *stmt, hash_set<tree> *hset)
|
||||||
|
+{
|
||||||
|
+ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++)
|
||||||
|
+ {
|
||||||
|
+ tree arg = gimple_phi_arg_def (stmt, i);
|
||||||
|
+ if (!integer_zerop (arg) && arg != ssa_name)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ tree result = gimple_phi_result (stmt);
|
||||||
|
+
|
||||||
|
+ /* It is used to avoid infinite recursion,
|
||||||
|
+ <bb 1>
|
||||||
|
+ if (cond)
|
||||||
|
+ goto <bb 2>
|
||||||
|
+ else
|
||||||
|
+ goto <bb 3>
|
||||||
|
+
|
||||||
|
+ <bb 2>
|
||||||
|
+ # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)>
|
||||||
|
+ {BODY}
|
||||||
|
+ if (cond)
|
||||||
|
+ goto <bb 3>
|
||||||
|
+ else
|
||||||
|
+ goto <bb 4>
|
||||||
|
+
|
||||||
|
+ <bb 3>
|
||||||
|
+ # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)>
|
||||||
|
+ {BODY}
|
||||||
|
+ if (cond)
|
||||||
|
+ goto <bb 2>
|
||||||
|
+ else
|
||||||
|
+ goto <bb 4>
|
||||||
|
+
|
||||||
|
+ <bb 4>
|
||||||
|
+ ...
|
||||||
|
+ */
|
||||||
|
+ if (hset->contains (result))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return check_uses (result, hset);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Check the use of SSA_NAME, it should only be used in comparison
|
||||||
|
+ operation and PHI node. HSET is used to record the ssa_names
|
||||||
|
+ that have been already checked. */
|
||||||
|
+static bool
|
||||||
|
+check_uses (tree ssa_name, hash_set<tree> *hset)
|
||||||
|
+{
|
||||||
|
+ imm_use_iterator imm_iter;
|
||||||
|
+ use_operand_p use_p;
|
||||||
|
+
|
||||||
|
+ if (TREE_CODE (ssa_name) != SSA_NAME)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (SSA_NAME_VAR (ssa_name)
|
||||||
|
+ && is_global_var (SSA_NAME_VAR (ssa_name)))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ hset->add (ssa_name);
|
||||||
|
+
|
||||||
|
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name)
|
||||||
|
+ {
|
||||||
|
+ gimple *stmt = USE_STMT (use_p);
|
||||||
|
+
|
||||||
|
+ /* Ignore debug gimple statements. */
|
||||||
|
+ if (is_gimple_debug (stmt))
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ switch (gimple_code (stmt))
|
||||||
|
+ {
|
||||||
|
+ case GIMPLE_COND:
|
||||||
|
+ if (!check_uses_cond (ssa_name, stmt, hset))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ case GIMPLE_ASSIGN:
|
||||||
|
+ if (!check_uses_assign (ssa_name, stmt, hset))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ case GIMPLE_PHI:
|
||||||
|
+ if (!check_uses_phi (ssa_name, stmt, hset))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ default:
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+check_def_gimple (gimple *def1, gimple *def2, tree result)
|
||||||
|
+{
|
||||||
|
+ /* def1 and def2 should be POINTER_PLUS_EXPR. */
|
||||||
|
+ if (!is_gimple_assign (def1) || !is_gimple_assign (def2)
|
||||||
|
+ || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR
|
||||||
|
+ || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ tree rhs12 = gimple_assign_rhs2 (def1);
|
||||||
|
+
|
||||||
|
+ tree rhs21 = gimple_assign_rhs1 (def2);
|
||||||
|
+ tree rhs22 = gimple_assign_rhs2 (def2);
|
||||||
|
+
|
||||||
|
+ if (rhs21 != result)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* We should have a positive pointer-plus constant to ensure
|
||||||
|
+ that the pointer value is continuously increasing. */
|
||||||
|
+ if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST
|
||||||
|
+ || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+check_loop_body (basic_block bb0, basic_block bb2, tree result)
|
||||||
|
+{
|
||||||
|
+ gimple *g01 = first_stmt (bb0);
|
||||||
|
+ if (!g01 || !is_gimple_assign (g01)
|
||||||
|
+ || gimple_assign_rhs_code (g01) != MEM_REF
|
||||||
|
+ || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ gimple *g02 = g01->next;
|
||||||
|
+ /* GIMPLE_COND would be the last gimple in a basic block,
|
||||||
|
+ and have no other side effects on RESULT. */
|
||||||
|
+ if (!g02 || gimple_code (g02) != GIMPLE_COND)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (first_stmt (bb2) != last_stmt (bb2))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Pattern is like
|
||||||
|
+ <pre bb>
|
||||||
|
+ arg1 = base (rhs11) + cst (rhs12); [def1]
|
||||||
|
+ goto <bb 0>
|
||||||
|
+
|
||||||
|
+ <bb 2>
|
||||||
|
+ arg2 = result (rhs21) + cst (rhs22); [def2]
|
||||||
|
+
|
||||||
|
+ <bb 0>
|
||||||
|
+ # result = PHI <arg1 (pre bb), arg2 (bb 2)>
|
||||||
|
+ _v = *result; [g01]
|
||||||
|
+ if (_v == 0) [g02]
|
||||||
|
+ goto <bb 1>
|
||||||
|
+ else
|
||||||
|
+ goto <bb 2>
|
||||||
|
+
|
||||||
|
+ <bb 1>
|
||||||
|
+ _1 = result - base; [g1]
|
||||||
|
+ _2 = _1 /[ex] cst; [g2]
|
||||||
|
+ _3 = (unsigned int) _2; [g3]
|
||||||
|
+ if (_3 == 0)
|
||||||
|
+ ...
|
||||||
|
+*/
|
||||||
|
+static bool
|
||||||
|
+check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
|
||||||
|
+ gphi *phi_stmt, gimple *&output)
|
||||||
|
+{
|
||||||
|
+ /* Start check from PHI node in BB0. */
|
||||||
|
+ if (gimple_phi_num_args (phi_stmt) != 2
|
||||||
|
+ || virtual_operand_p (gimple_phi_result (phi_stmt)))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ tree result = gimple_phi_result (phi_stmt);
|
||||||
|
+ tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
|
||||||
|
+ tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
|
||||||
|
+
|
||||||
|
+ if (TREE_CODE (arg1) != SSA_NAME
|
||||||
|
+ || TREE_CODE (arg2) != SSA_NAME
|
||||||
|
+ || SSA_NAME_IS_DEFAULT_DEF (arg1)
|
||||||
|
+ || SSA_NAME_IS_DEFAULT_DEF (arg2))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ gimple *def1 = SSA_NAME_DEF_STMT (arg1);
|
||||||
|
+ gimple *def2 = SSA_NAME_DEF_STMT (arg2);
|
||||||
|
+
|
||||||
|
+ /* Swap bb1 and bb2 if pattern is like
|
||||||
|
+ if (_v != 0)
|
||||||
|
+ goto <bb 2>
|
||||||
|
+ else
|
||||||
|
+ goto <bb 1>
|
||||||
|
+ */
|
||||||
|
+ if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
|
||||||
|
+ {
|
||||||
|
+ std::swap (bb1, bb2);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* prebb[def1] --> bb0 <-- bb2[def2] */
|
||||||
|
+ if (!gimple_bb (def1)
|
||||||
|
+ || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
|
||||||
|
+ || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Check whether define gimple meets the pattern requirements. */
|
||||||
|
+ if (!check_def_gimple (def1, def2, result))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!check_loop_body (bb0, bb2, result))
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ output = def1;
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Check pattern
|
||||||
|
+ <bb 1>
|
||||||
|
+ _1 = result - base; [g1]
|
||||||
|
+ _2 = _1 /[ex] cst; [g2]
|
||||||
|
+ _3 = (unsigned int) _2; [g3]
|
||||||
|
+ if (_3 == 0)
|
||||||
|
+ ...
|
||||||
|
+*/
|
||||||
|
+static bool
|
||||||
|
+check_gimple_order (basic_block bb1, tree base, tree cst, tree result,
|
||||||
|
+ gimple *&output)
|
||||||
|
+{
|
||||||
|
+ gimple *g1 = first_stmt (bb1);
|
||||||
|
+ if (!g1 || !is_gimple_assign (g1)
|
||||||
|
+ || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
|
||||||
|
+ || gimple_assign_rhs1 (g1) != result
|
||||||
|
+ || gimple_assign_rhs2 (g1) != base)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ gimple *g2 = g1->next;
|
||||||
|
+ if (!g2 || !is_gimple_assign (g2)
|
||||||
|
+ || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
|
||||||
|
+ || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
|
||||||
|
+ || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* INTEGER_CST cst in gimple def1. */
|
||||||
|
+ HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
|
||||||
|
+ /* INTEGER_CST cst in gimple g2. */
|
||||||
|
+ HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
|
||||||
|
+ /* _2 must be at least a positive number. */
|
||||||
|
+ if (num2 == 0 || num1 / num2 <= 0)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ gimple *g3 = g2->next;
|
||||||
|
+ if (!g3 || !is_gimple_assign (g3)
|
||||||
|
+ || gimple_assign_rhs_code (g3) != NOP_EXPR
|
||||||
|
+ || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
|
||||||
|
+ || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
|
||||||
|
+ {
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* _3 should only be used in comparison operation or PHI node. */
|
||||||
|
+ hash_set<tree> *hset = new hash_set<tree>;
|
||||||
|
+ if (!check_uses (gimple_assign_lhs (g3), hset))
|
||||||
|
+ {
|
||||||
|
+ delete hset;
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ delete hset;
|
||||||
|
+
|
||||||
|
+ output = g3;
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
|
||||||
|
+{
|
||||||
|
+ gphi_iterator gsi;
|
||||||
|
+
|
||||||
|
+ for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||||
|
+ {
|
||||||
|
+ gphi *phi_stmt = gsi.phi ();
|
||||||
|
+ gimple *def1 = NULL;
|
||||||
|
+ tree base, cst, result;
|
||||||
|
+
|
||||||
|
+ if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ base = gimple_assign_rhs1 (def1);
|
||||||
|
+ cst = gimple_assign_rhs2 (def1);
|
||||||
|
+ result = gimple_phi_result (phi_stmt);
|
||||||
|
+
|
||||||
|
+ gimple *stmt = NULL;
|
||||||
|
+ if (!check_gimple_order (bb1, base, cst, result, stmt))
|
||||||
|
+ {
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ gcc_assert (stmt);
|
||||||
|
+
|
||||||
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
+ {
|
||||||
|
+ fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
|
||||||
|
+ print_gimple_stmt (dump_file, stmt, 0);
|
||||||
|
+ fprintf (dump_file, "to\n");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Rewrite statement
|
||||||
|
+ _3 = (unsigned int) _2;
|
||||||
|
+ to
|
||||||
|
+ _3 = (unsigned int) 1;
|
||||||
|
+ */
|
||||||
|
+ tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
|
||||||
|
+ gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
|
||||||
|
+ update_stmt (stmt);
|
||||||
|
+
|
||||||
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
+ {
|
||||||
|
+ print_gimple_stmt (dump_file, stmt, 0);
|
||||||
|
+ fprintf (dump_file, "\n");
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+ return false;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Determine whether we should attempt to hoist adjacent loads out of
|
||||||
|
diamond patterns in pass_phiopt. Always hoist loads if
|
||||||
|
-fhoist-adjacent-loads is specified and the target machine has
|
||||||
2397
sccvn-Improve-handling-of-load-masked-with-integer.patch
Normal file
2397
sccvn-Improve-handling-of-load-masked-with-integer.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,9 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-expand-Simplify-removing-subregs-when-expanding-a-co.patch
|
||||||
|
9a182ef9ee011935d827ab5c6c9a7cd8e22257d8
|
||||||
|
|
||||||
diff -Nurp a/gcc/expr.c b/gcc/expr.c
|
diff -Nurp a/gcc/expr.c b/gcc/expr.c
|
||||||
--- a/gcc/expr.c 2020-08-05 20:33:04.068000000 +0800
|
--- a/gcc/expr.c 2020-08-05 20:33:04.068000000 +0800
|
||||||
+++ b/gcc/expr.c 2020-08-05 20:33:21.420000000 +0800
|
+++ b/gcc/expr.c 2020-08-05 20:33:21.420000000 +0800
|
||||||
|
|||||||
718
speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch
Normal file
718
speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch
Normal file
@ -0,0 +1,718 @@
|
|||||||
|
This backport contains 2 patchs from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
728c2e5eeaa91cf708f2b1b1f996653a7eebae59
|
||||||
|
0001-modulo-sched-speed-up-DDG-analysis-PR90001.patch
|
||||||
|
|
||||||
|
06d5d63d9944691bb4286e5f6b2422cc97148336
|
||||||
|
0001-modulo-sched-fix-bootstrap-compare-debug-issue.patch
|
||||||
|
|
||||||
|
diff -Nurp a/gcc/ddg.c b/gcc/ddg.c
|
||||||
|
--- a/gcc/ddg.c 2020-11-28 18:40:12.371633230 +0800
|
||||||
|
+++ b/gcc/ddg.c 2020-11-28 18:38:33.835633230 +0800
|
||||||
|
@@ -32,9 +32,6 @@ along with GCC; see the file COPYING3.
|
||||||
|
|
||||||
|
#ifdef INSN_SCHEDULING
|
||||||
|
|
||||||
|
-/* A flag indicating that a ddg edge belongs to an SCC or not. */
|
||||||
|
-enum edge_flag {NOT_IN_SCC = 0, IN_SCC};
|
||||||
|
-
|
||||||
|
/* Forward declarations. */
|
||||||
|
static void add_backarc_to_ddg (ddg_ptr, ddg_edge_ptr);
|
||||||
|
static void add_backarc_to_scc (ddg_scc_ptr, ddg_edge_ptr);
|
||||||
|
@@ -188,9 +185,6 @@ create_ddg_dep_from_intra_loop_link (ddg
|
||||||
|
else if (DEP_TYPE (link) == REG_DEP_OUTPUT)
|
||||||
|
t = OUTPUT_DEP;
|
||||||
|
|
||||||
|
- gcc_assert (!DEBUG_INSN_P (dest_node->insn) || t == ANTI_DEP);
|
||||||
|
- gcc_assert (!DEBUG_INSN_P (src_node->insn) || t == ANTI_DEP);
|
||||||
|
-
|
||||||
|
/* We currently choose not to create certain anti-deps edges and
|
||||||
|
compensate for that by generating reg-moves based on the life-range
|
||||||
|
analysis. The anti-deps that will be deleted are the ones which
|
||||||
|
@@ -225,9 +219,9 @@ create_ddg_dep_from_intra_loop_link (ddg
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- latency = dep_cost (link);
|
||||||
|
- e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
|
||||||
|
- add_edge_to_ddg (g, e);
|
||||||
|
+ latency = dep_cost (link);
|
||||||
|
+ e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance);
|
||||||
|
+ add_edge_to_ddg (g, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The same as the above function, but it doesn't require a link parameter. */
|
||||||
|
@@ -240,9 +234,6 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_n
|
||||||
|
enum reg_note dep_kind;
|
||||||
|
struct _dep _dep, *dep = &_dep;
|
||||||
|
|
||||||
|
- gcc_assert (!DEBUG_INSN_P (to->insn) || d_t == ANTI_DEP);
|
||||||
|
- gcc_assert (!DEBUG_INSN_P (from->insn) || d_t == ANTI_DEP);
|
||||||
|
-
|
||||||
|
if (d_t == ANTI_DEP)
|
||||||
|
dep_kind = REG_DEP_ANTI;
|
||||||
|
else if (d_t == OUTPUT_DEP)
|
||||||
|
@@ -275,16 +266,15 @@ create_ddg_dep_no_link (ddg_ptr g, ddg_n
|
||||||
|
static void
|
||||||
|
add_cross_iteration_register_deps (ddg_ptr g, df_ref last_def)
|
||||||
|
{
|
||||||
|
- int regno = DF_REF_REGNO (last_def);
|
||||||
|
struct df_link *r_use;
|
||||||
|
int has_use_in_bb_p = false;
|
||||||
|
- rtx_insn *def_insn = DF_REF_INSN (last_def);
|
||||||
|
- ddg_node_ptr last_def_node = get_node_of_insn (g, def_insn);
|
||||||
|
- ddg_node_ptr use_node;
|
||||||
|
+ int regno = DF_REF_REGNO (last_def);
|
||||||
|
+ ddg_node_ptr last_def_node = get_node_of_insn (g, DF_REF_INSN (last_def));
|
||||||
|
df_ref first_def = df_bb_regno_first_def_find (g->bb, regno);
|
||||||
|
+ ddg_node_ptr first_def_node = get_node_of_insn (g, DF_REF_INSN (first_def));
|
||||||
|
+ ddg_node_ptr use_node;
|
||||||
|
|
||||||
|
- gcc_assert (last_def_node);
|
||||||
|
- gcc_assert (first_def);
|
||||||
|
+ gcc_assert (last_def_node && first_def && first_def_node);
|
||||||
|
|
||||||
|
if (flag_checking && DF_REF_ID (last_def) != DF_REF_ID (first_def))
|
||||||
|
{
|
||||||
|
@@ -303,6 +293,9 @@ add_cross_iteration_register_deps (ddg_p
|
||||||
|
|
||||||
|
rtx_insn *use_insn = DF_REF_INSN (r_use->ref);
|
||||||
|
|
||||||
|
+ if (DEBUG_INSN_P (use_insn))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
/* ??? Do not handle uses with DF_REF_IN_NOTE notes. */
|
||||||
|
use_node = get_node_of_insn (g, use_insn);
|
||||||
|
gcc_assert (use_node);
|
||||||
|
@@ -313,35 +306,28 @@ add_cross_iteration_register_deps (ddg_p
|
||||||
|
iteration. Any such upwards exposed use appears before
|
||||||
|
the last_def def. */
|
||||||
|
create_ddg_dep_no_link (g, last_def_node, use_node,
|
||||||
|
- DEBUG_INSN_P (use_insn) ? ANTI_DEP : TRUE_DEP,
|
||||||
|
- REG_DEP, 1);
|
||||||
|
+ TRUE_DEP, REG_DEP, 1);
|
||||||
|
}
|
||||||
|
- else if (!DEBUG_INSN_P (use_insn))
|
||||||
|
+ else
|
||||||
|
{
|
||||||
|
/* Add anti deps from last_def's uses in the current iteration
|
||||||
|
to the first def in the next iteration. We do not add ANTI
|
||||||
|
dep when there is an intra-loop TRUE dep in the opposite
|
||||||
|
direction, but use regmoves to fix such disregarded ANTI
|
||||||
|
deps when broken. If the first_def reaches the USE then
|
||||||
|
- there is such a dep. */
|
||||||
|
- ddg_node_ptr first_def_node = get_node_of_insn (g,
|
||||||
|
- DF_REF_INSN (first_def));
|
||||||
|
-
|
||||||
|
- gcc_assert (first_def_node);
|
||||||
|
-
|
||||||
|
- /* Always create the edge if the use node is a branch in
|
||||||
|
- order to prevent the creation of reg-moves.
|
||||||
|
- If the address that is being auto-inc or auto-dec in LAST_DEF
|
||||||
|
- is used in USE_INSN then do not remove the edge to make sure
|
||||||
|
- reg-moves will not be created for that address. */
|
||||||
|
- if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
|
||||||
|
- || !flag_modulo_sched_allow_regmoves
|
||||||
|
+ there is such a dep.
|
||||||
|
+ Always create the edge if the use node is a branch in
|
||||||
|
+ order to prevent the creation of reg-moves.
|
||||||
|
+ If the address that is being auto-inc or auto-dec in LAST_DEF
|
||||||
|
+ is used in USE_INSN then do not remove the edge to make sure
|
||||||
|
+ reg-moves will not be created for that address. */
|
||||||
|
+ if (DF_REF_ID (last_def) != DF_REF_ID (first_def)
|
||||||
|
+ || !flag_modulo_sched_allow_regmoves
|
||||||
|
|| JUMP_P (use_node->insn)
|
||||||
|
- || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
|
||||||
|
+ || autoinc_var_is_used_p (DF_REF_INSN (last_def), use_insn)
|
||||||
|
|| def_has_ccmode_p (DF_REF_INSN (last_def)))
|
||||||
|
- create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
|
||||||
|
- REG_DEP, 1);
|
||||||
|
-
|
||||||
|
+ create_ddg_dep_no_link (g, use_node, first_def_node, ANTI_DEP,
|
||||||
|
+ REG_DEP, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Create an inter-loop output dependence between LAST_DEF (which is the
|
||||||
|
@@ -351,19 +337,11 @@ add_cross_iteration_register_deps (ddg_p
|
||||||
|
defs starting with a true dependence to a use which can be in the
|
||||||
|
next iteration; followed by an anti dependence of that use to the
|
||||||
|
first def (i.e. if there is a use between the two defs.) */
|
||||||
|
- if (!has_use_in_bb_p)
|
||||||
|
- {
|
||||||
|
- ddg_node_ptr dest_node;
|
||||||
|
-
|
||||||
|
- if (DF_REF_ID (last_def) == DF_REF_ID (first_def))
|
||||||
|
- return;
|
||||||
|
-
|
||||||
|
- dest_node = get_node_of_insn (g, DF_REF_INSN (first_def));
|
||||||
|
- gcc_assert (dest_node);
|
||||||
|
- create_ddg_dep_no_link (g, last_def_node, dest_node,
|
||||||
|
- OUTPUT_DEP, REG_DEP, 1);
|
||||||
|
- }
|
||||||
|
+ if (!has_use_in_bb_p && DF_REF_ID (last_def) != DF_REF_ID (first_def))
|
||||||
|
+ create_ddg_dep_no_link (g, last_def_node, first_def_node,
|
||||||
|
+ OUTPUT_DEP, REG_DEP, 1);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
/* Build inter-loop dependencies, by looking at DF analysis backwards. */
|
||||||
|
static void
|
||||||
|
build_inter_loop_deps (ddg_ptr g)
|
||||||
|
@@ -420,13 +398,9 @@ add_intra_loop_mem_dep (ddg_ptr g, ddg_n
|
||||||
|
if (mem_write_insn_p (from->insn))
|
||||||
|
{
|
||||||
|
if (mem_read_insn_p (to->insn))
|
||||||
|
- create_ddg_dep_no_link (g, from, to,
|
||||||
|
- DEBUG_INSN_P (to->insn)
|
||||||
|
- ? ANTI_DEP : TRUE_DEP, MEM_DEP, 0);
|
||||||
|
+ create_ddg_dep_no_link (g, from, to, TRUE_DEP, MEM_DEP, 0);
|
||||||
|
else
|
||||||
|
- create_ddg_dep_no_link (g, from, to,
|
||||||
|
- DEBUG_INSN_P (to->insn)
|
||||||
|
- ? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 0);
|
||||||
|
+ create_ddg_dep_no_link (g, from, to, OUTPUT_DEP, MEM_DEP, 0);
|
||||||
|
}
|
||||||
|
else if (!mem_read_insn_p (to->insn))
|
||||||
|
create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 0);
|
||||||
|
@@ -444,13 +418,9 @@ add_inter_loop_mem_dep (ddg_ptr g, ddg_n
|
||||||
|
if (mem_write_insn_p (from->insn))
|
||||||
|
{
|
||||||
|
if (mem_read_insn_p (to->insn))
|
||||||
|
- create_ddg_dep_no_link (g, from, to,
|
||||||
|
- DEBUG_INSN_P (to->insn)
|
||||||
|
- ? ANTI_DEP : TRUE_DEP, MEM_DEP, 1);
|
||||||
|
+ create_ddg_dep_no_link (g, from, to, TRUE_DEP, MEM_DEP, 1);
|
||||||
|
else if (from->cuid != to->cuid)
|
||||||
|
- create_ddg_dep_no_link (g, from, to,
|
||||||
|
- DEBUG_INSN_P (to->insn)
|
||||||
|
- ? ANTI_DEP : OUTPUT_DEP, MEM_DEP, 1);
|
||||||
|
+ create_ddg_dep_no_link (g, from, to, OUTPUT_DEP, MEM_DEP, 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
@@ -459,13 +429,9 @@ add_inter_loop_mem_dep (ddg_ptr g, ddg_n
|
||||||
|
else if (from->cuid != to->cuid)
|
||||||
|
{
|
||||||
|
create_ddg_dep_no_link (g, from, to, ANTI_DEP, MEM_DEP, 1);
|
||||||
|
- if (DEBUG_INSN_P (from->insn) || DEBUG_INSN_P (to->insn))
|
||||||
|
- create_ddg_dep_no_link (g, to, from, ANTI_DEP, MEM_DEP, 1);
|
||||||
|
- else
|
||||||
|
- create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
|
||||||
|
+ create_ddg_dep_no_link (g, to, from, TRUE_DEP, MEM_DEP, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
-
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Perform intra-block Data Dependency analysis and connect the nodes in
|
||||||
|
@@ -494,20 +460,10 @@ build_intra_loop_deps (ddg_ptr g)
|
||||||
|
sd_iterator_def sd_it;
|
||||||
|
dep_t dep;
|
||||||
|
|
||||||
|
- if (! INSN_P (dest_node->insn))
|
||||||
|
- continue;
|
||||||
|
-
|
||||||
|
FOR_EACH_DEP (dest_node->insn, SD_LIST_BACK, sd_it, dep)
|
||||||
|
{
|
||||||
|
rtx_insn *src_insn = DEP_PRO (dep);
|
||||||
|
- ddg_node_ptr src_node;
|
||||||
|
-
|
||||||
|
- /* Don't add dependencies on debug insns to non-debug insns
|
||||||
|
- to avoid codegen differences between -g and -g0. */
|
||||||
|
- if (DEBUG_INSN_P (src_insn) && !DEBUG_INSN_P (dest_node->insn))
|
||||||
|
- continue;
|
||||||
|
-
|
||||||
|
- src_node = get_node_of_insn (g, src_insn);
|
||||||
|
+ ddg_node_ptr src_node = get_node_of_insn (g, src_insn);
|
||||||
|
|
||||||
|
if (!src_node)
|
||||||
|
continue;
|
||||||
|
@@ -524,8 +480,7 @@ build_intra_loop_deps (ddg_ptr g)
|
||||||
|
for (j = 0; j <= i; j++)
|
||||||
|
{
|
||||||
|
ddg_node_ptr j_node = &g->nodes[j];
|
||||||
|
- if (DEBUG_INSN_P (j_node->insn))
|
||||||
|
- continue;
|
||||||
|
+
|
||||||
|
if (mem_access_insn_p (j_node->insn))
|
||||||
|
{
|
||||||
|
/* Don't bother calculating inter-loop dep if an intra-loop dep
|
||||||
|
@@ -564,7 +519,7 @@ create_ddg (basic_block bb, int closing_
|
||||||
|
{
|
||||||
|
ddg_ptr g;
|
||||||
|
rtx_insn *insn, *first_note;
|
||||||
|
- int i;
|
||||||
|
+ int i, j;
|
||||||
|
int num_nodes = 0;
|
||||||
|
|
||||||
|
g = (ddg_ptr) xcalloc (1, sizeof (struct ddg));
|
||||||
|
@@ -576,23 +531,21 @@ create_ddg (basic_block bb, int closing_
|
||||||
|
for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
|
||||||
|
insn = NEXT_INSN (insn))
|
||||||
|
{
|
||||||
|
- if (! INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
|
||||||
|
+ if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
- if (DEBUG_INSN_P (insn))
|
||||||
|
- g->num_debug++;
|
||||||
|
- else
|
||||||
|
+ if (NONDEBUG_INSN_P (insn))
|
||||||
|
{
|
||||||
|
if (mem_read_insn_p (insn))
|
||||||
|
g->num_loads++;
|
||||||
|
if (mem_write_insn_p (insn))
|
||||||
|
g->num_stores++;
|
||||||
|
+ num_nodes++;
|
||||||
|
}
|
||||||
|
- num_nodes++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* There is nothing to do for this BB. */
|
||||||
|
- if ((num_nodes - g->num_debug) <= 1)
|
||||||
|
+ if (num_nodes <= 1)
|
||||||
|
{
|
||||||
|
free (g);
|
||||||
|
return NULL;
|
||||||
|
@@ -607,32 +560,39 @@ create_ddg (basic_block bb, int closing_
|
||||||
|
for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
|
||||||
|
insn = NEXT_INSN (insn))
|
||||||
|
{
|
||||||
|
- if (! INSN_P (insn))
|
||||||
|
- {
|
||||||
|
- if (! first_note && NOTE_P (insn)
|
||||||
|
- && NOTE_KIND (insn) != NOTE_INSN_BASIC_BLOCK)
|
||||||
|
- first_note = insn;
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
+ if (LABEL_P (insn) || NOTE_INSN_BASIC_BLOCK_P (insn))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ if (!first_note && (INSN_P (insn) || NOTE_P (insn)))
|
||||||
|
+ first_note = insn;
|
||||||
|
+
|
||||||
|
+ if (!INSN_P (insn) || GET_CODE (PATTERN (insn)) == USE)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
if (JUMP_P (insn))
|
||||||
|
{
|
||||||
|
gcc_assert (!g->closing_branch);
|
||||||
|
g->closing_branch = &g->nodes[i];
|
||||||
|
}
|
||||||
|
- else if (GET_CODE (PATTERN (insn)) == USE)
|
||||||
|
+
|
||||||
|
+ if (NONDEBUG_INSN_P (insn))
|
||||||
|
{
|
||||||
|
- if (! first_note)
|
||||||
|
- first_note = insn;
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
+ g->nodes[i].cuid = i;
|
||||||
|
+ g->nodes[i].successors = sbitmap_alloc (num_nodes);
|
||||||
|
+ bitmap_clear (g->nodes[i].successors);
|
||||||
|
+ g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
|
||||||
|
+ bitmap_clear (g->nodes[i].predecessors);
|
||||||
|
+
|
||||||
|
+ gcc_checking_assert (first_note);
|
||||||
|
+ g->nodes[i].first_note = first_note;
|
||||||
|
+
|
||||||
|
+ g->nodes[i].aux.count = -1;
|
||||||
|
+ g->nodes[i].max_dist = XCNEWVEC (int, num_nodes);
|
||||||
|
+ for (j = 0; j < num_nodes; j++)
|
||||||
|
+ g->nodes[i].max_dist[j] = -1;
|
||||||
|
|
||||||
|
- g->nodes[i].cuid = i;
|
||||||
|
- g->nodes[i].successors = sbitmap_alloc (num_nodes);
|
||||||
|
- bitmap_clear (g->nodes[i].successors);
|
||||||
|
- g->nodes[i].predecessors = sbitmap_alloc (num_nodes);
|
||||||
|
- bitmap_clear (g->nodes[i].predecessors);
|
||||||
|
- g->nodes[i].first_note = (first_note ? first_note : insn);
|
||||||
|
- g->nodes[i++].insn = insn;
|
||||||
|
+ g->nodes[i++].insn = insn;
|
||||||
|
+ }
|
||||||
|
first_note = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -668,6 +628,7 @@ free_ddg (ddg_ptr g)
|
||||||
|
}
|
||||||
|
sbitmap_free (g->nodes[i].successors);
|
||||||
|
sbitmap_free (g->nodes[i].predecessors);
|
||||||
|
+ free (g->nodes[i].max_dist);
|
||||||
|
}
|
||||||
|
if (g->num_backarcs > 0)
|
||||||
|
free (g->backarcs);
|
||||||
|
@@ -792,7 +753,7 @@ create_ddg_edge (ddg_node_ptr src, ddg_n
|
||||||
|
e->latency = l;
|
||||||
|
e->distance = d;
|
||||||
|
e->next_in = e->next_out = NULL;
|
||||||
|
- e->aux.info = 0;
|
||||||
|
+ e->in_scc = false;
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -820,7 +781,7 @@ add_edge_to_ddg (ddg_ptr g ATTRIBUTE_UNU
|
||||||
|
for now that cycles in the data dependence graph contain a single backarc.
|
||||||
|
This simplifies the algorithm, and can be generalized later. */
|
||||||
|
static void
|
||||||
|
-set_recurrence_length (ddg_scc_ptr scc, ddg_ptr g)
|
||||||
|
+set_recurrence_length (ddg_scc_ptr scc)
|
||||||
|
{
|
||||||
|
int j;
|
||||||
|
int result = -1;
|
||||||
|
@@ -828,17 +789,14 @@ set_recurrence_length (ddg_scc_ptr scc,
|
||||||
|
for (j = 0; j < scc->num_backarcs; j++)
|
||||||
|
{
|
||||||
|
ddg_edge_ptr backarc = scc->backarcs[j];
|
||||||
|
- int length;
|
||||||
|
int distance = backarc->distance;
|
||||||
|
ddg_node_ptr src = backarc->dest;
|
||||||
|
ddg_node_ptr dest = backarc->src;
|
||||||
|
+ int length = src->max_dist[dest->cuid];
|
||||||
|
+
|
||||||
|
+ if (length < 0)
|
||||||
|
+ continue;
|
||||||
|
|
||||||
|
- length = longest_simple_path (g, src->cuid, dest->cuid, scc->nodes);
|
||||||
|
- if (length < 0 )
|
||||||
|
- {
|
||||||
|
- /* fprintf (stderr, "Backarc not on simple cycle in SCC.\n"); */
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
length += backarc->latency;
|
||||||
|
result = MAX (result, (length / distance));
|
||||||
|
}
|
||||||
|
@@ -846,9 +804,9 @@ set_recurrence_length (ddg_scc_ptr scc,
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create a new SCC given the set of its nodes. Compute its recurrence_length
|
||||||
|
- and mark edges that belong to this scc as IN_SCC. */
|
||||||
|
+ and mark edges that belong to this scc. */
|
||||||
|
static ddg_scc_ptr
|
||||||
|
-create_scc (ddg_ptr g, sbitmap nodes)
|
||||||
|
+create_scc (ddg_ptr g, sbitmap nodes, int id)
|
||||||
|
{
|
||||||
|
ddg_scc_ptr scc;
|
||||||
|
unsigned int u = 0;
|
||||||
|
@@ -866,16 +824,18 @@ create_scc (ddg_ptr g, sbitmap nodes)
|
||||||
|
ddg_edge_ptr e;
|
||||||
|
ddg_node_ptr n = &g->nodes[u];
|
||||||
|
|
||||||
|
+ gcc_assert (n->aux.count == -1);
|
||||||
|
+ n->aux.count = id;
|
||||||
|
+
|
||||||
|
for (e = n->out; e; e = e->next_out)
|
||||||
|
if (bitmap_bit_p (nodes, e->dest->cuid))
|
||||||
|
{
|
||||||
|
- e->aux.count = IN_SCC;
|
||||||
|
+ e->in_scc = true;
|
||||||
|
if (e->distance > 0)
|
||||||
|
add_backarc_to_scc (scc, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- set_recurrence_length (scc, g);
|
||||||
|
return scc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -1018,7 +978,7 @@ check_sccs (ddg_all_sccs_ptr sccs, int n
|
||||||
|
ddg_all_sccs_ptr
|
||||||
|
create_ddg_all_sccs (ddg_ptr g)
|
||||||
|
{
|
||||||
|
- int i;
|
||||||
|
+ int i, j, k, scc, way;
|
||||||
|
int num_nodes = g->num_nodes;
|
||||||
|
auto_sbitmap from (num_nodes);
|
||||||
|
auto_sbitmap to (num_nodes);
|
||||||
|
@@ -1038,7 +998,7 @@ create_ddg_all_sccs (ddg_ptr g)
|
||||||
|
ddg_node_ptr dest = backarc->dest;
|
||||||
|
|
||||||
|
/* If the backarc already belongs to an SCC, continue. */
|
||||||
|
- if (backarc->aux.count == IN_SCC)
|
||||||
|
+ if (backarc->in_scc)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
bitmap_clear (scc_nodes);
|
||||||
|
@@ -1049,10 +1009,52 @@ create_ddg_all_sccs (ddg_ptr g)
|
||||||
|
|
||||||
|
if (find_nodes_on_paths (scc_nodes, g, from, to))
|
||||||
|
{
|
||||||
|
- scc = create_scc (g, scc_nodes);
|
||||||
|
+ scc = create_scc (g, scc_nodes, sccs->num_sccs);
|
||||||
|
add_scc_to_ddg (sccs, scc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ /* Init max_dist arrays for FloydâWarshall-like
|
||||||
|
+ longest patch calculation algorithm. */
|
||||||
|
+ for (k = 0; k < num_nodes; k++)
|
||||||
|
+ {
|
||||||
|
+ ddg_edge_ptr e;
|
||||||
|
+ ddg_node_ptr n = &g->nodes[k];
|
||||||
|
+
|
||||||
|
+ if (n->aux.count == -1)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ n->max_dist[k] = 0;
|
||||||
|
+ for (e = n->out; e; e = e->next_out)
|
||||||
|
+ if (e->distance == 0 && g->nodes[e->dest->cuid].aux.count == n->aux.count)
|
||||||
|
+ n->max_dist[e->dest->cuid] = e->latency;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Run main Floid-Warshall loop. We use only non-backarc edges
|
||||||
|
+ inside each scc. */
|
||||||
|
+ for (k = 0; k < num_nodes; k++)
|
||||||
|
+ {
|
||||||
|
+ scc = g->nodes[k].aux.count;
|
||||||
|
+ if (scc != -1)
|
||||||
|
+ {
|
||||||
|
+ for (i = 0; i < num_nodes; i++)
|
||||||
|
+ if (g->nodes[i].aux.count == scc)
|
||||||
|
+ for (j = 0; j < num_nodes; j++)
|
||||||
|
+ if (g->nodes[j].aux.count == scc
|
||||||
|
+ && g->nodes[i].max_dist[k] >= 0
|
||||||
|
+ && g->nodes[k].max_dist[j] >= 0)
|
||||||
|
+ {
|
||||||
|
+ way = g->nodes[i].max_dist[k] + g->nodes[k].max_dist[j];
|
||||||
|
+ if (g->nodes[i].max_dist[j] < way)
|
||||||
|
+ g->nodes[i].max_dist[j] = way;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Calculate recurrence_length using max_dist info. */
|
||||||
|
+ for (i = 0; i < sccs->num_sccs; i++)
|
||||||
|
+ set_recurrence_length (sccs->sccs[i]);
|
||||||
|
+
|
||||||
|
order_sccs (sccs);
|
||||||
|
|
||||||
|
if (flag_checking)
|
||||||
|
@@ -1155,72 +1157,4 @@ find_nodes_on_paths (sbitmap result, ddg
|
||||||
|
return bitmap_and (result, reachable_from, reach_to);
|
||||||
|
}
|
||||||
|
|
||||||
|
-
|
||||||
|
-/* Updates the counts of U_NODE's successors (that belong to NODES) to be
|
||||||
|
- at-least as large as the count of U_NODE plus the latency between them.
|
||||||
|
- Sets a bit in TMP for each successor whose count was changed (increased).
|
||||||
|
- Returns nonzero if any count was changed. */
|
||||||
|
-static int
|
||||||
|
-update_dist_to_successors (ddg_node_ptr u_node, sbitmap nodes, sbitmap tmp)
|
||||||
|
-{
|
||||||
|
- ddg_edge_ptr e;
|
||||||
|
- int result = 0;
|
||||||
|
-
|
||||||
|
- for (e = u_node->out; e; e = e->next_out)
|
||||||
|
- {
|
||||||
|
- ddg_node_ptr v_node = e->dest;
|
||||||
|
- int v = v_node->cuid;
|
||||||
|
-
|
||||||
|
- if (bitmap_bit_p (nodes, v)
|
||||||
|
- && (e->distance == 0)
|
||||||
|
- && (v_node->aux.count < u_node->aux.count + e->latency))
|
||||||
|
- {
|
||||||
|
- v_node->aux.count = u_node->aux.count + e->latency;
|
||||||
|
- bitmap_set_bit (tmp, v);
|
||||||
|
- result = 1;
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
- return result;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-/* Find the length of a longest path from SRC to DEST in G,
|
||||||
|
- going only through NODES, and disregarding backarcs. */
|
||||||
|
-int
|
||||||
|
-longest_simple_path (struct ddg * g, int src, int dest, sbitmap nodes)
|
||||||
|
-{
|
||||||
|
- int i;
|
||||||
|
- unsigned int u = 0;
|
||||||
|
- int change = 1;
|
||||||
|
- int num_nodes = g->num_nodes;
|
||||||
|
- auto_sbitmap workset (num_nodes);
|
||||||
|
- auto_sbitmap tmp (num_nodes);
|
||||||
|
-
|
||||||
|
-
|
||||||
|
- /* Data will hold the distance of the longest path found so far from
|
||||||
|
- src to each node. Initialize to -1 = less than minimum. */
|
||||||
|
- for (i = 0; i < g->num_nodes; i++)
|
||||||
|
- g->nodes[i].aux.count = -1;
|
||||||
|
- g->nodes[src].aux.count = 0;
|
||||||
|
-
|
||||||
|
- bitmap_clear (tmp);
|
||||||
|
- bitmap_set_bit (tmp, src);
|
||||||
|
-
|
||||||
|
- while (change)
|
||||||
|
- {
|
||||||
|
- sbitmap_iterator sbi;
|
||||||
|
-
|
||||||
|
- change = 0;
|
||||||
|
- bitmap_copy (workset, tmp);
|
||||||
|
- bitmap_clear (tmp);
|
||||||
|
- EXECUTE_IF_SET_IN_BITMAP (workset, 0, u, sbi)
|
||||||
|
- {
|
||||||
|
- ddg_node_ptr u_node = &g->nodes[u];
|
||||||
|
-
|
||||||
|
- change |= update_dist_to_successors (u_node, nodes, tmp);
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
- return g->nodes[dest].aux.count;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
#endif /* INSN_SCHEDULING */
|
||||||
|
diff -Nurp a/gcc/ddg.h b/gcc/ddg.h
|
||||||
|
--- a/gcc/ddg.h 2020-03-12 19:07:21.000000000 +0800
|
||||||
|
+++ b/gcc/ddg.h 2020-11-28 18:38:33.835633230 +0800
|
||||||
|
@@ -64,6 +64,10 @@ struct ddg_node
|
||||||
|
sbitmap successors;
|
||||||
|
sbitmap predecessors;
|
||||||
|
|
||||||
|
+ /* Temporary array used for Floyd-Warshall algorithm to find
|
||||||
|
+ scc recurrence length. */
|
||||||
|
+ int *max_dist;
|
||||||
|
+
|
||||||
|
/* For general use by algorithms manipulating the ddg. */
|
||||||
|
union {
|
||||||
|
int count;
|
||||||
|
@@ -95,11 +99,8 @@ struct ddg_edge
|
||||||
|
ddg_edge_ptr next_in;
|
||||||
|
ddg_edge_ptr next_out;
|
||||||
|
|
||||||
|
- /* For general use by algorithms manipulating the ddg. */
|
||||||
|
- union {
|
||||||
|
- int count;
|
||||||
|
- void *info;
|
||||||
|
- } aux;
|
||||||
|
+ /* Is true when edge is already in scc. */
|
||||||
|
+ bool in_scc;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* This structure holds the Data Dependence Graph for a basic block. */
|
||||||
|
@@ -115,9 +116,6 @@ struct ddg
|
||||||
|
int num_loads;
|
||||||
|
int num_stores;
|
||||||
|
|
||||||
|
- /* Number of debug instructions in the BB. */
|
||||||
|
- int num_debug;
|
||||||
|
-
|
||||||
|
/* This array holds the nodes in the graph; it is indexed by the node
|
||||||
|
cuid, which follows the order of the instructions in the BB. */
|
||||||
|
ddg_node_ptr nodes;
|
||||||
|
@@ -178,7 +176,6 @@ ddg_all_sccs_ptr create_ddg_all_sccs (dd
|
||||||
|
void free_ddg_all_sccs (ddg_all_sccs_ptr);
|
||||||
|
|
||||||
|
int find_nodes_on_paths (sbitmap result, ddg_ptr, sbitmap from, sbitmap to);
|
||||||
|
-int longest_simple_path (ddg_ptr, int from, int to, sbitmap via);
|
||||||
|
|
||||||
|
bool autoinc_var_is_used_p (rtx_insn *, rtx_insn *);
|
||||||
|
|
||||||
|
diff -Nurp a/gcc/modulo-sched.c b/gcc/modulo-sched.c
|
||||||
|
--- a/gcc/modulo-sched.c 2020-03-12 19:07:21.000000000 +0800
|
||||||
|
+++ b/gcc/modulo-sched.c 2020-11-28 18:38:33.835633230 +0800
|
||||||
|
@@ -370,7 +370,7 @@ doloop_register_get (rtx_insn *head, rtx
|
||||||
|
: prev_nondebug_insn (tail));
|
||||||
|
|
||||||
|
for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
|
||||||
|
- if (!DEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
|
||||||
|
+ if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (reg, insn))
|
||||||
|
{
|
||||||
|
if (dump_file)
|
||||||
|
{
|
||||||
|
@@ -429,7 +429,7 @@ res_MII (ddg_ptr g)
|
||||||
|
if (targetm.sched.sms_res_mii)
|
||||||
|
return targetm.sched.sms_res_mii (g);
|
||||||
|
|
||||||
|
- return ((g->num_nodes - g->num_debug) / issue_rate);
|
||||||
|
+ return g->num_nodes / issue_rate;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -2156,11 +2156,7 @@ sms_schedule_by_order (ddg_ptr g, int mi
|
||||||
|
ddg_node_ptr u_node = &ps->g->nodes[u];
|
||||||
|
rtx_insn *insn = u_node->insn;
|
||||||
|
|
||||||
|
- if (!NONDEBUG_INSN_P (insn))
|
||||||
|
- {
|
||||||
|
- bitmap_clear_bit (tobe_scheduled, u);
|
||||||
|
- continue;
|
||||||
|
- }
|
||||||
|
+ gcc_checking_assert (NONDEBUG_INSN_P (insn));
|
||||||
|
|
||||||
|
if (bitmap_bit_p (sched_nodes, u))
|
||||||
|
continue;
|
||||||
|
@@ -3162,9 +3158,6 @@ ps_has_conflicts (partial_schedule_ptr p
|
||||||
|
{
|
||||||
|
rtx_insn *insn = ps_rtl_insn (ps, crr_insn->id);
|
||||||
|
|
||||||
|
- if (!NONDEBUG_INSN_P (insn))
|
||||||
|
- continue;
|
||||||
|
-
|
||||||
|
/* Check if there is room for the current insn. */
|
||||||
|
if (!can_issue_more || state_dead_lock_p (curr_state))
|
||||||
|
return true;
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c b/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c
|
||||||
|
--- a/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.c-torture/execute/pr70127-debug-sms.c 2020-11-28 18:38:33.835633230 +0800
|
||||||
|
@@ -0,0 +1,23 @@
|
||||||
|
+/* { dg-additional-options "-fcompare-debug -fmodulo-sched" } */
|
||||||
|
+
|
||||||
|
+struct S { int f; signed int g : 2; } a[1], c = {5, 1}, d;
|
||||||
|
+short b;
|
||||||
|
+
|
||||||
|
+__attribute__((noinline, noclone)) void
|
||||||
|
+foo (int x)
|
||||||
|
+{
|
||||||
|
+ if (x != 1)
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main ()
|
||||||
|
+{
|
||||||
|
+ while (b++ <= 0)
|
||||||
|
+ {
|
||||||
|
+ struct S e = {1, 1};
|
||||||
|
+ d = e = a[0] = c;
|
||||||
|
+ }
|
||||||
|
+ foo (a[0].g);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c b/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/torture/pr87197-debug-sms.c 2020-11-28 18:38:33.835633230 +0800
|
||||||
|
@@ -0,0 +1,36 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-additional-options "-fcompare-debug -fmodulo-sched --param sms-min-sc=1" } */
|
||||||
|
+
|
||||||
|
+int a, c, e, f, g;
|
||||||
|
+void
|
||||||
|
+h (int i)
|
||||||
|
+{
|
||||||
|
+ a = i;
|
||||||
|
+}
|
||||||
|
+void
|
||||||
|
+j (char *i, long k)
|
||||||
|
+{
|
||||||
|
+ while (k--)
|
||||||
|
+ c = *i++;
|
||||||
|
+}
|
||||||
|
+void
|
||||||
|
+l (unsigned char *i, long k)
|
||||||
|
+{
|
||||||
|
+ unsigned char *b = i + k;
|
||||||
|
+ while (i < b)
|
||||||
|
+ {
|
||||||
|
+ h (*i);
|
||||||
|
+ i++;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+void
|
||||||
|
+m ()
|
||||||
|
+{
|
||||||
|
+ while (e)
|
||||||
|
+ {
|
||||||
|
+ float d = g;
|
||||||
|
+ l ((char *) &d, sizeof (g));
|
||||||
|
+ if (f)
|
||||||
|
+ j ((char *) &d, sizeof (g));
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
359
store-merging-Consider-also-overlapping-stores-earlier.patch
Normal file
359
store-merging-Consider-also-overlapping-stores-earlier.patch
Normal file
@ -0,0 +1,359 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-store-merging-Consider-also-overlapping-stores-earli.patch
|
||||||
|
bd909071ac04e94f4b6f0baab64d0687ec55681d
|
||||||
|
|
||||||
|
diff -uprN a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c
|
||||||
|
--- a/gcc/gimple-ssa-store-merging.c 2020-12-16 17:03:16.155633230 +0800
|
||||||
|
+++ b/gcc/gimple-ssa-store-merging.c 2020-12-16 11:15:58.575633230 +0800
|
||||||
|
@@ -2021,7 +2021,8 @@ struct imm_store_chain_info
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool terminate_and_process_chain ();
|
||||||
|
- bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
|
||||||
|
+ bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
|
||||||
|
+ unsigned int);
|
||||||
|
bool coalesce_immediate_stores ();
|
||||||
|
bool output_merged_store (merged_store_group *);
|
||||||
|
bool output_merged_stores ();
|
||||||
|
@@ -2342,14 +2343,39 @@ gather_bswap_load_refs (vec<tree> *refs,
|
||||||
|
into the group. That way it will be its own store group and will
|
||||||
|
not be touched. If ALL_INTEGER_CST_P and there are overlapping
|
||||||
|
INTEGER_CST stores, those are mergeable using merge_overlapping,
|
||||||
|
- so don't return false for those. */
|
||||||
|
+ so don't return false for those.
|
||||||
|
+
|
||||||
|
+ Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
|
||||||
|
+ (exclusive), whether they don't overlap the bitrange START to END
|
||||||
|
+ and have order in between FIRST_ORDER and LAST_ORDER. This is to
|
||||||
|
+ prevent merging in cases like:
|
||||||
|
+ MEM <char[12]> [&b + 8B] = {};
|
||||||
|
+ MEM[(short *) &b] = 5;
|
||||||
|
+ _5 = *x_4(D);
|
||||||
|
+ MEM <long long unsigned int> [&b + 2B] = _5;
|
||||||
|
+ MEM[(char *)&b + 16B] = 88;
|
||||||
|
+ MEM[(int *)&b + 20B] = 1;
|
||||||
|
+ The = {} store comes in sort_by_bitpos before the = 88 store, and can't
|
||||||
|
+ be merged with it, because the = _5 store overlaps these and is in between
|
||||||
|
+ them in sort_by_order ordering. If it was merged, the merged store would
|
||||||
|
+ go after the = _5 store and thus change behavior. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
|
||||||
|
- bool all_integer_cst_p, unsigned int last_order,
|
||||||
|
- unsigned HOST_WIDE_INT end)
|
||||||
|
+ bool all_integer_cst_p, unsigned int first_order,
|
||||||
|
+ unsigned int last_order, unsigned HOST_WIDE_INT start,
|
||||||
|
+ unsigned HOST_WIDE_INT end, unsigned int first_earlier,
|
||||||
|
+ unsigned end_earlier)
|
||||||
|
{
|
||||||
|
unsigned int len = m_store_info.length ();
|
||||||
|
+ for (unsigned int j = first_earlier; j < end_earlier; j++)
|
||||||
|
+ {
|
||||||
|
+ store_immediate_info *info = m_store_info[j];
|
||||||
|
+ if (info->order > first_order
|
||||||
|
+ && info->order < last_order
|
||||||
|
+ && info->bitpos + info->bitsize > start)
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
for (++i; i < len; ++i)
|
||||||
|
{
|
||||||
|
store_immediate_info *info = m_store_info[i];
|
||||||
|
@@ -2370,7 +2396,8 @@ check_no_overlap (vec<store_immediate_in
|
||||||
|
bool
|
||||||
|
imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
|
||||||
|
unsigned int first,
|
||||||
|
- unsigned int try_size)
|
||||||
|
+ unsigned int try_size,
|
||||||
|
+ unsigned int first_earlier)
|
||||||
|
{
|
||||||
|
unsigned int len = m_store_info.length (), last = first;
|
||||||
|
unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
|
||||||
|
@@ -2509,7 +2536,8 @@ imm_store_chain_info::try_coalesce_bswap
|
||||||
|
if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
- if (!check_no_overlap (m_store_info, last, false, last_order, end))
|
||||||
|
+ if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
|
||||||
|
+ merged_store->start, end, first_earlier, first))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Don't handle memory copy this way if normal non-bswap processing
|
||||||
|
@@ -2601,6 +2629,8 @@ imm_store_chain_info::coalesce_immediate
|
||||||
|
|
||||||
|
store_immediate_info *info;
|
||||||
|
unsigned int i, ignore = 0;
|
||||||
|
+ unsigned int first_earlier = 0;
|
||||||
|
+ unsigned int end_earlier = 0;
|
||||||
|
|
||||||
|
/* Order the stores by the bitposition they write to. */
|
||||||
|
m_store_info.qsort (sort_by_bitpos);
|
||||||
|
@@ -2615,6 +2645,12 @@ imm_store_chain_info::coalesce_immediate
|
||||||
|
if (i <= ignore)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
+ while (first_earlier < end_earlier
|
||||||
|
+ && (m_store_info[first_earlier]->bitpos
|
||||||
|
+ + m_store_info[first_earlier]->bitsize
|
||||||
|
+ <= merged_store->start))
|
||||||
|
+ first_earlier++;
|
||||||
|
+
|
||||||
|
/* First try to handle group of stores like:
|
||||||
|
p[0] = data >> 24;
|
||||||
|
p[1] = data >> 16;
|
||||||
|
@@ -2628,7 +2664,8 @@ imm_store_chain_info::coalesce_immediate
|
||||||
|
{
|
||||||
|
unsigned int try_size;
|
||||||
|
for (try_size = 64; try_size >= 16; try_size >>= 1)
|
||||||
|
- if (try_coalesce_bswap (merged_store, i - 1, try_size))
|
||||||
|
+ if (try_coalesce_bswap (merged_store, i - 1, try_size,
|
||||||
|
+ first_earlier))
|
||||||
|
break;
|
||||||
|
|
||||||
|
if (try_size >= 16)
|
||||||
|
@@ -2636,7 +2673,10 @@ imm_store_chain_info::coalesce_immediate
|
||||||
|
ignore = i + merged_store->stores.length () - 1;
|
||||||
|
m_merged_store_groups.safe_push (merged_store);
|
||||||
|
if (ignore < m_store_info.length ())
|
||||||
|
- merged_store = new merged_store_group (m_store_info[ignore]);
|
||||||
|
+ {
|
||||||
|
+ merged_store = new merged_store_group (m_store_info[ignore]);
|
||||||
|
+ end_earlier = ignore;
|
||||||
|
+ }
|
||||||
|
else
|
||||||
|
merged_store = NULL;
|
||||||
|
goto done;
|
||||||
|
@@ -2662,12 +2702,16 @@ imm_store_chain_info::coalesce_immediate
|
||||||
|
/* Only allow overlapping stores of constants. */
|
||||||
|
if (info->rhs_code == INTEGER_CST && merged_store->only_constants)
|
||||||
|
{
|
||||||
|
+ unsigned int first_order
|
||||||
|
+ = MIN (merged_store->first_order, info->order);
|
||||||
|
unsigned int last_order
|
||||||
|
= MAX (merged_store->last_order, info->order);
|
||||||
|
unsigned HOST_WIDE_INT end
|
||||||
|
= MAX (merged_store->start + merged_store->width,
|
||||||
|
info->bitpos + info->bitsize);
|
||||||
|
- if (check_no_overlap (m_store_info, i, true, last_order, end))
|
||||||
|
+ if (check_no_overlap (m_store_info, i, true, first_order,
|
||||||
|
+ last_order, merged_store->start, end,
|
||||||
|
+ first_earlier, end_earlier))
|
||||||
|
{
|
||||||
|
/* check_no_overlap call above made sure there are no
|
||||||
|
overlapping stores with non-INTEGER_CST rhs_code
|
||||||
|
@@ -2696,6 +2740,7 @@ imm_store_chain_info::coalesce_immediate
|
||||||
|
do
|
||||||
|
{
|
||||||
|
unsigned int max_order = 0;
|
||||||
|
+ unsigned int min_order = first_order;
|
||||||
|
unsigned first_nonmergeable_int_order = ~0U;
|
||||||
|
unsigned HOST_WIDE_INT this_end = end;
|
||||||
|
k = i;
|
||||||
|
@@ -2721,6 +2766,7 @@ imm_store_chain_info::coalesce_immediate
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
k = j;
|
||||||
|
+ min_order = MIN (min_order, info2->order);
|
||||||
|
this_end = MAX (this_end,
|
||||||
|
info2->bitpos + info2->bitsize);
|
||||||
|
}
|
||||||
|
@@ -2736,6 +2782,12 @@ imm_store_chain_info::coalesce_immediate
|
||||||
|
first_nonmergeable_order
|
||||||
|
= MIN (first_nonmergeable_order, info2->order);
|
||||||
|
}
|
||||||
|
+ if (k > i
|
||||||
|
+ && !check_no_overlap (m_store_info, len - 1, true,
|
||||||
|
+ min_order, try_order,
|
||||||
|
+ merged_store->start, this_end,
|
||||||
|
+ first_earlier, end_earlier))
|
||||||
|
+ k = 0;
|
||||||
|
if (k == 0)
|
||||||
|
{
|
||||||
|
if (last_order == try_order)
|
||||||
|
@@ -2821,9 +2873,12 @@ imm_store_chain_info::coalesce_immediate
|
||||||
|
info->ops_swapped_p = true;
|
||||||
|
}
|
||||||
|
if (check_no_overlap (m_store_info, i, false,
|
||||||
|
+ MIN (merged_store->first_order, info->order),
|
||||||
|
MAX (merged_store->last_order, info->order),
|
||||||
|
+ merged_store->start,
|
||||||
|
MAX (merged_store->start + merged_store->width,
|
||||||
|
- info->bitpos + info->bitsize)))
|
||||||
|
+ info->bitpos + info->bitsize),
|
||||||
|
+ first_earlier, end_earlier))
|
||||||
|
{
|
||||||
|
/* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
|
||||||
|
if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
|
||||||
|
@@ -2868,6 +2923,7 @@ imm_store_chain_info::coalesce_immediate
|
||||||
|
delete merged_store;
|
||||||
|
|
||||||
|
merged_store = new merged_store_group (info);
|
||||||
|
+ end_earlier = i;
|
||||||
|
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
fputs ("New store group\n", dump_file);
|
||||||
|
|
||||||
|
diff -uprN a/gcc/testsuite/gcc.dg/store_merging_31.c b/gcc/testsuite/gcc.dg/store_merging_31.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/store_merging_31.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/store_merging_31.c 2020-12-16 11:15:58.575633230 +0800
|
||||||
|
@@ -0,0 +1,27 @@
|
||||||
|
+/* PR tree-optimization/97053 */
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-options "-O2" } */
|
||||||
|
+
|
||||||
|
+struct S { short a; char b[9]; int c; char d; int e; };
|
||||||
|
+
|
||||||
|
+__attribute__((noipa)) void
|
||||||
|
+foo (char *x, char *y)
|
||||||
|
+{
|
||||||
|
+ if (__builtin_strcmp (x, "ABCDXXXX") != 0
|
||||||
|
+ || __builtin_strcmp (y, "ABCDXXXX") != 0)
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main ()
|
||||||
|
+{
|
||||||
|
+ char a[9] = "XXXXXXXX";
|
||||||
|
+ struct S b = {};
|
||||||
|
+ __builtin_memcpy (a, "ABCD", 4);
|
||||||
|
+ b.a = 5;
|
||||||
|
+ __builtin_memcpy (b.b, a, 8);
|
||||||
|
+ b.d = 'X';
|
||||||
|
+ b.e = 1;
|
||||||
|
+ foo (a, b.b);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff -uprN a/gcc/testsuite/gcc.dg/store_merging_32.c b/gcc/testsuite/gcc.dg/store_merging_32.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/store_merging_32.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/store_merging_32.c 2020-12-16 11:15:58.575633230 +0800
|
||||||
|
@@ -0,0 +1,129 @@
|
||||||
|
+/* PR tree-optimization/97053 */
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-options "-O2 -fno-tree-dse" } */
|
||||||
|
+
|
||||||
|
+struct __attribute__((packed, may_alias)) S { long long s; };
|
||||||
|
+struct __attribute__((packed, may_alias)) T { short t; };
|
||||||
|
+
|
||||||
|
+__attribute__((noipa)) void
|
||||||
|
+test (char *p, char *q, int s)
|
||||||
|
+{
|
||||||
|
+ if ((s & 1) == 0)
|
||||||
|
+ {
|
||||||
|
+ if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
|
||||||
|
+ != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
|
||||||
|
+ || (((struct S __attribute__((may_alias)) *) &p[1])->s
|
||||||
|
+ != ((struct S __attribute__((may_alias)) *) &q[1])->s)
|
||||||
|
+ || (*(short __attribute__((may_alias)) *) &p[2 * sizeof (short)]
|
||||||
|
+ != *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)]))
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if (*(short __attribute__((may_alias)) *) &p[sizeof (short)]
|
||||||
|
+ != *(short __attribute__((may_alias)) *) &q[sizeof (short)]
|
||||||
|
+ || (((struct S __attribute__((may_alias)) *) &p[1])->s
|
||||||
|
+ != ((struct S __attribute__((may_alias)) *) &q[1])->s)
|
||||||
|
+ || (((struct T __attribute__((may_alias)) *) &p[2 * sizeof (short) - 1])->t
|
||||||
|
+ != ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t)
|
||||||
|
+ || p[3 * sizeof (short) - 2] != q[3 * sizeof (short) - 2])
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+__attribute__((noipa)) void
|
||||||
|
+foo (long long *p, char *q, char *r, char *s)
|
||||||
|
+{
|
||||||
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||||||
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||||
|
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
|
||||||
|
+ *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||||
|
+ *(short __attribute__((may_alias)) *) &s[2 * sizeof (short)] = 2;
|
||||||
|
+ test (a, q, 0);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+__attribute__((noipa)) void
|
||||||
|
+bar (long long *p, char *q, char *r, char *s, char *t)
|
||||||
|
+{
|
||||||
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||||||
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||||
|
+ ((struct T __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1])->t = 2;
|
||||||
|
+ a[3 * sizeof (short) - 2] = 3;
|
||||||
|
+ *(short __attribute__((may_alias)) *) &q[sizeof (short)] = 1;
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||||
|
+ ((struct T __attribute__((may_alias)) *) &s[2 * sizeof (short) - 1])->t = 2;
|
||||||
|
+ t[3 * sizeof (short) - 2] = 3;
|
||||||
|
+ test (a, q, 1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+__attribute__((noipa)) void
|
||||||
|
+baz (long long *p, char *q, char *r, char *s)
|
||||||
|
+{
|
||||||
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||||||
|
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = 2;
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||||
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||||||
|
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = 2;
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||||
|
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = 1;
|
||||||
|
+ test (a, q, 2);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+__attribute__((noipa)) void
|
||||||
|
+qux (long long *p, char *q, char *r, char *s, char *t)
|
||||||
|
+{
|
||||||
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||||||
|
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short) - 1] = 2;
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||||
|
+ a[3 * sizeof (short) - 2] = 3;
|
||||||
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = 1;
|
||||||
|
+ ((struct T __attribute__((may_alias)) *) &q[2 * sizeof (short) - 1])->t = 2;
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||||
|
+ s[3 * sizeof (short) - 2] = 3;
|
||||||
|
+ ((struct T __attribute__((may_alias)) *) &t[sizeof (short)])->t = 1;
|
||||||
|
+ test (a, q, 3);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+__attribute__((noipa)) void
|
||||||
|
+corge (long long *p, char *q, char *r, char *s, short u[3])
|
||||||
|
+{
|
||||||
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||||||
|
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||||
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
|
||||||
|
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||||
|
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
|
||||||
|
+ test (a, q, 4);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+__attribute__((noipa)) void
|
||||||
|
+garply (long long *p, char *q, char *r, char *s, short u[3])
|
||||||
|
+{
|
||||||
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||||||
|
+ *(short __attribute__((may_alias)) *) &a[sizeof (short)] = u[1];
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &a[1])->s = p[0];
|
||||||
|
+ *(short __attribute__((may_alias)) *) &a[2 * sizeof (short)] = u[2];
|
||||||
|
+ *(short __attribute__((may_alias)) *) &s[sizeof (short)] = u[1];
|
||||||
|
+ ((struct S __attribute__((may_alias)) *) &r[1])->s = p[0];
|
||||||
|
+ *(short __attribute__((may_alias)) *) &q[2 * sizeof (short)] = u[2];
|
||||||
|
+ test (a, q, 6);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main ()
|
||||||
|
+{
|
||||||
|
+ char a[64] __attribute__((aligned (__alignof (short))));
|
||||||
|
+ long long p = -1LL;
|
||||||
|
+ short u[] = { 1, 2, 3 };
|
||||||
|
+ foo (&p, &a[0], &a[0], &a[0]);
|
||||||
|
+ bar (&p, &a[0], &a[0], &a[0], &a[0]);
|
||||||
|
+ baz (&p, &a[0], &a[0], &a[0]);
|
||||||
|
+ qux (&p, &a[0], &a[0], &a[0], &a[0]);
|
||||||
|
+ corge (&p, &a[0], &a[0], &a[0], u);
|
||||||
|
+ garply (&p, &a[0], &a[0], &a[0], u);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
316
tree-optimization-96920-another-ICE-when-vectorizing.patch
Normal file
316
tree-optimization-96920-another-ICE-when-vectorizing.patch
Normal file
@ -0,0 +1,316 @@
|
|||||||
|
This backport contains 1 patchs from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
46a58c779af3055a4b10b285a1f4be28abe4351c
|
||||||
|
0001-tree-optimization-96920-another-ICE-when-vectorizing.patch
|
||||||
|
|
||||||
|
diff -uprN a/gcc/testsuite/gcc.dg/vect/pr96920.c b/gcc/testsuite/gcc.dg/vect/pr96920.c
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/pr96920.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/pr96920.c 2020-10-26 21:46:25.316000000 +0800
|
||||||
|
@@ -0,0 +1,20 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+
|
||||||
|
+int a[1024];
|
||||||
|
+int b[2048];
|
||||||
|
+
|
||||||
|
+void foo (int x, int y)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 1024; ++i)
|
||||||
|
+ {
|
||||||
|
+ int tem0 = b[2*i];
|
||||||
|
+ int tem1 = b[2*i+1];
|
||||||
|
+ for (int j = 0; j < 32; ++j)
|
||||||
|
+ {
|
||||||
|
+ int tem = tem0;
|
||||||
|
+ tem0 = tem1;
|
||||||
|
+ tem1 = tem;
|
||||||
|
+ a[i] += tem0;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
diff -uprN a/gcc/testsuite/gfortran.dg/vect/pr96920.f90 b/gcc/testsuite/gfortran.dg/vect/pr96920.f90
|
||||||
|
--- a/gcc/testsuite/gfortran.dg/vect/pr96920.f90 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/testsuite/gfortran.dg/vect/pr96920.f90 2020-10-26 21:46:25.316000000 +0800
|
||||||
|
@@ -0,0 +1,37 @@
|
||||||
|
+! { dg-do compile }
|
||||||
|
+ subroutine ice(npoint, nterm, x, g)
|
||||||
|
+ implicit none
|
||||||
|
+ integer norder
|
||||||
|
+ parameter (norder=10)
|
||||||
|
+ integer j
|
||||||
|
+ integer k
|
||||||
|
+ integer ii
|
||||||
|
+ integer nterm
|
||||||
|
+ integer npoint
|
||||||
|
+ real b(norder)
|
||||||
|
+ real c(norder)
|
||||||
|
+ real d(norder)
|
||||||
|
+ real x(npoint)
|
||||||
|
+ real g(npoint)
|
||||||
|
+ real gg
|
||||||
|
+ real prev
|
||||||
|
+ real prev2
|
||||||
|
+
|
||||||
|
+ j = 1
|
||||||
|
+ 100 continue
|
||||||
|
+ j = j+1
|
||||||
|
+ if (nterm == j) then
|
||||||
|
+ do ii=1,npoint
|
||||||
|
+ k = nterm
|
||||||
|
+ gg= d(k)
|
||||||
|
+ prev= 0.0
|
||||||
|
+ do k=k-1,1,-1
|
||||||
|
+ prev2= prev
|
||||||
|
+ prev= gg
|
||||||
|
+ gg = d(k)+(x(ii)-b(k))*prev-c(k+1)*prev2
|
||||||
|
+ enddo
|
||||||
|
+ g(ii) = gg
|
||||||
|
+ enddo
|
||||||
|
+ endif
|
||||||
|
+ go to 100
|
||||||
|
+ end
|
||||||
|
diff -uprN a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
|
||||||
|
--- a/gcc/tree-vect-loop.c 2020-10-26 21:45:23.056000000 +0800
|
||||||
|
+++ b/gcc/tree-vect-loop.c 2020-10-26 21:49:02.884000000 +0800
|
||||||
|
@@ -8166,6 +8166,47 @@ scale_profile_for_vect_loop (struct loop
|
||||||
|
scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* For a vectorized stmt DEF_STMT_INFO adjust all vectorized PHI
|
||||||
|
+ latch edge values originally defined by it. */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+maybe_set_vectorized_backedge_value (loop_vec_info loop_vinfo,
|
||||||
|
+ stmt_vec_info def_stmt_info)
|
||||||
|
+{
|
||||||
|
+ tree def = gimple_get_lhs (vect_orig_stmt (def_stmt_info)->stmt);
|
||||||
|
+ if (!def || TREE_CODE (def) != SSA_NAME)
|
||||||
|
+ return;
|
||||||
|
+ stmt_vec_info phi_info;
|
||||||
|
+ imm_use_iterator iter;
|
||||||
|
+ use_operand_p use_p;
|
||||||
|
+ FOR_EACH_IMM_USE_FAST (use_p, iter, def)
|
||||||
|
+ if (gphi *phi = dyn_cast <gphi *> (USE_STMT (use_p)))
|
||||||
|
+ if (gimple_bb (phi)->loop_father->header == gimple_bb (phi)
|
||||||
|
+ && (phi_info = loop_vinfo->lookup_stmt (phi))
|
||||||
|
+ && VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (phi_info))
|
||||||
|
+ && STMT_VINFO_REDUC_TYPE (phi_info) != FOLD_LEFT_REDUCTION
|
||||||
|
+ && STMT_VINFO_REDUC_TYPE (phi_info) != EXTRACT_LAST_REDUCTION)
|
||||||
|
+ {
|
||||||
|
+ loop_p loop = gimple_bb (phi)->loop_father;
|
||||||
|
+ edge e = loop_latch_edge (loop);
|
||||||
|
+ if (PHI_ARG_DEF_FROM_EDGE (phi, e) == def)
|
||||||
|
+ {
|
||||||
|
+ stmt_vec_info phi_defs = STMT_VINFO_VEC_STMT (phi_info);
|
||||||
|
+ stmt_vec_info latch_defs = STMT_VINFO_VEC_STMT (def_stmt_info);
|
||||||
|
+ while (phi_defs && latch_defs)
|
||||||
|
+ {
|
||||||
|
+ add_phi_arg (as_a <gphi *> (phi_defs->stmt),
|
||||||
|
+ gimple_get_lhs (latch_defs->stmt), e,
|
||||||
|
+ gimple_phi_arg_location (phi, e->dest_idx));
|
||||||
|
+ phi_defs = STMT_VINFO_RELATED_STMT (phi_defs);
|
||||||
|
+ latch_defs = STMT_VINFO_RELATED_STMT (latch_defs);
|
||||||
|
+ }
|
||||||
|
+ gcc_assert (!latch_defs);
|
||||||
|
+ gcc_assert (!phi_defs);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.
|
||||||
|
When vectorizing STMT_INFO as a store, set *SEEN_STORE to its
|
||||||
|
stmt_vec_info. */
|
||||||
|
@@ -8533,7 +8574,7 @@ vect_transform_loop (loop_vec_info loop_
|
||||||
|
|
||||||
|
for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
|
||||||
|
gsi_next (&si))
|
||||||
|
- {
|
||||||
|
+ {
|
||||||
|
gphi *phi = si.phi ();
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
|
@@ -8568,6 +8609,27 @@ vect_transform_loop (loop_vec_info loop_
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
|
||||||
|
+ gsi_next (&si))
|
||||||
|
+ {
|
||||||
|
+ gphi *phi = si.phi ();
|
||||||
|
+ stmt_info = loop_vinfo->lookup_stmt (phi);
|
||||||
|
+ if (!stmt_info)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ if (!STMT_VINFO_RELEVANT_P (stmt_info)
|
||||||
|
+ && !STMT_VINFO_LIVE_P (stmt_info))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
|
||||||
|
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
|
||||||
|
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
|
||||||
|
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
|
||||||
|
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
|
||||||
|
+ && ! PURE_SLP_STMT (stmt_info))
|
||||||
|
+ maybe_set_vectorized_backedge_value (loop_vinfo, stmt_info);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
for (gimple_stmt_iterator si = gsi_start_bb (bb);
|
||||||
|
!gsi_end_p (si);)
|
||||||
|
{
|
||||||
|
@@ -8604,9 +8666,16 @@ vect_transform_loop (loop_vec_info loop_
|
||||||
|
= STMT_VINFO_RELATED_STMT (stmt_info);
|
||||||
|
vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si,
|
||||||
|
&seen_store);
|
||||||
|
+ maybe_set_vectorized_backedge_value (loop_vinfo,
|
||||||
|
+ pat_stmt_info);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
|
||||||
|
+ &seen_store);
|
||||||
|
+ maybe_set_vectorized_backedge_value (loop_vinfo,
|
||||||
|
+ stmt_info);
|
||||||
|
}
|
||||||
|
- vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
|
||||||
|
- &seen_store);
|
||||||
|
}
|
||||||
|
gsi_next (&si);
|
||||||
|
if (seen_store)
|
||||||
|
@@ -8623,43 +8692,6 @@ vect_transform_loop (loop_vec_info loop_
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- /* Fill in backedge defs of reductions. */
|
||||||
|
- for (unsigned i = 0; i < loop_vinfo->reduc_latch_defs.length (); ++i)
|
||||||
|
- {
|
||||||
|
- stmt_vec_info stmt_info = loop_vinfo->reduc_latch_defs[i];
|
||||||
|
- stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
|
||||||
|
- stmt_vec_info phi_info
|
||||||
|
- = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
|
||||||
|
- stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
|
||||||
|
- gphi *phi
|
||||||
|
- = dyn_cast <gphi *> (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt);
|
||||||
|
- edge e = loop_latch_edge (gimple_bb (phi_info->stmt)->loop_father);
|
||||||
|
- do
|
||||||
|
- {
|
||||||
|
- add_phi_arg (as_a <gphi *> (phi_info->stmt),
|
||||||
|
- gimple_get_lhs (vec_stmt->stmt), e,
|
||||||
|
- gimple_phi_arg_location (phi, e->dest_idx));
|
||||||
|
- phi_info = STMT_VINFO_RELATED_STMT (phi_info);
|
||||||
|
- vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
|
||||||
|
- }
|
||||||
|
- while (phi_info);
|
||||||
|
- gcc_assert (!vec_stmt);
|
||||||
|
- }
|
||||||
|
- for (unsigned i = 0; i < loop_vinfo->reduc_latch_slp_defs.length (); ++i)
|
||||||
|
- {
|
||||||
|
- slp_tree slp_node = loop_vinfo->reduc_latch_slp_defs[i].first;
|
||||||
|
- slp_tree phi_node = loop_vinfo->reduc_latch_slp_defs[i].second;
|
||||||
|
- gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
|
||||||
|
- e = loop_latch_edge (gimple_bb (phi)->loop_father);
|
||||||
|
- gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
|
||||||
|
- == SLP_TREE_VEC_STMTS (slp_node).length ());
|
||||||
|
- for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
|
||||||
|
- add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt),
|
||||||
|
- gimple_get_lhs
|
||||||
|
- (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt),
|
||||||
|
- e, gimple_phi_arg_location (phi, e->dest_idx));
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
/* Stub out scalar statements that must not survive vectorization.
|
||||||
|
Doing this here helps with grouped statements, or statements that
|
||||||
|
are involved in patterns. */
|
||||||
|
diff -uprN a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
|
||||||
|
--- a/gcc/tree-vectorizer.h 2020-10-26 21:45:23.052000000 +0800
|
||||||
|
+++ b/gcc/tree-vectorizer.h 2020-10-26 21:46:25.316000000 +0800
|
||||||
|
@@ -575,11 +575,6 @@ typedef struct _loop_vec_info : public v
|
||||||
|
stmt in the chain. */
|
||||||
|
auto_vec<stmt_vec_info> reduction_chains;
|
||||||
|
|
||||||
|
- /* The vectorized stmts defining the latch values of the reduction
|
||||||
|
- they are involved with. */
|
||||||
|
- auto_vec<stmt_vec_info> reduc_latch_defs;
|
||||||
|
- auto_vec<std::pair<slp_tree, slp_tree> > reduc_latch_slp_defs;
|
||||||
|
-
|
||||||
|
/* Cost vector for a single scalar iteration. */
|
||||||
|
auto_vec<stmt_info_for_cost> scalar_cost_vec;
|
||||||
|
|
||||||
|
diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
|
||||||
|
--- a/gcc/tree-vect-slp.c 2020-10-26 21:45:23.052000000 +0800
|
||||||
|
+++ b/gcc/tree-vect-slp.c 2020-10-26 21:46:25.320000000 +0800
|
||||||
|
@@ -2189,6 +2189,7 @@ vect_analyze_slp_instance (vec_info *vin
|
||||||
|
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
|
||||||
|
SLP_INSTANCE_LOADS (new_instance) = vNULL;
|
||||||
|
SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
|
||||||
|
+ new_instance->reduc_phis = NULL;
|
||||||
|
|
||||||
|
vect_gather_slp_loads (new_instance, node);
|
||||||
|
if (dump_enabled_p ())
|
||||||
|
@@ -4282,6 +4283,26 @@ vect_schedule_slp (vec_info *vinfo)
|
||||||
|
stmt_vec_info store_info;
|
||||||
|
unsigned int j;
|
||||||
|
|
||||||
|
+ /* For reductions set the latch values of the vectorized PHIs. */
|
||||||
|
+ if (instance->reduc_phis
|
||||||
|
+ && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS
|
||||||
|
+ (instance->reduc_phis)[0]) != FOLD_LEFT_REDUCTION
|
||||||
|
+ && STMT_VINFO_REDUC_TYPE (SLP_TREE_SCALAR_STMTS
|
||||||
|
+ (instance->reduc_phis)[0]) != EXTRACT_LAST_REDUCTION)
|
||||||
|
+ {
|
||||||
|
+ slp_tree slp_node = root;
|
||||||
|
+ slp_tree phi_node = instance->reduc_phis;
|
||||||
|
+ gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
|
||||||
|
+ edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
|
||||||
|
+ gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
|
||||||
|
+ == SLP_TREE_VEC_STMTS (slp_node).length ());
|
||||||
|
+ for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
|
||||||
|
+ add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt),
|
||||||
|
+ gimple_get_lhs
|
||||||
|
+ (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt),
|
||||||
|
+ e, gimple_phi_arg_location (phi, e->dest_idx));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/* Remove scalar call stmts. Do not do this for basic-block
|
||||||
|
vectorization as not all uses may be vectorized.
|
||||||
|
??? Why should this be necessary? DCE should be able to
|
||||||
|
diff -uprN a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
||||||
|
--- a/gcc/tree-vect-stmts.c 2020-10-26 21:45:23.012000000 +0800
|
||||||
|
+++ b/gcc/tree-vect-stmts.c 2020-10-26 21:46:25.320000000 +0800
|
||||||
|
@@ -10229,37 +10229,6 @@ vect_transform_stmt (stmt_vec_info stmt_
|
||||||
|
if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
|
||||||
|
return is_store;
|
||||||
|
|
||||||
|
- /* If this stmt defines a value used on a backedge, record it so
|
||||||
|
- we can update the vectorized PHIs later. */
|
||||||
|
- stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
|
||||||
|
- stmt_vec_info reduc_info;
|
||||||
|
- if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
|
||||||
|
- && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
|
||||||
|
- && (reduc_info = info_for_reduction (orig_stmt_info))
|
||||||
|
- && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
|
||||||
|
- && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
|
||||||
|
- {
|
||||||
|
- gphi *phi;
|
||||||
|
- edge e;
|
||||||
|
- if (!slp_node
|
||||||
|
- && (phi = dyn_cast <gphi *>
|
||||||
|
- (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
|
||||||
|
- && dominated_by_p (CDI_DOMINATORS,
|
||||||
|
- gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
|
||||||
|
- && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
|
||||||
|
- && (PHI_ARG_DEF_FROM_EDGE (phi, e)
|
||||||
|
- == gimple_get_lhs (orig_stmt_info->stmt)))
|
||||||
|
- {
|
||||||
|
- as_a <loop_vec_info> (vinfo)->reduc_latch_defs.safe_push (stmt_info);
|
||||||
|
- }
|
||||||
|
- else if (slp_node
|
||||||
|
- && slp_node != slp_node_instance->reduc_phis)
|
||||||
|
- {
|
||||||
|
- as_a <loop_vec_info> (vinfo)->reduc_latch_slp_defs.safe_push
|
||||||
|
- (std::make_pair (slp_node, slp_node_instance->reduc_phis));
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
/* Handle stmts whose DEF is used outside the loop-nest that is
|
||||||
|
being vectorized. */
|
||||||
|
done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
|
||||||
48
tree-optimization-97812-fix-range-query-in-VRP-asser.patch
Normal file
48
tree-optimization-97812-fix-range-query-in-VRP-asser.patch
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
dcfd302a79a5e2ea3bb16fc4fc45a5ee31cc0eab
|
||||||
|
0001-tree-optimization-97812-fix-range-query-in-VRP-asser.patch
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/torture/pr97812.c b/gcc/testsuite/gcc.dg/torture/pr97812.c
|
||||||
|
new file mode 100644
|
||||||
|
index 00000000000..4d468adf8fa
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/torture/pr97812.c
|
||||||
|
@@ -0,0 +1,15 @@
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-additional-options "-fdisable-tree-evrp" } */
|
||||||
|
+
|
||||||
|
+unsigned char c;
|
||||||
|
+
|
||||||
|
+int main() {
|
||||||
|
+volatile short b = 4066;
|
||||||
|
+ unsigned short bp = b;
|
||||||
|
+ unsigned d = bp & 2305;
|
||||||
|
+ signed char e = d;
|
||||||
|
+ c = e ? : e;
|
||||||
|
+ if (!d)
|
||||||
|
+ __builtin_abort ();
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
|
||||||
|
index 54ce017e8b2..d661866630e 100644
|
||||||
|
--- a/gcc/tree-vrp.c
|
||||||
|
+++ b/gcc/tree-vrp.c
|
||||||
|
@@ -1740,8 +1740,14 @@ register_edge_assert_for_2 (tree name, edge e,
|
||||||
|
&& ((TYPE_PRECISION (TREE_TYPE (name))
|
||||||
|
> TYPE_PRECISION (TREE_TYPE (rhs1)))
|
||||||
|
|| (get_range_info (rhs1, &rmin, &rmax) == VR_RANGE
|
||||||
|
- && wi::fits_to_tree_p (rmin, TREE_TYPE (name))
|
||||||
|
- && wi::fits_to_tree_p (rmax, TREE_TYPE (name)))))
|
||||||
|
+ && wi::fits_to_tree_p
|
||||||
|
+ (widest_int::from (rmin,
|
||||||
|
+ TYPE_SIGN (TREE_TYPE (rhs1))),
|
||||||
|
+ TREE_TYPE (name))
|
||||||
|
+ && wi::fits_to_tree_p
|
||||||
|
+ (widest_int::from (rmax,
|
||||||
|
+ TYPE_SIGN (TREE_TYPE (rhs1))),
|
||||||
|
+ TREE_TYPE (name)))))
|
||||||
|
add_assert_info (asserts, rhs1, rhs1,
|
||||||
|
comp_code, fold_convert (TREE_TYPE (rhs1), val));
|
||||||
|
}
|
||||||
19
vectorizable-comparison-Swap-operands-only-once.patch
Normal file
19
vectorizable-comparison-Swap-operands-only-once.patch
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
This backport contains 1 patch from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
a0aeb7fb93da156b64fd08391c79ff35a69af7ba
|
||||||
|
0001-tree-vect-stmts.c-vectorizable_comparison-Swap-opera.patch
|
||||||
|
|
||||||
|
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
||||||
|
index e921225b5ec..601a6f55fbf 100644
|
||||||
|
--- a/gcc/tree-vect-stmts.c
|
||||||
|
+++ b/gcc/tree-vect-stmts.c
|
||||||
|
@@ -10369,7 +10369,7 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
|
||||||
|
|
||||||
|
if (!slp_node)
|
||||||
|
{
|
||||||
|
- if (swap_p)
|
||||||
|
+ if (swap_p && j == 0)
|
||||||
|
std::swap (vec_rhs1, vec_rhs2);
|
||||||
|
vec_oprnds0.quick_push (vec_rhs1);
|
||||||
|
vec_oprnds1.quick_push (vec_rhs2);
|
||||||
321
x86-Fix-bf16-and-matrix.patch
Normal file
321
x86-Fix-bf16-and-matrix.patch
Normal file
@ -0,0 +1,321 @@
|
|||||||
|
This backport contains 4 patchs from gcc main stream tree.
|
||||||
|
The commit id of these patchs list as following in the order of time.
|
||||||
|
|
||||||
|
0001-re-PR-target-90424-memcpy-into-vector-builtin-not-op.patch
|
||||||
|
1bf2a0b90f2457f6d9301535560eb5e05978261b
|
||||||
|
|
||||||
|
0002-testsuite-aarch64-arm-Add-missing-quotes-to-expected.patch
|
||||||
|
0ec537f3500924f29505977aa89c2a1d4671c584
|
||||||
|
|
||||||
|
0003-x86-Tweak-testcases-for-PR82361.patch
|
||||||
|
ad4644f378fe2f731cd987a4aff14b935f530b88
|
||||||
|
|
||||||
|
0004-x86-Robustify-vzeroupper-handling-across-calls.patch
|
||||||
|
2a2e3a0dfcbe0861915f421d11b828f0c35023f0
|
||||||
|
|
||||||
|
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
|
||||||
|
index 9282a8fb6..ba72da1ec 100644
|
||||||
|
--- a/gcc/config/i386/i386.c
|
||||||
|
+++ b/gcc/config/i386/i386.c
|
||||||
|
@@ -95,6 +95,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#include "i386-builtins.h"
|
||||||
|
#include "i386-expand.h"
|
||||||
|
#include "i386-features.h"
|
||||||
|
+#include "function-abi.h"
|
||||||
|
|
||||||
|
/* This file should be included last. */
|
||||||
|
#include "target-def.h"
|
||||||
|
@@ -13529,6 +13530,15 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ /* If the function is known to preserve some SSE registers,
|
||||||
|
+ RA and previous passes can legitimately rely on that for
|
||||||
|
+ modes wider than 256 bits. It's only safe to issue a
|
||||||
|
+ vzeroupper if all SSE registers are clobbered. */
|
||||||
|
+ const function_abi &abi = insn_callee_abi (insn);
|
||||||
|
+ if (!hard_reg_set_subset_p (reg_class_contents[ALL_SSE_REGS],
|
||||||
|
+ abi.mode_clobbers (V4DImode)))
|
||||||
|
+ return AVX_U128_ANY;
|
||||||
|
+
|
||||||
|
return AVX_U128_CLEAN;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/g++.target/i386/pr90424-1.C b/gcc/testsuite/g++.target/i386/pr90424-1.C
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..9df8c089b
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/g++.target/i386/pr90424-1.C
|
||||||
|
@@ -0,0 +1,32 @@
|
||||||
|
+/* { dg-do compile { target c++11 } } */
|
||||||
|
+/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
|
||||||
|
+
|
||||||
|
+template <class T>
|
||||||
|
+using V [[gnu::vector_size(16)]] = T;
|
||||||
|
+
|
||||||
|
+template <class T, unsigned M = sizeof(V<T>)>
|
||||||
|
+V<T> load(const void *p) {
|
||||||
|
+ using W = V<T>;
|
||||||
|
+ W r;
|
||||||
|
+ __builtin_memcpy(&r, p, M);
|
||||||
|
+ return r;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// movq or movsd
|
||||||
|
+template V<char> load<char, 8>(const void *); // bad
|
||||||
|
+template V<short> load<short, 8>(const void *); // bad
|
||||||
|
+template V<int> load<int, 8>(const void *); // bad
|
||||||
|
+template V<long> load<long, 8>(const void *); // good
|
||||||
|
+// the following is disabled because V2SF isn't a supported mode
|
||||||
|
+// template V<float> load<float, 8>(const void *); // bad
|
||||||
|
+template V<double> load<double, 8>(const void *); // good (movsd?)
|
||||||
|
+
|
||||||
|
+// movd or movss
|
||||||
|
+template V<char> load<char, 4>(const void *); // bad
|
||||||
|
+template V<short> load<short, 4>(const void *); // bad
|
||||||
|
+template V<int> load<int, 4>(const void *); // good
|
||||||
|
+template V<float> load<float, 4>(const void *); // good
|
||||||
|
+
|
||||||
|
+/* We should end up with one load and one insert for each function. */
|
||||||
|
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */
|
||||||
|
diff --git a/gcc/testsuite/g++.target/i386/pr90424-2.C b/gcc/testsuite/g++.target/i386/pr90424-2.C
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..3abb65f45
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/g++.target/i386/pr90424-2.C
|
||||||
|
@@ -0,0 +1,31 @@
|
||||||
|
+/* { dg-do compile { target c++11 } } */
|
||||||
|
+/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
|
||||||
|
+
|
||||||
|
+template <class T>
|
||||||
|
+using V [[gnu::vector_size(16)]] = T;
|
||||||
|
+
|
||||||
|
+template <class T, unsigned M = sizeof(V<T>)>
|
||||||
|
+V<T> load(const void *p) {
|
||||||
|
+ V<T> r = {};
|
||||||
|
+ __builtin_memcpy(&r, p, M);
|
||||||
|
+ return r;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+// movq or movsd
|
||||||
|
+template V<char> load<char, 8>(const void *); // bad
|
||||||
|
+template V<short> load<short, 8>(const void *); // bad
|
||||||
|
+template V<int> load<int, 8>(const void *); // bad
|
||||||
|
+template V<long> load<long, 8>(const void *); // good
|
||||||
|
+// the following is disabled because V2SF isn't a supported mode
|
||||||
|
+// template V<float> load<float, 8>(const void *); // bad
|
||||||
|
+template V<double> load<double, 8>(const void *); // good (movsd?)
|
||||||
|
+
|
||||||
|
+// movd or movss
|
||||||
|
+template V<char> load<char, 4>(const void *); // bad
|
||||||
|
+template V<short> load<short, 4>(const void *); // bad
|
||||||
|
+template V<int> load<int, 4>(const void *); // good
|
||||||
|
+template V<float> load<float, 4>(const void *); // good
|
||||||
|
+
|
||||||
|
+/* We should end up with one load and one insert for each function. */
|
||||||
|
+/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_10.c b/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
|
||||||
|
index 184990471..d96a8733a 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/aarch64/target_attr_10.c
|
||||||
|
@@ -13,4 +13,4 @@ foo (uint8x16_t a, uint8x16_t b, uint8x16_t c)
|
||||||
|
return vbslq_u8 (a, b, c); /* { dg-message "called from here" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */
|
||||||
|
+/* { dg-error "inlining failed in call to 'always_inline'" "" { target *-*-* } 0 } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c b/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
|
||||||
|
index 05dc579f2..fb6e0b9cd 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/arm/attr-neon-builtin-fail.c
|
||||||
|
@@ -14,5 +14,5 @@ foo (uint8x16_t *p)
|
||||||
|
*p = vmovq_n_u8 (3); /* { dg-message "called from here" } */
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-error "inlining failed in call to always_inline" "" { target *-*-* } 0 } */
|
||||||
|
+/* { dg-error "inlining failed in call to 'always_inline'" "" { target *-*-* } 0 } */
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/i386/pr82361-1.c b/gcc/testsuite/gcc.target/i386/pr82361-1.c
|
||||||
|
index e7c356557..dec1792ae 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/i386/pr82361-1.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/pr82361-1.c
|
||||||
|
@@ -4,50 +4,50 @@
|
||||||
|
/* We should be able to optimize all %eax to %rax zero extensions, because
|
||||||
|
div and idiv instructions with 32-bit operands zero-extend both results. */
|
||||||
|
/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
|
||||||
|
-/* FIXME: We are still not able to optimize the modulo in f1/f2, only manage
|
||||||
|
- one. */
|
||||||
|
+/* FIXME: The compiler does not merge zero-extension to the modulo part
|
||||||
|
+ of f1 and f2. */
|
||||||
|
/* { dg-final { scan-assembler-times "movl\t%edx" 2 } } */
|
||||||
|
|
||||||
|
void
|
||||||
|
f1 (unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
- unsigned long long c = a / b;
|
||||||
|
- unsigned long long d = a % b;
|
||||||
|
+ register unsigned long long c asm ("rax") = a / b;
|
||||||
|
+ register unsigned long long d asm ("rdx") = a % b;
|
||||||
|
asm volatile ("" : : "r" (c), "r" (d));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
f2 (int a, int b)
|
||||||
|
{
|
||||||
|
- unsigned long long c = (unsigned int) (a / b);
|
||||||
|
- unsigned long long d = (unsigned int) (a % b);
|
||||||
|
+ register unsigned long long c asm ("rax") = (unsigned int) (a / b);
|
||||||
|
+ register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
|
||||||
|
asm volatile ("" : : "r" (c), "r" (d));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
f3 (unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
- unsigned long long c = a / b;
|
||||||
|
+ register unsigned long long c asm ("rax") = a / b;
|
||||||
|
asm volatile ("" : : "r" (c));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
f4 (int a, int b)
|
||||||
|
{
|
||||||
|
- unsigned long long c = (unsigned int) (a / b);
|
||||||
|
+ register unsigned long long c asm ("rax") = (unsigned int) (a / b);
|
||||||
|
asm volatile ("" : : "r" (c));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
f5 (unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
- unsigned long long d = a % b;
|
||||||
|
+ register unsigned long long d asm ("rdx") = a % b;
|
||||||
|
asm volatile ("" : : "r" (d));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
f6 (int a, int b)
|
||||||
|
{
|
||||||
|
- unsigned long long d = (unsigned int) (a % b);
|
||||||
|
+ register unsigned long long d asm ("rdx") = (unsigned int) (a % b);
|
||||||
|
asm volatile ("" : : "r" (d));
|
||||||
|
}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/i386/pr82361-2.c b/gcc/testsuite/gcc.target/i386/pr82361-2.c
|
||||||
|
index c1e484d6e..2d87de182 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/i386/pr82361-2.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/i386/pr82361-2.c
|
||||||
|
@@ -4,7 +4,8 @@
|
||||||
|
/* We should be able to optimize all %eax to %rax zero extensions, because
|
||||||
|
div and idiv instructions with 32-bit operands zero-extend both results. */
|
||||||
|
/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
|
||||||
|
-/* Ditto %edx to %rdx zero extensions. */
|
||||||
|
-/* { dg-final { scan-assembler-not "movl\t%edx, %edx" } } */
|
||||||
|
+/* FIXME: The compiler does not merge zero-extension to the modulo part
|
||||||
|
+ of f1 and f2. */
|
||||||
|
+/* { dg-final { scan-assembler-times "movl\t%edx" 4 } } */
|
||||||
|
|
||||||
|
#include "pr82361-1.c"
|
||||||
|
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
|
||||||
|
index 527deffe4..be47519bc 100644
|
||||||
|
--- a/gcc/tree-cfg.c
|
||||||
|
+++ b/gcc/tree-cfg.c
|
||||||
|
@@ -4297,8 +4297,17 @@ verify_gimple_assign_ternary (gassign *stmt)
|
||||||
|
}
|
||||||
|
if (! ((INTEGRAL_TYPE_P (rhs1_type)
|
||||||
|
&& INTEGRAL_TYPE_P (rhs2_type))
|
||||||
|
+ /* Vector element insert. */
|
||||||
|
|| (VECTOR_TYPE_P (rhs1_type)
|
||||||
|
- && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type))))
|
||||||
|
+ && types_compatible_p (TREE_TYPE (rhs1_type), rhs2_type))
|
||||||
|
+ /* Aligned sub-vector insert. */
|
||||||
|
+ || (VECTOR_TYPE_P (rhs1_type)
|
||||||
|
+ && VECTOR_TYPE_P (rhs2_type)
|
||||||
|
+ && types_compatible_p (TREE_TYPE (rhs1_type),
|
||||||
|
+ TREE_TYPE (rhs2_type))
|
||||||
|
+ && multiple_p (TYPE_VECTOR_SUBPARTS (rhs1_type),
|
||||||
|
+ TYPE_VECTOR_SUBPARTS (rhs2_type))
|
||||||
|
+ && multiple_of_p (bitsizetype, rhs3, TYPE_SIZE (rhs2_type)))))
|
||||||
|
{
|
||||||
|
error ("not allowed type combination in BIT_INSERT_EXPR");
|
||||||
|
debug_generic_expr (rhs1_type);
|
||||||
|
diff --git a/gcc/tree-ssa.c b/gcc/tree-ssa.c
|
||||||
|
index 1dc544b6d..a149f5e79 100644
|
||||||
|
--- a/gcc/tree-ssa.c
|
||||||
|
+++ b/gcc/tree-ssa.c
|
||||||
|
@@ -1522,8 +1522,6 @@ non_rewritable_lvalue_p (tree lhs)
|
||||||
|
if (DECL_P (decl)
|
||||||
|
&& VECTOR_TYPE_P (TREE_TYPE (decl))
|
||||||
|
&& TYPE_MODE (TREE_TYPE (decl)) != BLKmode
|
||||||
|
- && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)),
|
||||||
|
- TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (decl))), 0)
|
||||||
|
&& known_ge (mem_ref_offset (lhs), 0)
|
||||||
|
&& known_gt (wi::to_poly_offset (TYPE_SIZE_UNIT (TREE_TYPE (decl))),
|
||||||
|
mem_ref_offset (lhs))
|
||||||
|
@@ -1531,7 +1529,24 @@ non_rewritable_lvalue_p (tree lhs)
|
||||||
|
TYPE_SIZE_UNIT (TREE_TYPE (lhs)))
|
||||||
|
&& known_ge (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (decl))),
|
||||||
|
wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (lhs)))))
|
||||||
|
- return false;
|
||||||
|
+ {
|
||||||
|
+ poly_uint64 lhs_bits, nelts;
|
||||||
|
+ if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)), &lhs_bits)
|
||||||
|
+ && multiple_p (lhs_bits,
|
||||||
|
+ tree_to_uhwi
|
||||||
|
+ (TYPE_SIZE (TREE_TYPE (TREE_TYPE (decl)))),
|
||||||
|
+ &nelts))
|
||||||
|
+ {
|
||||||
|
+ if (known_eq (nelts, 1u))
|
||||||
|
+ return false;
|
||||||
|
+ /* For sub-vector inserts the insert vector mode has to be
|
||||||
|
+ supported. */
|
||||||
|
+ tree vtype = build_vector_type (TREE_TYPE (TREE_TYPE (decl)),
|
||||||
|
+ nelts);
|
||||||
|
+ if (TYPE_MODE (vtype) != BLKmode)
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A vector-insert using a BIT_FIELD_REF is rewritable using
|
||||||
|
@@ -1869,20 +1884,30 @@ execute_update_addresses_taken (void)
|
||||||
|
&& bitmap_bit_p (suitable_for_renaming, DECL_UID (sym))
|
||||||
|
&& VECTOR_TYPE_P (TREE_TYPE (sym))
|
||||||
|
&& TYPE_MODE (TREE_TYPE (sym)) != BLKmode
|
||||||
|
- && operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (lhs)),
|
||||||
|
- TYPE_SIZE_UNIT
|
||||||
|
- (TREE_TYPE (TREE_TYPE (sym))), 0)
|
||||||
|
- && tree_fits_uhwi_p (TREE_OPERAND (lhs, 1))
|
||||||
|
- && tree_int_cst_lt (TREE_OPERAND (lhs, 1),
|
||||||
|
- TYPE_SIZE_UNIT (TREE_TYPE (sym)))
|
||||||
|
- && (tree_to_uhwi (TREE_OPERAND (lhs, 1))
|
||||||
|
- % tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (lhs)))) == 0)
|
||||||
|
+ && known_ge (mem_ref_offset (lhs), 0)
|
||||||
|
+ && known_gt (wi::to_poly_offset
|
||||||
|
+ (TYPE_SIZE_UNIT (TREE_TYPE (sym))),
|
||||||
|
+ mem_ref_offset (lhs))
|
||||||
|
+ && multiple_of_p (sizetype,
|
||||||
|
+ TREE_OPERAND (lhs, 1),
|
||||||
|
+ TYPE_SIZE_UNIT (TREE_TYPE (lhs))))
|
||||||
|
{
|
||||||
|
tree val = gimple_assign_rhs1 (stmt);
|
||||||
|
if (! types_compatible_p (TREE_TYPE (val),
|
||||||
|
TREE_TYPE (TREE_TYPE (sym))))
|
||||||
|
{
|
||||||
|
- tree tem = make_ssa_name (TREE_TYPE (TREE_TYPE (sym)));
|
||||||
|
+ poly_uint64 lhs_bits, nelts;
|
||||||
|
+ tree temtype = TREE_TYPE (TREE_TYPE (sym));
|
||||||
|
+ if (poly_int_tree_p (TYPE_SIZE (TREE_TYPE (lhs)),
|
||||||
|
+ &lhs_bits)
|
||||||
|
+ && multiple_p (lhs_bits,
|
||||||
|
+ tree_to_uhwi
|
||||||
|
+ (TYPE_SIZE (TREE_TYPE
|
||||||
|
+ (TREE_TYPE (sym)))),
|
||||||
|
+ &nelts)
|
||||||
|
+ && maybe_ne (nelts, 1u))
|
||||||
|
+ temtype = build_vector_type (temtype, nelts);
|
||||||
|
+ tree tem = make_ssa_name (temtype);
|
||||||
|
gimple *pun
|
||||||
|
= gimple_build_assign (tem,
|
||||||
|
build1 (VIEW_CONVERT_EXPR,
|
||||||
Loading…
x
Reference in New Issue
Block a user