gcc/SLP-VECT-Add-check-to-fix-96837.patch

100 lines
3.6 KiB
Diff
Raw Normal View History

Upload GCC feature and bugfix patches. - avoid-cycling-on-vertain-subreg-reloads.patch: Add patch source comment - change-gcc-BASE-VER.patch: Likewise - dont-generate-IF_THEN_ELSE.patch: Likewise - fix-ICE-in-compute_live_loop_exits.patch: Likewise - fix-ICE-in-eliminate_stmt.patch: Likewise - fix-ICE-in-vect_create_epilog_for_reduction.patch: Likewise - fix-ICE-in-vect_stmt_to_vectorize.patch: Likewise - fix-ICE-in-verify_ssa.patch: Likewise - fix-ICE-when-vectorizing-nested-cycles.patch: Likewise - fix-cost-of-plus.patch: Likewise - ipa-const-prop-self-recursion-bugfix.patch: Likewise - simplify-removing-subregs.patch: Likewise - medium-code-mode.patch: Bugfix - fix-when-peeling-for-alignment.patch: Move to ... - fix-PR-92351-When-peeling-for-alignment.patch: ... this - AArch64-Fix-constraints-for-CPY-M.patch: New file - Apply-maximum-nunits-for-BB-SLP.patch: New file - Fix-EXTRACT_LAST_REDUCTION-segfault.patch: New file - Fix-up-push_partial_def-little-endian-bitfield.patch: New file - Fix-zero-masking-for-vcvtps2ph.patch: New file - IRA-Handle-fully-tied-destinations.patch: New file - SLP-VECT-Add-check-to-fix-96837.patch: New file - aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch: New file - aarch64-Fix-bf16-and-matrix-g++-gfortran.patch: New file - aarch64-Fix-mismatched-SVE-predicate-modes.patch: New file - aarch64-fix-sve-acle-error.patch: New file - adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch: New file - bf16-and-matrix-characteristic.patch: New file - fix-ICE-IPA-compare-VRP-types.patch: New file - fix-ICE-in-affine-combination.patch: New file - fix-ICE-in-pass-vect.patch: New file - fix-ICE-in-vect_update_misalignment_for_peel.patch: New file - fix-addlosymdi-ICE-in-pass-reload.patch: New file - fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: New file - fix-avx512vl-vcvttpd2dq-2-fail.patch: New file - fix-issue499-add-nop-convert.patch: New file - fix-issue604-ldist-dependency-fixup.patch: New file - modulo-sched-Carefully-process-loop-counter-initiali.patch: New file - re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch: New file - reduction-paths-with-unhandled-live-stmt.patch: New file - redundant-loop-elimination.patch: New file - sccvn-Improve-handling-of-load-masked-with-integer.patch: New file - speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch: New file - store-merging-Consider-also-overlapping-stores-earlier.patch: New file - tree-optimization-96920-another-ICE-when-vectorizing.patch: New file - tree-optimization-97812-fix-range-query-in-VRP-asser.patch: New file - vectorizable-comparison-Swap-operands-only-once.patch: New file - x86-Fix-bf16-and-matrix.patch: New file
2020-12-30 09:54:10 +08:00
This backport contains 2 patchs from gcc main stream tree.
The commit id of these patchs list as following in the order of time.
97b798d80baf945ea28236eef3fa69f36626b579
0001-SLP-VECT-Add-check-to-fix-96837.patch
373b99dc40949efa697326f378e5022a02e0328b
0002-Add-a-testcase-for-PR-target-96827.patch
diff -uprN a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-49.c 2020-11-17 15:58:12.118126065 +0800
@@ -0,0 +1,28 @@
+/* This checks that vectorized constructors have the correct ordering. */
+/* { dg-require-effective-target vect_int } */
+
+typedef int V __attribute__((__vector_size__(16)));
+
+__attribute__((__noipa__)) void
+foo (unsigned int x, V *y)
+{
+ unsigned int a[4] = { x + 0, x + 2, x + 4, x + 6 };
+ for (unsigned int i = 0; i < 3; ++i)
+ if (a[i] == 1234)
+ a[i]--;
+ *y = (V) { a[3], a[2], a[1], a[0] };
+}
+
+int
+main ()
+{
+ V b;
+ foo (0, &b);
+ if (b[0] != 6 || b[1] != 4 || b[2] != 2 || b[3] != 0)
+ __builtin_abort ();
+ return 0;
+}
+
+/* See that we vectorize an SLP instance. */
+/* { dg-final { scan-tree-dump "Analyzing vectorizable constructor" "slp1" } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "slp1" } } */
diff -uprN a/gcc/testsuite/gcc.target/i386/pr96827.c b/gcc/testsuite/gcc.target/i386/pr96827.c
--- a/gcc/testsuite/gcc.target/i386/pr96827.c 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/testsuite/gcc.target/i386/pr96827.c 2020-11-17 15:58:15.182126065 +0800
@@ -0,0 +1,41 @@
+/* { dg-do run { target sse2_runtime } } */
+/* { dg-options "-O3 -msse2 -mfpmath=sse" } */
+
+typedef unsigned short int __uint16_t;
+typedef unsigned int __uint32_t;
+typedef __uint16_t uint16_t;
+typedef __uint32_t uint32_t;
+typedef int __v4si __attribute__ ((__vector_size__ (16)));
+typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_store_si128 (__m128i *__P, __m128i __B)
+{
+ *__P = __B;
+}
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
+{
+ return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
+}
+typedef uint16_t u16;
+typedef uint32_t u32;
+extern int printf (const char *__restrict __format, ...);
+void do_the_thing(u32 idx, __m128i *dude)
+{
+ u32 dude_[4] = { idx+0, idx+2, idx+4, idx+6 };
+ for (u32 i = 0; i < 3; ++i)
+ if (dude_[i] == 1234)
+ dude_[i]--;
+ *dude = _mm_set_epi32(dude_[0], dude_[1], dude_[2], dude_[3]);
+}
+int main()
+{
+ __m128i dude;
+ u32 idx = 0;
+ do_the_thing(idx, &dude);
+ __attribute__((aligned(16))) u32 dude_[4];
+ _mm_store_si128((__m128i*)dude_, dude);
+ if (!(6 == dude_[0] && 4 == dude_[1] && 2 == dude_[2] && 0 == dude_[3]))
+ __builtin_abort ();
+ return 0;
+}
diff -uprN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
--- a/gcc/tree-vect-slp.c 2020-11-17 15:55:57.098126065 +0800
+++ b/gcc/tree-vect-slp.c 2020-11-17 15:59:25.862126065 +0800
@@ -1842,7 +1842,8 @@ vect_supported_load_permutation_p (slp_i
/* Reduction (there are no data-refs in the root).
In reduction chain the order of the loads is not important. */
if (!STMT_VINFO_DATA_REF (stmt_info)
- && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+ && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+ && !SLP_INSTANCE_ROOT_STMT (slp_instn))
vect_attempt_slp_rearrange_stmts (slp_instn);
/* In basic block vectorization we allow any subchain of an interleaving