- avoid-cycling-on-vertain-subreg-reloads.patch: Add patch source comment - change-gcc-BASE-VER.patch: Likewise - dont-generate-IF_THEN_ELSE.patch: Likewise - fix-ICE-in-compute_live_loop_exits.patch: Likewise - fix-ICE-in-eliminate_stmt.patch: Likewise - fix-ICE-in-vect_create_epilog_for_reduction.patch: Likewise - fix-ICE-in-vect_stmt_to_vectorize.patch: Likewise - fix-ICE-in-verify_ssa.patch: Likewise - fix-ICE-when-vectorizing-nested-cycles.patch: Likewise - fix-cost-of-plus.patch: Likewise - ipa-const-prop-self-recursion-bugfix.patch: Likewise - simplify-removing-subregs.patch: Likewise - medium-code-mode.patch: Bugfix - fix-when-peeling-for-alignment.patch: Move to ... - fix-PR-92351-When-peeling-for-alignment.patch: ... this - AArch64-Fix-constraints-for-CPY-M.patch: New file - Apply-maximum-nunits-for-BB-SLP.patch: New file - Fix-EXTRACT_LAST_REDUCTION-segfault.patch: New file - Fix-up-push_partial_def-little-endian-bitfield.patch: New file - Fix-zero-masking-for-vcvtps2ph.patch: New file - IRA-Handle-fully-tied-destinations.patch: New file - SLP-VECT-Add-check-to-fix-96837.patch: New file - aarch64-Fix-ash-lr-lshr-mode-3-expanders.patch: New file - aarch64-Fix-bf16-and-matrix-g++-gfortran.patch: New file - aarch64-Fix-mismatched-SVE-predicate-modes.patch: New file - aarch64-fix-sve-acle-error.patch: New file - adjust-vector-cost-and-move-EXTRACT_LAST_REDUCTION-costing.patch: New file - bf16-and-matrix-characteristic.patch: New file - fix-ICE-IPA-compare-VRP-types.patch: New file - fix-ICE-in-affine-combination.patch: New file - fix-ICE-in-pass-vect.patch: New file - fix-ICE-in-vect_update_misalignment_for_peel.patch: New file - fix-addlosymdi-ICE-in-pass-reload.patch: New file - fix-an-ICE-in-vect_recog_mask_conversion_pattern.patch: New file - fix-avx512vl-vcvttpd2dq-2-fail.patch: New file - fix-issue499-add-nop-convert.patch: New file - fix-issue604-ldist-dependency-fixup.patch: New file - modulo-sched-Carefully-process-loop-counter-initiali.patch: New file - re-PR-target-91124-gcc.target-i386-avx512vl-vpshldvd.patch: New file - reduction-paths-with-unhandled-live-stmt.patch: New file - redundant-loop-elimination.patch: New file - sccvn-Improve-handling-of-load-masked-with-integer.patch: New file - speed-up-DDG-analysis-and-fix-bootstrap-compare-debug.patch: New file - store-merging-Consider-also-overlapping-stores-earlier.patch: New file - tree-optimization-96920-another-ICE-when-vectorizing.patch: New file - tree-optimization-97812-fix-range-query-in-VRP-asser.patch: New file - vectorizable-comparison-Swap-operands-only-once.patch: New file - x86-Fix-bf16-and-matrix.patch: New file
124 lines
4.2 KiB
Diff
124 lines
4.2 KiB
Diff
This backport contains 1 patch from gcc main stream tree.
|
|
The commit id of these patchs list as following in the order of time.
|
|
|
|
0001-expand-Simplify-removing-subregs-when-expanding-a-co.patch
|
|
9a182ef9ee011935d827ab5c6c9a7cd8e22257d8
|
|
|
|
diff -Nurp a/gcc/expr.c b/gcc/expr.c
|
|
--- a/gcc/expr.c 2020-08-05 20:33:04.068000000 +0800
|
|
+++ b/gcc/expr.c 2020-08-05 20:33:21.420000000 +0800
|
|
@@ -3770,6 +3770,78 @@ emit_move_insn (rtx x, rtx y)
|
|
gcc_assert (mode != BLKmode
|
|
&& (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
|
|
|
|
+ /* If we have a copy that looks like one of the following patterns:
|
|
+ (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
|
|
+ (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
|
|
+ (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
|
|
+ (set (subreg:M1 (reg:M2 ...)) (constant C))
|
|
+ where mode M1 is equal in size to M2, try to detect whether the
|
|
+ mode change involves an implicit round trip through memory.
|
|
+ If so, see if we can avoid that by removing the subregs and
|
|
+ doing the move in mode M2 instead. */
|
|
+
|
|
+ rtx x_inner = NULL_RTX;
|
|
+ rtx y_inner = NULL_RTX;
|
|
+
|
|
+#define CANDIDATE_SUBREG_P(subreg) \
|
|
+ (REG_P (SUBREG_REG (subreg)) \
|
|
+ && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \
|
|
+ GET_MODE_SIZE (GET_MODE (subreg))) \
|
|
+ && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \
|
|
+ != CODE_FOR_nothing)
|
|
+
|
|
+#define CANDIDATE_MEM_P(innermode, mem) \
|
|
+ (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \
|
|
+ && !push_operand ((mem), GET_MODE (mem)) \
|
|
+ /* Not a candiate if innermode requires too much alignment. */ \
|
|
+ && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode) \
|
|
+ || targetm.slow_unaligned_access (GET_MODE (mem), \
|
|
+ MEM_ALIGN (mem)) \
|
|
+ || !targetm.slow_unaligned_access ((innermode), \
|
|
+ MEM_ALIGN (mem))))
|
|
+
|
|
+ if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x))
|
|
+ x_inner = SUBREG_REG (x);
|
|
+
|
|
+ if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y))
|
|
+ y_inner = SUBREG_REG (y);
|
|
+
|
|
+ if (x_inner != NULL_RTX
|
|
+ && y_inner != NULL_RTX
|
|
+ && GET_MODE (x_inner) == GET_MODE (y_inner)
|
|
+ && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS))
|
|
+ {
|
|
+ x = x_inner;
|
|
+ y = y_inner;
|
|
+ mode = GET_MODE (x_inner);
|
|
+ }
|
|
+ else if (x_inner != NULL_RTX
|
|
+ && MEM_P (y)
|
|
+ && CANDIDATE_MEM_P (GET_MODE (x_inner), y))
|
|
+ {
|
|
+ x = x_inner;
|
|
+ y = adjust_address (y, GET_MODE (x_inner), 0);
|
|
+ mode = GET_MODE (x_inner);
|
|
+ }
|
|
+ else if (y_inner != NULL_RTX
|
|
+ && MEM_P (x)
|
|
+ && CANDIDATE_MEM_P (GET_MODE (y_inner), x))
|
|
+ {
|
|
+ x = adjust_address (x, GET_MODE (y_inner), 0);
|
|
+ y = y_inner;
|
|
+ mode = GET_MODE (y_inner);
|
|
+ }
|
|
+ else if (x_inner != NULL_RTX
|
|
+ && CONSTANT_P (y)
|
|
+ && !targetm.can_change_mode_class (GET_MODE (x_inner),
|
|
+ mode, ALL_REGS)
|
|
+ && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0)))
|
|
+ {
|
|
+ x = x_inner;
|
|
+ y = y_inner;
|
|
+ mode = GET_MODE (x_inner);
|
|
+ }
|
|
+
|
|
if (CONSTANT_P (y))
|
|
{
|
|
if (optimize
|
|
diff -Nurp a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c
|
|
--- a/gcc/testsuite/gcc.target/aarch64/pr95254.c 1970-01-01 08:00:00.000000000 +0800
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c 2020-08-05 20:33:21.424000000 +0800
|
|
@@ -0,0 +1,19 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */
|
|
+
|
|
+typedef short __attribute__((vector_size (8))) v4hi;
|
|
+
|
|
+typedef union U4HI { v4hi v; short a[4]; } u4hi;
|
|
+
|
|
+short b[4];
|
|
+
|
|
+void pass_v4hi (v4hi v)
|
|
+{
|
|
+ int i;
|
|
+ u4hi u;
|
|
+ u.v = v;
|
|
+ for (i = 0; i < 4; i++)
|
|
+ b[i] = u.a[i];
|
|
+};
|
|
+
|
|
+/* { dg-final { scan-assembler-not "ptrue" } } */
|
|
diff -Nurp a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c
|
|
--- a/gcc/testsuite/gcc.target/i386/pr67609.c 2020-08-05 20:33:04.628000000 +0800
|
|
+++ b/gcc/testsuite/gcc.target/i386/pr67609.c 2020-08-05 20:33:21.424000000 +0800
|
|
@@ -1,7 +1,7 @@
|
|
/* { dg-do compile } */
|
|
/* { dg-options "-O2 -msse2" } */
|
|
/* { dg-require-effective-target lp64 } */
|
|
-/* { dg-final { scan-assembler "movdqa" } } */
|
|
+/* { dg-final { scan-assembler "movq\t%xmm0" } } */
|
|
|
|
#include <emmintrin.h>
|
|
__m128d reg;
|