208 lines
7.4 KiB
Diff
208 lines
7.4 KiB
Diff
This backport contains 2 patchs from gcc main stream tree.
|
|
The commit id of these patchs list as following in the order of time.
|
|
|
|
0001-AArch64-Improve-SVE-constant-moves.patch
|
|
4aeb1ba7f62c1d680c819ae3e137c3bad6f520ca
|
|
|
|
0002-aarch64-Add-vector-vector-vec_extract-patterns-PR928.patch
|
|
c15893df6eafc32efd6184379dd7f02c36da7d12
|
|
|
|
diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
|
--- a/gcc/config/aarch64/aarch64.c 2020-09-03 19:50:00.484000000 +0800
|
|
+++ b/gcc/config/aarch64/aarch64.c 2020-09-03 19:50:19.336943210 +0800
|
|
@@ -3632,7 +3632,7 @@ aarch64_maybe_expand_sve_subreg_move (rt
|
|
attributes. Unlike gen_lowpart, this doesn't care whether the
|
|
mode change is valid. */
|
|
|
|
-static rtx
|
|
+rtx
|
|
aarch64_replace_reg_mode (rtx x, machine_mode mode)
|
|
{
|
|
if (GET_MODE (x) == mode)
|
|
@@ -15016,6 +15016,36 @@ aarch64_simd_check_vect_par_cnst_half (r
|
|
return true;
|
|
}
|
|
|
|
+/* Return a PARALLEL containing NELTS elements, with element I equal
|
|
+ to BASE + I * STEP. */
|
|
+
|
|
+rtx
|
|
+aarch64_gen_stepped_int_parallel (unsigned int nelts, int base, int step)
|
|
+{
|
|
+ rtvec vec = rtvec_alloc (nelts);
|
|
+ for (unsigned int i = 0; i < nelts; ++i)
|
|
+ RTVEC_ELT (vec, i) = gen_int_mode (base + i * step, DImode);
|
|
+ return gen_rtx_PARALLEL (VOIDmode, vec);
|
|
+}
|
|
+
|
|
+/* Return true if OP is a PARALLEL of CONST_INTs that form a linear
|
|
+ series with step STEP. */
|
|
+
|
|
+bool
|
|
+aarch64_stepped_int_parallel_p (rtx op, int step)
|
|
+{
|
|
+ if (GET_CODE (op) != PARALLEL || !CONST_INT_P (XVECEXP (op, 0, 0)))
|
|
+ return false;
|
|
+
|
|
+ unsigned HOST_WIDE_INT base = UINTVAL (XVECEXP (op, 0, 0));
|
|
+ for (int i = 1; i < XVECLEN (op, 0); ++i)
|
|
+ if (!CONST_INT_P (XVECEXP (op, 0, i))
|
|
+ || UINTVAL (XVECEXP (op, 0, i)) != base + i * step)
|
|
+ return false;
|
|
+
|
|
+ return true;
|
|
+}
|
|
+
|
|
/* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
|
|
HIGH (exclusive). */
|
|
void
|
|
diff -Nurp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
|
--- a/gcc/config/aarch64/aarch64-protos.h 2020-09-03 19:50:00.484000000 +0800
|
|
+++ b/gcc/config/aarch64/aarch64-protos.h 2020-09-03 19:50:29.137683100 +0800
|
|
@@ -501,6 +501,8 @@ bool aarch64_sve_ld1r_operand_p (rtx);
|
|
bool aarch64_sve_ldr_operand_p (rtx);
|
|
bool aarch64_sve_struct_memory_operand_p (rtx);
|
|
rtx aarch64_simd_vect_par_cnst_half (machine_mode, int, bool);
|
|
+rtx aarch64_gen_stepped_int_parallel (unsigned int, int, int);
|
|
+bool aarch64_stepped_int_parallel_p (rtx, int);
|
|
rtx aarch64_tls_get_addr (void);
|
|
tree aarch64_fold_builtin (tree, int, tree *, bool);
|
|
unsigned aarch64_dbx_register_number (unsigned);
|
|
@@ -516,6 +518,7 @@ void aarch64_expand_mov_immediate (rtx,
|
|
void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
|
|
void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
|
|
bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
|
|
+rtx aarch64_replace_reg_mode (rtx, machine_mode);
|
|
void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
|
|
void aarch64_expand_prologue (void);
|
|
void aarch64_expand_vector_init (rtx, rtx);
|
|
diff -Nurp a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
|
--- a/gcc/config/aarch64/aarch64-simd.md 2020-09-03 19:50:00.484000000 +0800
|
|
+++ b/gcc/config/aarch64/aarch64-simd.md 2020-09-03 19:50:44.100673150 +0800
|
|
@@ -282,37 +282,51 @@
|
|
rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
|
|
rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
|
|
rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
|
|
-
|
|
- emit_insn
|
|
- (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
|
|
- emit_insn
|
|
- (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
|
|
+ emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
|
|
+ emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
|
|
}
|
|
DONE;
|
|
}
|
|
)
|
|
|
|
-(define_insn "aarch64_simd_mov_from_<mode>low"
|
|
- [(set (match_operand:<VHALF> 0 "register_operand" "=r")
|
|
+(define_expand "aarch64_get_half<mode>"
|
|
+ [(set (match_operand:<VHALF> 0 "register_operand")
|
|
(vec_select:<VHALF>
|
|
- (match_operand:VQ 1 "register_operand" "w")
|
|
- (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
|
|
- "TARGET_SIMD && reload_completed"
|
|
- "umov\t%0, %1.d[0]"
|
|
- [(set_attr "type" "neon_to_gp<q>")
|
|
- (set_attr "length" "4")
|
|
- ])
|
|
+ (match_operand:VQ 1 "register_operand")
|
|
+ (match_operand 2 "ascending_int_parallel")))]
|
|
+ "TARGET_SIMD"
|
|
+)
|
|
+
|
|
+(define_insn_and_split "aarch64_simd_mov_from_<mode>low"
|
|
+ [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
|
|
+ (vec_select:<VHALF>
|
|
+ (match_operand:VQ_NO2E 1 "register_operand" "w,w")
|
|
+ (match_operand:VQ_NO2E 2 "vect_par_cnst_lo_half" "")))]
|
|
+ "TARGET_SIMD"
|
|
+ "@
|
|
+ #
|
|
+ umov\t%0, %1.d[0]"
|
|
+ "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
|
|
+ [(set (match_dup 0) (match_dup 1))]
|
|
+ {
|
|
+ operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
|
|
+ }
|
|
+ [(set_attr "type" "mov_reg,neon_to_gp<q>")
|
|
+ (set_attr "length" "4")]
|
|
+)
|
|
|
|
(define_insn "aarch64_simd_mov_from_<mode>high"
|
|
- [(set (match_operand:<VHALF> 0 "register_operand" "=r")
|
|
+ [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
|
|
(vec_select:<VHALF>
|
|
- (match_operand:VQ 1 "register_operand" "w")
|
|
- (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
|
|
- "TARGET_SIMD && reload_completed"
|
|
- "umov\t%0, %1.d[1]"
|
|
- [(set_attr "type" "neon_to_gp<q>")
|
|
- (set_attr "length" "4")
|
|
- ])
|
|
+ (match_operand:VQ_NO2E 1 "register_operand" "w,w")
|
|
+ (match_operand:VQ_NO2E 2 "vect_par_cnst_hi_half" "")))]
|
|
+ "TARGET_SIMD"
|
|
+ "@
|
|
+ dup\\t%d0, %1.d[1]
|
|
+ umov\t%0, %1.d[1]"
|
|
+ [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
|
|
+ (set_attr "length" "4")]
|
|
+)
|
|
|
|
(define_insn "orn<mode>3"
|
|
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
|
@@ -6016,6 +6030,35 @@
|
|
DONE;
|
|
})
|
|
|
|
+;; Extract a 64-bit vector from one half of a 128-bit vector.
|
|
+(define_expand "vec_extract<mode><Vhalf>"
|
|
+ [(match_operand:<VHALF> 0 "register_operand")
|
|
+ (match_operand:VQ_NO2E 1 "register_operand")
|
|
+ (match_operand 2 "immediate_operand")]
|
|
+ "TARGET_SIMD"
|
|
+{
|
|
+ int start = INTVAL (operands[2]);
|
|
+ if (start != 0 && start != <nunits> / 2)
|
|
+ FAIL;
|
|
+ rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
|
|
+ emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
|
|
+ DONE;
|
|
+})
|
|
+
|
|
+;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
|
|
+(define_expand "vec_extractv2dfv1df"
|
|
+ [(match_operand:V1DF 0 "register_operand")
|
|
+ (match_operand:V2DF 1 "register_operand")
|
|
+ (match_operand 2 "immediate_operand")]
|
|
+ "TARGET_SIMD"
|
|
+{
|
|
+ /* V1DF is rarely used by other patterns, so it should be better to hide
|
|
+ it in a subreg destination of a normal DF op. */
|
|
+ rtx scalar0 = gen_lowpart (DFmode, operands[0]);
|
|
+ emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
|
|
+ DONE;
|
|
+})
|
|
+
|
|
;; aes
|
|
|
|
(define_insn "aarch64_crypto_aes<aes_op>v16qi"
|
|
diff -Nurp a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
|
|
--- a/gcc/config/aarch64/predicates.md 2020-09-03 19:50:00.484000000 +0800
|
|
+++ b/gcc/config/aarch64/predicates.md 2020-09-03 19:50:49.315344350 +0800
|
|
@@ -438,6 +438,12 @@
|
|
return aarch64_simd_check_vect_par_cnst_half (op, mode, false);
|
|
})
|
|
|
|
+(define_predicate "ascending_int_parallel"
|
|
+ (match_code "parallel")
|
|
+{
|
|
+ return aarch64_stepped_int_parallel_p (op, 1);
|
|
+})
|
|
+
|
|
(define_special_predicate "aarch64_simd_lshift_imm"
|
|
(match_code "const,const_vector")
|
|
{
|