package init as gcc-7.3.0
This commit is contained in:
commit
523d5a7c93
126
Big-endian-union-bitfield-bugfix.patch
Normal file
126
Big-endian-union-bitfield-bugfix.patch
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
From 900ccfa89dda3ab5f7e44a0dd4d1e9d108b5dc8b Mon Sep 17 00:00:00 2001
|
||||||
|
From: rguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||||
|
Date: Tue, 26 Mar 2019 13:18:23 +0000
|
||||||
|
Subject: [PATCH] 2019-02-26 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
Backport from mainline
|
||||||
|
2019-02-12 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/89253
|
||||||
|
* tree-ssa-loop-split.c (tree_ssa_split_loops): Check we can
|
||||||
|
duplicate the loop.
|
||||||
|
|
||||||
|
* gfortran.dg/pr89253.f: New testcase.
|
||||||
|
|
||||||
|
2019-02-08 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR middle-end/89223
|
||||||
|
* tree-data-ref.c (initialize_matrix_A): Fail if constant
|
||||||
|
doesn't fit in HWI.
|
||||||
|
(analyze_subscript_affine_affine): Handle failure from
|
||||||
|
initialize_matrix_A.
|
||||||
|
|
||||||
|
* gcc.dg/torture/pr89223.c: New testcase.
|
||||||
|
|
||||||
|
2019-01-28 Richard Biener <rguenther@suse.de>
|
||||||
|
|
||||||
|
PR tree-optimization/88739
|
||||||
|
* tree-ssa-sccvn.c (vn_reference_lookup_3): Avoid generating
|
||||||
|
BIT_FIELD_REFs of non-mode-precision integral operands.
|
||||||
|
|
||||||
|
* gcc.c-torture/execute/pr88739.c: New test.
|
||||||
|
|
||||||
|
|
||||||
|
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-7-branch@269942 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||||
|
---
|
||||||
|
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
|
||||||
|
index 2480f4e..a349e3e 100644
|
||||||
|
--- a/gcc/tree-data-ref.c
|
||||||
|
+++ b/gcc/tree-data-ref.c
|
||||||
|
@@ -2118,6 +2118,8 @@ initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
|
||||||
|
switch (TREE_CODE (chrec))
|
||||||
|
{
|
||||||
|
case POLYNOMIAL_CHREC:
|
||||||
|
+ if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
|
||||||
|
+ return chrec_dont_know;
|
||||||
|
A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
|
||||||
|
return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
|
||||||
|
|
||||||
|
@@ -2499,7 +2501,7 @@ analyze_subscript_affine_affine (tree chrec_a,
|
||||||
|
tree *last_conflicts)
|
||||||
|
{
|
||||||
|
unsigned nb_vars_a, nb_vars_b, dim;
|
||||||
|
- HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
|
||||||
|
+ HOST_WIDE_INT gamma, gcd_alpha_beta;
|
||||||
|
lambda_matrix A, U, S;
|
||||||
|
struct obstack scratch_obstack;
|
||||||
|
|
||||||
|
@@ -2536,9 +2538,20 @@ analyze_subscript_affine_affine (tree chrec_a,
|
||||||
|
A = lambda_matrix_new (dim, 1, &scratch_obstack);
|
||||||
|
S = lambda_matrix_new (dim, 1, &scratch_obstack);
|
||||||
|
|
||||||
|
- init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
|
||||||
|
- init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
|
||||||
|
- gamma = init_b - init_a;
|
||||||
|
+ tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
|
||||||
|
+ tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
|
||||||
|
+ if (init_a == chrec_dont_know
|
||||||
|
+ || init_b == chrec_dont_know)
|
||||||
|
+ {
|
||||||
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
+ fprintf (dump_file, "affine-affine test failed: "
|
||||||
|
+ "representation issue.\n");
|
||||||
|
+ *overlaps_a = conflict_fn_not_known ();
|
||||||
|
+ *overlaps_b = conflict_fn_not_known ();
|
||||||
|
+ *last_conflicts = chrec_dont_know;
|
||||||
|
+ goto end_analyze_subs_aa;
|
||||||
|
+ }
|
||||||
|
+ gamma = int_cst_value (init_b) - int_cst_value (init_a);
|
||||||
|
|
||||||
|
/* Don't do all the hard work of solving the Diophantine equation
|
||||||
|
when we already know the solution: for example,
|
||||||
|
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
|
||||||
|
index fd97213..3992597 100644
|
||||||
|
--- a/gcc/tree-ssa-loop-split.c
|
||||||
|
+++ b/gcc/tree-ssa-loop-split.c
|
||||||
|
@@ -649,7 +649,8 @@ tree_ssa_split_loops (void)
|
||||||
|
false, true)
|
||||||
|
&& niter.cmp != ERROR_MARK
|
||||||
|
/* We can't yet handle loops controlled by a != predicate. */
|
||||||
|
- && niter.cmp != NE_EXPR)
|
||||||
|
+ && niter.cmp != NE_EXPR
|
||||||
|
+ && can_duplicate_loop_p (loop))
|
||||||
|
{
|
||||||
|
if (split_loop (loop, &niter))
|
||||||
|
{
|
||||||
|
diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
|
||||||
|
index c93f1f2..a2e3ce2 100644
|
||||||
|
--- a/gcc/tree-ssa-sccvn.c
|
||||||
|
+++ b/gcc/tree-ssa-sccvn.c
|
||||||
|
@@ -2029,6 +2029,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
|
||||||
|
base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
|
||||||
|
&offset2, &size2, &maxsize2,
|
||||||
|
&reverse);
|
||||||
|
+ tree def_rhs = gimple_assign_rhs1 (def_stmt);
|
||||||
|
if (!reverse
|
||||||
|
&& maxsize2 != -1
|
||||||
|
&& maxsize2 == size2
|
||||||
|
@@ -2041,11 +2042,14 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
|
||||||
|
according to endianness. */
|
||||||
|
&& (! INTEGRAL_TYPE_P (vr->type)
|
||||||
|
|| ref->size == TYPE_PRECISION (vr->type))
|
||||||
|
- && ref->size % BITS_PER_UNIT == 0)
|
||||||
|
+ && ref->size % BITS_PER_UNIT == 0
|
||||||
|
+ && (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs))
|
||||||
|
+ || (TYPE_PRECISION (TREE_TYPE (def_rhs))
|
||||||
|
+ == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (def_rhs))))))
|
||||||
|
{
|
||||||
|
code_helper rcode = BIT_FIELD_REF;
|
||||||
|
tree ops[3];
|
||||||
|
- ops[0] = SSA_VAL (gimple_assign_rhs1 (def_stmt));
|
||||||
|
+ ops[0] = SSA_VAL (def_rhs);
|
||||||
|
ops[1] = bitsize_int (ref->size);
|
||||||
|
ops[2] = bitsize_int (offset - offset2);
|
||||||
|
tree val = vn_nary_build_or_lookup (rcode, vr->type, ops);
|
||||||
|
--
|
||||||
|
2.9.3
|
||||||
655
CVE-2018-12886.patch
Normal file
655
CVE-2018-12886.patch
Normal file
@ -0,0 +1,655 @@
|
|||||||
|
diff -urpN a/gcc/cfgexpand.c b/gcc/cfgexpand.c
|
||||||
|
--- a/gcc/cfgexpand.c 2019-05-30 16:58:45.350508770 +0800
|
||||||
|
+++ b/gcc/cfgexpand.c 2019-05-30 11:53:13.315156625 +0800
|
||||||
|
@@ -6094,6 +6094,23 @@ stack_protect_prologue (void)
|
||||||
|
rtx x, y;
|
||||||
|
|
||||||
|
x = expand_normal (crtl->stack_protect_guard);
|
||||||
|
+
|
||||||
|
+ if (targetm.have_stack_protect_combined_set () && guard_decl)
|
||||||
|
+ {
|
||||||
|
+ gcc_assert (DECL_P (guard_decl));
|
||||||
|
+ y = DECL_RTL (guard_decl);
|
||||||
|
+
|
||||||
|
+ /* Allow the target to compute address of Y and copy it to X without
|
||||||
|
+ leaking Y into a register. This combined address + copy pattern
|
||||||
|
+ allows the target to prevent spilling of any intermediate results by
|
||||||
|
+ splitting it after register allocator. */
|
||||||
|
+ if (rtx_insn *insn = targetm.gen_stack_protect_combined_set (x, y))
|
||||||
|
+ {
|
||||||
|
+ emit_insn (insn);
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (guard_decl)
|
||||||
|
y = expand_normal (guard_decl);
|
||||||
|
else
|
||||||
|
diff -urpN a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
|
||||||
|
--- a/gcc/config/arm/arm.c 2019-05-30 16:58:45.354508770 +0800
|
||||||
|
+++ b/gcc/config/arm/arm.c 2019-05-30 16:59:05.058508073 +0800
|
||||||
|
@@ -7236,21 +7236,34 @@ legitimate_pic_operand_p (rtx x)
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Record that the current function needs a PIC register. Initialize
|
||||||
|
- cfun->machine->pic_reg if we have not already done so. */
|
||||||
|
+/* Record that the current function needs a PIC register. If PIC_REG is null,
|
||||||
|
+ a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
|
||||||
|
+ both case cfun->machine->pic_reg is initialized if we have not already done
|
||||||
|
+ so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
|
||||||
|
+ PIC register is reloaded in the current position of the instruction stream
|
||||||
|
+ irregardless of whether it was loaded before. Otherwise, it is only loaded
|
||||||
|
+ if not already done so (crtl->uses_pic_offset_table is null). Note that
|
||||||
|
+ nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
|
||||||
|
+ is only supported iff COMPUTE_NOW is false. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
-require_pic_register (void)
|
||||||
|
+require_pic_register (rtx pic_reg, bool compute_now)
|
||||||
|
{
|
||||||
|
+ gcc_assert (compute_now == (pic_reg != NULL_RTX));
|
||||||
|
+
|
||||||
|
/* A lot of the logic here is made obscure by the fact that this
|
||||||
|
routine gets called as part of the rtx cost estimation process.
|
||||||
|
We don't want those calls to affect any assumptions about the real
|
||||||
|
function; and further, we can't call entry_of_function() until we
|
||||||
|
start the real expansion process. */
|
||||||
|
- if (!crtl->uses_pic_offset_table)
|
||||||
|
+ if (!crtl->uses_pic_offset_table || compute_now)
|
||||||
|
{
|
||||||
|
- gcc_assert (can_create_pseudo_p ());
|
||||||
|
+ gcc_assert (can_create_pseudo_p ()
|
||||||
|
+ || (pic_reg != NULL_RTX
|
||||||
|
+ && REG_P (pic_reg)
|
||||||
|
+ && GET_MODE (pic_reg) == Pmode));
|
||||||
|
if (arm_pic_register != INVALID_REGNUM
|
||||||
|
+ && !compute_now
|
||||||
|
&& !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
|
||||||
|
{
|
||||||
|
if (!cfun->machine->pic_reg)
|
||||||
|
@@ -7266,8 +7279,19 @@ require_pic_register (void)
|
||||||
|
{
|
||||||
|
rtx_insn *seq, *insn;
|
||||||
|
|
||||||
|
- if (!cfun->machine->pic_reg)
|
||||||
|
- cfun->machine->pic_reg = gen_reg_rtx (Pmode);
|
||||||
|
+ if (pic_reg == NULL_RTX && cfun->machine->pic_reg == NULL_RTX)
|
||||||
|
+ {
|
||||||
|
+ pic_reg = gen_reg_rtx (Pmode);
|
||||||
|
+ cfun->machine->pic_reg = pic_reg;
|
||||||
|
+ }
|
||||||
|
+ else if (pic_reg == NULL_RTX)
|
||||||
|
+ {
|
||||||
|
+ pic_reg = cfun->machine->pic_reg;
|
||||||
|
+ }
|
||||||
|
+ else if (cfun->machine->pic_reg == NULL_RTX)
|
||||||
|
+ {
|
||||||
|
+ cfun->machine->pic_reg = pic_reg;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
/* Play games to avoid marking the function as needing pic
|
||||||
|
if we are being called as part of the cost-estimation
|
||||||
|
@@ -7278,11 +7306,12 @@ require_pic_register (void)
|
||||||
|
start_sequence ();
|
||||||
|
|
||||||
|
if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
|
||||||
|
- && arm_pic_register > LAST_LO_REGNUM)
|
||||||
|
+ && arm_pic_register > LAST_LO_REGNUM
|
||||||
|
+ && !compute_now)
|
||||||
|
emit_move_insn (cfun->machine->pic_reg,
|
||||||
|
gen_rtx_REG (Pmode, arm_pic_register));
|
||||||
|
else
|
||||||
|
- arm_load_pic_register (0UL);
|
||||||
|
+ arm_load_pic_register (0UL, pic_reg);
|
||||||
|
|
||||||
|
seq = get_insns ();
|
||||||
|
end_sequence ();
|
||||||
|
@@ -7295,16 +7324,33 @@ require_pic_register (void)
|
||||||
|
we can't yet emit instructions directly in the final
|
||||||
|
insn stream. Queue the insns on the entry edge, they will
|
||||||
|
be committed after everything else is expanded. */
|
||||||
|
- insert_insn_on_edge (seq,
|
||||||
|
- single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
|
||||||
|
+ if (currently_expanding_to_rtl)
|
||||||
|
+ insert_insn_on_edge (seq,
|
||||||
|
+ single_succ_edge
|
||||||
|
+ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
|
||||||
|
+ else
|
||||||
|
+ emit_insn (seq);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
|
||||||
|
+ created to hold the result of the load. If not NULL, PIC_REG indicates
|
||||||
|
+ which register to use as PIC register, otherwise it is decided by register
|
||||||
|
+ allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
|
||||||
|
+ location in the instruction stream, irregardless of whether it was loaded
|
||||||
|
+ previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
|
||||||
|
+ true and null PIC_REG is only supported iff COMPUTE_NOW is false.
|
||||||
|
+
|
||||||
|
+ Returns the register REG into which the PIC load is performed. */
|
||||||
|
+
|
||||||
|
rtx
|
||||||
|
-legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
|
||||||
|
+legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
|
||||||
|
+ bool compute_now)
|
||||||
|
{
|
||||||
|
+ gcc_assert (compute_now == (pic_reg != NULL_RTX));
|
||||||
|
+
|
||||||
|
if (GET_CODE (orig) == SYMBOL_REF
|
||||||
|
|| GET_CODE (orig) == LABEL_REF)
|
||||||
|
{
|
||||||
|
@@ -7337,9 +7383,12 @@ legitimize_pic_address (rtx orig, machin
|
||||||
|
rtx mem;
|
||||||
|
|
||||||
|
/* If this function doesn't have a pic register, create one now. */
|
||||||
|
- require_pic_register ();
|
||||||
|
+ require_pic_register (pic_reg, compute_now);
|
||||||
|
+
|
||||||
|
+ if (pic_reg == NULL_RTX)
|
||||||
|
+ pic_reg = cfun->machine->pic_reg;
|
||||||
|
|
||||||
|
- pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
|
||||||
|
+ pat = gen_calculate_pic_address (reg, pic_reg, orig);
|
||||||
|
|
||||||
|
/* Make the MEM as close to a constant as possible. */
|
||||||
|
mem = SET_SRC (pat);
|
||||||
|
@@ -7388,9 +7437,11 @@ legitimize_pic_address (rtx orig, machin
|
||||||
|
|
||||||
|
gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
|
||||||
|
|
||||||
|
- base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
|
||||||
|
+ base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
|
||||||
|
+ pic_reg, compute_now);
|
||||||
|
offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
|
||||||
|
- base == reg ? 0 : reg);
|
||||||
|
+ base == reg ? 0 : reg, pic_reg,
|
||||||
|
+ compute_now);
|
||||||
|
|
||||||
|
if (CONST_INT_P (offset))
|
||||||
|
{
|
||||||
|
@@ -7490,16 +7541,17 @@ static GTY(()) int pic_labelno;
|
||||||
|
low register. */
|
||||||
|
|
||||||
|
void
|
||||||
|
-arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
|
||||||
|
+arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
|
||||||
|
{
|
||||||
|
- rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
|
||||||
|
+ rtx l1, labelno, pic_tmp, pic_rtx;
|
||||||
|
|
||||||
|
if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
gcc_assert (flag_pic);
|
||||||
|
|
||||||
|
- pic_reg = cfun->machine->pic_reg;
|
||||||
|
+ if (pic_reg == NULL_RTX)
|
||||||
|
+ pic_reg = cfun->machine->pic_reg;
|
||||||
|
if (TARGET_VXWORKS_RTP)
|
||||||
|
{
|
||||||
|
pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
|
||||||
|
@@ -8558,7 +8610,8 @@ arm_legitimize_address (rtx x, rtx orig_
|
||||||
|
{
|
||||||
|
/* We need to find and carefully transform any SYMBOL and LABEL
|
||||||
|
references; so go back to the original address expression. */
|
||||||
|
- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
|
||||||
|
+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
|
||||||
|
+ false /*compute_now*/);
|
||||||
|
|
||||||
|
if (new_x != orig_x)
|
||||||
|
x = new_x;
|
||||||
|
@@ -8626,7 +8679,8 @@ thumb_legitimize_address (rtx x, rtx ori
|
||||||
|
{
|
||||||
|
/* We need to find and carefully transform any SYMBOL and LABEL
|
||||||
|
references; so go back to the original address expression. */
|
||||||
|
- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
|
||||||
|
+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
|
||||||
|
+ false /*compute_now*/);
|
||||||
|
|
||||||
|
if (new_x != orig_x)
|
||||||
|
x = new_x;
|
||||||
|
@@ -17800,7 +17854,7 @@ arm_emit_call_insn (rtx pat, rtx addr, b
|
||||||
|
? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
|
||||||
|
: !SYMBOL_REF_LOCAL_P (addr)))
|
||||||
|
{
|
||||||
|
- require_pic_register ();
|
||||||
|
+ require_pic_register (NULL_RTX, false /*compute_now*/);
|
||||||
|
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -21706,7 +21760,7 @@ arm_expand_prologue (void)
|
||||||
|
mask &= THUMB2_WORK_REGS;
|
||||||
|
if (!IS_NESTED (func_type))
|
||||||
|
mask |= (1 << IP_REGNUM);
|
||||||
|
- arm_load_pic_register (mask);
|
||||||
|
+ arm_load_pic_register (mask, NULL_RTX);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we are profiling, make sure no instructions are scheduled before
|
||||||
|
@@ -24909,7 +24963,7 @@ thumb1_expand_prologue (void)
|
||||||
|
/* Load the pic register before setting the frame pointer,
|
||||||
|
so we can use r7 as a temporary work register. */
|
||||||
|
if (flag_pic && arm_pic_register != INVALID_REGNUM)
|
||||||
|
- arm_load_pic_register (live_regs_mask);
|
||||||
|
+ arm_load_pic_register (live_regs_mask, NULL_RTX);
|
||||||
|
|
||||||
|
if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
|
||||||
|
emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
|
||||||
|
diff -urpN a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
|
||||||
|
--- a/gcc/config/arm/arm.md 2019-05-30 16:58:45.358508769 +0800
|
||||||
|
+++ b/gcc/config/arm/arm.md 2019-05-30 11:52:58.491157149 +0800
|
||||||
|
@@ -6051,7 +6051,8 @@
|
||||||
|
operands[1] = legitimize_pic_address (operands[1], SImode,
|
||||||
|
(!can_create_pseudo_p ()
|
||||||
|
? operands[0]
|
||||||
|
- : 0));
|
||||||
|
+ : NULL_RTX), NULL_RTX,
|
||||||
|
+ false /*compute_now*/);
|
||||||
|
}
|
||||||
|
"
|
||||||
|
)
|
||||||
|
@@ -6340,7 +6341,7 @@
|
||||||
|
/* r3 is clobbered by set/longjmp, so we can use it as a scratch
|
||||||
|
register. */
|
||||||
|
if (arm_pic_register != INVALID_REGNUM)
|
||||||
|
- arm_load_pic_register (1UL << 3);
|
||||||
|
+ arm_load_pic_register (1UL << 3, NULL_RTX);
|
||||||
|
DONE;
|
||||||
|
}")
|
||||||
|
|
||||||
|
@@ -8666,6 +8667,164 @@
|
||||||
|
(set_attr "conds" "clob")]
|
||||||
|
)
|
||||||
|
|
||||||
|
+;; Named patterns for stack smashing protection.
|
||||||
|
+(define_expand "stack_protect_combined_set"
|
||||||
|
+ [(parallel
|
||||||
|
+ [(set (match_operand:SI 0 "memory_operand" "")
|
||||||
|
+ (unspec:SI [(match_operand:SI 1 "guard_operand" "")]
|
||||||
|
+ UNSPEC_SP_SET))
|
||||||
|
+ (clobber (match_scratch:SI 2 ""))
|
||||||
|
+ (clobber (match_scratch:SI 3 ""))])]
|
||||||
|
+ ""
|
||||||
|
+ ""
|
||||||
|
+)
|
||||||
|
+
|
||||||
|
+;; Use a separate insn from the above expand to be able to have the mem outside
|
||||||
|
+;; the operand #1 when register allocation comes. This is needed to avoid LRA
|
||||||
|
+;; try to reload the guard since we need to control how PIC access is done in
|
||||||
|
+;; the -fpic/-fPIC case (see COMPUTE_NOW parameter when calling
|
||||||
|
+;; legitimize_pic_address ()).
|
||||||
|
+(define_insn_and_split "*stack_protect_combined_set_insn"
|
||||||
|
+ [(set (match_operand:SI 0 "memory_operand" "=m,m")
|
||||||
|
+ (unspec:SI [(mem:SI (match_operand:SI 1 "guard_addr_operand" "X,X"))]
|
||||||
|
+ UNSPEC_SP_SET))
|
||||||
|
+ (clobber (match_scratch:SI 2 "=&l,&r"))
|
||||||
|
+ (clobber (match_scratch:SI 3 "=&l,&r"))]
|
||||||
|
+ ""
|
||||||
|
+ "#"
|
||||||
|
+ "reload_completed"
|
||||||
|
+ [(parallel [(set (match_dup 0) (unspec:SI [(mem:SI (match_dup 2))]
|
||||||
|
+ UNSPEC_SP_SET))
|
||||||
|
+ (clobber (match_dup 2))])]
|
||||||
|
+ "
|
||||||
|
+{
|
||||||
|
+ if (flag_pic)
|
||||||
|
+ {
|
||||||
|
+ /* Forces recomputing of GOT base now. */
|
||||||
|
+ legitimize_pic_address (operands[1], SImode, operands[2], operands[3],
|
||||||
|
+ true /*compute_now*/);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if (address_operand (operands[1], SImode))
|
||||||
|
+ operands[2] = operands[1];
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
|
||||||
|
+ emit_move_insn (operands[2], mem);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}"
|
||||||
|
+ [(set_attr "arch" "t1,32")]
|
||||||
|
+)
|
||||||
|
+
|
||||||
|
+(define_insn "*stack_protect_set_insn"
|
||||||
|
+ [(set (match_operand:SI 0 "memory_operand" "=m,m")
|
||||||
|
+ (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "+&l,&r"))]
|
||||||
|
+ UNSPEC_SP_SET))
|
||||||
|
+ (clobber (match_dup 1))]
|
||||||
|
+ ""
|
||||||
|
+ "@
|
||||||
|
+ ldr\\t%1, [%1]\;str\\t%1, %0\;movs\t%1,#0
|
||||||
|
+ ldr\\t%1, [%1]\;str\\t%1, %0\;mov\t%1,#0"
|
||||||
|
+ [(set_attr "length" "8,12")
|
||||||
|
+ (set_attr "conds" "clob,nocond")
|
||||||
|
+ (set_attr "type" "multiple")
|
||||||
|
+ (set_attr "arch" "t1,32")]
|
||||||
|
+)
|
||||||
|
+
|
||||||
|
+(define_expand "stack_protect_combined_test"
|
||||||
|
+ [(parallel
|
||||||
|
+ [(set (pc)
|
||||||
|
+ (if_then_else
|
||||||
|
+ (eq (match_operand:SI 0 "memory_operand" "")
|
||||||
|
+ (unspec:SI [(match_operand:SI 1 "guard_operand" "")]
|
||||||
|
+ UNSPEC_SP_TEST))
|
||||||
|
+ (label_ref (match_operand 2))
|
||||||
|
+ (pc)))
|
||||||
|
+ (clobber (match_scratch:SI 3 ""))
|
||||||
|
+ (clobber (match_scratch:SI 4 ""))
|
||||||
|
+ (clobber (reg:CC CC_REGNUM))])]
|
||||||
|
+ ""
|
||||||
|
+ ""
|
||||||
|
+)
|
||||||
|
+
|
||||||
|
+;; Use a separate insn from the above expand to be able to have the mem outside
|
||||||
|
+;; the operand #1 when register allocation comes. This is needed to avoid LRA
|
||||||
|
+;; try to reload the guard since we need to control how PIC access is done in
|
||||||
|
+;; the -fpic/-fPIC case (see COMPUTE_NOW parameter when calling
|
||||||
|
+;; legitimize_pic_address ()).
|
||||||
|
+(define_insn_and_split "*stack_protect_combined_test_insn"
|
||||||
|
+ [(set (pc)
|
||||||
|
+ (if_then_else
|
||||||
|
+ (eq (match_operand:SI 0 "memory_operand" "m,m")
|
||||||
|
+ (unspec:SI [(mem:SI (match_operand:SI 1 "guard_addr_operand" "X,X"))]
|
||||||
|
+ UNSPEC_SP_TEST))
|
||||||
|
+ (label_ref (match_operand 2))
|
||||||
|
+ (pc)))
|
||||||
|
+ (clobber (match_scratch:SI 3 "=&l,&r"))
|
||||||
|
+ (clobber (match_scratch:SI 4 "=&l,&r"))
|
||||||
|
+ (clobber (reg:CC CC_REGNUM))]
|
||||||
|
+ ""
|
||||||
|
+ "#"
|
||||||
|
+ "reload_completed"
|
||||||
|
+ [(const_int 0)]
|
||||||
|
+{
|
||||||
|
+ rtx eq;
|
||||||
|
+
|
||||||
|
+ if (flag_pic)
|
||||||
|
+ {
|
||||||
|
+ /* Forces recomputing of GOT base now. */
|
||||||
|
+ legitimize_pic_address (operands[1], SImode, operands[3], operands[4],
|
||||||
|
+ true /*compute_now*/);
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if (address_operand (operands[1], SImode))
|
||||||
|
+ operands[3] = operands[1];
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
|
||||||
|
+ emit_move_insn (operands[3], mem);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (TARGET_32BIT)
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_arm_stack_protect_test_insn (operands[4], operands[0],
|
||||||
|
+ operands[3]));
|
||||||
|
+ rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
|
||||||
|
+ eq = gen_rtx_EQ (CC_Zmode, cc_reg, const0_rtx);
|
||||||
|
+ emit_jump_insn (gen_arm_cond_branch (operands[2], eq, cc_reg));
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_thumb1_stack_protect_test_insn (operands[4], operands[0],
|
||||||
|
+ operands[3]));
|
||||||
|
+ eq = gen_rtx_EQ (VOIDmode, operands[4], const0_rtx);
|
||||||
|
+ emit_jump_insn (gen_cbranchsi4 (eq, operands[4], const0_rtx,
|
||||||
|
+ operands[2]));
|
||||||
|
+ }
|
||||||
|
+ DONE;
|
||||||
|
+}
|
||||||
|
+ [(set_attr "arch" "t1,32")]
|
||||||
|
+)
|
||||||
|
+
|
||||||
|
+(define_insn "arm_stack_protect_test_insn"
|
||||||
|
+ [(set (reg:CC_Z CC_REGNUM)
|
||||||
|
+ (compare:CC_Z (unspec:SI [(match_operand:SI 1 "memory_operand" "m,m")
|
||||||
|
+ (mem:SI (match_operand:SI 2 "register_operand" "+l,r"))]
|
||||||
|
+ UNSPEC_SP_TEST)
|
||||||
|
+ (const_int 0)))
|
||||||
|
+ (clobber (match_operand:SI 0 "register_operand" "=&l,&r"))
|
||||||
|
+ (clobber (match_dup 2))]
|
||||||
|
+ "TARGET_32BIT"
|
||||||
|
+ "ldr\t%0, [%2]\;ldr\t%2, %1\;eors\t%0, %2, %0"
|
||||||
|
+ [(set_attr "length" "8,12")
|
||||||
|
+ (set_attr "conds" "set")
|
||||||
|
+ (set_attr "type" "multiple")
|
||||||
|
+ (set_attr "arch" "t,32")]
|
||||||
|
+)
|
||||||
|
+
|
||||||
|
(define_expand "casesi"
|
||||||
|
[(match_operand:SI 0 "s_register_operand" "") ; index to jump on
|
||||||
|
(match_operand:SI 1 "const_int_operand" "") ; lower bound
|
||||||
|
diff -urpN a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
|
||||||
|
--- a/gcc/config/arm/arm-protos.h 2019-05-30 16:58:45.358508769 +0800
|
||||||
|
+++ b/gcc/config/arm/arm-protos.h 2019-05-30 11:52:58.491157149 +0800
|
||||||
|
@@ -28,7 +28,7 @@ extern enum unwind_info_type arm_except_
|
||||||
|
extern int use_return_insn (int, rtx);
|
||||||
|
extern bool use_simple_return_p (void);
|
||||||
|
extern enum reg_class arm_regno_class (int);
|
||||||
|
-extern void arm_load_pic_register (unsigned long);
|
||||||
|
+extern void arm_load_pic_register (unsigned long, rtx);
|
||||||
|
extern int arm_volatile_func (void);
|
||||||
|
extern void arm_expand_prologue (void);
|
||||||
|
extern void arm_expand_epilogue (bool);
|
||||||
|
@@ -69,7 +69,7 @@ extern int const_ok_for_dimode_op (HOST_
|
||||||
|
extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
|
||||||
|
HOST_WIDE_INT, rtx, rtx, int);
|
||||||
|
extern int legitimate_pic_operand_p (rtx);
|
||||||
|
-extern rtx legitimize_pic_address (rtx, machine_mode, rtx);
|
||||||
|
+extern rtx legitimize_pic_address (rtx, machine_mode, rtx, rtx, bool);
|
||||||
|
extern rtx legitimize_tls_address (rtx, rtx);
|
||||||
|
extern bool arm_legitimate_address_p (machine_mode, rtx, bool);
|
||||||
|
extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, int);
|
||||||
|
diff -urpN a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
|
||||||
|
--- a/gcc/config/arm/predicates.md 2019-05-30 16:58:45.358508769 +0800
|
||||||
|
+++ b/gcc/config/arm/predicates.md 2019-05-30 11:52:58.491157149 +0800
|
||||||
|
@@ -31,6 +31,23 @@
|
||||||
|
|| REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
|
||||||
|
})
|
||||||
|
|
||||||
|
+; Predicate for stack protector guard's address in
|
||||||
|
+; stack_protect_combined_set_insn and stack_protect_combined_test_insn patterns
|
||||||
|
+(define_predicate "guard_addr_operand"
|
||||||
|
+ (match_test "true")
|
||||||
|
+{
|
||||||
|
+ return (CONSTANT_ADDRESS_P (op)
|
||||||
|
+ || !targetm.cannot_force_const_mem (mode, op));
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+; Predicate for stack protector guard in stack_protect_combined_set and
|
||||||
|
+; stack_protect_combined_test patterns
|
||||||
|
+(define_predicate "guard_operand"
|
||||||
|
+ (match_code "mem")
|
||||||
|
+{
|
||||||
|
+ return guard_addr_operand (XEXP (op, 0), mode);
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
(define_predicate "imm_for_neon_inv_logic_operand"
|
||||||
|
(match_code "const_vector")
|
||||||
|
{
|
||||||
|
diff -urpN a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
|
||||||
|
--- a/gcc/config/arm/thumb1.md 2019-05-30 16:58:45.358508769 +0800
|
||||||
|
+++ b/gcc/config/arm/thumb1.md 2019-05-30 11:52:58.491157149 +0800
|
||||||
|
@@ -1964,4 +1964,17 @@
|
||||||
|
}"
|
||||||
|
[(set_attr "type" "mov_reg")]
|
||||||
|
)
|
||||||
|
+
|
||||||
|
+(define_insn "thumb1_stack_protect_test_insn"
|
||||||
|
+ [(set (match_operand:SI 0 "register_operand" "=&l")
|
||||||
|
+ (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
|
||||||
|
+ (mem:SI (match_operand:SI 2 "register_operand" "+l"))]
|
||||||
|
+ UNSPEC_SP_TEST))
|
||||||
|
+ (clobber (match_dup 2))]
|
||||||
|
+ "TARGET_THUMB1"
|
||||||
|
+ "ldr\t%0, [%2]\;ldr\t%2, %1\;eors\t%0, %2, %0"
|
||||||
|
+ [(set_attr "length" "8")
|
||||||
|
+ (set_attr "conds" "set")
|
||||||
|
+ (set_attr "type" "multiple")]
|
||||||
|
+)
|
||||||
|
|
||||||
|
diff -urpN a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
|
||||||
|
--- a/gcc/config/arm/unspecs.md 2019-05-30 16:58:45.358508769 +0800
|
||||||
|
+++ b/gcc/config/arm/unspecs.md 2019-05-30 11:52:58.491157149 +0800
|
||||||
|
@@ -86,6 +86,9 @@
|
||||||
|
UNSPEC_PROBE_STACK ; Probe stack memory reference
|
||||||
|
UNSPEC_NONSECURE_MEM ; Represent non-secure memory in ARMv8-M with
|
||||||
|
; security extension
|
||||||
|
+ UNSPEC_SP_SET ; Represent the setting of stack protector's canary
|
||||||
|
+ UNSPEC_SP_TEST ; Represent the testing of stack protector's canary
|
||||||
|
+ ; against the guard.
|
||||||
|
])
|
||||||
|
|
||||||
|
(define_c_enum "unspec" [
|
||||||
|
diff -urpN a/gcc/doc/md.texi b/gcc/doc/md.texi
|
||||||
|
--- a/gcc/doc/md.texi 2019-05-30 16:58:45.362508769 +0800
|
||||||
|
+++ b/gcc/doc/md.texi 2019-05-30 11:52:58.491157149 +0800
|
||||||
|
@@ -6955,22 +6955,61 @@ builtins.
|
||||||
|
The get/set patterns have a single output/input operand respectively,
|
||||||
|
with @var{mode} intended to be @code{Pmode}.
|
||||||
|
|
||||||
|
+@cindex @code{stack_protect_combined_set} instruction pattern
|
||||||
|
+@item @samp{stack_protect_combined_set}
|
||||||
|
+This pattern, if defined, moves a @code{ptr_mode} value from an address
|
||||||
|
+whose declaration RTX is given in operand 1 to the memory in operand 0
|
||||||
|
+without leaving the value in a register afterward. If several
|
||||||
|
+instructions are needed by the target to perform the operation (eg. to
|
||||||
|
+load the address from a GOT entry then load the @code{ptr_mode} value
|
||||||
|
+and finally store it), it is the backend's responsibility to ensure no
|
||||||
|
+intermediate result gets spilled. This is to avoid leaking the value
|
||||||
|
+some place that an attacker might use to rewrite the stack guard slot
|
||||||
|
+after having clobbered it.
|
||||||
|
+
|
||||||
|
+If this pattern is not defined, then the address declaration is
|
||||||
|
+expanded first in the standard way and a @code{stack_protect_set}
|
||||||
|
+pattern is then generated to move the value from that address to the
|
||||||
|
+address in operand 0.
|
||||||
|
+
|
||||||
|
@cindex @code{stack_protect_set} instruction pattern
|
||||||
|
@item @samp{stack_protect_set}
|
||||||
|
-This pattern, if defined, moves a @code{ptr_mode} value from the memory
|
||||||
|
-in operand 1 to the memory in operand 0 without leaving the value in
|
||||||
|
-a register afterward. This is to avoid leaking the value some place
|
||||||
|
-that an attacker might use to rewrite the stack guard slot after
|
||||||
|
-having clobbered it.
|
||||||
|
+This pattern, if defined, moves a @code{ptr_mode} value from the valid
|
||||||
|
+memory location in operand 1 to the memory in operand 0 without leaving
|
||||||
|
+the value in a register afterward. This is to avoid leaking the value
|
||||||
|
+some place that an attacker might use to rewrite the stack guard slot
|
||||||
|
+after having clobbered it.
|
||||||
|
+
|
||||||
|
+Note: on targets where the addressing modes do not allow to load
|
||||||
|
+directly from stack guard address, the address is expanded in a standard
|
||||||
|
+way first which could cause some spills.
|
||||||
|
|
||||||
|
If this pattern is not defined, then a plain move pattern is generated.
|
||||||
|
|
||||||
|
+@cindex @code{stack_protect_combined_test} instruction pattern
|
||||||
|
+@item @samp{stack_protect_combined_test}
|
||||||
|
+This pattern, if defined, compares a @code{ptr_mode} value from an
|
||||||
|
+address whose declaration RTX is given in operand 1 with the memory in
|
||||||
|
+operand 0 without leaving the value in a register afterward and
|
||||||
|
+branches to operand 2 if the values were equal. If several
|
||||||
|
+instructions are needed by the target to perform the operation (eg. to
|
||||||
|
+load the address from a GOT entry then load the @code{ptr_mode} value
|
||||||
|
+and finally store it), it is the backend's responsibility to ensure no
|
||||||
|
+intermediate result gets spilled. This is to avoid leaking the value
|
||||||
|
+some place that an attacker might use to rewrite the stack guard slot
|
||||||
|
+after having clobbered it.
|
||||||
|
+
|
||||||
|
+If this pattern is not defined, then the address declaration is
|
||||||
|
+expanded first in the standard way and a @code{stack_protect_test}
|
||||||
|
+pattern is then generated to compare the value from that address to the
|
||||||
|
+value at the memory in operand 0.
|
||||||
|
+
|
||||||
|
@cindex @code{stack_protect_test} instruction pattern
|
||||||
|
@item @samp{stack_protect_test}
|
||||||
|
This pattern, if defined, compares a @code{ptr_mode} value from the
|
||||||
|
-memory in operand 1 with the memory in operand 0 without leaving the
|
||||||
|
-value in a register afterward and branches to operand 2 if the values
|
||||||
|
-were equal.
|
||||||
|
+valid memory location in operand 1 with the memory in operand 0 without
|
||||||
|
+leaving the value in a register afterward and branches to operand 2 if
|
||||||
|
+the values were equal.
|
||||||
|
|
||||||
|
If this pattern is not defined, then a plain compare pattern and
|
||||||
|
conditional branch pattern is used.
|
||||||
|
diff -urpN a/gcc/function.c b/gcc/function.c
|
||||||
|
--- a/gcc/function.c 2019-05-30 16:58:45.362508769 +0800
|
||||||
|
+++ b/gcc/function.c 2019-05-30 11:53:14.071156599 +0800
|
||||||
|
@@ -5065,18 +5065,34 @@ stack_protect_epilogue (void)
|
||||||
|
tree guard_decl = targetm.stack_protect_guard ();
|
||||||
|
rtx_code_label *label = gen_label_rtx ();
|
||||||
|
rtx x, y;
|
||||||
|
- rtx_insn *seq;
|
||||||
|
+ rtx_insn *seq = NULL;
|
||||||
|
|
||||||
|
x = expand_normal (crtl->stack_protect_guard);
|
||||||
|
- if (guard_decl)
|
||||||
|
- y = expand_normal (guard_decl);
|
||||||
|
+
|
||||||
|
+ if (targetm.have_stack_protect_combined_test () && guard_decl)
|
||||||
|
+ {
|
||||||
|
+ gcc_assert (DECL_P (guard_decl));
|
||||||
|
+ y = DECL_RTL (guard_decl);
|
||||||
|
+ /* Allow the target to compute address of Y and compare it with X without
|
||||||
|
+ leaking Y into a register. This combined address + compare pattern
|
||||||
|
+ allows the target to prevent spilling of any intermediate results by
|
||||||
|
+ splitting it after register allocator. */
|
||||||
|
+ seq = targetm.gen_stack_protect_combined_test (x, y, label);
|
||||||
|
+ }
|
||||||
|
else
|
||||||
|
- y = const0_rtx;
|
||||||
|
+ {
|
||||||
|
+ if (guard_decl)
|
||||||
|
+ y = expand_normal (guard_decl);
|
||||||
|
+ else
|
||||||
|
+ y = const0_rtx;
|
||||||
|
+
|
||||||
|
+ /* Allow the target to compare Y with X without leaking either into
|
||||||
|
+ a register. */
|
||||||
|
+ if (targetm.have_stack_protect_test ())
|
||||||
|
+ seq = targetm.gen_stack_protect_test (x, y, label);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- /* Allow the target to compare Y with X without leaking either into
|
||||||
|
- a register. */
|
||||||
|
- if (targetm.have_stack_protect_test ()
|
||||||
|
- && ((seq = targetm.gen_stack_protect_test (x, y, label)) != NULL_RTX))
|
||||||
|
+ if (seq)
|
||||||
|
emit_insn (seq);
|
||||||
|
else
|
||||||
|
emit_cmp_and_jump_insns (x, y, EQ, NULL_RTX, ptr_mode, 1, label);
|
||||||
|
diff -urpN a/gcc/genpreds.c b/gcc/genpreds.c
|
||||||
|
--- a/gcc/genpreds.c 2019-05-30 16:58:45.362508769 +0800
|
||||||
|
+++ b/gcc/genpreds.c 2019-05-30 11:53:14.163156595 +0800
|
||||||
|
@@ -1581,7 +1581,8 @@ write_insn_preds_c (void)
|
||||||
|
#include \"reload.h\"\n\
|
||||||
|
#include \"regs.h\"\n\
|
||||||
|
#include \"emit-rtl.h\"\n\
|
||||||
|
-#include \"tm-constrs.h\"\n");
|
||||||
|
+#include \"tm-constrs.h\"\n\
|
||||||
|
+#include \"target.h\"\n");
|
||||||
|
|
||||||
|
FOR_ALL_PREDICATES (p)
|
||||||
|
write_one_predicate_function (p);
|
||||||
|
diff -urpN a/gcc/target-insns.def b/gcc/target-insns.def
|
||||||
|
--- a/gcc/target-insns.def 2019-05-30 16:58:45.362508769 +0800
|
||||||
|
+++ b/gcc/target-insns.def 2019-05-30 11:52:58.495157149 +0800
|
||||||
|
@@ -96,7 +96,9 @@ DEF_TARGET_INSN (sibcall_value, (rtx x0,
|
||||||
|
DEF_TARGET_INSN (simple_return, (void))
|
||||||
|
DEF_TARGET_INSN (split_stack_prologue, (void))
|
||||||
|
DEF_TARGET_INSN (split_stack_space_check, (rtx x0, rtx x1))
|
||||||
|
+DEF_TARGET_INSN (stack_protect_combined_set, (rtx x0, rtx x1))
|
||||||
|
DEF_TARGET_INSN (stack_protect_set, (rtx x0, rtx x1))
|
||||||
|
+DEF_TARGET_INSN (stack_protect_combined_test, (rtx x0, rtx x1, rtx x2))
|
||||||
|
DEF_TARGET_INSN (stack_protect_test, (rtx x0, rtx x1, rtx x2))
|
||||||
|
DEF_TARGET_INSN (store_multiple, (rtx x0, rtx x1, rtx x2))
|
||||||
|
DEF_TARGET_INSN (tablejump, (rtx x0, rtx x1))
|
||||||
51
CVE-2019-15847.patch
Normal file
51
CVE-2019-15847.patch
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
diff -urpN a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
|
||||||
|
--- a/gcc/config/rs6000/altivec.md 2018-01-15 01:47:30.483964000 +0800
|
||||||
|
+++ b/gcc/config/rs6000/altivec.md 2019-09-09 00:01:25.770835633 +0800
|
||||||
|
@@ -74,9 +74,6 @@
|
||||||
|
UNSPEC_VUNPACK_LO_SIGN_DIRECT
|
||||||
|
UNSPEC_VUPKHPX
|
||||||
|
UNSPEC_VUPKLPX
|
||||||
|
- UNSPEC_DARN
|
||||||
|
- UNSPEC_DARN_32
|
||||||
|
- UNSPEC_DARN_RAW
|
||||||
|
UNSPEC_DST
|
||||||
|
UNSPEC_DSTT
|
||||||
|
UNSPEC_DSTST
|
||||||
|
@@ -3770,21 +3767,21 @@
|
||||||
|
|
||||||
|
(define_insn "darn_32"
|
||||||
|
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||||
|
- (unspec:SI [(const_int 0)] UNSPEC_DARN_32))]
|
||||||
|
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_DARN_32))]
|
||||||
|
"TARGET_P9_MISC"
|
||||||
|
"darn %0,0"
|
||||||
|
[(set_attr "type" "integer")])
|
||||||
|
|
||||||
|
(define_insn "darn_raw"
|
||||||
|
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||||
|
- (unspec:DI [(const_int 0)] UNSPEC_DARN_RAW))]
|
||||||
|
+ (unspec_volatile:DI [(const_int 0)] UNSPECV_DARN_RAW))]
|
||||||
|
"TARGET_P9_MISC && TARGET_64BIT"
|
||||||
|
"darn %0,2"
|
||||||
|
[(set_attr "type" "integer")])
|
||||||
|
|
||||||
|
(define_insn "darn"
|
||||||
|
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||||
|
- (unspec:DI [(const_int 0)] UNSPEC_DARN))]
|
||||||
|
+ (unspec_volatile:DI [(const_int 0)] UNSPECV_DARN))]
|
||||||
|
"TARGET_P9_MISC && TARGET_64BIT"
|
||||||
|
"darn %0,1"
|
||||||
|
[(set_attr "type" "integer")])
|
||||||
|
diff -urpN a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
|
||||||
|
--- a/gcc/config/rs6000/rs6000.md 2018-01-21 21:32:58.843504000 +0800
|
||||||
|
+++ b/gcc/config/rs6000/rs6000.md 2019-09-08 23:53:13.122859153 +0800
|
||||||
|
@@ -163,6 +163,9 @@
|
||||||
|
UNSPECV_EH_RR ; eh_reg_restore
|
||||||
|
UNSPECV_ISYNC ; isync instruction
|
||||||
|
UNSPECV_MFTB ; move from time base
|
||||||
|
+ UNSPECV_DARN ; darn 1 (deliver a random number)
|
||||||
|
+ UNSPECV_DARN_32 ; darn 2
|
||||||
|
+ UNSPECV_DARN_RAW ; darn 0
|
||||||
|
UNSPECV_NLGR ; non-local goto receiver
|
||||||
|
UNSPECV_MFFS ; Move from FPSCR
|
||||||
|
UNSPECV_MTFSF ; Move to FPSCR Fields
|
||||||
24
aarch64-fix-tls-negative-offset.patch
Normal file
24
aarch64-fix-tls-negative-offset.patch
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||||
|
--- a/gcc/config/aarch64/aarch64.c 2018-10-09 11:49:19.000000000 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.c 2018-10-09 13:42:15.000000000 +0800
|
||||||
|
@@ -1619,7 +1619,7 @@ aarch64_load_symref_appropriately (rtx d
|
||||||
|
case SYMBOL_SMALL_TLSDESC:
|
||||||
|
{
|
||||||
|
machine_mode mode = GET_MODE (dest);
|
||||||
|
- rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
|
||||||
|
+ rtx x0 = gen_rtx_REG (ptr_mode, R0_REGNUM);
|
||||||
|
rtx tp;
|
||||||
|
|
||||||
|
gcc_assert (mode == Pmode || mode == ptr_mode);
|
||||||
|
@@ -1635,6 +1635,11 @@ aarch64_load_symref_appropriately (rtx d
|
||||||
|
if (mode != Pmode)
|
||||||
|
tp = gen_lowpart (mode, tp);
|
||||||
|
|
||||||
|
+ if (mode != ptr_mode)
|
||||||
|
+ {
|
||||||
|
+ x0 = force_reg (mode, gen_rtx_SIGN_EXTEND (mode, x0));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
|
||||||
|
set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
|
||||||
|
return;
|
||||||
31
aarch64-ilp32-call-addr-dimode.patch
Normal file
31
aarch64-ilp32-call-addr-dimode.patch
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||||
|
--- a/gcc/config/aarch64/aarch64.md 2018-10-09 11:30:50.000000000 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.md 2018-10-09 11:52:54.000000000 +0800
|
||||||
|
@@ -857,6 +857,13 @@
|
||||||
|
: !REG_P (callee))
|
||||||
|
XEXP (operands[0], 0) = force_reg (Pmode, callee);
|
||||||
|
|
||||||
|
+ if (TARGET_ILP32
|
||||||
|
+ && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
|
||||||
|
+ && GET_MODE (XEXP (operands[0], 0)) == SImode)
|
||||||
|
+ XEXP (operands[0], 0) = convert_memory_address (DImode,
|
||||||
|
+ XEXP (operands[0], 0));
|
||||||
|
+
|
||||||
|
+
|
||||||
|
if (operands[2] == NULL_RTX)
|
||||||
|
operands[2] = const0_rtx;
|
||||||
|
|
||||||
|
@@ -889,6 +896,13 @@
|
||||||
|
: !REG_P (callee))
|
||||||
|
XEXP (operands[1], 0) = force_reg (Pmode, callee);
|
||||||
|
|
||||||
|
+ if (TARGET_ILP32
|
||||||
|
+ && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
|
||||||
|
+ && GET_MODE (XEXP (operands[1], 0)) == SImode)
|
||||||
|
+ XEXP (operands[1], 0) = convert_memory_address (DImode,
|
||||||
|
+ XEXP (operands[1], 0));
|
||||||
|
+
|
||||||
|
+
|
||||||
|
if (operands[3] == NULL_RTX)
|
||||||
|
operands[3] = const0_rtx;
|
||||||
|
|
||||||
780
add-tsv110-pipeline-scheduling.patch
Normal file
780
add-tsv110-pipeline-scheduling.patch
Normal file
@ -0,0 +1,780 @@
|
|||||||
|
diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||||
|
--- a/gcc/config/aarch64/aarch64.c 2019-04-15 14:50:25.866378665 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.c 2019-04-15 14:49:21.986376983 +0800
|
||||||
|
@@ -554,6 +554,31 @@ static const struct tune_params generic_
|
||||||
|
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||||
|
};
|
||||||
|
|
||||||
|
+static const struct tune_params tsv110_tunings =
|
||||||
|
+{
|
||||||
|
+ &cortexa57_extra_costs,
|
||||||
|
+ &generic_addrcost_table,
|
||||||
|
+ &generic_regmove_cost,
|
||||||
|
+ &generic_vector_cost,
|
||||||
|
+ &generic_branch_cost,
|
||||||
|
+ &generic_approx_modes,
|
||||||
|
+ 4, /* memmov_cost */
|
||||||
|
+ 4, /* issue_rate */
|
||||||
|
+ AARCH64_FUSE_NOTHING, /* fusible_ops */
|
||||||
|
+ 16, /* function_align. */
|
||||||
|
+ 16, /* jump_align. */
|
||||||
|
+ 8, /* loop_align. */
|
||||||
|
+ 2, /* int_reassoc_width. */
|
||||||
|
+ 4, /* fp_reassoc_width. */
|
||||||
|
+ 1, /* vec_reassoc_width. */
|
||||||
|
+ 2, /* min_div_recip_mul_sf. */
|
||||||
|
+ 2, /* min_div_recip_mul_df. */
|
||||||
|
+ 0, /* max_case_values. */
|
||||||
|
+ 0, /* cache_line_size. */
|
||||||
|
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||||
|
+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
static const struct tune_params cortexa35_tunings =
|
||||||
|
{
|
||||||
|
&cortexa53_extra_costs,
|
||||||
|
diff -urpN a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
|
||||||
|
--- a/gcc/config/aarch64/aarch64-cores.def 2017-02-15 08:09:28.845771000 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64-cores.def 2019-04-15 14:49:21.986376983 +0800
|
||||||
|
@@ -78,6 +78,8 @@ AARCH64_CORE("xgene1", xgene1, x
|
||||||
|
AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||||
|
AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||||
|
|
||||||
|
+AARCH64_CORE("tsv110", tsv110, tsv110, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, tsv110, 0x48, 0xd01, -1)
|
||||||
|
+
|
||||||
|
/* V8 big.LITTLE implementations. */
|
||||||
|
|
||||||
|
AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
|
||||||
|
diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||||
|
--- a/gcc/config/aarch64/aarch64.md 2019-04-15 14:50:25.870378665 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.md 2019-04-15 14:49:21.986376983 +0800
|
||||||
|
@@ -226,6 +226,7 @@
|
||||||
|
(include "thunderx.md")
|
||||||
|
(include "../arm/xgene1.md")
|
||||||
|
(include "thunderx2t99.md")
|
||||||
|
+(include "tsv110.md")
|
||||||
|
|
||||||
|
;; -------------------------------------------------------------------
|
||||||
|
;; Jumps and other miscellaneous insns
|
||||||
|
diff -urpN a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
|
||||||
|
--- a/gcc/config/aarch64/aarch64-tune.md 2017-02-15 08:09:28.845771000 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64-tune.md 2019-04-15 14:49:21.986376983 +0800
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
;; -*- buffer-read-only: t -*-
|
||||||
|
;; Generated automatically by gentune.sh from aarch64-cores.def
|
||||||
|
(define_attr "tune"
|
||||||
|
- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
|
||||||
|
+ "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,tsv110,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
|
||||||
|
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
|
||||||
|
diff -urpN a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
|
||||||
|
--- a/gcc/config/aarch64/tsv110.md 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/config/aarch64/tsv110.md 2019-04-15 14:55:30.420081420 +0800
|
||||||
|
@@ -0,0 +1,708 @@
|
||||||
|
+;; tsv110 pipeline description
|
||||||
|
+;; Copyright (C) 2018 Free Software Foundation, Inc.
|
||||||
|
+;;
|
||||||
|
+;; This file is part of GCC.
|
||||||
|
+;;
|
||||||
|
+;; GCC is free software; you can redistribute it and/or modify it
|
||||||
|
+;; under the terms of the GNU General Public License as published by
|
||||||
|
+;; the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
+;; any later version.
|
||||||
|
+;;
|
||||||
|
+;; GCC is distributed in the hope that it will be useful, but
|
||||||
|
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
+;; General Public License for more details.
|
||||||
|
+;;
|
||||||
|
+;; You should have received a copy of the GNU General Public License
|
||||||
|
+;; along with GCC; see the file COPYING3. If not see
|
||||||
|
+;; <http://www.gnu.org/licenses/>.
|
||||||
|
+
|
||||||
|
+(define_automaton "tsv110")
|
||||||
|
+
|
||||||
|
+(define_attr "tsv110_neon_type"
|
||||||
|
+ "neon_arith_acc, neon_arith_acc_q,
|
||||||
|
+ neon_arith_basic, neon_arith_complex,
|
||||||
|
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
|
||||||
|
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
|
||||||
|
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
|
||||||
|
+ neon_shift_imm_complex,
|
||||||
|
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
|
||||||
|
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
|
||||||
|
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
|
||||||
|
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
|
||||||
|
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
|
||||||
|
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
|
||||||
|
+ neon_bitops, neon_bitops_q, neon_from_gp,
|
||||||
|
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
|
||||||
|
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
|
||||||
|
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
|
||||||
|
+ unknown"
|
||||||
|
+ (cond [
|
||||||
|
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
|
||||||
|
+ neon_reduc_add_acc_q")
|
||||||
|
+ (const_string "neon_arith_acc")
|
||||||
|
+ (eq_attr "type" "neon_arith_acc_q")
|
||||||
|
+ (const_string "neon_arith_acc_q")
|
||||||
|
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
|
||||||
|
+ neon_add_widen, neon_neg, neon_neg_q,\
|
||||||
|
+ neon_reduc_add, neon_reduc_add_q,\
|
||||||
|
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
|
||||||
|
+ neon_sub_long, neon_sub_widen, neon_logic,\
|
||||||
|
+ neon_logic_q, neon_tst, neon_tst_q,\
|
||||||
|
+ neon_compare, neon_compare_q,\
|
||||||
|
+ neon_compare_zero, neon_compare_zero_q,\
|
||||||
|
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
|
||||||
|
+ neon_reduc_minmax_q")
|
||||||
|
+ (const_string "neon_arith_basic")
|
||||||
|
+ (eq_attr "type" "neon_add_halve_narrow_q,\
|
||||||
|
+ neon_add_halve, neon_add_halve_q,\
|
||||||
|
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
|
||||||
|
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
|
||||||
|
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
|
||||||
|
+ neon_sub_halve_narrow_q")
|
||||||
|
+ (const_string "neon_arith_complex")
|
||||||
|
+
|
||||||
|
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
|
||||||
|
+ neon_mul_h_scalar, neon_mul_s_scalar,\
|
||||||
|
+ neon_sat_mul_b, neon_sat_mul_h,\
|
||||||
|
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
|
||||||
|
+ neon_sat_mul_s_scalar,\
|
||||||
|
+ neon_mul_b_long, neon_mul_h_long,\
|
||||||
|
+ neon_mul_s_long,\
|
||||||
|
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
|
||||||
|
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
|
||||||
|
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
|
||||||
|
+ neon_sat_mul_s_scalar_long,\
|
||||||
|
+ neon_mla_b, neon_mla_h, neon_mla_s,\
|
||||||
|
+ neon_mla_h_scalar, neon_mla_s_scalar,\
|
||||||
|
+ neon_mla_b_long, neon_mla_h_long,\
|
||||||
|
+ neon_mla_s_long,\
|
||||||
|
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
|
||||||
|
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
|
||||||
|
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
|
||||||
|
+ neon_sat_mla_s_scalar_long")
|
||||||
|
+ (const_string "neon_multiply")
|
||||||
|
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
|
||||||
|
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
|
||||||
|
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
|
||||||
|
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
|
||||||
|
+ neon_sat_mul_s_scalar_q,\
|
||||||
|
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
|
||||||
|
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
|
||||||
|
+ (const_string "neon_multiply_q")
|
||||||
|
+
|
||||||
|
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
|
||||||
|
+ (const_string "neon_shift_acc")
|
||||||
|
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
|
||||||
|
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
|
||||||
|
+ (const_string "neon_shift_imm_basic")
|
||||||
|
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
|
||||||
|
+ neon_sat_shift_imm_narrow_q")
|
||||||
|
+ (const_string "neon_shift_imm_complex")
|
||||||
|
+ (eq_attr "type" "neon_shift_reg")
|
||||||
|
+ (const_string "neon_shift_reg_basic")
|
||||||
|
+ (eq_attr "type" "neon_shift_reg_q")
|
||||||
|
+ (const_string "neon_shift_reg_basic_q")
|
||||||
|
+ (eq_attr "type" "neon_sat_shift_reg")
|
||||||
|
+ (const_string "neon_shift_reg_complex")
|
||||||
|
+ (eq_attr "type" "neon_sat_shift_reg_q")
|
||||||
|
+ (const_string "neon_shift_reg_complex_q")
|
||||||
|
+
|
||||||
|
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
|
||||||
|
+ neon_fp_abs_s, neon_fp_abs_s_q,\
|
||||||
|
+ neon_fp_neg_d, neon_fp_neg_d_q,\
|
||||||
|
+ neon_fp_abs_d, neon_fp_abs_d_q,\
|
||||||
|
+ neon_fp_minmax_s,neon_fp_minmax_d,\
|
||||||
|
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
|
||||||
|
+ (const_string "neon_fp_negabs")
|
||||||
|
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
|
||||||
|
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
|
||||||
|
+ neon_fp_round_s,\
|
||||||
|
+ neon_fp_addsub_d, neon_fp_abd_d,\
|
||||||
|
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
|
||||||
|
+ neon_fp_round_d")
|
||||||
|
+ (const_string "neon_fp_arith")
|
||||||
|
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
|
||||||
|
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
|
||||||
|
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
|
||||||
|
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
|
||||||
|
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
|
||||||
|
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
|
||||||
|
+ (const_string "neon_fp_arith_q")
|
||||||
|
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
|
||||||
|
+ neon_fp_reduc_minmax_d_q,\
|
||||||
|
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
|
||||||
|
+ (const_string "neon_fp_reductions_q")
|
||||||
|
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
|
||||||
|
+ neon_fp_to_int_d, neon_int_to_fp_d")
|
||||||
|
+ (const_string "neon_fp_cvt_int")
|
||||||
|
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
|
||||||
|
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
|
||||||
|
+ (const_string "neon_fp_cvt_int_q")
|
||||||
|
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
|
||||||
|
+ (const_string "neon_fp_cvt16")
|
||||||
|
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
|
||||||
|
+ neon_fp_mul_d")
|
||||||
|
+ (const_string "neon_fp_mul")
|
||||||
|
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
|
||||||
|
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
|
||||||
|
+ (const_string "neon_fp_mul_q")
|
||||||
|
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
|
||||||
|
+ neon_fp_mla_d")
|
||||||
|
+ (const_string "neon_fp_mla")
|
||||||
|
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
|
||||||
|
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
|
||||||
|
+ (const_string "neon_fp_mla_q")
|
||||||
|
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
|
||||||
|
+ neon_fp_recpx_s,\
|
||||||
|
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
|
||||||
|
+ neon_fp_recpx_d")
|
||||||
|
+ (const_string "neon_fp_recpe_rsqrte")
|
||||||
|
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
|
||||||
|
+ neon_fp_recpx_s_q,\
|
||||||
|
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
|
||||||
|
+ neon_fp_recpx_d_q")
|
||||||
|
+ (const_string "neon_fp_recpe_rsqrte_q")
|
||||||
|
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
|
||||||
|
+ neon_fp_recps_d, neon_fp_rsqrts_d")
|
||||||
|
+ (const_string "neon_fp_recps_rsqrts")
|
||||||
|
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
|
||||||
|
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
|
||||||
|
+ (const_string "neon_fp_recps_rsqrts_q")
|
||||||
|
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
|
||||||
|
+ neon_rev, neon_permute, neon_rbit,\
|
||||||
|
+ neon_tbl1, neon_tbl2, neon_zip,\
|
||||||
|
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
|
||||||
|
+ neon_move, neon_move_q, neon_move_narrow_q")
|
||||||
|
+ (const_string "neon_bitops")
|
||||||
|
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
|
||||||
|
+ neon_rev_q, neon_permute_q, neon_rbit_q")
|
||||||
|
+ (const_string "neon_bitops_q")
|
||||||
|
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
|
||||||
|
+ (const_string "neon_from_gp")
|
||||||
|
+ (eq_attr "type" "neon_from_gp_q")
|
||||||
|
+ (const_string "neon_from_gp_q")
|
||||||
|
+
|
||||||
|
+ (eq_attr "type" "f_loads, f_loadd,\
|
||||||
|
+ neon_load1_1reg, neon_load1_1reg_q,\
|
||||||
|
+ neon_load1_2reg, neon_load1_2reg_q")
|
||||||
|
+ (const_string "neon_load_a")
|
||||||
|
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
|
||||||
|
+ neon_load1_4reg, neon_load1_4reg_q")
|
||||||
|
+ (const_string "neon_load_b")
|
||||||
|
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
|
||||||
|
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
|
||||||
|
+ neon_load2_2reg, neon_load2_2reg_q,\
|
||||||
|
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
|
||||||
|
+ (const_string "neon_load_c")
|
||||||
|
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
|
||||||
|
+ neon_load3_3reg, neon_load3_3reg_q,\
|
||||||
|
+ neon_load3_one_lane, neon_load3_one_lane_q,\
|
||||||
|
+ neon_load4_4reg, neon_load4_4reg_q")
|
||||||
|
+ (const_string "neon_load_d")
|
||||||
|
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
|
||||||
|
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
|
||||||
|
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
|
||||||
|
+ (const_string "neon_load_e")
|
||||||
|
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
|
||||||
|
+ (const_string "neon_load_f")
|
||||||
|
+
|
||||||
|
+ (eq_attr "type" "f_stores, f_stored,\
|
||||||
|
+ neon_store1_1reg")
|
||||||
|
+ (const_string "neon_store_a")
|
||||||
|
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
|
||||||
|
+ (const_string "neon_store_b")
|
||||||
|
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
|
||||||
|
+ neon_store3_3reg, neon_store3_3reg_q,\
|
||||||
|
+ neon_store2_4reg, neon_store2_4reg_q,\
|
||||||
|
+ neon_store4_4reg, neon_store4_4reg_q,\
|
||||||
|
+ neon_store2_2reg, neon_store2_2reg_q,\
|
||||||
|
+ neon_store3_one_lane, neon_store3_one_lane_q,\
|
||||||
|
+ neon_store4_one_lane, neon_store4_one_lane_q,\
|
||||||
|
+ neon_store1_4reg, neon_store1_4reg_q,\
|
||||||
|
+ neon_store1_one_lane, neon_store1_one_lane_q,\
|
||||||
|
+ neon_store2_one_lane, neon_store2_one_lane_q")
|
||||||
|
+ (const_string "neon_store_complex")]
|
||||||
|
+ (const_string "unknown")))
|
||||||
|
+
|
||||||
|
+;; The tsv110 core is modelled as issues pipeline that has
|
||||||
|
+;; the following functional units.
|
||||||
|
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
|
||||||
|
+
|
||||||
|
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
|
||||||
|
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
|
||||||
|
+
|
||||||
|
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
|
||||||
|
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
|
||||||
|
+
|
||||||
|
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
|
||||||
|
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
|
||||||
|
+
|
||||||
|
+;; 2. One pipeline for complex integer operations: MDU
|
||||||
|
+
|
||||||
|
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
|
||||||
|
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
|
||||||
|
+
|
||||||
|
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
|
||||||
|
+(define_automaton "tsv110_fsu")
|
||||||
|
+
|
||||||
|
+(define_cpu_unit "tsv110_fsu1_issue"
|
||||||
|
+ "tsv110_fsu")
|
||||||
|
+(define_cpu_unit "tsv110_fsu2_issue"
|
||||||
|
+ "tsv110_fsu")
|
||||||
|
+
|
||||||
|
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
|
||||||
|
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
|
||||||
|
+
|
||||||
|
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
|
||||||
|
+
|
||||||
|
+;; 5. Two pipelines for load and store operations: LS1, LS2.
|
||||||
|
+
|
||||||
|
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
|
||||||
|
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
|
||||||
|
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
|
||||||
|
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
|
||||||
|
+
|
||||||
|
+;; Block all issue queues.
|
||||||
|
+
|
||||||
|
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
|
||||||
|
+ + tsv110_mdu_issue + tsv110_alu1_issue
|
||||||
|
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
|
||||||
|
+
|
||||||
|
+;; Simple Execution Unit:
|
||||||
|
+;;
|
||||||
|
+;; Simple ALU without shift
|
||||||
|
+(define_insn_reservation "tsv110_alu" 1
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "alu_imm,logic_imm,\
|
||||||
|
+ alu_sreg,logic_reg,\
|
||||||
|
+ adc_imm,adc_reg,\
|
||||||
|
+ adr,bfm,clz,rbit,rev,\
|
||||||
|
+ shift_imm,shift_reg,\
|
||||||
|
+ mov_imm,mov_reg,\
|
||||||
|
+ mvn_imm,mvn_reg,\
|
||||||
|
+ mrs,multiple,no_insn"))
|
||||||
|
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_alus" 1
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "alus_imm,logics_imm,\
|
||||||
|
+ alus_sreg,logics_reg,\
|
||||||
|
+ adcs_imm,adcs_reg"))
|
||||||
|
+ "tsv110_alu2|tsv110_alu3")
|
||||||
|
+
|
||||||
|
+;; ALU ops with shift
|
||||||
|
+(define_insn_reservation "tsv110_alu_shift" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "extend,\
|
||||||
|
+ alu_shift_imm,alu_shift_reg,\
|
||||||
|
+ crc,logic_shift_imm,logic_shift_reg,\
|
||||||
|
+ mov_shift,mvn_shift,\
|
||||||
|
+ mov_shift_reg,mvn_shift_reg"))
|
||||||
|
+ "tsv110_mdu")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_alus_shift" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
|
||||||
|
+ logics_shift_imm,logics_shift_reg"))
|
||||||
|
+ "tsv110_alu2|tsv110_alu3")
|
||||||
|
+
|
||||||
|
+;; Multiplies instructions
|
||||||
|
+(define_insn_reservation "tsv110_mult" 3
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (ior (eq_attr "mul32" "yes")
|
||||||
|
+ (eq_attr "mul64" "yes")))
|
||||||
|
+ "tsv110_mdu")
|
||||||
|
+
|
||||||
|
+;; Integer divide
|
||||||
|
+(define_insn_reservation "tsv110_div" 10
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "udiv,sdiv"))
|
||||||
|
+ "tsv110_mdu")
|
||||||
|
+
|
||||||
|
+;; Block all issue pipes for a cycle
|
||||||
|
+(define_insn_reservation "tsv110_block" 1
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "block"))
|
||||||
|
+ "tsv110_block")
|
||||||
|
+
|
||||||
|
+;; Branch execution Unit
|
||||||
|
+;;
|
||||||
|
+;; Branches take two issue slot.
|
||||||
|
+;; No latency as there is no result
|
||||||
|
+(define_insn_reservation "tsv110_branch" 0
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "branch"))
|
||||||
|
+ "tsv110_alu2|tsv110_alu3")
|
||||||
|
+
|
||||||
|
+;; Load-store execution Unit
|
||||||
|
+;;
|
||||||
|
+;; Loads of up to two words.
|
||||||
|
+(define_insn_reservation "tsv110_load1" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "load1,load2"))
|
||||||
|
+ "tsv110_ls1|tsv110_ls2")
|
||||||
|
+
|
||||||
|
+;; Stores of up to two words.
|
||||||
|
+(define_insn_reservation "tsv110_store1" 0
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "store1,store2"))
|
||||||
|
+ "tsv110_ls1|tsv110_ls2")
|
||||||
|
+
|
||||||
|
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_neon_abd_aba" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_arith_acc_q"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_neon_arith_basic" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_neon_arith_complex" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+;; Integer Multiply Instructions.
|
||||||
|
+;; D-form
|
||||||
|
+(define_insn_reservation "tsv110_neon_multiply" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
|
||||||
|
+ "tsv110_fsu1")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_mul_d_long"))
|
||||||
|
+ "tsv110_fsu1")
|
||||||
|
+
|
||||||
|
+;; Q-form
|
||||||
|
+(define_insn_reservation "tsv110_neon_multiply_q" 8
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
|
||||||
|
+ "tsv110_fsu1")
|
||||||
|
+
|
||||||
|
+;; Integer Shift Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_shift_acc" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
|
||||||
|
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
|
||||||
|
+ neon_shift_reg_complex"))
|
||||||
|
+ "tsv110_fsu1")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_shift_acc_q" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
|
||||||
|
+ neon_shift_reg_complex_q"))
|
||||||
|
+ "tsv110_fsu1")
|
||||||
|
+
|
||||||
|
+;; Floating Point Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_negabs" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
|
||||||
|
+ "(tsv110_fsu1|tsv110_fsu2)")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_arith" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
|
||||||
|
+ "(tsv110_fsu1|tsv110_fsu2)")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_arith_q" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_minmax_q" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_reductions_q" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_cvt_int" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_mul" 5
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_mul_q" 5
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_mla" 7
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
|
||||||
|
+ neon_fp_recps_rsqrts"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_recpe_rsqrte" 3
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_mla_q" 7
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
|
||||||
|
+ neon_fp_recps_rsqrts_q"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+;; Miscellaneous Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_bitops" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_dup" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_from_gp,f_mcr"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_mov" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "f_mcrr"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_bitops_q" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_from_gp_q" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
|
||||||
|
+ "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_to_gp" 3
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
|
||||||
|
+ "tsv110_fsu1")
|
||||||
|
+
|
||||||
|
+;; Load Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_ld1_lane" 8
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
|
||||||
|
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
|
||||||
|
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_ld1_reg1" 6
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
|
||||||
|
+ "tsv110_ls1|tsv110_ls2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_ld1_reg2" 6
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
|
||||||
|
+ "tsv110_ls1|tsv110_ls2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_ld1_reg3" 7
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
|
||||||
|
+ "tsv110_ls1|tsv110_ls2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_ld1_reg4" 7
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
|
||||||
|
+ "tsv110_ls1|tsv110_ls2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_ld2" 8
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
|
||||||
|
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
|
||||||
|
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
|
||||||
|
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_ld3" 9
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
|
||||||
|
+ neon_load3_one_lane,neon_load3_one_lane_q,\
|
||||||
|
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
|
||||||
|
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_ld4_lane" 9
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
|
||||||
|
+ neon_load4_one_lane,neon_load4_one_lane_q"))
|
||||||
|
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_ld4_reg" 11
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
|
||||||
|
+ neon_load4_one_lane,neon_load4_one_lane_q"))
|
||||||
|
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
|
||||||
|
+
|
||||||
|
+;; Store Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_store_a" 0
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_store_b" 0
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+;; These block issue for a number of cycles proportional to the number
|
||||||
|
+;; of 64-bit chunks they will store, we don't attempt to model that
|
||||||
|
+;; precisely, treat them as blocking execution for two cycles when
|
||||||
|
+;; issued.
|
||||||
|
+(define_insn_reservation
|
||||||
|
+ "tsv110_neon_store_complex" 0
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
|
||||||
|
+ "tsv110_block*2")
|
||||||
|
+
|
||||||
|
+;; Floating-Point Operations.
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_fp_const" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "fconsts,fconstd,fmov"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_fp_add_sub" 5
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_fp_mac" 7
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_fp_cvt" 3
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "f_cvt"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "f_cvtf2i"))
|
||||||
|
+ "tsv110_fsu1")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_fp_cvti2f" 5
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "f_cvti2f"))
|
||||||
|
+ "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_fp_cmp" 4
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "fcmps,fcmpd"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_fp_arith" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "ffariths,ffarithd"))
|
||||||
|
+ "tsv110_fsu1|tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_fp_divs" 12
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
|
||||||
|
+ neon_fp_div_s_q,neon_fp_div_d_q"))
|
||||||
|
+ "tsv110_fsu1")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_fp_sqrts" 24
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
|
||||||
|
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
|
||||||
|
+ "tsv110_fsu2")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_crypto_aes" 3
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
|
||||||
|
+ "tsv110_fsu1")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
|
||||||
|
+ "(tsv110_fsu1|tsv110_fsu2)")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "crypto_sha256_fast"))
|
||||||
|
+ "tsv110_fsu1")
|
||||||
|
+
|
||||||
|
+(define_insn_reservation "tsv110_crypto_complex" 5
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
|
||||||
|
+ "tsv110_fsu1")
|
||||||
|
+
|
||||||
|
+;; We lie with calls. They take up all issue slots, but are otherwise
|
||||||
|
+;; not harmful.
|
||||||
|
+(define_insn_reservation "tsv110_call" 1
|
||||||
|
+ (and (eq_attr "tune" "tsv110")
|
||||||
|
+ (eq_attr "type" "call"))
|
||||||
|
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
|
||||||
|
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
|
||||||
|
+)
|
||||||
|
+
|
||||||
|
+;; Simple execution unit bypasses
|
||||||
|
+(define_bypass 1 "tsv110_alu"
|
||||||
|
+ "tsv110_alu,tsv110_alu_shift")
|
||||||
|
+(define_bypass 2 "tsv110_alu_shift"
|
||||||
|
+ "tsv110_alu,tsv110_alu_shift")
|
||||||
|
+
|
||||||
|
+;; An MLA or a MUL can feed a dependent MLA.
|
||||||
|
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
|
||||||
|
+ "tsv110_neon_*mla*")
|
||||||
|
+
|
||||||
|
+;; We don't need to care about control hazards, either the branch is
|
||||||
|
+;; predicted in which case we pay no penalty, or the branch is
|
||||||
|
+;; mispredicted in which case instruction scheduling will be unlikely to
|
||||||
|
+;; help.
|
||||||
|
+(define_bypass 1 "tsv110_*"
|
||||||
|
+ "tsv110_call,tsv110_branch")
|
||||||
60
arm-adjust-be-ldrd-strd.patch
Normal file
60
arm-adjust-be-ldrd-strd.patch
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
diff -urp a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
|
||||||
|
--- a/gcc/config/arm/arm.c 2019-01-18 11:25:20.840179114 +0800
|
||||||
|
+++ b/gcc/config/arm/arm.c 2019-01-18 11:25:47.548179817 +0800
|
||||||
|
@@ -14306,18 +14306,36 @@ gen_movmem_ldrd_strd (rtx *operands)
|
||||||
|
emit_move_insn (reg0, src);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
- emit_insn (gen_unaligned_loadsi (low_reg, src));
|
||||||
|
- src = next_consecutive_mem (src);
|
||||||
|
- emit_insn (gen_unaligned_loadsi (hi_reg, src));
|
||||||
|
+ if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN)
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_unaligned_loadsi (hi_reg, src));
|
||||||
|
+ src = next_consecutive_mem (src);
|
||||||
|
+ emit_insn (gen_unaligned_loadsi (low_reg, src));
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_unaligned_loadsi (low_reg, src));
|
||||||
|
+ src = next_consecutive_mem (src);
|
||||||
|
+ emit_insn (gen_unaligned_loadsi (hi_reg, src));
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dst_aligned)
|
||||||
|
emit_move_insn (dst, reg0);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
- emit_insn (gen_unaligned_storesi (dst, low_reg));
|
||||||
|
- dst = next_consecutive_mem (dst);
|
||||||
|
- emit_insn (gen_unaligned_storesi (dst, hi_reg));
|
||||||
|
+ if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN)
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_unaligned_storesi (dst, hi_reg));
|
||||||
|
+ dst = next_consecutive_mem (dst);
|
||||||
|
+ emit_insn (gen_unaligned_storesi (dst, low_reg));
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_unaligned_storesi (dst, low_reg));
|
||||||
|
+ dst = next_consecutive_mem (dst);
|
||||||
|
+ emit_insn (gen_unaligned_storesi (dst, hi_reg));
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
src = next_consecutive_mem (src);
|
||||||
|
diff -urp a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
|
||||||
|
--- a/gcc/config/arm/arm.opt 2019-01-18 11:25:20.840179114 +0800
|
||||||
|
+++ b/gcc/config/arm/arm.opt 2019-01-18 11:28:51.744184666 +0800
|
||||||
|
@@ -274,6 +274,10 @@ masm-syntax-unified
|
||||||
|
Target Report Var(inline_asm_unified) Init(0) Save
|
||||||
|
Assume unified syntax for inline assembly code.
|
||||||
|
|
||||||
|
+mlsrd-be-adjust
|
||||||
|
+Target Report Var(flag_lsrd_be_adjust) Init(1)
|
||||||
|
+Adjust ldrd/strd splitting order when it's big-endian.
|
||||||
|
+
|
||||||
|
mpure-code
|
||||||
|
Target Report Var(target_pure_code) Init(0)
|
||||||
|
Do not allow constant data to be placed in code sections.
|
||||||
19
arm-bigendian-disable-interleaved-LS-vectorize.patch
Normal file
19
arm-bigendian-disable-interleaved-LS-vectorize.patch
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
diff -urpN gcc-7.3.0-bak/gcc/config/arm/arm.c gcc-7.3.0/gcc/config/arm/arm.c
|
||||||
|
--- gcc-7.3.0-bak/gcc/config/arm/arm.c 2018-11-13 14:23:21.362347728 +0800
|
||||||
|
+++ gcc-7.3.0/gcc/config/arm/arm.c 2018-11-13 14:31:15.722360215 +0800
|
||||||
|
@@ -26853,7 +26853,14 @@ static bool
|
||||||
|
arm_array_mode_supported_p (machine_mode mode,
|
||||||
|
unsigned HOST_WIDE_INT nelems)
|
||||||
|
{
|
||||||
|
- if (TARGET_NEON
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+ /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
|
||||||
|
+ for now, as the lane-swapping logic needs to be extended in the expanders.
|
||||||
|
+ See PR target/82518. */
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+ if (TARGET_NEON && !BYTES_BIG_ENDIAN
|
||||||
|
&& (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
|
||||||
|
&& (nelems >= 2 && nelems <= 4))
|
||||||
|
return true;
|
||||||
25
arm-fix-push-minipool.patch
Normal file
25
arm-fix-push-minipool.patch
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
diff -Nurp a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
|
||||||
|
--- a/gcc/config/arm/arm.md 2019-08-10 00:21:12.658523444 +0800
|
||||||
|
+++ b/gcc/config/arm/arm.md 2019-08-10 00:21:53.478521496 +0800
|
||||||
|
@@ -5337,7 +5337,9 @@
|
||||||
|
#
|
||||||
|
ldrh%?\\t%0, %1"
|
||||||
|
[(set_attr "type" "alu_shift_reg,load_byte")
|
||||||
|
- (set_attr "predicable" "yes")]
|
||||||
|
+ (set_attr "predicable" "yes")
|
||||||
|
+ (set_attr "pool_range" "*,256")
|
||||||
|
+ (set_attr "neg_pool_range" "*,244")]
|
||||||
|
)
|
||||||
|
|
||||||
|
(define_insn "*arm_zero_extendhisi2_v6"
|
||||||
|
@@ -5348,7 +5350,9 @@
|
||||||
|
uxth%?\\t%0, %1
|
||||||
|
ldrh%?\\t%0, %1"
|
||||||
|
[(set_attr "predicable" "yes")
|
||||||
|
- (set_attr "type" "extend,load_byte")]
|
||||||
|
+ (set_attr "type" "extend,load_byte")
|
||||||
|
+ (set_attr "pool_range" "*,256")
|
||||||
|
+ (set_attr "neg_pool_range" "*,244")]
|
||||||
|
)
|
||||||
|
|
||||||
|
(define_insn "*arm_zero_extendhisi2addsi"
|
||||||
BIN
cloog-0.18.4.tar.gz
Normal file
BIN
cloog-0.18.4.tar.gz
Normal file
Binary file not shown.
21
constructor-priority-bugfix.patch
Normal file
21
constructor-priority-bugfix.patch
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||||
|
--- a/gcc/config/aarch64/aarch64.c 2018-11-16 18:02:11.000000000 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.c 2018-11-16 18:07:39.000000000 +0800
|
||||||
|
@@ -6102,7 +6102,7 @@ aarch64_elf_asm_constructor (rtx symbol,
|
||||||
|
-Wformat-truncation false positive, use a larger size. */
|
||||||
|
char buf[23];
|
||||||
|
snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
|
||||||
|
- s = get_section (buf, SECTION_WRITE, NULL);
|
||||||
|
+ s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
|
||||||
|
switch_to_section (s);
|
||||||
|
assemble_align (POINTER_SIZE);
|
||||||
|
assemble_aligned_integer (POINTER_BYTES, symbol);
|
||||||
|
@@ -6122,7 +6122,7 @@ aarch64_elf_asm_destructor (rtx symbol,
|
||||||
|
-Wformat-truncation false positive, use a larger size. */
|
||||||
|
char buf[23];
|
||||||
|
snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
|
||||||
|
- s = get_section (buf, SECTION_WRITE, NULL);
|
||||||
|
+ s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
|
||||||
|
switch_to_section (s);
|
||||||
|
assemble_align (POINTER_SIZE);
|
||||||
|
assemble_aligned_integer (POINTER_BYTES, symbol);
|
||||||
155
fix-operand-size-mismatch-for-i386-sse.patch
Normal file
155
fix-operand-size-mismatch-for-i386-sse.patch
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
diff -N -urp a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||||||
|
--- a/gcc/config/i386/sse.md 2019-10-30 10:02:45.894920908 +0800
|
||||||
|
+++ b/gcc/config/i386/sse.md 2019-10-30 10:17:39.682887612 +0800
|
||||||
|
@@ -16012,9 +16012,11 @@
|
||||||
|
switch (INTVAL (operands[4]))
|
||||||
|
{
|
||||||
|
case 3:
|
||||||
|
- return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
|
||||||
|
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||||
|
+ gas changed what it requires incompatibly. */
|
||||||
|
+ return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||||
|
case 2:
|
||||||
|
- return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
|
||||||
|
+ return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
@@ -16057,9 +16059,11 @@
|
||||||
|
switch (INTVAL (operands[4]))
|
||||||
|
{
|
||||||
|
case 3:
|
||||||
|
- return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
|
||||||
|
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||||
|
+ gas changed what it requires incompatibly. */
|
||||||
|
+ return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||||
|
case 2:
|
||||||
|
- return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
|
||||||
|
+ return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
@@ -16103,10 +16107,12 @@
|
||||||
|
{
|
||||||
|
case 3:
|
||||||
|
case 7:
|
||||||
|
- return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
|
||||||
|
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||||
|
+ gas changed what it requires incompatibly. */
|
||||||
|
+ return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||||
|
case 2:
|
||||||
|
case 6:
|
||||||
|
- return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
|
||||||
|
+ return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
@@ -16150,10 +16156,12 @@
|
||||||
|
{
|
||||||
|
case 3:
|
||||||
|
case 7:
|
||||||
|
- return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
|
||||||
|
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||||
|
+ gas changed what it requires incompatibly. */
|
||||||
|
+ return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||||
|
case 2:
|
||||||
|
case 6:
|
||||||
|
- return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
|
||||||
|
+ return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
@@ -19153,12 +19161,6 @@
|
||||||
|
(set_attr "prefix" "vex")
|
||||||
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
|
|
||||||
|
-;; Memory operand override for -masm=intel of the v*gatherq* patterns.
|
||||||
|
-(define_mode_attr gatherq_mode
|
||||||
|
- [(V4SI "q") (V2DI "x") (V4SF "q") (V2DF "x")
|
||||||
|
- (V8SI "x") (V4DI "t") (V8SF "x") (V4DF "t")
|
||||||
|
- (V16SI "t") (V8DI "g") (V16SF "t") (V8DF "g")])
|
||||||
|
-
|
||||||
|
(define_expand "<avx512>_gathersi<mode>"
|
||||||
|
[(parallel [(set (match_operand:VI48F 0 "register_operand")
|
||||||
|
(unspec:VI48F
|
||||||
|
@@ -19192,7 +19194,9 @@
|
||||||
|
UNSPEC_GATHER))
|
||||||
|
(clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
|
||||||
|
"TARGET_AVX512F"
|
||||||
|
- "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %<xtg_mode>6}"
|
||||||
|
+;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||||
|
+;; gas changed what it requires incompatibly.
|
||||||
|
+ "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
|
||||||
|
[(set_attr "type" "ssemov")
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
|
@@ -19211,7 +19215,9 @@
|
||||||
|
UNSPEC_GATHER))
|
||||||
|
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
|
||||||
|
"TARGET_AVX512F"
|
||||||
|
- "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<xtg_mode>5}"
|
||||||
|
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||||
|
+;; gas changed what it requires incompatibly.
|
||||||
|
+ "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
|
||||||
|
[(set_attr "type" "ssemov")
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
|
@@ -19250,9 +19256,9 @@
|
||||||
|
UNSPEC_GATHER))
|
||||||
|
(clobber (match_scratch:QI 2 "=&Yk"))]
|
||||||
|
"TARGET_AVX512F"
|
||||||
|
-{
|
||||||
|
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %<gatherq_mode>6}";
|
||||||
|
-}
|
||||||
|
+;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||||
|
+;; gas changed what it requires incompatibly.
|
||||||
|
+ "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
|
||||||
|
[(set_attr "type" "ssemov")
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
|
@@ -19272,14 +19278,16 @@
|
||||||
|
(clobber (match_scratch:QI 1 "=&Yk"))]
|
||||||
|
"TARGET_AVX512F"
|
||||||
|
{
|
||||||
|
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||||
|
+ gas changed what it requires incompatibly. */
|
||||||
|
if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
|
||||||
|
{
|
||||||
|
if (<MODE_SIZE> != 64)
|
||||||
|
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %<gatherq_mode>5}";
|
||||||
|
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
|
||||||
|
else
|
||||||
|
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %t5}";
|
||||||
|
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
|
||||||
|
}
|
||||||
|
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<gatherq_mode>5}";
|
||||||
|
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
|
||||||
|
}
|
||||||
|
[(set_attr "type" "ssemov")
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
@@ -19316,7 +19324,9 @@
|
||||||
|
UNSPEC_SCATTER))
|
||||||
|
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
|
||||||
|
"TARGET_AVX512F"
|
||||||
|
- "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
|
||||||
|
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||||
|
+;; gas changed what it requires incompatibly.
|
||||||
|
+ "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
|
||||||
|
[(set_attr "type" "ssemov")
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
|
@@ -19352,11 +19362,9 @@
|
||||||
|
UNSPEC_SCATTER))
|
||||||
|
(clobber (match_scratch:QI 1 "=&Yk"))]
|
||||||
|
"TARGET_AVX512F"
|
||||||
|
-{
|
||||||
|
- if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
|
||||||
|
- return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}";
|
||||||
|
- return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%t5%{%1%}, %3}";
|
||||||
|
-}
|
||||||
|
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||||
|
+;; gas changed what it requires incompatibly.
|
||||||
|
+ "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
|
||||||
|
[(set_attr "type" "ssemov")
|
||||||
|
(set_attr "prefix" "evex")
|
||||||
|
(set_attr "mode" "<sseinsnmode>")])
|
||||||
2680
floop-interchange.patch
Normal file
2680
floop-interchange.patch
Normal file
File diff suppressed because it is too large
Load Diff
905
floop-unroll-and-jam.patch
Normal file
905
floop-unroll-and-jam.patch
Normal file
@ -0,0 +1,905 @@
|
|||||||
|
diff -N -urp a/gcc/Makefile.in b/gcc/Makefile.in
|
||||||
|
--- a/gcc/Makefile.in 2018-11-07 11:37:24.615223860 +0800
|
||||||
|
+++ b/gcc/Makefile.in 2018-11-07 11:38:26.155223860 +0800
|
||||||
|
@@ -1292,6 +1292,7 @@ OBJS = \
|
||||||
|
gimple-iterator.o \
|
||||||
|
gimple-fold.o \
|
||||||
|
gimple-laddress.o \
|
||||||
|
+ gimple-loop-jam.o \
|
||||||
|
gimple-low.o \
|
||||||
|
gimple-pretty-print.o \
|
||||||
|
gimple-ssa-backprop.o \
|
||||||
|
diff -N -urp a/gcc/cfgloop.c b/gcc/cfgloop.c
|
||||||
|
--- a/gcc/cfgloop.c 2018-11-07 11:37:24.947223860 +0800
|
||||||
|
+++ b/gcc/cfgloop.c 2018-11-07 11:38:26.155223860 +0800
|
||||||
|
@@ -296,13 +296,25 @@ establish_preds (struct loop *loop, stru
|
||||||
|
|
||||||
|
/* Add LOOP to the loop hierarchy tree where FATHER is father of the
|
||||||
|
added loop. If LOOP has some children, take care of that their
|
||||||
|
- pred field will be initialized correctly. */
|
||||||
|
+ pred field will be initialized correctly. If AFTER is non-null
|
||||||
|
+ then it's expected it's a pointer into FATHERs inner sibling
|
||||||
|
+ list and LOOP is added behind AFTER, otherwise it's added in front
|
||||||
|
+ of FATHERs siblings. */
|
||||||
|
|
||||||
|
void
|
||||||
|
-flow_loop_tree_node_add (struct loop *father, struct loop *loop)
|
||||||
|
+flow_loop_tree_node_add (struct loop *father, struct loop *loop,
|
||||||
|
+ struct loop *after)
|
||||||
|
{
|
||||||
|
- loop->next = father->inner;
|
||||||
|
- father->inner = loop;
|
||||||
|
+ if (after)
|
||||||
|
+ {
|
||||||
|
+ loop->next = after->next;
|
||||||
|
+ after->next = loop;
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ loop->next = father->inner;
|
||||||
|
+ father->inner = loop;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
establish_preds (loop, father);
|
||||||
|
}
|
||||||
|
diff -N -urp a/gcc/cfgloop.h b/gcc/cfgloop.h
|
||||||
|
--- a/gcc/cfgloop.h 2018-11-07 11:37:24.331223860 +0800
|
||||||
|
+++ b/gcc/cfgloop.h 2018-11-07 11:38:26.155223860 +0800
|
||||||
|
@@ -324,7 +324,8 @@ void record_loop_exits (void);
|
||||||
|
void rescan_loop_exit (edge, bool, bool);
|
||||||
|
|
||||||
|
/* Loop data structure manipulation/querying. */
|
||||||
|
-extern void flow_loop_tree_node_add (struct loop *, struct loop *);
|
||||||
|
+extern void flow_loop_tree_node_add (struct loop *, struct loop *,
|
||||||
|
+ struct loop * = NULL);
|
||||||
|
extern void flow_loop_tree_node_remove (struct loop *);
|
||||||
|
extern bool flow_loop_nested_p (const struct loop *, const struct loop *);
|
||||||
|
extern bool flow_bb_inside_loop_p (const struct loop *, const_basic_block);
|
||||||
|
diff -N -urp a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
|
||||||
|
--- a/gcc/cfgloopmanip.c 2018-11-07 11:37:24.847223860 +0800
|
||||||
|
+++ b/gcc/cfgloopmanip.c 2018-11-07 11:38:26.155223860 +0800
|
||||||
|
@@ -1026,9 +1026,11 @@ copy_loop_info (struct loop *loop, struc
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copies copy of LOOP as subloop of TARGET loop, placing newly
|
||||||
|
- created loop into loops structure. */
|
||||||
|
+ created loop into loops structure. If AFTER is non-null
|
||||||
|
+ the new loop is added at AFTER->next, otherwise in front of TARGETs
|
||||||
|
+ sibling list. */
|
||||||
|
struct loop *
|
||||||
|
-duplicate_loop (struct loop *loop, struct loop *target)
|
||||||
|
+duplicate_loop (struct loop *loop, struct loop *target, struct loop *after)
|
||||||
|
{
|
||||||
|
struct loop *cloop;
|
||||||
|
cloop = alloc_loop ();
|
||||||
|
@@ -1040,36 +1042,46 @@ duplicate_loop (struct loop *loop, struc
|
||||||
|
set_loop_copy (loop, cloop);
|
||||||
|
|
||||||
|
/* Add it to target. */
|
||||||
|
- flow_loop_tree_node_add (target, cloop);
|
||||||
|
+ flow_loop_tree_node_add (target, cloop, after);
|
||||||
|
|
||||||
|
return cloop;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copies structure of subloops of LOOP into TARGET loop, placing
|
||||||
|
- newly created loops into loop tree. */
|
||||||
|
+ newly created loops into loop tree at the end of TARGETs sibling
|
||||||
|
+ list in the original order. */
|
||||||
|
void
|
||||||
|
duplicate_subloops (struct loop *loop, struct loop *target)
|
||||||
|
{
|
||||||
|
- struct loop *aloop, *cloop;
|
||||||
|
+ struct loop *aloop, *cloop, *tail;
|
||||||
|
|
||||||
|
+ for (tail = target->inner; tail && tail->next; tail = tail->next)
|
||||||
|
+ ;
|
||||||
|
for (aloop = loop->inner; aloop; aloop = aloop->next)
|
||||||
|
{
|
||||||
|
- cloop = duplicate_loop (aloop, target);
|
||||||
|
+ cloop = duplicate_loop (aloop, target, tail);
|
||||||
|
+ tail = cloop;
|
||||||
|
+ gcc_assert (!tail->next);
|
||||||
|
duplicate_subloops (aloop, cloop);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
|
||||||
|
- into TARGET loop, placing newly created loops into loop tree. */
|
||||||
|
+ into TARGET loop, placing newly created loops into loop tree adding
|
||||||
|
+ them to TARGETs sibling list at the end in order. */
|
||||||
|
static void
|
||||||
|
copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
|
||||||
|
{
|
||||||
|
- struct loop *aloop;
|
||||||
|
+ struct loop *aloop, *tail;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
+ for (tail = target->inner; tail && tail->next; tail = tail->next)
|
||||||
|
+ ;
|
||||||
|
for (i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
- aloop = duplicate_loop (copied_loops[i], target);
|
||||||
|
+ aloop = duplicate_loop (copied_loops[i], target, tail);
|
||||||
|
+ tail = aloop;
|
||||||
|
+ gcc_assert (!tail->next);
|
||||||
|
duplicate_subloops (copied_loops[i], aloop);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -1133,14 +1145,15 @@ set_zero_probability (edge e)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
|
||||||
|
- loop structure and dominators. E's destination must be LOOP header for
|
||||||
|
- this to work, i.e. it must be entry or latch edge of this loop; these are
|
||||||
|
- unique, as the loops must have preheaders for this function to work
|
||||||
|
- correctly (in case E is latch, the function unrolls the loop, if E is entry
|
||||||
|
- edge, it peels the loop). Store edges created by copying ORIG edge from
|
||||||
|
- copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
|
||||||
|
- original LOOP body, the other copies are numbered in order given by control
|
||||||
|
- flow through them) into TO_REMOVE array. Returns false if duplication is
|
||||||
|
+ loop structure and dominators (order of inner subloops is retained).
|
||||||
|
+ E's destination must be LOOP header for this to work, i.e. it must be entry
|
||||||
|
+ or latch edge of this loop; these are unique, as the loops must have
|
||||||
|
+ preheaders for this function to work correctly (in case E is latch, the
|
||||||
|
+ function unrolls the loop, if E is entry edge, it peels the loop). Store
|
||||||
|
+ edges created by copying ORIG edge from copies corresponding to set bits in
|
||||||
|
+ WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
|
||||||
|
+ are numbered in order given by control flow through them) into TO_REMOVE
|
||||||
|
+ array. Returns false if duplication is
|
||||||
|
impossible. */
|
||||||
|
|
||||||
|
bool
|
||||||
|
diff -N -urp a/gcc/cfgloopmanip.h b/gcc/cfgloopmanip.h
|
||||||
|
--- a/gcc/cfgloopmanip.h 2018-11-07 11:37:24.939223860 +0800
|
||||||
|
+++ b/gcc/cfgloopmanip.h 2018-11-07 11:38:26.155223860 +0800
|
||||||
|
@@ -47,7 +47,8 @@ extern struct loop *loopify (edge, edge,
|
||||||
|
unsigned, unsigned);
|
||||||
|
extern void unloop (struct loop *, bool *, bitmap);
|
||||||
|
extern void copy_loop_info (struct loop *loop, struct loop *target);
|
||||||
|
-extern struct loop * duplicate_loop (struct loop *, struct loop *);
|
||||||
|
+extern struct loop * duplicate_loop (struct loop *, struct loop *,
|
||||||
|
+ struct loop * = NULL);
|
||||||
|
extern void duplicate_subloops (struct loop *, struct loop *);
|
||||||
|
extern bool can_duplicate_loop_p (const struct loop *loop);
|
||||||
|
extern bool duplicate_loop_to_header_edge (struct loop *, edge,
|
||||||
|
diff -N -urp a/gcc/common.opt b/gcc/common.opt
|
||||||
|
--- a/gcc/common.opt 2018-11-07 11:37:24.859223860 +0800
|
||||||
|
+++ b/gcc/common.opt 2018-11-07 11:38:26.159223860 +0800
|
||||||
|
@@ -1496,8 +1496,8 @@ Common Alias(floop-nest-optimize)
|
||||||
|
Enable loop nest transforms. Same as -floop-nest-optimize.
|
||||||
|
|
||||||
|
floop-unroll-and-jam
|
||||||
|
-Common Alias(floop-nest-optimize)
|
||||||
|
-Enable loop nest transforms. Same as -floop-nest-optimize.
|
||||||
|
+Common Report Var(flag_unroll_jam) Optimization
|
||||||
|
+Perform unroll-and-jam on loops.
|
||||||
|
|
||||||
|
fgnu-tm
|
||||||
|
Common Report Var(flag_tm)
|
||||||
|
diff -N -urp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||||
|
--- a/gcc/doc/invoke.texi 2018-11-07 11:37:24.915223860 +0800
|
||||||
|
+++ b/gcc/doc/invoke.texi 2018-11-07 11:39:49.031223860 +0800
|
||||||
|
@@ -7120,7 +7120,8 @@ Optimize yet more. @option{-O3} turns o
|
||||||
|
by @option{-O2} and also turns on the @option{-finline-functions},
|
||||||
|
@option{-funswitch-loops}, @option{-fpredictive-commoning},
|
||||||
|
@option{-fgcse-after-reload}, @option{-ftree-loop-vectorize},
|
||||||
|
-@option{-ftree-loop-distribute-patterns}, @option{-fsplit-paths}
|
||||||
|
+@option{-ftree-loop-distribute-patterns}, @option{-fsplit-paths},
|
||||||
|
+@option{-floop-unroll-and-jam},
|
||||||
|
@option{-ftree-slp-vectorize}, @option{-fvect-cost-model},
|
||||||
|
@option{-ftree-partial-pre}, @option{-fpeel-loops}
|
||||||
|
and @option{-fipa-cp-clone} options.
|
||||||
|
@@ -8226,12 +8227,10 @@ at @option{-O} and higher.
|
||||||
|
@itemx -floop-interchange
|
||||||
|
@itemx -floop-strip-mine
|
||||||
|
@itemx -floop-block
|
||||||
|
-@itemx -floop-unroll-and-jam
|
||||||
|
@opindex ftree-loop-linear
|
||||||
|
@opindex floop-interchange
|
||||||
|
@opindex floop-strip-mine
|
||||||
|
@opindex floop-block
|
||||||
|
-@opindex floop-unroll-and-jam
|
||||||
|
Perform loop nest optimizations. Same as
|
||||||
|
@option{-floop-nest-optimize}. To use this code transformation, GCC has
|
||||||
|
to be configured with @option{--with-isl} to enable the Graphite loop
|
||||||
|
@@ -8323,6 +8322,12 @@ ENDDO
|
||||||
|
@end smallexample
|
||||||
|
and the initialization loop is transformed into a call to memset zero.
|
||||||
|
|
||||||
|
+@item -floop-unroll-and-jam
|
||||||
|
+@opindex floop-unroll-and-jam
|
||||||
|
+Apply unroll and jam transformations on feasible loops. In a loop
|
||||||
|
+nest this unrolls the outer loop by some factor and fuses the resulting
|
||||||
|
+multiple inner loops. This flag is enabled by default at @option{-O3}.
|
||||||
|
+
|
||||||
|
@item -ftree-loop-im
|
||||||
|
@opindex ftree-loop-im
|
||||||
|
Perform loop invariant motion on trees. This pass moves only invariants that
|
||||||
|
@@ -10353,13 +10358,13 @@ loop in the loop nest by a given number
|
||||||
|
length can be changed using the @option{loop-block-tile-size}
|
||||||
|
parameter. The default value is 51 iterations.
|
||||||
|
|
||||||
|
-@item loop-unroll-jam-size
|
||||||
|
-Specify the unroll factor for the @option{-floop-unroll-and-jam} option. The
|
||||||
|
-default value is 4.
|
||||||
|
-
|
||||||
|
-@item loop-unroll-jam-depth
|
||||||
|
-Specify the dimension to be unrolled (counting from the most inner loop)
|
||||||
|
-for the @option{-floop-unroll-and-jam}. The default value is 2.
|
||||||
|
+@item unroll-jam-min-percent
|
||||||
|
+The minimum percentage of memory references that must be optimized
|
||||||
|
+away for the unroll-and-jam transformation to be considered profitable.
|
||||||
|
+
|
||||||
|
+@item unroll-jam-max-unroll
|
||||||
|
+The maximum number of times the outer loop should be unrolled by
|
||||||
|
+the unroll-and-jam transformation.
|
||||||
|
|
||||||
|
@item ipa-cp-value-list-size
|
||||||
|
IPA-CP attempts to track all possible values and types passed to a function's
|
||||||
|
diff -N -urp a/gcc/gimple-loop-jam.c b/gcc/gimple-loop-jam.c
|
||||||
|
--- a/gcc/gimple-loop-jam.c 1970-01-01 08:00:00.000000000 +0800
|
||||||
|
+++ b/gcc/gimple-loop-jam.c 2018-11-07 11:38:26.167223860 +0800
|
||||||
|
@@ -0,0 +1,598 @@
|
||||||
|
+/* Loop unroll-and-jam.
|
||||||
|
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+This file is part of GCC.
|
||||||
|
+
|
||||||
|
+GCC is free software; you can redistribute it and/or modify it
|
||||||
|
+under the terms of the GNU General Public License as published by the
|
||||||
|
+Free Software Foundation; either version 3, or (at your option) any
|
||||||
|
+later version.
|
||||||
|
+
|
||||||
|
+GCC is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
+for more details.
|
||||||
|
+
|
||||||
|
+You should have received a copy of the GNU General Public License
|
||||||
|
+along with GCC; see the file COPYING3. If not see
|
||||||
|
+<http://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+#include "config.h"
|
||||||
|
+#include "system.h"
|
||||||
|
+#include "coretypes.h"
|
||||||
|
+#include "params.h"
|
||||||
|
+#include "tree-pass.h"
|
||||||
|
+#include "backend.h"
|
||||||
|
+#include "tree.h"
|
||||||
|
+#include "gimple.h"
|
||||||
|
+#include "ssa.h"
|
||||||
|
+#include "fold-const.h"
|
||||||
|
+#include "tree-cfg.h"
|
||||||
|
+#include "tree-ssa.h"
|
||||||
|
+#include "tree-ssa-loop-niter.h"
|
||||||
|
+#include "tree-ssa-loop.h"
|
||||||
|
+#include "tree-ssa-loop-manip.h"
|
||||||
|
+#include "cfgloop.h"
|
||||||
|
+#include "tree-scalar-evolution.h"
|
||||||
|
+#include "gimple-iterator.h"
|
||||||
|
+#include "cfghooks.h"
|
||||||
|
+#include "tree-data-ref.h"
|
||||||
|
+#include "tree-ssa-loop-ivopts.h"
|
||||||
|
+#include "tree-vectorizer.h"
|
||||||
|
+
|
||||||
|
+/* Unroll and Jam transformation
|
||||||
|
+
|
||||||
|
+ This is a combination of two transformations, where the second
|
||||||
|
+ is not always valid. It's applicable if a loop nest has redundancies
|
||||||
|
+ over the iterations of an outer loop while not having that with
|
||||||
|
+ an inner loop.
|
||||||
|
+
|
||||||
|
+ Given this nest:
|
||||||
|
+ for (i) {
|
||||||
|
+ for (j) {
|
||||||
|
+ B (i,j)
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ first unroll:
|
||||||
|
+ for (i by 2) {
|
||||||
|
+ for (j) {
|
||||||
|
+ B (i,j)
|
||||||
|
+ }
|
||||||
|
+ for (j) {
|
||||||
|
+ B (i+1,j)
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ then fuse the two adjacent inner loops resulting from that:
|
||||||
|
+ for (i by 2) {
|
||||||
|
+ for (j) {
|
||||||
|
+ B (i,j)
|
||||||
|
+ B (i+1,j)
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ As the order of evaluations of the body B changes this is valid
|
||||||
|
+ only in certain situations: all distance vectors need to be forward.
|
||||||
|
+ Additionally if there are multiple induction variables than just
|
||||||
|
+ a counting control IV (j above) we can also deal with some situations.
|
||||||
|
+
|
||||||
|
+ The validity is checked by unroll_jam_possible_p, and the data-dep
|
||||||
|
+ testing below.
|
||||||
|
+
|
||||||
|
+ A trivial example where the fusion is wrong would be when
|
||||||
|
+ B (i,j) == x[j-1] = x[j];
|
||||||
|
+ for (i by 2) {
|
||||||
|
+ for (j) {
|
||||||
|
+ x[j-1] = x[j];
|
||||||
|
+ }
|
||||||
|
+ for (j) {
|
||||||
|
+ x[j-1] = x[j];
|
||||||
|
+ }
|
||||||
|
+ } effect: move content to front by two elements
|
||||||
|
+ -->
|
||||||
|
+ for (i by 2) {
|
||||||
|
+ for (j) {
|
||||||
|
+ x[j-1] = x[j];
|
||||||
|
+ x[j-1] = x[j];
|
||||||
|
+ }
|
||||||
|
+ } effect: move content to front by one element
|
||||||
|
+*/
|
||||||
|
+
|
||||||
|
+/* Modify the loop tree for the fact that all code once belonging
|
||||||
|
+ to the OLD loop or the outer loop of OLD now is inside LOOP. */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+merge_loop_tree (struct loop *loop, struct loop *old)
|
||||||
|
+{
|
||||||
|
+ basic_block *bbs;
|
||||||
|
+ int i, n;
|
||||||
|
+ struct loop *subloop;
|
||||||
|
+ edge e;
|
||||||
|
+ edge_iterator ei;
|
||||||
|
+
|
||||||
|
+ /* Find its nodes. */
|
||||||
|
+ bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
|
||||||
|
+ n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < n; i++)
|
||||||
|
+ {
|
||||||
|
+ /* If the block was direct child of OLD loop it's now part
|
||||||
|
+ of LOOP. If it was outside OLD, then it moved into LOOP
|
||||||
|
+ as well. This avoids changing the loop father for BBs
|
||||||
|
+ in inner loops of OLD. */
|
||||||
|
+ if (bbs[i]->loop_father == old
|
||||||
|
+ || loop_depth (bbs[i]->loop_father) < loop_depth (old))
|
||||||
|
+ {
|
||||||
|
+ remove_bb_from_loops (bbs[i]);
|
||||||
|
+ add_bb_to_loop (bbs[i], loop);
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* If we find a direct subloop of OLD, move it to LOOP. */
|
||||||
|
+ subloop = bbs[i]->loop_father;
|
||||||
|
+ if (loop_outer (subloop) == old && subloop->header == bbs[i])
|
||||||
|
+ {
|
||||||
|
+ flow_loop_tree_node_remove (subloop);
|
||||||
|
+ flow_loop_tree_node_add (loop, subloop);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Update the information about loop exit edges. */
|
||||||
|
+ for (i = 0; i < n; i++)
|
||||||
|
+ {
|
||||||
|
+ FOR_EACH_EDGE (e, ei, bbs[i]->succs)
|
||||||
|
+ {
|
||||||
|
+ rescan_loop_exit (e, false, false);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loop->num_nodes = n;
|
||||||
|
+
|
||||||
|
+ free (bbs);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* BB is part of the outer loop of an unroll-and-jam situation.
|
||||||
|
+ Check if any statements therein would prevent the transformation. */
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+bb_prevents_fusion_p (basic_block bb)
|
||||||
|
+{
|
||||||
|
+ gimple_stmt_iterator gsi;
|
||||||
|
+ /* BB is duplicated by outer unrolling and then all N-1 first copies
|
||||||
|
+ move into the body of the fused inner loop. If BB exits the outer loop
|
||||||
|
+ the last copy still does so, and the first N-1 copies are cancelled
|
||||||
|
+ by loop unrolling, so also after fusion it's the exit block.
|
||||||
|
+ But there might be other reasons that prevent fusion:
|
||||||
|
+ * stores or unknown side-effects prevent fusion
|
||||||
|
+ * loads don't
|
||||||
|
+ * computations into SSA names: these aren't problematic. Their
|
||||||
|
+ result will be unused on the exit edges of the first N-1 copies
|
||||||
|
+ (those aren't taken after unrolling). If they are used on the
|
||||||
|
+ other edge (the one leading to the outer latch block) they are
|
||||||
|
+ loop-carried (on the outer loop) and the Nth copy of BB will
|
||||||
|
+ compute them again (i.e. the first N-1 copies will be dead). */
|
||||||
|
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||||
|
+ {
|
||||||
|
+ gimple *g = gsi_stmt (gsi);
|
||||||
|
+ if (gimple_vdef (g) || gimple_has_side_effects (g))
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+ return false;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Given an inner loop LOOP (of some OUTER loop) determine if
|
||||||
|
+ we can safely fuse copies of it (generated by outer unrolling).
|
||||||
|
+ If so return true, otherwise return false. */
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+unroll_jam_possible_p (struct loop *outer, struct loop *loop)
|
||||||
|
+{
|
||||||
|
+ basic_block *bbs;
|
||||||
|
+ int i, n;
|
||||||
|
+ struct tree_niter_desc niter;
|
||||||
|
+
|
||||||
|
+ /* When fusing the loops we skip the latch block
|
||||||
|
+ of the first one, so it mustn't have any effects to
|
||||||
|
+ preserve. */
|
||||||
|
+ if (!empty_block_p (loop->latch))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ if (!single_exit (loop))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /* We need a perfect nest. Quick check for adjacent inner loops. */
|
||||||
|
+ if (outer->inner != loop || loop->next)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /* Prevent head-controlled inner loops, that we usually have.
|
||||||
|
+ The guard block would need to be accepted
|
||||||
|
+ (invariant condition either entering or skipping the loop),
|
||||||
|
+ without also accepting arbitrary control flow. When unswitching
|
||||||
|
+ ran before us (as with -O3) this won't be a problem because its
|
||||||
|
+ outer loop unswitching will have moved out the invariant condition.
|
||||||
|
+
|
||||||
|
+ If we do that we need to extend fuse_loops () to cope with this
|
||||||
|
+ by threading through the (still invariant) copied condition
|
||||||
|
+ between the two loop copies. */
|
||||||
|
+ if (!dominated_by_p (CDI_DOMINATORS, outer->latch, loop->header))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /* The number of iterations of the inner loop must be loop invariant
|
||||||
|
+ with respect to the outer loop. */
|
||||||
|
+ if (!number_of_iterations_exit (loop, single_exit (loop), &niter,
|
||||||
|
+ false, true)
|
||||||
|
+ || niter.cmp == ERROR_MARK
|
||||||
|
+ || !integer_zerop (niter.may_be_zero)
|
||||||
|
+ || !expr_invariant_in_loop_p (outer, niter.niter))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /* If the inner loop produces any values that are used inside the
|
||||||
|
+ outer loop (except the virtual op) then it can flow
|
||||||
|
+ back (perhaps indirectly) into the inner loop. This prevents
|
||||||
|
+ fusion: without fusion the value at the last iteration is used,
|
||||||
|
+ with fusion the value after the initial iteration is used.
|
||||||
|
+
|
||||||
|
+ If all uses are outside the outer loop this doesn't prevent fusion;
|
||||||
|
+ the value of the last iteration is still used (and the values from
|
||||||
|
+ all intermediate iterations are dead). */
|
||||||
|
+ gphi_iterator psi;
|
||||||
|
+ for (psi = gsi_start_phis (single_exit (loop)->dest);
|
||||||
|
+ !gsi_end_p (psi); gsi_next (&psi))
|
||||||
|
+ {
|
||||||
|
+ imm_use_iterator imm_iter;
|
||||||
|
+ use_operand_p use_p;
|
||||||
|
+ tree op = gimple_phi_result (psi.phi ());
|
||||||
|
+ if (virtual_operand_p (op))
|
||||||
|
+ continue;
|
||||||
|
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
|
||||||
|
+ {
|
||||||
|
+ gimple *use_stmt = USE_STMT (use_p);
|
||||||
|
+ if (!is_gimple_debug (use_stmt)
|
||||||
|
+ && flow_bb_inside_loop_p (outer, gimple_bb (use_stmt)))
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* And check blocks belonging to just outer loop. */
|
||||||
|
+ bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
|
||||||
|
+ n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun));
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < n; i++)
|
||||||
|
+ if (bbs[i]->loop_father == outer && bb_prevents_fusion_p (bbs[i]))
|
||||||
|
+ break;
|
||||||
|
+ free (bbs);
|
||||||
|
+ if (i != n)
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /* For now we can safely fuse copies of LOOP only if all
|
||||||
|
+ loop carried variables are inductions (or the virtual op).
|
||||||
|
+
|
||||||
|
+ We could handle reductions as well (the initial value in the second
|
||||||
|
+ body would be the after-iter value of the first body) if it's over
|
||||||
|
+ an associative and commutative operation. We wouldn't
|
||||||
|
+ be able to handle unknown cycles. */
|
||||||
|
+ for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
|
||||||
|
+ {
|
||||||
|
+ affine_iv iv;
|
||||||
|
+ tree op = gimple_phi_result (psi.phi ());
|
||||||
|
+
|
||||||
|
+ if (virtual_operand_p (op))
|
||||||
|
+ continue;
|
||||||
|
+ if (!simple_iv (loop, loop, op, &iv, true))
|
||||||
|
+ return false;
|
||||||
|
+ /* The inductions must be regular, loop invariant step and initial
|
||||||
|
+ value. */
|
||||||
|
+ if (!expr_invariant_in_loop_p (outer, iv.step)
|
||||||
|
+ || !expr_invariant_in_loop_p (outer, iv.base))
|
||||||
|
+ return false;
|
||||||
|
+ /* XXX With more effort we could also be able to deal with inductions
|
||||||
|
+ where the initial value is loop variant but a simple IV in the
|
||||||
|
+ outer loop. The initial value for the second body would be
|
||||||
|
+ the original initial value plus iv.base.step. The next value
|
||||||
|
+ for the fused loop would be the original next value of the first
|
||||||
|
+ copy, _not_ the next value of the second body. */
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return true;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Fuse LOOP with all further neighbors. The loops are expected to
|
||||||
|
+ be in appropriate form. */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+fuse_loops (struct loop *loop)
|
||||||
|
+{
|
||||||
|
+ struct loop *next = loop->next;
|
||||||
|
+
|
||||||
|
+ while (next)
|
||||||
|
+ {
|
||||||
|
+ edge e;
|
||||||
|
+
|
||||||
|
+ remove_branch (single_pred_edge (loop->latch));
|
||||||
|
+ /* Make delete_basic_block not fiddle with the loop structure. */
|
||||||
|
+ basic_block oldlatch = loop->latch;
|
||||||
|
+ loop->latch = NULL;
|
||||||
|
+ delete_basic_block (oldlatch);
|
||||||
|
+ e = redirect_edge_and_branch (loop_latch_edge (next),
|
||||||
|
+ loop->header);
|
||||||
|
+ loop->latch = e->src;
|
||||||
|
+ flush_pending_stmts (e);
|
||||||
|
+
|
||||||
|
+ gcc_assert (EDGE_COUNT (next->header->preds) == 1);
|
||||||
|
+
|
||||||
|
+ /* The PHI nodes of the second body (single-argument now)
|
||||||
|
+ need adjustments to use the right values: either directly
|
||||||
|
+ the value of the corresponding PHI in the first copy or
|
||||||
|
+ the one leaving the first body which unrolling did for us.
|
||||||
|
+
|
||||||
|
+ See also unroll_jam_possible_p () for further possibilities. */
|
||||||
|
+ gphi_iterator psi_first, psi_second;
|
||||||
|
+ e = single_pred_edge (next->header);
|
||||||
|
+ for (psi_first = gsi_start_phis (loop->header),
|
||||||
|
+ psi_second = gsi_start_phis (next->header);
|
||||||
|
+ !gsi_end_p (psi_first);
|
||||||
|
+ gsi_next (&psi_first), gsi_next (&psi_second))
|
||||||
|
+ {
|
||||||
|
+ gphi *phi_first = psi_first.phi ();
|
||||||
|
+ gphi *phi_second = psi_second.phi ();
|
||||||
|
+ tree firstop = gimple_phi_result (phi_first);
|
||||||
|
+ /* The virtual operand is correct already as it's
|
||||||
|
+ always live at exit, hence has a LCSSA node and outer
|
||||||
|
+ loop unrolling updated SSA form. */
|
||||||
|
+ if (virtual_operand_p (firstop))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ /* Due to unroll_jam_possible_p () we know that this is
|
||||||
|
+ an induction. The second body goes over the same
|
||||||
|
+ iteration space. */
|
||||||
|
+ add_phi_arg (phi_second, firstop, e,
|
||||||
|
+ gimple_location (phi_first));
|
||||||
|
+ }
|
||||||
|
+ gcc_assert (gsi_end_p (psi_second));
|
||||||
|
+
|
||||||
|
+ merge_loop_tree (loop, next);
|
||||||
|
+ gcc_assert (!next->num_nodes);
|
||||||
|
+ struct loop *ln = next->next;
|
||||||
|
+ delete_loop (next);
|
||||||
|
+ next = ln;
|
||||||
|
+ }
|
||||||
|
+ rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Returns true if the distance in DDR can be determined and adjusts
|
||||||
|
+ the unroll factor in *UNROLL to make unrolling valid for that distance.
|
||||||
|
+ Otherwise return false.
|
||||||
|
+
|
||||||
|
+ If this data dep can lead to a removed memory reference, increment
|
||||||
|
+ *REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor
|
||||||
|
+ for this to happen. */
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+adjust_unroll_factor (struct data_dependence_relation *ddr,
|
||||||
|
+ unsigned *unroll, unsigned *profit_unroll,
|
||||||
|
+ unsigned *removed)
|
||||||
|
+{
|
||||||
|
+ bool ret = false;
|
||||||
|
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
|
||||||
|
+ {
|
||||||
|
+ if (DDR_NUM_DIST_VECTS (ddr) == 0)
|
||||||
|
+ return false;
|
||||||
|
+ unsigned i;
|
||||||
|
+ lambda_vector dist_v;
|
||||||
|
+ FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
|
||||||
|
+ {
|
||||||
|
+ /* A distance (a,b) is at worst transformed into (a/N,b) by the
|
||||||
|
+ unrolling (factor N), so the transformation is valid if
|
||||||
|
+ a >= N, or b > 0, or b is zero and a > 0. Otherwise the unroll
|
||||||
|
+ factor needs to be limited so that the first condition holds.
|
||||||
|
+ That may limit the factor down to zero in the worst case. */
|
||||||
|
+ int dist = dist_v[0];
|
||||||
|
+ if (dist < 0)
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
+ else if ((unsigned)dist >= *unroll)
|
||||||
|
+ ;
|
||||||
|
+ else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
|
||||||
|
+ || (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
|
||||||
|
+ && dist > 0))
|
||||||
|
+ ;
|
||||||
|
+ else
|
||||||
|
+ *unroll = dist;
|
||||||
|
+
|
||||||
|
+ /* With a distance (a,0) it's always profitable to unroll-and-jam
|
||||||
|
+ (by a+1), because one memory reference will go away. With
|
||||||
|
+ (a,b) and b != 0 that's less clear. We will increase the
|
||||||
|
+ number of streams without lowering the number of mem refs.
|
||||||
|
+ So for now only handle the first situation. */
|
||||||
|
+ if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
|
||||||
|
+ {
|
||||||
|
+ *profit_unroll = MAX (*profit_unroll, (unsigned)dist + 1);
|
||||||
|
+ (*removed)++;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ret = true;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Main entry point for the unroll-and-jam transformation
|
||||||
|
+ described above. */
|
||||||
|
+
|
||||||
|
+static unsigned int
|
||||||
|
+tree_loop_unroll_and_jam (void)
|
||||||
|
+{
|
||||||
|
+ struct loop *loop;
|
||||||
|
+ bool changed = false;
|
||||||
|
+
|
||||||
|
+ gcc_assert (scev_initialized_p ());
|
||||||
|
+
|
||||||
|
+ /* Go through all innermost loops. */
|
||||||
|
+ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
|
||||||
|
+ {
|
||||||
|
+ struct loop *outer = loop_outer (loop);
|
||||||
|
+
|
||||||
|
+ if (loop_depth (loop) < 2
|
||||||
|
+ || optimize_loop_nest_for_size_p (outer))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ if (!unroll_jam_possible_p (outer, loop))
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ vec<data_reference_p> datarefs;
|
||||||
|
+ vec<ddr_p> dependences;
|
||||||
|
+ unsigned unroll_factor, profit_unroll, removed;
|
||||||
|
+ struct tree_niter_desc desc;
|
||||||
|
+ bool unroll = false;
|
||||||
|
+
|
||||||
|
+ auto_vec<loop_p, 3> loop_nest;
|
||||||
|
+ dependences.create (10);
|
||||||
|
+ datarefs.create (10);
|
||||||
|
+ if (!compute_data_dependences_for_loop (outer, true, &loop_nest,
|
||||||
|
+ &datarefs, &dependences))
|
||||||
|
+ {
|
||||||
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
+ fprintf (dump_file, "Cannot analyze data dependencies\n");
|
||||||
|
+ free_data_refs (datarefs);
|
||||||
|
+ free_dependence_relations (dependences);
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+ if (!datarefs.length ())
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||||
|
+ dump_data_dependence_relations (dump_file, dependences);
|
||||||
|
+
|
||||||
|
+ unroll_factor = (unsigned)-1;
|
||||||
|
+ profit_unroll = 1;
|
||||||
|
+ removed = 0;
|
||||||
|
+
|
||||||
|
+ /* Check all dependencies. */
|
||||||
|
+ unsigned i;
|
||||||
|
+ struct data_dependence_relation *ddr;
|
||||||
|
+ FOR_EACH_VEC_ELT (dependences, i, ddr)
|
||||||
|
+ {
|
||||||
|
+ struct data_reference *dra, *drb;
|
||||||
|
+
|
||||||
|
+ /* If the refs are independend there's nothing to do. */
|
||||||
|
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
|
||||||
|
+ continue;
|
||||||
|
+ dra = DDR_A (ddr);
|
||||||
|
+ drb = DDR_B (ddr);
|
||||||
|
+ /* Nothing interesting for the self dependencies. */
|
||||||
|
+ if (dra == drb)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ /* Now check the distance vector, for determining a sensible
|
||||||
|
+ outer unroll factor, and for validity of merging the inner
|
||||||
|
+ loop copies. */
|
||||||
|
+ if (!adjust_unroll_factor (ddr, &unroll_factor, &profit_unroll,
|
||||||
|
+ &removed))
|
||||||
|
+ {
|
||||||
|
+ /* Couldn't get the distance vector. For two reads that's
|
||||||
|
+ harmless (we assume we should unroll). For at least
|
||||||
|
+ one write this means we can't check the dependence direction
|
||||||
|
+ and hence can't determine safety. */
|
||||||
|
+
|
||||||
|
+ if (DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
|
||||||
|
+ {
|
||||||
|
+ unroll_factor = 0;
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* We regard a user-specified minimum percentage of zero as a request
|
||||||
|
+ to ignore all profitability concerns and apply the transformation
|
||||||
|
+ always. */
|
||||||
|
+ if (!PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
|
||||||
|
+ profit_unroll = 2;
|
||||||
|
+ else if (removed * 100 / datarefs.length ()
|
||||||
|
+ < (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
|
||||||
|
+ profit_unroll = 1;
|
||||||
|
+ if (unroll_factor > profit_unroll)
|
||||||
|
+ unroll_factor = profit_unroll;
|
||||||
|
+ if (unroll_factor > (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MAX_UNROLL))
|
||||||
|
+ unroll_factor = PARAM_VALUE (PARAM_UNROLL_JAM_MAX_UNROLL);
|
||||||
|
+ unroll = (unroll_factor > 1
|
||||||
|
+ && can_unroll_loop_p (outer, unroll_factor, &desc));
|
||||||
|
+
|
||||||
|
+ if (unroll)
|
||||||
|
+ {
|
||||||
|
+ if (dump_enabled_p ())
|
||||||
|
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
|
||||||
|
+ find_loop_location (outer),
|
||||||
|
+ "applying unroll and jam with factor %d\n",
|
||||||
|
+ unroll_factor);
|
||||||
|
+ initialize_original_copy_tables ();
|
||||||
|
+ tree_unroll_loop (outer, unroll_factor, single_dom_exit (outer),
|
||||||
|
+ &desc);
|
||||||
|
+ free_original_copy_tables ();
|
||||||
|
+ fuse_loops (outer->inner);
|
||||||
|
+ changed = true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loop_nest.release ();
|
||||||
|
+ free_dependence_relations (dependences);
|
||||||
|
+ free_data_refs (datarefs);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (changed)
|
||||||
|
+ {
|
||||||
|
+ scev_reset ();
|
||||||
|
+ free_dominance_info (CDI_DOMINATORS);
|
||||||
|
+ return TODO_cleanup_cfg;
|
||||||
|
+ }
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Pass boilerplate. */
|
||||||
|
+
|
||||||
|
+namespace {
|
||||||
|
+
|
||||||
|
+const pass_data pass_data_loop_jam =
|
||||||
|
+{
|
||||||
|
+ GIMPLE_PASS, /* type. */
|
||||||
|
+ "unrolljam", /* name. */
|
||||||
|
+ OPTGROUP_LOOP, /* optinfo_flags. */
|
||||||
|
+ TV_LOOP_JAM, /* tv_id. */
|
||||||
|
+ PROP_cfg, /* properties_required. */
|
||||||
|
+ 0, /* properties_provided. */
|
||||||
|
+ 0, /* properties_destroyed. */
|
||||||
|
+ 0, /* todo_flags_start. */
|
||||||
|
+ 0, /* todo_flags_finish. */
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+class pass_loop_jam : public gimple_opt_pass
|
||||||
|
+{
|
||||||
|
+public:
|
||||||
|
+ pass_loop_jam (gcc::context *ctxt)
|
||||||
|
+ : gimple_opt_pass (pass_data_loop_jam, ctxt)
|
||||||
|
+ {}
|
||||||
|
+
|
||||||
|
+ /* opt_pass methods: */
|
||||||
|
+ virtual bool gate (function *)
|
||||||
|
+ {
|
||||||
|
+ return flag_unroll_jam != 0;
|
||||||
|
+ }
|
||||||
|
+ virtual unsigned int execute (function *);
|
||||||
|
+
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+unsigned int
|
||||||
|
+pass_loop_jam::execute (function *fun)
|
||||||
|
+{
|
||||||
|
+ if (number_of_loops (fun) <= 1)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ return tree_loop_unroll_and_jam ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+gimple_opt_pass *
|
||||||
|
+make_pass_loop_jam (gcc::context *ctxt)
|
||||||
|
+{
|
||||||
|
+ return new pass_loop_jam (ctxt);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
diff -N -urp a/gcc/opts.c b/gcc/opts.c
|
||||||
|
--- a/gcc/opts.c 2018-11-07 11:37:24.891223860 +0800
|
||||||
|
+++ b/gcc/opts.c 2018-11-07 11:38:26.171223860 +0800
|
||||||
|
@@ -534,6 +534,7 @@ static const struct default_options defa
|
||||||
|
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 },
|
||||||
|
{ OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 },
|
||||||
|
{ OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 },
|
||||||
|
+ { OPT_LEVELS_3_PLUS, OPT_floop_unroll_and_jam, NULL, 1 },
|
||||||
|
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
|
||||||
|
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
|
||||||
|
{ OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
|
||||||
|
diff -N -urp a/gcc/params.def b/gcc/params.def
|
||||||
|
--- a/gcc/params.def 2018-11-07 11:37:27.543223860 +0800
|
||||||
|
+++ b/gcc/params.def 2018-11-07 11:38:26.171223860 +0800
|
||||||
|
@@ -1280,6 +1280,16 @@ DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
|
||||||
|
"Enable loop epilogue vectorization using smaller vector size.",
|
||||||
|
0, 0, 1)
|
||||||
|
|
||||||
|
+DEFPARAM (PARAM_UNROLL_JAM_MIN_PERCENT,
|
||||||
|
+ "unroll-jam-min-percent",
|
||||||
|
+ "Minimum percentage of memrefs that must go away for unroll-and-jam to be considered profitable.",
|
||||||
|
+ 1, 0, 100)
|
||||||
|
+
|
||||||
|
+DEFPARAM (PARAM_UNROLL_JAM_MAX_UNROLL,
|
||||||
|
+ "unroll-jam-max-unroll",
|
||||||
|
+ "Maximum unroll factor for the unroll-and-jam transformation.",
|
||||||
|
+ 4, 0, 0)
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
|
||||||
|
Local variables:
|
||||||
|
diff -N -urp a/gcc/passes.def b/gcc/passes.def
|
||||||
|
--- a/gcc/passes.def 2018-11-07 11:37:24.859223860 +0800
|
||||||
|
+++ b/gcc/passes.def 2018-11-07 11:38:26.171223860 +0800
|
||||||
|
@@ -272,6 +272,7 @@ along with GCC; see the file COPYING3.
|
||||||
|
NEXT_PASS (pass_tree_unswitch);
|
||||||
|
NEXT_PASS (pass_scev_cprop);
|
||||||
|
NEXT_PASS (pass_loop_split);
|
||||||
|
+ NEXT_PASS (pass_loop_jam);
|
||||||
|
/* All unswitching, final value replacement and splitting can expose
|
||||||
|
empty loops. Remove them now. */
|
||||||
|
NEXT_PASS (pass_cd_dce);
|
||||||
|
diff -N -urp a/gcc/timevar.def b/gcc/timevar.def
|
||||||
|
--- a/gcc/timevar.def 2018-11-07 11:37:24.935223860 +0800
|
||||||
|
+++ b/gcc/timevar.def 2018-11-07 11:38:26.175223860 +0800
|
||||||
|
@@ -186,6 +186,7 @@ DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "
|
||||||
|
DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop")
|
||||||
|
DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
|
||||||
|
DEFTIMEVAR (TV_LOOP_SPLIT , "loop splitting")
|
||||||
|
+DEFTIMEVAR (TV_LOOP_JAM , "unroll and jam")
|
||||||
|
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
|
||||||
|
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
|
||||||
|
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
|
||||||
|
diff -N -urp a/gcc/tree-pass.h b/gcc/tree-pass.h
|
||||||
|
--- a/gcc/tree-pass.h 2018-11-07 11:37:24.887223860 +0800
|
||||||
|
+++ b/gcc/tree-pass.h 2018-11-07 11:38:26.175223860 +0800
|
||||||
|
@@ -369,6 +369,7 @@ extern gimple_opt_pass *make_pass_tree_l
|
||||||
|
extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
|
||||||
|
extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
|
||||||
|
extern gimple_opt_pass *make_pass_loop_split (gcc::context *ctxt);
|
||||||
|
+extern gimple_opt_pass *make_pass_loop_jam (gcc::context *ctxt);
|
||||||
|
extern gimple_opt_pass *make_pass_predcom (gcc::context *ctxt);
|
||||||
|
extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
|
||||||
|
extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
|
||||||
768
fstack-clash-protection.patch
Normal file
768
fstack-clash-protection.patch
Normal file
@ -0,0 +1,768 @@
|
|||||||
|
diff -N -urp a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c
|
||||||
|
--- a/gcc/combine-stack-adj.c 2017-01-20 08:05:30.925466000 +0800
|
||||||
|
+++ b/gcc/combine-stack-adj.c 2019-01-10 17:10:16.606528459 +0800
|
||||||
|
@@ -508,6 +508,8 @@ combine_stack_adjustments_for_block (bas
|
||||||
|
continue;
|
||||||
|
|
||||||
|
set = single_set_for_csa (insn);
|
||||||
|
+ if (set && find_reg_note (insn, REG_STACK_CHECK, NULL_RTX))
|
||||||
|
+ set = NULL_RTX;
|
||||||
|
if (set)
|
||||||
|
{
|
||||||
|
rtx dest = SET_DEST (set);
|
||||||
|
diff -N -urp a/gcc/common.opt b/gcc/common.opt
|
||||||
|
--- a/gcc/common.opt 2019-01-10 13:33:20.926185828 +0800
|
||||||
|
+++ b/gcc/common.opt 2019-01-10 16:37:35.238476827 +0800
|
||||||
|
@@ -2336,13 +2336,18 @@ Common Report Var(flag_variable_expansio
|
||||||
|
Apply variable expansion when loops are unrolled.
|
||||||
|
|
||||||
|
fstack-check=
|
||||||
|
-Common Report RejectNegative Joined
|
||||||
|
+Common Report RejectNegative Joined Optimization
|
||||||
|
-fstack-check=[no|generic|specific] Insert stack checking code into the program.
|
||||||
|
|
||||||
|
fstack-check
|
||||||
|
Common Alias(fstack-check=, specific, no)
|
||||||
|
Insert stack checking code into the program. Same as -fstack-check=specific.
|
||||||
|
|
||||||
|
+fstack-clash-protection
|
||||||
|
+Common Report Var(flag_stack_clash_protection) Optimization
|
||||||
|
+Insert code to probe each page of stack space as it is allocated to protect
|
||||||
|
+from stack-clash style attacks.
|
||||||
|
+
|
||||||
|
fstack-limit
|
||||||
|
Common Var(common_deferred_options) Defer
|
||||||
|
|
||||||
|
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||||
|
--- a/gcc/config/aarch64/aarch64.c 2019-01-10 13:33:20.914185828 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.c 2019-01-11 14:12:22.248521895 +0800
|
||||||
|
@@ -3881,12 +3881,14 @@ aarch64_expand_prologue (void)
|
||||||
|
{
|
||||||
|
if (crtl->is_leaf && !cfun->calls_alloca)
|
||||||
|
{
|
||||||
|
- if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
|
||||||
|
- aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
|
||||||
|
- frame_size - STACK_CHECK_PROTECT);
|
||||||
|
+ if (frame_size > PROBE_INTERVAL
|
||||||
|
+ && frame_size > get_stack_check_protect ())
|
||||||
|
+ aarch64_emit_probe_stack_range (get_stack_check_protect (),
|
||||||
|
+ (frame_size
|
||||||
|
+ - get_stack_check_protect ()));
|
||||||
|
}
|
||||||
|
else if (frame_size > 0)
|
||||||
|
- aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
|
||||||
|
+ aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
|
||||||
|
diff -N -urp a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
|
||||||
|
--- a/gcc/config/i386/i386.c 2019-01-10 13:33:20.674185822 +0800
|
||||||
|
+++ b/gcc/config/i386/i386.c 2019-01-28 10:55:37.006876481 +0800
|
||||||
|
@@ -14396,7 +14396,7 @@ ix86_expand_prologue (void)
|
||||||
|
HOST_WIDE_INT size = allocate;
|
||||||
|
|
||||||
|
if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
|
||||||
|
- size = 0x80000000 - STACK_CHECK_PROTECT - 1;
|
||||||
|
+ size = 0x80000000 - get_stack_check_protect () - 1;
|
||||||
|
|
||||||
|
if (TARGET_STACK_PROBE)
|
||||||
|
{
|
||||||
|
@@ -14406,18 +14406,21 @@ ix86_expand_prologue (void)
|
||||||
|
ix86_emit_probe_stack_range (0, size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
- ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
|
||||||
|
+ ix86_emit_probe_stack_range (0,
|
||||||
|
+ size + get_stack_check_protect ());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (crtl->is_leaf && !cfun->calls_alloca)
|
||||||
|
{
|
||||||
|
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
|
||||||
|
- ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
|
||||||
|
- size - STACK_CHECK_PROTECT);
|
||||||
|
+ if (size > PROBE_INTERVAL
|
||||||
|
+ && size > get_stack_check_protect ())
|
||||||
|
+ ix86_emit_probe_stack_range (get_stack_check_protect (),
|
||||||
|
+ (size
|
||||||
|
+ - get_stack_check_protect ()));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
- ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
|
||||||
|
+ ix86_emit_probe_stack_range (get_stack_check_protect (), size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diff -N -urp a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
|
||||||
|
--- a/gcc/config/ia64/ia64.c 2017-01-01 20:07:43.905435000 +0800
|
||||||
|
+++ b/gcc/config/ia64/ia64.c 2019-01-28 10:58:37.582881234 +0800
|
||||||
|
@@ -3481,15 +3481,16 @@ ia64_expand_prologue (void)
|
||||||
|
|
||||||
|
if (crtl->is_leaf && !cfun->calls_alloca)
|
||||||
|
{
|
||||||
|
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
|
||||||
|
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
|
||||||
|
- size - STACK_CHECK_PROTECT,
|
||||||
|
+ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
|
||||||
|
+ ia64_emit_probe_stack_range (get_stack_check_protect (),
|
||||||
|
+ size - get_stack_check_protect (),
|
||||||
|
bs_size);
|
||||||
|
- else if (size + bs_size > STACK_CHECK_PROTECT)
|
||||||
|
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
|
||||||
|
+ else if (size + bs_size > get_stack_check_protect ())
|
||||||
|
+ ia64_emit_probe_stack_range (get_stack_check_protect (),
|
||||||
|
+ 0, bs_size);
|
||||||
|
}
|
||||||
|
else if (size + bs_size > 0)
|
||||||
|
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
|
||||||
|
+ ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (dump_file)
|
||||||
|
diff -N -urp a/gcc/coretypes.h b/gcc/coretypes.h
|
||||||
|
--- a/gcc/coretypes.h 2017-01-01 20:07:43.905435000 +0800
|
||||||
|
+++ b/gcc/coretypes.h 2019-01-11 14:09:58.612518114 +0800
|
||||||
|
@@ -371,6 +371,7 @@ typedef unsigned char uchar;
|
||||||
|
#include "input.h"
|
||||||
|
#include "is-a.h"
|
||||||
|
#include "memory-block.h"
|
||||||
|
+#include "dumpfile.h"
|
||||||
|
#endif /* GENERATOR_FILE && !USED_FOR_TARGET */
|
||||||
|
|
||||||
|
#endif /* coretypes.h */
|
||||||
|
diff -N -urp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||||
|
--- a/gcc/doc/invoke.texi 2019-01-10 13:33:20.882185827 +0800
|
||||||
|
+++ b/gcc/doc/invoke.texi 2019-01-10 16:40:40.066481692 +0800
|
||||||
|
@@ -10050,6 +10050,21 @@ compilation without. The value for comp
|
||||||
|
needs to be more conservative (higher) in order to make tracer
|
||||||
|
effective.
|
||||||
|
|
||||||
|
+@item stack-clash-protection-guard-size
|
||||||
|
+Specify the size of the operating system provided stack guard as
|
||||||
|
+2 raised to @var{num} bytes. The default value is 12 (4096 bytes).
|
||||||
|
+Acceptable values are between 12 and 30. Higher values may reduce the
|
||||||
|
+number of explicit probes, but a value larger than the operating system
|
||||||
|
+provided guard will leave code vulnerable to stack clash style attacks.
|
||||||
|
+
|
||||||
|
+@item stack-clash-protection-probe-interval
|
||||||
|
+Stack clash protection involves probing stack space as it is allocated. This
|
||||||
|
+param controls the maximum distance between probes into the stack as 2 raised
|
||||||
|
+to @var{num} bytes. Acceptable values are between 10 and 16 and defaults to
|
||||||
|
+12. Higher values may reduce the number of explicit probes, but a value
|
||||||
|
+larger than the operating system provided guard will leave code vulnerable to
|
||||||
|
+stack clash style attacks.
|
||||||
|
+
|
||||||
|
@item max-cse-path-length
|
||||||
|
|
||||||
|
The maximum number of basic blocks on path that CSE considers.
|
||||||
|
@@ -11248,7 +11263,8 @@ target support in the compiler but comes
|
||||||
|
@enumerate
|
||||||
|
@item
|
||||||
|
Modified allocation strategy for large objects: they are always
|
||||||
|
-allocated dynamically if their size exceeds a fixed threshold.
|
||||||
|
+allocated dynamically if their size exceeds a fixed threshold. Note this
|
||||||
|
+may change the semantics of some code.
|
||||||
|
|
||||||
|
@item
|
||||||
|
Fixed limit on the size of the static frame of functions: when it is
|
||||||
|
@@ -11263,6 +11279,25 @@ generic implementation, code performance
|
||||||
|
Note that old-style stack checking is also the fallback method for
|
||||||
|
@samp{specific} if no target support has been added in the compiler.
|
||||||
|
|
||||||
|
+@samp{-fstack-check=} is designed for Ada's needs to detect infinite recursion
|
||||||
|
+and stack overflows. @samp{specific} is an excellent choice when compiling
|
||||||
|
+Ada code. It is not generally sufficient to protect against stack-clash
|
||||||
|
+attacks. To protect against those you want @samp{-fstack-clash-protection}.
|
||||||
|
+
|
||||||
|
+@item -fstack-clash-protection
|
||||||
|
+@opindex fstack-clash-protection
|
||||||
|
+Generate code to prevent stack clash style attacks. When this option is
|
||||||
|
+enabled, the compiler will only allocate one page of stack space at a time
|
||||||
|
+and each page is accessed immediately after allocation. Thus, it prevents
|
||||||
|
+allocations from jumping over any stack guard page provided by the
|
||||||
|
+operating system.
|
||||||
|
+
|
||||||
|
+Most targets do not fully support stack clash protection. However, on
|
||||||
|
+those targets @option{-fstack-clash-protection} will protect dynamic stack
|
||||||
|
+allocations. @option{-fstack-clash-protection} may also provide limited
|
||||||
|
+protection for static stack allocations if the target supports
|
||||||
|
+@option{-fstack-check=specific}.
|
||||||
|
+
|
||||||
|
@item -fstack-limit-register=@var{reg}
|
||||||
|
@itemx -fstack-limit-symbol=@var{sym}
|
||||||
|
@itemx -fno-stack-limit
|
||||||
|
diff -N -urp a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||||
|
--- a/gcc/doc/tm.texi 2017-04-05 01:52:27.193766000 +0800
|
||||||
|
+++ b/gcc/doc/tm.texi 2019-01-10 16:50:44.006497591 +0800
|
||||||
|
@@ -3419,6 +3419,10 @@ GCC computed the default from the values
|
||||||
|
normally not need to override that default.
|
||||||
|
@end defmac
|
||||||
|
|
||||||
|
+@deftypefn {Target Hook} bool TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE (rtx @var{residual})
|
||||||
|
+Some targets make optimistic assumptions about the state of stack probing when they emit their prologues. On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks. Define this variable to return nonzero if such a probe is required or zero otherwise. You need not define this macro if it would always have the value zero.
|
||||||
|
+@end deftypefn
|
||||||
|
+
|
||||||
|
@need 2000
|
||||||
|
@node Frame Registers
|
||||||
|
@subsection Registers That Address the Stack Frame
|
||||||
|
diff -N -urp a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
||||||
|
--- a/gcc/doc/tm.texi.in 2017-04-05 01:52:27.193766000 +0800
|
||||||
|
+++ b/gcc/doc/tm.texi.in 2019-01-10 16:51:41.530499105 +0800
|
||||||
|
@@ -2999,6 +2999,8 @@ GCC computed the default from the values
|
||||||
|
normally not need to override that default.
|
||||||
|
@end defmac
|
||||||
|
|
||||||
|
+@hook TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE
|
||||||
|
+
|
||||||
|
@need 2000
|
||||||
|
@node Frame Registers
|
||||||
|
@subsection Registers That Address the Stack Frame
|
||||||
|
diff -N -urp a/gcc/explow.c b/gcc/explow.c
|
||||||
|
--- a/gcc/explow.c 2017-02-02 20:39:09.589196000 +0800
|
||||||
|
+++ b/gcc/explow.c 2019-01-10 16:56:07.454506105 +0800
|
||||||
|
@@ -39,8 +39,10 @@ along with GCC; see the file COPYING3.
|
||||||
|
#include "expr.h"
|
||||||
|
#include "common/common-target.h"
|
||||||
|
#include "output.h"
|
||||||
|
+#include "params.h"
|
||||||
|
|
||||||
|
static rtx break_out_memory_refs (rtx);
|
||||||
|
+static void anti_adjust_stack_and_probe_stack_clash (rtx);
|
||||||
|
|
||||||
|
|
||||||
|
/* Truncate and perhaps sign-extend C as appropriate for MODE. */
|
||||||
|
@@ -1271,6 +1273,29 @@ get_dynamic_stack_size (rtx *psize, unsi
|
||||||
|
*psize = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Return the number of bytes to "protect" on the stack for -fstack-check.
|
||||||
|
+
|
||||||
|
+ "protect" in the context of -fstack-check means how many bytes we
|
||||||
|
+ should always ensure are available on the stack. More importantly
|
||||||
|
+ this is how many bytes are skipped when probing the stack.
|
||||||
|
+
|
||||||
|
+ On some targets we want to reuse the -fstack-check prologue support
|
||||||
|
+ to give a degree of protection against stack clashing style attacks.
|
||||||
|
+
|
||||||
|
+ In that scenario we do not want to skip bytes before probing as that
|
||||||
|
+ would render the stack clash protections useless.
|
||||||
|
+
|
||||||
|
+ So we never use STACK_CHECK_PROTECT directly. Instead we indirect though
|
||||||
|
+ this helper which allows us to provide different values for
|
||||||
|
+ -fstack-check and -fstack-clash-protection. */
|
||||||
|
+HOST_WIDE_INT
|
||||||
|
+get_stack_check_protect (void)
|
||||||
|
+{
|
||||||
|
+ if (flag_stack_clash_protection)
|
||||||
|
+ return 0;
|
||||||
|
+ return STACK_CHECK_PROTECT;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Return an rtx representing the address of an area of memory dynamically
|
||||||
|
pushed on the stack.
|
||||||
|
|
||||||
|
@@ -1429,7 +1454,7 @@ allocate_dynamic_stack_space (rtx size,
|
||||||
|
probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE,
|
||||||
|
size);
|
||||||
|
else if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
|
||||||
|
- probe_stack_range (STACK_CHECK_PROTECT, size);
|
||||||
|
+ probe_stack_range (get_stack_check_protect (), size);
|
||||||
|
|
||||||
|
/* Don't let anti_adjust_stack emit notes. */
|
||||||
|
suppress_reg_args_size = true;
|
||||||
|
@@ -1482,6 +1507,8 @@ allocate_dynamic_stack_space (rtx size,
|
||||||
|
|
||||||
|
if (flag_stack_check && STACK_CHECK_MOVING_SP)
|
||||||
|
anti_adjust_stack_and_probe (size, false);
|
||||||
|
+ else if (flag_stack_clash_protection)
|
||||||
|
+ anti_adjust_stack_and_probe_stack_clash (size);
|
||||||
|
else
|
||||||
|
anti_adjust_stack (size);
|
||||||
|
|
||||||
|
@@ -1757,6 +1784,237 @@ probe_stack_range (HOST_WIDE_INT first,
|
||||||
|
emit_insn (gen_blockage ());
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Compute parameters for stack clash probing a dynamic stack
|
||||||
|
+ allocation of SIZE bytes.
|
||||||
|
+
|
||||||
|
+ We compute ROUNDED_SIZE, LAST_ADDR, RESIDUAL and PROBE_INTERVAL.
|
||||||
|
+
|
||||||
|
+ Additionally we conditionally dump the type of probing that will
|
||||||
|
+ be needed given the values computed. */
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compute_stack_clash_protection_loop_data (rtx *rounded_size, rtx *last_addr,
|
||||||
|
+ rtx *residual,
|
||||||
|
+ HOST_WIDE_INT *probe_interval,
|
||||||
|
+ rtx size)
|
||||||
|
+{
|
||||||
|
+ /* Round SIZE down to STACK_CLASH_PROTECTION_PROBE_INTERVAL. */
|
||||||
|
+ *probe_interval
|
||||||
|
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
|
||||||
|
+ *rounded_size = simplify_gen_binary (AND, Pmode, size,
|
||||||
|
+ GEN_INT (-*probe_interval));
|
||||||
|
+
|
||||||
|
+ /* Compute the value of the stack pointer for the last iteration.
|
||||||
|
+ It's just SP + ROUNDED_SIZE. */
|
||||||
|
+ rtx rounded_size_op = force_operand (*rounded_size, NULL_RTX);
|
||||||
|
+ *last_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode,
|
||||||
|
+ stack_pointer_rtx,
|
||||||
|
+ rounded_size_op),
|
||||||
|
+ NULL_RTX);
|
||||||
|
+
|
||||||
|
+ /* Compute any residuals not allocated by the loop above. Residuals
|
||||||
|
+ are just the ROUNDED_SIZE - SIZE. */
|
||||||
|
+ *residual = simplify_gen_binary (MINUS, Pmode, size, *rounded_size);
|
||||||
|
+
|
||||||
|
+ /* Dump key information to make writing tests easy. */
|
||||||
|
+ if (dump_file)
|
||||||
|
+ {
|
||||||
|
+ if (*rounded_size == CONST0_RTX (Pmode))
|
||||||
|
+ fprintf (dump_file,
|
||||||
|
+ "Stack clash skipped dynamic allocation and probing loop.\n");
|
||||||
|
+ else if (CONST_INT_P (*rounded_size)
|
||||||
|
+ && INTVAL (*rounded_size) <= 4 * *probe_interval)
|
||||||
|
+ fprintf (dump_file,
|
||||||
|
+ "Stack clash dynamic allocation and probing inline.\n");
|
||||||
|
+ else if (CONST_INT_P (*rounded_size))
|
||||||
|
+ fprintf (dump_file,
|
||||||
|
+ "Stack clash dynamic allocation and probing in "
|
||||||
|
+ "rotated loop.\n");
|
||||||
|
+ else
|
||||||
|
+ fprintf (dump_file,
|
||||||
|
+ "Stack clash dynamic allocation and probing in loop.\n");
|
||||||
|
+
|
||||||
|
+ if (*residual != CONST0_RTX (Pmode))
|
||||||
|
+ fprintf (dump_file,
|
||||||
|
+ "Stack clash dynamic allocation and probing residuals.\n");
|
||||||
|
+ else
|
||||||
|
+ fprintf (dump_file,
|
||||||
|
+ "Stack clash skipped dynamic allocation and "
|
||||||
|
+ "probing residuals.\n");
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Emit the start of an allocate/probe loop for stack
|
||||||
|
+ clash protection.
|
||||||
|
+
|
||||||
|
+ LOOP_LAB and END_LAB are returned for use when we emit the
|
||||||
|
+ end of the loop.
|
||||||
|
+
|
||||||
|
+ LAST addr is the value for SP which stops the loop. */
|
||||||
|
+void
|
||||||
|
+emit_stack_clash_protection_probe_loop_start (rtx *loop_lab,
|
||||||
|
+ rtx *end_lab,
|
||||||
|
+ rtx last_addr,
|
||||||
|
+ bool rotated)
|
||||||
|
+{
|
||||||
|
+ /* Essentially we want to emit any setup code, the top of loop
|
||||||
|
+ label and the comparison at the top of the loop. */
|
||||||
|
+ *loop_lab = gen_label_rtx ();
|
||||||
|
+ *end_lab = gen_label_rtx ();
|
||||||
|
+
|
||||||
|
+ emit_label (*loop_lab);
|
||||||
|
+ if (!rotated)
|
||||||
|
+ emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, EQ, NULL_RTX,
|
||||||
|
+ Pmode, 1, *end_lab);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Emit the end of a stack clash probing loop.
|
||||||
|
+
|
||||||
|
+ This consists of just the jump back to LOOP_LAB and
|
||||||
|
+ emitting END_LOOP after the loop. */
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+emit_stack_clash_protection_probe_loop_end (rtx loop_lab, rtx end_loop,
|
||||||
|
+ rtx last_addr, bool rotated)
|
||||||
|
+{
|
||||||
|
+ if (rotated)
|
||||||
|
+ emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, NE, NULL_RTX,
|
||||||
|
+ Pmode, 1, loop_lab);
|
||||||
|
+ else
|
||||||
|
+ emit_jump (loop_lab);
|
||||||
|
+
|
||||||
|
+ emit_label (end_loop);
|
||||||
|
+
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
|
||||||
|
+ while probing it. This pushes when SIZE is positive. SIZE need not
|
||||||
|
+ be constant.
|
||||||
|
+
|
||||||
|
+ This is subtly different than anti_adjust_stack_and_probe to try and
|
||||||
|
+ prevent stack-clash attacks
|
||||||
|
+
|
||||||
|
+ 1. It must assume no knowledge of the probing state, any allocation
|
||||||
|
+ must probe.
|
||||||
|
+
|
||||||
|
+ Consider the case of a 1 byte alloca in a loop. If the sum of the
|
||||||
|
+ allocations is large, then this could be used to jump the guard if
|
||||||
|
+ probes were not emitted.
|
||||||
|
+
|
||||||
|
+ 2. It never skips probes, whereas anti_adjust_stack_and_probe will
|
||||||
|
+ skip probes on the first couple PROBE_INTERVALs on the assumption
|
||||||
|
+ they're done elsewhere.
|
||||||
|
+
|
||||||
|
+ 3. It only allocates and probes SIZE bytes, it does not need to
|
||||||
|
+ allocate/probe beyond that because this probing style does not
|
||||||
|
+ guarantee signal handling capability if the guard is hit. */
|
||||||
|
+
|
||||||
|
+static void
|
||||||
|
+anti_adjust_stack_and_probe_stack_clash (rtx size)
|
||||||
|
+{
|
||||||
|
+ /* First ensure SIZE is Pmode. */
|
||||||
|
+ if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
|
||||||
|
+ size = convert_to_mode (Pmode, size, 1);
|
||||||
|
+
|
||||||
|
+ /* We can get here with a constant size on some targets. */
|
||||||
|
+ rtx rounded_size, last_addr, residual;
|
||||||
|
+ HOST_WIDE_INT probe_interval;
|
||||||
|
+ compute_stack_clash_protection_loop_data (&rounded_size, &last_addr,
|
||||||
|
+ &residual, &probe_interval, size);
|
||||||
|
+
|
||||||
|
+ if (rounded_size != CONST0_RTX (Pmode))
|
||||||
|
+ {
|
||||||
|
+ if (CONST_INT_P (rounded_size)
|
||||||
|
+ && INTVAL (rounded_size) <= 4 * probe_interval)
|
||||||
|
+ {
|
||||||
|
+ for (HOST_WIDE_INT i = 0;
|
||||||
|
+ i < INTVAL (rounded_size);
|
||||||
|
+ i += probe_interval)
|
||||||
|
+ {
|
||||||
|
+ anti_adjust_stack (GEN_INT (probe_interval));
|
||||||
|
+
|
||||||
|
+ /* The prologue does not probe residuals. Thus the offset
|
||||||
|
+ here to probe just beyond what the prologue had already
|
||||||
|
+ allocated. */
|
||||||
|
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||||
|
+ (probe_interval
|
||||||
|
+ - GET_MODE_SIZE (word_mode))));
|
||||||
|
+ emit_insn (gen_blockage ());
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ rtx loop_lab, end_loop;
|
||||||
|
+ bool rotate_loop = CONST_INT_P (rounded_size);
|
||||||
|
+ emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop,
|
||||||
|
+ last_addr, rotate_loop);
|
||||||
|
+
|
||||||
|
+ anti_adjust_stack (GEN_INT (probe_interval));
|
||||||
|
+
|
||||||
|
+ /* The prologue does not probe residuals. Thus the offset here
|
||||||
|
+ to probe just beyond what the prologue had already allocated. */
|
||||||
|
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||||
|
+ (probe_interval
|
||||||
|
+ - GET_MODE_SIZE (word_mode))));
|
||||||
|
+
|
||||||
|
+ emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop,
|
||||||
|
+ last_addr, rotate_loop);
|
||||||
|
+ emit_insn (gen_blockage ());
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (residual != CONST0_RTX (Pmode))
|
||||||
|
+ {
|
||||||
|
+ rtx label = NULL_RTX;
|
||||||
|
+ /* RESIDUAL could be zero at runtime and in that case *sp could
|
||||||
|
+ hold live data. Furthermore, we do not want to probe into the
|
||||||
|
+ red zone.
|
||||||
|
+
|
||||||
|
+ Go ahead and just guard the probe at *sp on RESIDUAL != 0 at
|
||||||
|
+ runtime if RESIDUAL is not a compile time constant. */
|
||||||
|
+ if (!CONST_INT_P (residual))
|
||||||
|
+ {
|
||||||
|
+ label = gen_label_rtx ();
|
||||||
|
+ emit_cmp_and_jump_insns (residual, CONST0_RTX (GET_MODE (residual)),
|
||||||
|
+ EQ, NULL_RTX, Pmode, 1, label);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ rtx x = force_reg (Pmode, plus_constant (Pmode, residual,
|
||||||
|
+ -GET_MODE_SIZE (word_mode)));
|
||||||
|
+ anti_adjust_stack (residual);
|
||||||
|
+ emit_stack_probe (gen_rtx_PLUS (Pmode, stack_pointer_rtx, x));
|
||||||
|
+ emit_insn (gen_blockage ());
|
||||||
|
+ if (!CONST_INT_P (residual))
|
||||||
|
+ emit_label (label);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Some targets make optimistic assumptions in their prologues about
|
||||||
|
+ how the caller may have probed the stack. Make sure we honor
|
||||||
|
+ those assumptions when needed. */
|
||||||
|
+ if (size != CONST0_RTX (Pmode)
|
||||||
|
+ && targetm.stack_clash_protection_final_dynamic_probe (residual))
|
||||||
|
+ {
|
||||||
|
+ /* SIZE could be zero at runtime and in that case *sp could hold
|
||||||
|
+ live data. Furthermore, we don't want to probe into the red
|
||||||
|
+ zone.
|
||||||
|
+
|
||||||
|
+ Go ahead and just guard the probe at *sp on SIZE != 0 at runtime
|
||||||
|
+ if SIZE is not a compile time constant. */
|
||||||
|
+ rtx label = NULL_RTX;
|
||||||
|
+ if (!CONST_INT_P (size))
|
||||||
|
+ {
|
||||||
|
+ label = gen_label_rtx ();
|
||||||
|
+ emit_cmp_and_jump_insns (size, CONST0_RTX (GET_MODE (size)),
|
||||||
|
+ EQ, NULL_RTX, Pmode, 1, label);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ emit_stack_probe (stack_pointer_rtx);
|
||||||
|
+ emit_insn (gen_blockage ());
|
||||||
|
+ if (!CONST_INT_P (size))
|
||||||
|
+ emit_label (label);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
|
||||||
|
while probing it. This pushes when SIZE is positive. SIZE need not
|
||||||
|
be constant. If ADJUST_BACK is true, adjust back the stack pointer
|
||||||
|
diff -N -urp a/gcc/explow.h b/gcc/explow.h
|
||||||
|
--- a/gcc/explow.h 2017-01-01 20:07:43.905435000 +0800
|
||||||
|
+++ b/gcc/explow.h 2019-01-10 16:57:37.934508487 +0800
|
||||||
|
@@ -69,6 +69,15 @@ extern void anti_adjust_stack (rtx);
|
||||||
|
/* Add some bytes to the stack while probing it. An rtx says how many. */
|
||||||
|
extern void anti_adjust_stack_and_probe (rtx, bool);
|
||||||
|
|
||||||
|
+/* Support for building allocation/probing loops for stack-clash
|
||||||
|
+ protection of dyamically allocated stack space. */
|
||||||
|
+extern void compute_stack_clash_protection_loop_data (rtx *, rtx *, rtx *,
|
||||||
|
+ HOST_WIDE_INT *, rtx);
|
||||||
|
+extern void emit_stack_clash_protection_probe_loop_start (rtx *, rtx *,
|
||||||
|
+ rtx, bool);
|
||||||
|
+extern void emit_stack_clash_protection_probe_loop_end (rtx, rtx,
|
||||||
|
+ rtx, bool);
|
||||||
|
+
|
||||||
|
/* This enum is used for the following two functions. */
|
||||||
|
enum save_level {SAVE_BLOCK, SAVE_FUNCTION, SAVE_NONLOCAL};
|
||||||
|
|
||||||
|
diff -N -urp a/gcc/flag-types.h b/gcc/flag-types.h
|
||||||
|
--- a/gcc/flag-types.h 2017-01-01 20:07:43.905435000 +0800
|
||||||
|
+++ b/gcc/flag-types.h 2019-01-10 16:42:11.490484099 +0800
|
||||||
|
@@ -166,7 +166,14 @@ enum permitted_flt_eval_methods
|
||||||
|
PERMITTED_FLT_EVAL_METHODS_C11
|
||||||
|
};
|
||||||
|
|
||||||
|
-/* Type of stack check. */
|
||||||
|
+/* Type of stack check.
|
||||||
|
+
|
||||||
|
+ Stack checking is designed to detect infinite recursion and stack
|
||||||
|
+ overflows for Ada programs. Furthermore stack checking tries to ensure
|
||||||
|
+ in that scenario that enough stack space is left to run a signal handler.
|
||||||
|
+
|
||||||
|
+ -fstack-check= does not prevent stack-clash style attacks. For that
|
||||||
|
+ you want -fstack-clash-protection. */
|
||||||
|
enum stack_check_type
|
||||||
|
{
|
||||||
|
/* Do not check the stack. */
|
||||||
|
diff -N -urp a/gcc/function.c b/gcc/function.c
|
||||||
|
--- a/gcc/function.c 2017-08-08 21:21:12.755378000 +0800
|
||||||
|
+++ b/gcc/function.c 2019-01-10 17:07:17.414523742 +0800
|
||||||
|
@@ -5695,6 +5695,58 @@ get_arg_pointer_save_area (void)
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
+
|
||||||
|
+/* If debugging dumps are requested, dump information about how the
|
||||||
|
+ target handled -fstack-check=clash for the prologue.
|
||||||
|
+
|
||||||
|
+ PROBES describes what if any probes were emitted.
|
||||||
|
+
|
||||||
|
+ RESIDUALS indicates if the prologue had any residual allocation
|
||||||
|
+ (i.e. total allocation was not a multiple of PROBE_INTERVAL). */
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+dump_stack_clash_frame_info (enum stack_clash_probes probes, bool residuals)
|
||||||
|
+{
|
||||||
|
+ if (!dump_file)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ switch (probes)
|
||||||
|
+ {
|
||||||
|
+ case NO_PROBE_NO_FRAME:
|
||||||
|
+ fprintf (dump_file,
|
||||||
|
+ "Stack clash no probe no stack adjustment in prologue.\n");
|
||||||
|
+ break;
|
||||||
|
+ case NO_PROBE_SMALL_FRAME:
|
||||||
|
+ fprintf (dump_file,
|
||||||
|
+ "Stack clash no probe small stack adjustment in prologue.\n");
|
||||||
|
+ break;
|
||||||
|
+ case PROBE_INLINE:
|
||||||
|
+ fprintf (dump_file, "Stack clash inline probes in prologue.\n");
|
||||||
|
+ break;
|
||||||
|
+ case PROBE_LOOP:
|
||||||
|
+ fprintf (dump_file, "Stack clash probe loop in prologue.\n");
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (residuals)
|
||||||
|
+ fprintf (dump_file, "Stack clash residual allocation in prologue.\n");
|
||||||
|
+ else
|
||||||
|
+ fprintf (dump_file, "Stack clash no residual allocation in prologue.\n");
|
||||||
|
+
|
||||||
|
+ if (frame_pointer_needed)
|
||||||
|
+ fprintf (dump_file, "Stack clash frame pointer needed.\n");
|
||||||
|
+ else
|
||||||
|
+ fprintf (dump_file, "Stack clash no frame pointer needed.\n");
|
||||||
|
+
|
||||||
|
+ if (TREE_THIS_VOLATILE (cfun->decl))
|
||||||
|
+ fprintf (dump_file,
|
||||||
|
+ "Stack clash noreturn prologue, assuming no implicit"
|
||||||
|
+ " probes in caller.\n");
|
||||||
|
+ else
|
||||||
|
+ fprintf (dump_file,
|
||||||
|
+ "Stack clash not noreturn prologue.\n");
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Add a list of INSNS to the hash HASHP, possibly allocating HASHP
|
||||||
|
for the first time. */
|
||||||
|
|
||||||
|
diff -N -urp a/gcc/function.h b/gcc/function.h
|
||||||
|
--- a/gcc/function.h 2017-01-25 01:07:36.015431000 +0800
|
||||||
|
+++ b/gcc/function.h 2019-01-10 17:08:12.806525200 +0800
|
||||||
|
@@ -553,6 +553,14 @@ do { \
|
||||||
|
((TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn) \
|
||||||
|
? MAX (FUNCTION_BOUNDARY, 2 * BITS_PER_UNIT) : FUNCTION_BOUNDARY)
|
||||||
|
|
||||||
|
+enum stack_clash_probes {
|
||||||
|
+ NO_PROBE_NO_FRAME,
|
||||||
|
+ NO_PROBE_SMALL_FRAME,
|
||||||
|
+ PROBE_INLINE,
|
||||||
|
+ PROBE_LOOP
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+extern void dump_stack_clash_frame_info (enum stack_clash_probes, bool);
|
||||||
|
|
||||||
|
|
||||||
|
extern void push_function_context (void);
|
||||||
|
diff -N -urp a/gcc/params.def b/gcc/params.def
|
||||||
|
--- a/gcc/params.def 2019-01-10 13:33:20.894185827 +0800
|
||||||
|
+++ b/gcc/params.def 2019-01-10 16:43:15.414485782 +0800
|
||||||
|
@@ -213,6 +213,16 @@ DEFPARAM(PARAM_STACK_FRAME_GROWTH,
|
||||||
|
"Maximal stack frame growth due to inlining (in percent).",
|
||||||
|
1000, 0, 0)
|
||||||
|
|
||||||
|
+DEFPARAM(PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
|
||||||
|
+ "stack-clash-protection-guard-size",
|
||||||
|
+ "Size of the stack guard expressed as a power of two.",
|
||||||
|
+ 12, 12, 30)
|
||||||
|
+
|
||||||
|
+DEFPARAM(PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL,
|
||||||
|
+ "stack-clash-protection-probe-interval",
|
||||||
|
+ "Interval in which to probe the stack expressed as a power of two.",
|
||||||
|
+ 12, 10, 16)
|
||||||
|
+
|
||||||
|
/* The GCSE optimization will be disabled if it would require
|
||||||
|
significantly more memory than this value. */
|
||||||
|
DEFPARAM(PARAM_MAX_GCSE_MEMORY,
|
||||||
|
diff -N -urp a/gcc/reg-notes.def b/gcc/reg-notes.def
|
||||||
|
--- a/gcc/reg-notes.def 2017-03-28 05:00:35.674561000 +0800
|
||||||
|
+++ b/gcc/reg-notes.def 2019-01-10 17:12:11.678531488 +0800
|
||||||
|
@@ -223,6 +223,10 @@ REG_NOTE (ARGS_SIZE)
|
||||||
|
pseudo reg. */
|
||||||
|
REG_NOTE (RETURNED)
|
||||||
|
|
||||||
|
+/* Indicates the instruction is a stack check probe that should not
|
||||||
|
+ be combined with other stack adjustments. */
|
||||||
|
+REG_NOTE (STACK_CHECK)
|
||||||
|
+
|
||||||
|
/* Used to mark a call with the function decl called by the call.
|
||||||
|
The decl might not be available in the call due to splitting of the call
|
||||||
|
insn. This note is a SYMBOL_REF. */
|
||||||
|
diff -N -urp a/gcc/rtl.h b/gcc/rtl.h
|
||||||
|
--- a/gcc/rtl.h 2017-03-14 20:47:42.745690000 +0800
|
||||||
|
+++ b/gcc/rtl.h 2019-01-10 16:59:15.574511058 +0800
|
||||||
|
@@ -2707,6 +2707,7 @@ get_full_set_src_cost (rtx x, machine_mo
|
||||||
|
/* In explow.c */
|
||||||
|
extern HOST_WIDE_INT trunc_int_for_mode (HOST_WIDE_INT, machine_mode);
|
||||||
|
extern rtx plus_constant (machine_mode, rtx, HOST_WIDE_INT, bool = false);
|
||||||
|
+extern HOST_WIDE_INT get_stack_check_protect (void);
|
||||||
|
|
||||||
|
/* In rtl.c */
|
||||||
|
extern rtx rtx_alloc_stat (RTX_CODE MEM_STAT_DECL);
|
||||||
|
diff -N -urp a/gcc/sched-deps.c b/gcc/sched-deps.c
|
||||||
|
--- a/gcc/sched-deps.c 2017-01-01 20:07:43.905435000 +0800
|
||||||
|
+++ b/gcc/sched-deps.c 2019-01-10 17:13:37.470533746 +0800
|
||||||
|
@@ -4717,6 +4717,11 @@ parse_add_or_inc (struct mem_inc_info *m
|
||||||
|
if (RTX_FRAME_RELATED_P (insn) || !pat)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
+ /* Do not allow breaking data dependencies for insns that are marked
|
||||||
|
+ with REG_STACK_CHECK. */
|
||||||
|
+ if (find_reg_note (insn, REG_STACK_CHECK, NULL))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
/* Result must be single reg. */
|
||||||
|
if (!REG_P (SET_DEST (pat)))
|
||||||
|
return false;
|
||||||
|
diff -N -urp a/gcc/target.def b/gcc/target.def
|
||||||
|
--- a/gcc/target.def 2019-01-10 13:33:20.762185824 +0800
|
||||||
|
+++ b/gcc/target.def 2019-01-10 17:01:49.146515100 +0800
|
||||||
|
@@ -5490,6 +5490,12 @@ these registers when the target switches
|
||||||
|
void, (void),
|
||||||
|
hook_void_void)
|
||||||
|
|
||||||
|
+DEFHOOK
|
||||||
|
+(stack_clash_protection_final_dynamic_probe,
|
||||||
|
+ "Some targets make optimistic assumptions about the state of stack probing when they emit their prologues. On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks. Define this variable to return nonzero if such a probe is required or zero otherwise. You need not define this macro if it would always have the value zero.",
|
||||||
|
+ bool, (rtx residual),
|
||||||
|
+ default_stack_clash_protection_final_dynamic_probe)
|
||||||
|
+
|
||||||
|
/* Functions specific to the C family of frontends. */
|
||||||
|
#undef HOOK_PREFIX
|
||||||
|
#define HOOK_PREFIX "TARGET_C_"
|
||||||
|
diff -N -urp a/gcc/targhooks.c b/gcc/targhooks.c
|
||||||
|
--- a/gcc/targhooks.c 2017-02-07 19:29:06.644837000 +0800
|
||||||
|
+++ b/gcc/targhooks.c 2019-01-10 17:03:23.818517592 +0800
|
||||||
|
@@ -2107,4 +2107,10 @@ default_excess_precision (enum excess_pr
|
||||||
|
return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
|
||||||
|
}
|
||||||
|
|
||||||
|
+bool
|
||||||
|
+default_stack_clash_protection_final_dynamic_probe (rtx residual ATTRIBUTE_UNUSED)
|
||||||
|
+{
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#include "gt-targhooks.h"
|
||||||
|
diff -N -urp a/gcc/targhooks.h b/gcc/targhooks.h
|
||||||
|
--- a/gcc/targhooks.h 2017-04-05 01:52:27.193766000 +0800
|
||||||
|
+++ b/gcc/targhooks.h 2019-01-10 17:04:11.438518846 +0800
|
||||||
|
@@ -263,5 +263,6 @@ extern unsigned int default_min_arithmet
|
||||||
|
|
||||||
|
extern enum flt_eval_method
|
||||||
|
default_excess_precision (enum excess_precision_type ATTRIBUTE_UNUSED);
|
||||||
|
+extern bool default_stack_clash_protection_final_dynamic_probe (rtx);
|
||||||
|
|
||||||
|
#endif /* GCC_TARGHOOKS_H */
|
||||||
|
diff -N -urp a/gcc/toplev.c b/gcc/toplev.c
|
||||||
|
--- a/gcc/toplev.c 2017-09-15 16:18:34.015147000 +0800
|
||||||
|
+++ b/gcc/toplev.c 2019-01-10 16:45:33.626489420 +0800
|
||||||
|
@@ -1573,6 +1573,26 @@ process_options (void)
|
||||||
|
flag_associative_math = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ /* -fstack-clash-protection is not currently supported on targets
|
||||||
|
+ where the stack grows up. */
|
||||||
|
+ if (flag_stack_clash_protection && !STACK_GROWS_DOWNWARD)
|
||||||
|
+ {
|
||||||
|
+ warning_at (UNKNOWN_LOCATION, 0,
|
||||||
|
+ "%<-fstack-clash-protection%> is not supported on targets "
|
||||||
|
+ "where the stack grows from lower to higher addresses");
|
||||||
|
+ flag_stack_clash_protection = 0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* We can not support -fstack-check= and -fstack-clash-protection at
|
||||||
|
+ the same time. */
|
||||||
|
+ if (flag_stack_check != NO_STACK_CHECK && flag_stack_clash_protection)
|
||||||
|
+ {
|
||||||
|
+ warning_at (UNKNOWN_LOCATION, 0,
|
||||||
|
+ "%<-fstack-check=%> and %<-fstack-clash_protection%> are "
|
||||||
|
+ "mutually exclusive. Disabling %<-fstack-check=%>");
|
||||||
|
+ flag_stack_check = NO_STACK_CHECK;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/* With -fcx-limited-range, we do cheap and quick complex arithmetic. */
|
||||||
|
if (flag_cx_limited_range)
|
||||||
|
flag_complex_method = 0;
|
||||||
BIN
gcc-7.3.0.tar.gz
Normal file
BIN
gcc-7.3.0.tar.gz
Normal file
Binary file not shown.
12
gcc-adapt-to-isl.patch
Normal file
12
gcc-adapt-to-isl.patch
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
diff --git a/gcc/graphite.h b/gcc/graphite.h
|
||||||
|
index 4e0e58c..be0a22b 100644 (file)
|
||||||
|
--- a/gcc/graphite.h
|
||||||
|
+++ b/gcc/graphite.h
|
||||||
|
@@ -37,6 +37,8 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#include <isl/schedule.h>
|
||||||
|
#include <isl/ast_build.h>
|
||||||
|
#include <isl/schedule_node.h>
|
||||||
|
+#include <isl/id.h>
|
||||||
|
+#include <isl/space.h>
|
||||||
|
|
||||||
|
typedef struct poly_dr *poly_dr_p;
|
||||||
BIN
isl-0.14.tar.xz
Normal file
BIN
isl-0.14.tar.xz
Normal file
Binary file not shown.
13
mark-pattern-as-clobbering-CC-REGNUM.patch
Normal file
13
mark-pattern-as-clobbering-CC-REGNUM.patch
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
diff -N -urp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||||
|
--- a/gcc/config/aarch64/aarch64.md 2019-05-30 16:12:52.950606040 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.md 2019-05-30 16:15:56.606599549 +0800
|
||||||
|
@@ -3110,7 +3110,8 @@
|
||||||
|
(define_insn_and_split "*compare_cstore<mode>_insn"
|
||||||
|
[(set (match_operand:GPI 0 "register_operand" "=r")
|
||||||
|
(EQL:GPI (match_operand:GPI 1 "register_operand" "r")
|
||||||
|
- (match_operand:GPI 2 "aarch64_imm24" "n")))]
|
||||||
|
+ (match_operand:GPI 2 "aarch64_imm24" "n")))
|
||||||
|
+ (clobber (reg:CC CC_REGNUM))]
|
||||||
|
"!aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)
|
||||||
|
&& !aarch64_plus_operand (operands[2], <MODE>mode)
|
||||||
|
&& !reload_completed"
|
||||||
108
option-mfentry-and-mlong-calls-bugfix.patch
Normal file
108
option-mfentry-and-mlong-calls-bugfix.patch
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||||
|
--- a/gcc/config/aarch64/aarch64.c 2018-09-19 17:11:42.583520820 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.c 2018-09-19 17:10:22.715520820 +0800
|
||||||
|
@@ -1260,29 +1260,32 @@ aarch64_is_long_call_p (rtx sym)
|
||||||
|
void
|
||||||
|
aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
|
||||||
|
{
|
||||||
|
- if (!TARGET_LONG_CALLS)
|
||||||
|
+ if (flag_fentry)
|
||||||
|
{
|
||||||
|
- fprintf (file, "\tmov\tx9, x30\n");
|
||||||
|
- fprintf (file, "\tbl\t__fentry__\n");
|
||||||
|
- fprintf (file, "\tmov\tx30, x9\n");
|
||||||
|
- }
|
||||||
|
- else
|
||||||
|
- {
|
||||||
|
- if (flag_pic)
|
||||||
|
+ if (!TARGET_LONG_CALLS)
|
||||||
|
{
|
||||||
|
fprintf (file, "\tmov\tx9, x30\n");
|
||||||
|
- fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
|
||||||
|
- fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
|
||||||
|
- fprintf (file, "\tblr\tx10\n");
|
||||||
|
+ fprintf (file, "\tbl\t__fentry__\n");
|
||||||
|
fprintf (file, "\tmov\tx30, x9\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
- fprintf (file, "\tmov\tx9, x30\n");
|
||||||
|
- fprintf (file, "\tadrp\tx10, __fentry__\n");
|
||||||
|
- fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
|
||||||
|
- fprintf (file, "\tblr\tx10\n");
|
||||||
|
- fprintf (file, "\tmov\tx30, x9\n");
|
||||||
|
+ if (flag_pic)
|
||||||
|
+ {
|
||||||
|
+ fprintf (file, "\tmov\tx9, x30\n");
|
||||||
|
+ fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
|
||||||
|
+ fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
|
||||||
|
+ fprintf (file, "\tblr\tx10\n");
|
||||||
|
+ fprintf (file, "\tmov\tx30, x9\n");
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ fprintf (file, "\tmov\tx9, x30\n");
|
||||||
|
+ fprintf (file, "\tadrp\tx10, __fentry__\n");
|
||||||
|
+ fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
|
||||||
|
+ fprintf (file, "\tblr\tx10\n");
|
||||||
|
+ fprintf (file, "\tmov\tx30, x9\n");
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -12020,6 +12023,15 @@ aarch64_emit_unlikely_jump (rtx insn)
|
||||||
|
add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Return true, if profiling code should be emitted before
|
||||||
|
+ prologue. Otherwise it returns false.
|
||||||
|
+ Note: For x86 with "hotfix" it is sorried. */
|
||||||
|
+static bool
|
||||||
|
+aarch64_profile_before_prologue (void)
|
||||||
|
+{
|
||||||
|
+ return flag_fentry != 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Expand a compare and swap pattern. */
|
||||||
|
|
||||||
|
void
|
||||||
|
@@ -14952,6 +14964,9 @@ aarch64_run_selftests (void)
|
||||||
|
#undef TARGET_ASM_ALIGNED_SI_OP
|
||||||
|
#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
|
||||||
|
|
||||||
|
+#undef TARGET_PROFILE_BEFORE_PROLOGUE
|
||||||
|
+#define TARGET_PROFILE_BEFORE_PROLOGUE aarch64_profile_before_prologue
|
||||||
|
+
|
||||||
|
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
|
||||||
|
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
|
||||||
|
hook_bool_const_tree_hwi_hwi_const_tree_true
|
||||||
|
diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||||
|
--- a/gcc/config/aarch64/aarch64.h 2018-09-19 17:11:42.587520820 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.h 2018-09-19 17:10:22.715520820 +0800
|
||||||
|
@@ -850,9 +850,12 @@ typedef struct
|
||||||
|
{ \
|
||||||
|
rtx fun, lr; \
|
||||||
|
const rtx_insn* tmp = get_insns (); \
|
||||||
|
- lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
|
||||||
|
- fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
|
||||||
|
- emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
|
||||||
|
+ if (!flag_fentry) \
|
||||||
|
+ { \
|
||||||
|
+ lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
|
||||||
|
+ fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
|
||||||
|
+ emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
|
||||||
|
+ } \
|
||||||
|
if (TARGET_LONG_CALLS) \
|
||||||
|
{ \
|
||||||
|
emit_insn (gen_blockage ()); \
|
||||||
|
diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
||||||
|
--- a/gcc/config/aarch64/aarch64.opt 2018-09-19 17:11:42.587520820 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.opt 2018-09-19 17:10:22.715520820 +0800
|
||||||
|
@@ -192,3 +192,7 @@ single precision and to 32 bits for doub
|
||||||
|
mverbose-cost-dump
|
||||||
|
Common Undocumented Var(flag_aarch64_verbose_cost)
|
||||||
|
Enables verbose cost model dumping in the debug dump files.
|
||||||
|
+
|
||||||
|
+mfentry
|
||||||
|
+Target Report Var(flag_fentry) Init(0)
|
||||||
|
+Emit profiling counter call at function entry immediately after prologue.
|
||||||
362
option-mlong-calls.patch
Normal file
362
option-mlong-calls.patch
Normal file
@ -0,0 +1,362 @@
|
|||||||
|
diff -N -urp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
||||||
|
--- a/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:43:27.862079389 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:44:34.930081154 +0800
|
||||||
|
@@ -353,6 +353,10 @@ bool aarch64_use_return_insn_p (void);
|
||||||
|
const char *aarch64_mangle_builtin_type (const_tree);
|
||||||
|
const char *aarch64_output_casesi (rtx *);
|
||||||
|
|
||||||
|
+extern void aarch64_pr_long_calls (struct cpp_reader *);
|
||||||
|
+extern void aarch64_pr_no_long_calls (struct cpp_reader *);
|
||||||
|
+extern void aarch64_pr_long_calls_off (struct cpp_reader *);
|
||||||
|
+
|
||||||
|
enum aarch64_symbol_type aarch64_classify_symbol (rtx, rtx);
|
||||||
|
enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
|
||||||
|
enum reg_class aarch64_regno_regclass (unsigned);
|
||||||
|
@@ -384,6 +388,7 @@ void aarch64_expand_epilogue (bool);
|
||||||
|
void aarch64_expand_mov_immediate (rtx, rtx);
|
||||||
|
void aarch64_expand_prologue (void);
|
||||||
|
void aarch64_expand_vector_init (rtx, rtx);
|
||||||
|
+void aarch64_function_profiler (FILE *, int);
|
||||||
|
void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
|
||||||
|
const_tree, unsigned);
|
||||||
|
void aarch64_init_expanders (void);
|
||||||
|
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||||
|
--- a/gcc/config/aarch64/aarch64.c 2018-11-06 10:43:27.870079389 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.c 2018-11-06 10:44:34.934081154 +0800
|
||||||
|
@@ -70,6 +70,9 @@
|
||||||
|
/* This file should be included last. */
|
||||||
|
#include "target-def.h"
|
||||||
|
|
||||||
|
+static void aarch64_set_default_type_attributes (tree);
|
||||||
|
+static int aarch64_comp_type_attributes (const_tree, const_tree);
|
||||||
|
+
|
||||||
|
/* Defined for convenience. */
|
||||||
|
#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
|
||||||
|
|
||||||
|
@@ -1092,12 +1095,163 @@ aarch64_hard_regno_caller_save_mode (uns
|
||||||
|
return choose_hard_reg_mode (regno, nregs, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Table of machine attributes. */
|
||||||
|
+static const struct attribute_spec aarch64_attribute_table[] =
|
||||||
|
+{
|
||||||
|
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
|
||||||
|
+ affects_type_identity }. */
|
||||||
|
+ /* Function calls made to this symbol must be done indirectly, because
|
||||||
|
+ it may lie outside of the 26 bit addressing range of a normal function
|
||||||
|
+ call. */
|
||||||
|
+ { "long_call", 0, 0, false, true, true, NULL, false },
|
||||||
|
+ /* Whereas these functions are always known to reside within the 26 bit
|
||||||
|
+ addressing range. */
|
||||||
|
+ { "short_call", 0, 0, false, true, true, NULL, false },
|
||||||
|
+ { NULL, 0, 0, false, false, false, NULL, false }
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+/* Encode the current state of the #pragma[no_]long_calls. */
|
||||||
|
+typedef enum
|
||||||
|
+{
|
||||||
|
+ OFF, /* No #pragma[no_]long_calls is in effect. */
|
||||||
|
+ LONG, /* #pragma long_calls is in effect. */
|
||||||
|
+ SHORT /* #pragma no_long_calls is in effect. */
|
||||||
|
+} aarch64_pragma_enum;
|
||||||
|
+
|
||||||
|
+static aarch64_pragma_enum aarch64_pragma_long_calls = OFF;
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+aarch64_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
|
||||||
|
+{
|
||||||
|
+ aarch64_pragma_long_calls = LONG;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+aarch64_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
|
||||||
|
+{
|
||||||
|
+ aarch64_pragma_long_calls = SHORT;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+aarch64_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
|
||||||
|
+{
|
||||||
|
+ aarch64_pragma_long_calls = OFF;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Return 0 if the attributes for two types are incompatible, 1 if they
|
||||||
|
+ are compatible. */
|
||||||
|
+static int
|
||||||
|
+aarch64_comp_type_attributes (const_tree type1, const_tree type2)
|
||||||
|
+{
|
||||||
|
+ int l1, l2, s1, s2;
|
||||||
|
+
|
||||||
|
+ /* Check for mismatch of non-default calling convention. */
|
||||||
|
+ if (TREE_CODE (type1) != FUNCTION_TYPE)
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ /* Check for mismatched call attributes. */
|
||||||
|
+ l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
|
||||||
|
+ l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
|
||||||
|
+ s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
|
||||||
|
+ s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
|
||||||
|
+
|
||||||
|
+ /* Only bother to check if an attribute is defined. */
|
||||||
|
+ if (l1 | l2 | s1 | s2)
|
||||||
|
+ {
|
||||||
|
+ /* If one type has an attribute, the other
|
||||||
|
+ must have the same attribute. */
|
||||||
|
+ if ((l1 != l2) || (s1 != s2))
|
||||||
|
+ {
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ /* Disallow mixed attributes. */
|
||||||
|
+ if ((l1 && s2) || (l2 && s1))
|
||||||
|
+ {
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Assigns default attributes to newly defined type. This is used to
|
||||||
|
+ set short_call/long_call attributes for function types of
|
||||||
|
+ functions defined inside corresponding #pragma scopes. */
|
||||||
|
+static void
|
||||||
|
+aarch64_set_default_type_attributes (tree type)
|
||||||
|
+{
|
||||||
|
+ /* Add __attribute__ ((long_call)) to all functions, when
|
||||||
|
+ inside #pragma long_calls or __attribute__ ((short_call)),
|
||||||
|
+ when inside #pragma no_long_calls. */
|
||||||
|
+ if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
|
||||||
|
+ {
|
||||||
|
+ tree type_attr_list = NULL;
|
||||||
|
+ tree attr_name = NULL;
|
||||||
|
+ type_attr_list = TYPE_ATTRIBUTES (type);
|
||||||
|
+
|
||||||
|
+ if (aarch64_pragma_long_calls == LONG)
|
||||||
|
+ {
|
||||||
|
+ attr_name = get_identifier ("long_call");
|
||||||
|
+ }
|
||||||
|
+ else if (aarch64_pragma_long_calls == SHORT)
|
||||||
|
+ {
|
||||||
|
+ attr_name = get_identifier ("short_call");
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ return;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
|
||||||
|
+ TYPE_ATTRIBUTES (type) = type_attr_list;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Return true if DECL is known to be linked into section SECTION. */
|
||||||
|
+static bool
|
||||||
|
+aarch64_function_in_section_p (tree decl, section *section)
|
||||||
|
+{
|
||||||
|
+ /* We can only be certain about the prevailing symbol definition. */
|
||||||
|
+ if (!decl_binds_to_current_def_p (decl))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
|
||||||
|
+ if (!DECL_SECTION_NAME (decl))
|
||||||
|
+ {
|
||||||
|
+ /* Make sure that we will not create a unique section for DECL. */
|
||||||
|
+ if (flag_function_sections || DECL_COMDAT_GROUP (decl))
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return function_section (decl) == section;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Return true if calls to DECL should be treated as
|
||||||
|
long-calls (ie called via a register). */
|
||||||
|
static bool
|
||||||
|
-aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
|
||||||
|
+aarch64_decl_is_long_call_p (tree decl)
|
||||||
|
{
|
||||||
|
- return false;
|
||||||
|
+ tree attrs = NULL;
|
||||||
|
+
|
||||||
|
+ if (!decl)
|
||||||
|
+ return TARGET_LONG_CALLS;
|
||||||
|
+
|
||||||
|
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
|
||||||
|
+ if (lookup_attribute ("short_call", attrs))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ /* For "f", be conservative, and only cater for cases in which the
|
||||||
|
+ whole of the current function is placed in the same section. */
|
||||||
|
+ if (!flag_reorder_blocks_and_partition
|
||||||
|
+ && TREE_CODE (decl) == FUNCTION_DECL
|
||||||
|
+ && aarch64_function_in_section_p (decl, current_function_section ()))
|
||||||
|
+ return false;
|
||||||
|
+
|
||||||
|
+ if (lookup_attribute ("long_call", attrs))
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
+ return TARGET_LONG_CALLS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return true if calls to symbol-ref SYM should be treated as
|
||||||
|
@@ -1108,6 +1257,36 @@ aarch64_is_long_call_p (rtx sym)
|
||||||
|
return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
|
||||||
|
}
|
||||||
|
|
||||||
|
+void
|
||||||
|
+aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
|
||||||
|
+{
|
||||||
|
+ if (!TARGET_LONG_CALLS)
|
||||||
|
+ {
|
||||||
|
+ fprintf (file, "\tmov\tx9, x30\n");
|
||||||
|
+ fprintf (file, "\tbl\t__fentry__\n");
|
||||||
|
+ fprintf (file, "\tmov\tx30, x9\n");
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ if (flag_pic)
|
||||||
|
+ {
|
||||||
|
+ fprintf (file, "\tmov\tx9, x30\n");
|
||||||
|
+ fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
|
||||||
|
+ fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
|
||||||
|
+ fprintf (file, "\tblr\tx10\n");
|
||||||
|
+ fprintf (file, "\tmov\tx30, x9\n");
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ fprintf (file, "\tmov\tx9, x30\n");
|
||||||
|
+ fprintf (file, "\tadrp\tx10, __fentry__\n");
|
||||||
|
+ fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
|
||||||
|
+ fprintf (file, "\tblr\tx10\n");
|
||||||
|
+ fprintf (file, "\tmov\tx30, x9\n");
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Return true if calls to symbol-ref SYM should not go through
|
||||||
|
plt stubs. */
|
||||||
|
|
||||||
|
@@ -15099,6 +15278,15 @@ aarch64_libgcc_floating_mode_supported_p
|
||||||
|
#undef TARGET_SCHED_CAN_SPECULATE_INSN
|
||||||
|
#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
|
||||||
|
|
||||||
|
+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
|
||||||
|
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES aarch64_set_default_type_attributes
|
||||||
|
+
|
||||||
|
+#undef TARGET_ATTRIBUTE_TABLE
|
||||||
|
+#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
|
||||||
|
+
|
||||||
|
+#undef TARGET_COMP_TYPE_ATTRIBUTES
|
||||||
|
+#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
|
||||||
|
+
|
||||||
|
#undef TARGET_CAN_USE_DOLOOP_P
|
||||||
|
#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
|
||||||
|
|
||||||
|
diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||||
|
--- a/gcc/config/aarch64/aarch64.h 2018-11-06 10:43:27.870079389 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.h 2018-11-06 10:49:29.574088911 +0800
|
||||||
|
@@ -28,7 +28,6 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-#define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas ()
|
||||||
|
|
||||||
|
/* Target machine storage layout. */
|
||||||
|
|
||||||
|
@@ -659,6 +658,14 @@ typedef struct
|
||||||
|
} CUMULATIVE_ARGS;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+/* Handle pragmas for compatibility with Intel's compilers. */
|
||||||
|
+#define REGISTER_TARGET_PRAGMAS() do { \
|
||||||
|
+ c_register_pragma (0, "long_calls", aarch64_pr_long_calls); \
|
||||||
|
+ c_register_pragma (0, "no_long_calls", aarch64_pr_no_long_calls); \
|
||||||
|
+ c_register_pragma (0, "long_calls_off", aarch64_pr_long_calls_off); \
|
||||||
|
+ aarch64_register_pragmas (); \
|
||||||
|
+} while (0)
|
||||||
|
+
|
||||||
|
#define FUNCTION_ARG_PADDING(MODE, TYPE) \
|
||||||
|
(aarch64_pad_arg_upward (MODE, TYPE) ? upward : downward)
|
||||||
|
|
||||||
|
@@ -842,13 +849,20 @@ typedef struct
|
||||||
|
#define PROFILE_HOOK(LABEL) \
|
||||||
|
{ \
|
||||||
|
rtx fun, lr; \
|
||||||
|
+ const rtx_insn* tmp = get_insns (); \
|
||||||
|
lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
|
||||||
|
fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
|
||||||
|
emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
|
||||||
|
+ if (TARGET_LONG_CALLS) \
|
||||||
|
+ { \
|
||||||
|
+ emit_insn (gen_blockage ()); \
|
||||||
|
+ emit_insn_after (gen_blockage (), NEXT_INSN (tmp)); \
|
||||||
|
+ } \
|
||||||
|
}
|
||||||
|
|
||||||
|
/* All the work done in PROFILE_HOOK, but still required. */
|
||||||
|
-#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
|
||||||
|
+#define FUNCTION_PROFILER(STREAM, LABELNO) \
|
||||||
|
+ aarch64_function_profiler (STREAM, LABELNO)
|
||||||
|
|
||||||
|
/* For some reason, the Linux headers think they know how to define
|
||||||
|
these macros. They don't!!! */
|
||||||
|
diff -N -urp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||||
|
--- a/gcc/config/aarch64/aarch64.md 2018-11-06 10:43:27.874079389 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.md 2018-11-06 10:44:34.934081154 +0800
|
||||||
|
@@ -850,9 +850,10 @@
|
||||||
|
{
|
||||||
|
rtx pat;
|
||||||
|
rtx callee = XEXP (operands[0], 0);
|
||||||
|
- if (!REG_P (callee)
|
||||||
|
- && ((GET_CODE (callee) != SYMBOL_REF)
|
||||||
|
- || aarch64_is_noplt_call_p (callee)))
|
||||||
|
+
|
||||||
|
+ if (GET_CODE (callee) == SYMBOL_REF
|
||||||
|
+ ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee))
|
||||||
|
+ : !REG_P (callee))
|
||||||
|
XEXP (operands[0], 0) = force_reg (Pmode, callee);
|
||||||
|
|
||||||
|
if (operands[2] == NULL_RTX)
|
||||||
|
@@ -881,9 +882,10 @@
|
||||||
|
{
|
||||||
|
rtx pat;
|
||||||
|
rtx callee = XEXP (operands[1], 0);
|
||||||
|
- if (!REG_P (callee)
|
||||||
|
- && ((GET_CODE (callee) != SYMBOL_REF)
|
||||||
|
- || aarch64_is_noplt_call_p (callee)))
|
||||||
|
+
|
||||||
|
+ if (GET_CODE (callee) == SYMBOL_REF
|
||||||
|
+ ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee))
|
||||||
|
+ : !REG_P (callee))
|
||||||
|
XEXP (operands[1], 0) = force_reg (Pmode, callee);
|
||||||
|
|
||||||
|
if (operands[3] == NULL_RTX)
|
||||||
|
diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
||||||
|
--- a/gcc/config/aarch64/aarch64.opt 2018-11-06 10:43:27.874079389 +0800
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.opt 2018-11-06 10:44:34.934081154 +0800
|
||||||
|
@@ -80,6 +80,10 @@ mlittle-endian
|
||||||
|
Target Report RejectNegative InverseMask(BIG_END)
|
||||||
|
Assume target CPU is configured as little endian.
|
||||||
|
|
||||||
|
+mlong-calls
|
||||||
|
+Target Report Mask(LONG_CALLS)
|
||||||
|
+Generate call insns as indirect calls, if necessary.
|
||||||
|
+
|
||||||
|
mcmodel=
|
||||||
|
Target RejectNegative Joined Enum(cmodel) Var(aarch64_cmodel_var) Init(AARCH64_CMODEL_SMALL) Save
|
||||||
|
Specify the code model.
|
||||||
|
diff -N -urp a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
|
||||||
|
--- a/gcc/config/aarch64/predicates.md 2018-11-06 10:43:27.878079389 +0800
|
||||||
|
+++ b/gcc/config/aarch64/predicates.md 2018-11-06 10:44:34.938081154 +0800
|
||||||
|
@@ -27,8 +27,9 @@
|
||||||
|
)
|
||||||
|
|
||||||
|
(define_predicate "aarch64_call_insn_operand"
|
||||||
|
- (ior (match_code "symbol_ref")
|
||||||
|
- (match_operand 0 "register_operand")))
|
||||||
|
+ (ior (and (match_code "symbol_ref")
|
||||||
|
+ (match_test "!aarch64_is_long_call_p (op)"))
|
||||||
|
+ (match_operand 0 "register_operand")))
|
||||||
|
|
||||||
|
;; Return true if OP a (const_int 0) operand.
|
||||||
|
(define_predicate "const0_operand"
|
||||||
33
sanitizer-pr-85835.patch
Normal file
33
sanitizer-pr-85835.patch
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
|
||||||
|
index 858bb21..de18e56 100644 (file)
|
||||||
|
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
|
||||||
|
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
|
||||||
|
@@ -157,7 +157,6 @@ typedef struct user_fpregs elf_fpregset_t;
|
||||||
|
# include <sys/procfs.h>
|
||||||
|
#endif
|
||||||
|
#include <sys/user.h>
|
||||||
|
-#include <sys/ustat.h>
|
||||||
|
#include <linux/cyclades.h>
|
||||||
|
#include <linux/if_eql.h>
|
||||||
|
#include <linux/if_plip.h>
|
||||||
|
@@ -250,7 +249,19 @@ namespace __sanitizer {
|
||||||
|
#endif // SANITIZER_LINUX || SANITIZER_FREEBSD
|
||||||
|
|
||||||
|
#if SANITIZER_LINUX && !SANITIZER_ANDROID
|
||||||
|
- unsigned struct_ustat_sz = sizeof(struct ustat);
|
||||||
|
+ // Use pre-computed size of struct ustat to avoid <sys/ustat.h> which
|
||||||
|
+ // has been removed from glibc 2.28.
|
||||||
|
+#if defined(__aarch64__) || defined(__s390x__) || defined (__mips64) \
|
||||||
|
+ || defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) \
|
||||||
|
+ || defined(__x86_64__)
|
||||||
|
+#define SIZEOF_STRUCT_USTAT 32
|
||||||
|
+#elif defined(__arm__) || defined(__i386__) || defined(__mips__) \
|
||||||
|
+ || defined(__powerpc__) || defined(__s390__)
|
||||||
|
+#define SIZEOF_STRUCT_USTAT 20
|
||||||
|
+#else
|
||||||
|
+#error Unknown size of struct ustat
|
||||||
|
+#endif
|
||||||
|
+ unsigned struct_ustat_sz = SIZEOF_STRUCT_USTAT;
|
||||||
|
unsigned struct_rlimit64_sz = sizeof(struct rlimit64);
|
||||||
|
unsigned struct_statvfs64_sz = sizeof(struct statvfs64);
|
||||||
|
#endif // SANITIZER_LINUX && !SANITIZER_ANDROID
|
||||||
11
try-unroll.patch
Normal file
11
try-unroll.patch
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
--- a/gcc/tree-ssa-loop-ivcanon.c 2018-12-06 05:05:43.841181211 +0800
|
||||||
|
+++ b/gcc/tree-ssa-loop-ivcanon.c 2018-12-06 05:03:17.545185153 +0800
|
||||||
|
@@ -726,7 +726,7 @@ try_unroll_loop_completely (struct loop
|
||||||
|
edge_to_cancel = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (!n_unroll_found)
|
||||||
|
+ if (!n_unroll_found || SCEV_NOT_KNOWN == TREE_CODE (niter))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
|
||||||
25
turn-on-funwind-tables-by-default.patch
Normal file
25
turn-on-funwind-tables-by-default.patch
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
diff -N -urp a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c
|
||||||
|
--- a/gcc/common/config/aarch64/aarch64-common.c 2019-07-02 09:28:49.798701181 +0800
|
||||||
|
+++ b/gcc/common/config/aarch64/aarch64-common.c 2019-07-02 09:30:15.436282799 +0800
|
||||||
|
@@ -51,6 +51,10 @@ static const struct default_options aarc
|
||||||
|
{ OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
|
||||||
|
/* Enable redundant extension instructions removal at -O2 and higher. */
|
||||||
|
{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
|
||||||
|
+#if (TARGET_DEFAULT_ASYNC_UNWIND_TABLES == 1)
|
||||||
|
+ { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
|
||||||
|
+ { OPT_LEVELS_ALL, OPT_funwind_tables, NULL, 1},
|
||||||
|
+#endif
|
||||||
|
{ OPT_LEVELS_NONE, 0, NULL, 0 }
|
||||||
|
};
|
||||||
|
|
||||||
|
diff -N -urp a/gcc/config.gcc b/gcc/config.gcc
|
||||||
|
--- a/gcc/config.gcc 2019-07-02 09:28:50.114701170 +0800
|
||||||
|
+++ b/gcc/config.gcc 2019-07-02 09:31:50.636196118 +0800
|
||||||
|
@@ -966,6 +966,7 @@ aarch64*-*-linux*)
|
||||||
|
tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h"
|
||||||
|
tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-linux.h"
|
||||||
|
tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-linux"
|
||||||
|
+ tm_defines="${tm_defines} TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1"
|
||||||
|
case $target in
|
||||||
|
aarch64_be-*)
|
||||||
|
tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
|
||||||
Loading…
x
Reference in New Issue
Block a user