commit
c077903c77
@ -1,126 +0,0 @@
|
||||
From 900ccfa89dda3ab5f7e44a0dd4d1e9d108b5dc8b Mon Sep 17 00:00:00 2001
|
||||
From: rguenth <rguenth@138bc75d-0d04-0410-961f-82ee72b054a4>
|
||||
Date: Tue, 26 Mar 2019 13:18:23 +0000
|
||||
Subject: [PATCH] 2019-02-26 Richard Biener <rguenther@suse.de>
|
||||
|
||||
Backport from mainline
|
||||
2019-02-12 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/89253
|
||||
* tree-ssa-loop-split.c (tree_ssa_split_loops): Check we can
|
||||
duplicate the loop.
|
||||
|
||||
* gfortran.dg/pr89253.f: New testcase.
|
||||
|
||||
2019-02-08 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR middle-end/89223
|
||||
* tree-data-ref.c (initialize_matrix_A): Fail if constant
|
||||
doesn't fit in HWI.
|
||||
(analyze_subscript_affine_affine): Handle failure from
|
||||
initialize_matrix_A.
|
||||
|
||||
* gcc.dg/torture/pr89223.c: New testcase.
|
||||
|
||||
2019-01-28 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/88739
|
||||
* tree-ssa-sccvn.c (vn_reference_lookup_3): Avoid generating
|
||||
BIT_FIELD_REFs of non-mode-precision integral operands.
|
||||
|
||||
* gcc.c-torture/execute/pr88739.c: New test.
|
||||
|
||||
|
||||
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-7-branch@269942 138bc75d-0d04-0410-961f-82ee72b054a4
|
||||
---
|
||||
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
|
||||
index 2480f4e..a349e3e 100644
|
||||
--- a/gcc/tree-data-ref.c
|
||||
+++ b/gcc/tree-data-ref.c
|
||||
@@ -2118,6 +2118,8 @@ initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
|
||||
switch (TREE_CODE (chrec))
|
||||
{
|
||||
case POLYNOMIAL_CHREC:
|
||||
+ if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
|
||||
+ return chrec_dont_know;
|
||||
A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
|
||||
return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
|
||||
|
||||
@@ -2499,7 +2501,7 @@ analyze_subscript_affine_affine (tree chrec_a,
|
||||
tree *last_conflicts)
|
||||
{
|
||||
unsigned nb_vars_a, nb_vars_b, dim;
|
||||
- HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
|
||||
+ HOST_WIDE_INT gamma, gcd_alpha_beta;
|
||||
lambda_matrix A, U, S;
|
||||
struct obstack scratch_obstack;
|
||||
|
||||
@@ -2536,9 +2538,20 @@ analyze_subscript_affine_affine (tree chrec_a,
|
||||
A = lambda_matrix_new (dim, 1, &scratch_obstack);
|
||||
S = lambda_matrix_new (dim, 1, &scratch_obstack);
|
||||
|
||||
- init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
|
||||
- init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
|
||||
- gamma = init_b - init_a;
|
||||
+ tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
|
||||
+ tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
|
||||
+ if (init_a == chrec_dont_know
|
||||
+ || init_b == chrec_dont_know)
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "affine-affine test failed: "
|
||||
+ "representation issue.\n");
|
||||
+ *overlaps_a = conflict_fn_not_known ();
|
||||
+ *overlaps_b = conflict_fn_not_known ();
|
||||
+ *last_conflicts = chrec_dont_know;
|
||||
+ goto end_analyze_subs_aa;
|
||||
+ }
|
||||
+ gamma = int_cst_value (init_b) - int_cst_value (init_a);
|
||||
|
||||
/* Don't do all the hard work of solving the Diophantine equation
|
||||
when we already know the solution: for example,
|
||||
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
|
||||
index fd97213..3992597 100644
|
||||
--- a/gcc/tree-ssa-loop-split.c
|
||||
+++ b/gcc/tree-ssa-loop-split.c
|
||||
@@ -649,7 +649,8 @@ tree_ssa_split_loops (void)
|
||||
false, true)
|
||||
&& niter.cmp != ERROR_MARK
|
||||
/* We can't yet handle loops controlled by a != predicate. */
|
||||
- && niter.cmp != NE_EXPR)
|
||||
+ && niter.cmp != NE_EXPR
|
||||
+ && can_duplicate_loop_p (loop))
|
||||
{
|
||||
if (split_loop (loop, &niter))
|
||||
{
|
||||
diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
|
||||
index c93f1f2..a2e3ce2 100644
|
||||
--- a/gcc/tree-ssa-sccvn.c
|
||||
+++ b/gcc/tree-ssa-sccvn.c
|
||||
@@ -2029,6 +2029,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
|
||||
base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
|
||||
&offset2, &size2, &maxsize2,
|
||||
&reverse);
|
||||
+ tree def_rhs = gimple_assign_rhs1 (def_stmt);
|
||||
if (!reverse
|
||||
&& maxsize2 != -1
|
||||
&& maxsize2 == size2
|
||||
@@ -2041,11 +2042,14 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
|
||||
according to endianness. */
|
||||
&& (! INTEGRAL_TYPE_P (vr->type)
|
||||
|| ref->size == TYPE_PRECISION (vr->type))
|
||||
- && ref->size % BITS_PER_UNIT == 0)
|
||||
+ && ref->size % BITS_PER_UNIT == 0
|
||||
+ && (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs))
|
||||
+ || (TYPE_PRECISION (TREE_TYPE (def_rhs))
|
||||
+ == GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (def_rhs))))))
|
||||
{
|
||||
code_helper rcode = BIT_FIELD_REF;
|
||||
tree ops[3];
|
||||
- ops[0] = SSA_VAL (gimple_assign_rhs1 (def_stmt));
|
||||
+ ops[0] = SSA_VAL (def_rhs);
|
||||
ops[1] = bitsize_int (ref->size);
|
||||
ops[2] = bitsize_int (offset - offset2);
|
||||
tree val = vn_nary_build_or_lookup (rcode, vr->type, ops);
|
||||
--
|
||||
2.9.3
|
||||
@ -1,655 +0,0 @@
|
||||
diff -urpN a/gcc/cfgexpand.c b/gcc/cfgexpand.c
|
||||
--- a/gcc/cfgexpand.c 2019-05-30 16:58:45.350508770 +0800
|
||||
+++ b/gcc/cfgexpand.c 2019-05-30 11:53:13.315156625 +0800
|
||||
@@ -6094,6 +6094,23 @@ stack_protect_prologue (void)
|
||||
rtx x, y;
|
||||
|
||||
x = expand_normal (crtl->stack_protect_guard);
|
||||
+
|
||||
+ if (targetm.have_stack_protect_combined_set () && guard_decl)
|
||||
+ {
|
||||
+ gcc_assert (DECL_P (guard_decl));
|
||||
+ y = DECL_RTL (guard_decl);
|
||||
+
|
||||
+ /* Allow the target to compute address of Y and copy it to X without
|
||||
+ leaking Y into a register. This combined address + copy pattern
|
||||
+ allows the target to prevent spilling of any intermediate results by
|
||||
+ splitting it after register allocator. */
|
||||
+ if (rtx_insn *insn = targetm.gen_stack_protect_combined_set (x, y))
|
||||
+ {
|
||||
+ emit_insn (insn);
|
||||
+ return;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
if (guard_decl)
|
||||
y = expand_normal (guard_decl);
|
||||
else
|
||||
diff -urpN a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
|
||||
--- a/gcc/config/arm/arm.c 2019-05-30 16:58:45.354508770 +0800
|
||||
+++ b/gcc/config/arm/arm.c 2019-05-30 16:59:05.058508073 +0800
|
||||
@@ -7236,21 +7236,34 @@ legitimate_pic_operand_p (rtx x)
|
||||
return 1;
|
||||
}
|
||||
|
||||
-/* Record that the current function needs a PIC register. Initialize
|
||||
- cfun->machine->pic_reg if we have not already done so. */
|
||||
+/* Record that the current function needs a PIC register. If PIC_REG is null,
|
||||
+ a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
|
||||
+ both case cfun->machine->pic_reg is initialized if we have not already done
|
||||
+ so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
|
||||
+ PIC register is reloaded in the current position of the instruction stream
|
||||
+ irregardless of whether it was loaded before. Otherwise, it is only loaded
|
||||
+ if not already done so (crtl->uses_pic_offset_table is null). Note that
|
||||
+ nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
|
||||
+ is only supported iff COMPUTE_NOW is false. */
|
||||
|
||||
static void
|
||||
-require_pic_register (void)
|
||||
+require_pic_register (rtx pic_reg, bool compute_now)
|
||||
{
|
||||
+ gcc_assert (compute_now == (pic_reg != NULL_RTX));
|
||||
+
|
||||
/* A lot of the logic here is made obscure by the fact that this
|
||||
routine gets called as part of the rtx cost estimation process.
|
||||
We don't want those calls to affect any assumptions about the real
|
||||
function; and further, we can't call entry_of_function() until we
|
||||
start the real expansion process. */
|
||||
- if (!crtl->uses_pic_offset_table)
|
||||
+ if (!crtl->uses_pic_offset_table || compute_now)
|
||||
{
|
||||
- gcc_assert (can_create_pseudo_p ());
|
||||
+ gcc_assert (can_create_pseudo_p ()
|
||||
+ || (pic_reg != NULL_RTX
|
||||
+ && REG_P (pic_reg)
|
||||
+ && GET_MODE (pic_reg) == Pmode));
|
||||
if (arm_pic_register != INVALID_REGNUM
|
||||
+ && !compute_now
|
||||
&& !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
|
||||
{
|
||||
if (!cfun->machine->pic_reg)
|
||||
@@ -7266,8 +7279,19 @@ require_pic_register (void)
|
||||
{
|
||||
rtx_insn *seq, *insn;
|
||||
|
||||
- if (!cfun->machine->pic_reg)
|
||||
- cfun->machine->pic_reg = gen_reg_rtx (Pmode);
|
||||
+ if (pic_reg == NULL_RTX && cfun->machine->pic_reg == NULL_RTX)
|
||||
+ {
|
||||
+ pic_reg = gen_reg_rtx (Pmode);
|
||||
+ cfun->machine->pic_reg = pic_reg;
|
||||
+ }
|
||||
+ else if (pic_reg == NULL_RTX)
|
||||
+ {
|
||||
+ pic_reg = cfun->machine->pic_reg;
|
||||
+ }
|
||||
+ else if (cfun->machine->pic_reg == NULL_RTX)
|
||||
+ {
|
||||
+ cfun->machine->pic_reg = pic_reg;
|
||||
+ }
|
||||
|
||||
/* Play games to avoid marking the function as needing pic
|
||||
if we are being called as part of the cost-estimation
|
||||
@@ -7278,11 +7306,12 @@ require_pic_register (void)
|
||||
start_sequence ();
|
||||
|
||||
if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
|
||||
- && arm_pic_register > LAST_LO_REGNUM)
|
||||
+ && arm_pic_register > LAST_LO_REGNUM
|
||||
+ && !compute_now)
|
||||
emit_move_insn (cfun->machine->pic_reg,
|
||||
gen_rtx_REG (Pmode, arm_pic_register));
|
||||
else
|
||||
- arm_load_pic_register (0UL);
|
||||
+ arm_load_pic_register (0UL, pic_reg);
|
||||
|
||||
seq = get_insns ();
|
||||
end_sequence ();
|
||||
@@ -7295,16 +7324,33 @@ require_pic_register (void)
|
||||
we can't yet emit instructions directly in the final
|
||||
insn stream. Queue the insns on the entry edge, they will
|
||||
be committed after everything else is expanded. */
|
||||
- insert_insn_on_edge (seq,
|
||||
- single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
|
||||
+ if (currently_expanding_to_rtl)
|
||||
+ insert_insn_on_edge (seq,
|
||||
+ single_succ_edge
|
||||
+ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
|
||||
+ else
|
||||
+ emit_insn (seq);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+/* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
|
||||
+ created to hold the result of the load. If not NULL, PIC_REG indicates
|
||||
+ which register to use as PIC register, otherwise it is decided by register
|
||||
+ allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
|
||||
+ location in the instruction stream, irregardless of whether it was loaded
|
||||
+ previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
|
||||
+ true and null PIC_REG is only supported iff COMPUTE_NOW is false.
|
||||
+
|
||||
+ Returns the register REG into which the PIC load is performed. */
|
||||
+
|
||||
rtx
|
||||
-legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
|
||||
+legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
|
||||
+ bool compute_now)
|
||||
{
|
||||
+ gcc_assert (compute_now == (pic_reg != NULL_RTX));
|
||||
+
|
||||
if (GET_CODE (orig) == SYMBOL_REF
|
||||
|| GET_CODE (orig) == LABEL_REF)
|
||||
{
|
||||
@@ -7337,9 +7383,12 @@ legitimize_pic_address (rtx orig, machin
|
||||
rtx mem;
|
||||
|
||||
/* If this function doesn't have a pic register, create one now. */
|
||||
- require_pic_register ();
|
||||
+ require_pic_register (pic_reg, compute_now);
|
||||
+
|
||||
+ if (pic_reg == NULL_RTX)
|
||||
+ pic_reg = cfun->machine->pic_reg;
|
||||
|
||||
- pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
|
||||
+ pat = gen_calculate_pic_address (reg, pic_reg, orig);
|
||||
|
||||
/* Make the MEM as close to a constant as possible. */
|
||||
mem = SET_SRC (pat);
|
||||
@@ -7388,9 +7437,11 @@ legitimize_pic_address (rtx orig, machin
|
||||
|
||||
gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
|
||||
|
||||
- base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
|
||||
+ base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
|
||||
+ pic_reg, compute_now);
|
||||
offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
|
||||
- base == reg ? 0 : reg);
|
||||
+ base == reg ? 0 : reg, pic_reg,
|
||||
+ compute_now);
|
||||
|
||||
if (CONST_INT_P (offset))
|
||||
{
|
||||
@@ -7490,16 +7541,17 @@ static GTY(()) int pic_labelno;
|
||||
low register. */
|
||||
|
||||
void
|
||||
-arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
|
||||
+arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
|
||||
{
|
||||
- rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
|
||||
+ rtx l1, labelno, pic_tmp, pic_rtx;
|
||||
|
||||
if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
|
||||
return;
|
||||
|
||||
gcc_assert (flag_pic);
|
||||
|
||||
- pic_reg = cfun->machine->pic_reg;
|
||||
+ if (pic_reg == NULL_RTX)
|
||||
+ pic_reg = cfun->machine->pic_reg;
|
||||
if (TARGET_VXWORKS_RTP)
|
||||
{
|
||||
pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
|
||||
@@ -8558,7 +8610,8 @@ arm_legitimize_address (rtx x, rtx orig_
|
||||
{
|
||||
/* We need to find and carefully transform any SYMBOL and LABEL
|
||||
references; so go back to the original address expression. */
|
||||
- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
|
||||
+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
|
||||
+ false /*compute_now*/);
|
||||
|
||||
if (new_x != orig_x)
|
||||
x = new_x;
|
||||
@@ -8626,7 +8679,8 @@ thumb_legitimize_address (rtx x, rtx ori
|
||||
{
|
||||
/* We need to find and carefully transform any SYMBOL and LABEL
|
||||
references; so go back to the original address expression. */
|
||||
- rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
|
||||
+ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
|
||||
+ false /*compute_now*/);
|
||||
|
||||
if (new_x != orig_x)
|
||||
x = new_x;
|
||||
@@ -17800,7 +17854,7 @@ arm_emit_call_insn (rtx pat, rtx addr, b
|
||||
? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
|
||||
: !SYMBOL_REF_LOCAL_P (addr)))
|
||||
{
|
||||
- require_pic_register ();
|
||||
+ require_pic_register (NULL_RTX, false /*compute_now*/);
|
||||
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
|
||||
}
|
||||
|
||||
@@ -21706,7 +21760,7 @@ arm_expand_prologue (void)
|
||||
mask &= THUMB2_WORK_REGS;
|
||||
if (!IS_NESTED (func_type))
|
||||
mask |= (1 << IP_REGNUM);
|
||||
- arm_load_pic_register (mask);
|
||||
+ arm_load_pic_register (mask, NULL_RTX);
|
||||
}
|
||||
|
||||
/* If we are profiling, make sure no instructions are scheduled before
|
||||
@@ -24909,7 +24963,7 @@ thumb1_expand_prologue (void)
|
||||
/* Load the pic register before setting the frame pointer,
|
||||
so we can use r7 as a temporary work register. */
|
||||
if (flag_pic && arm_pic_register != INVALID_REGNUM)
|
||||
- arm_load_pic_register (live_regs_mask);
|
||||
+ arm_load_pic_register (live_regs_mask, NULL_RTX);
|
||||
|
||||
if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
|
||||
emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
|
||||
diff -urpN a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
|
||||
--- a/gcc/config/arm/arm.md 2019-05-30 16:58:45.358508769 +0800
|
||||
+++ b/gcc/config/arm/arm.md 2019-05-30 11:52:58.491157149 +0800
|
||||
@@ -6051,7 +6051,8 @@
|
||||
operands[1] = legitimize_pic_address (operands[1], SImode,
|
||||
(!can_create_pseudo_p ()
|
||||
? operands[0]
|
||||
- : 0));
|
||||
+ : NULL_RTX), NULL_RTX,
|
||||
+ false /*compute_now*/);
|
||||
}
|
||||
"
|
||||
)
|
||||
@@ -6340,7 +6341,7 @@
|
||||
/* r3 is clobbered by set/longjmp, so we can use it as a scratch
|
||||
register. */
|
||||
if (arm_pic_register != INVALID_REGNUM)
|
||||
- arm_load_pic_register (1UL << 3);
|
||||
+ arm_load_pic_register (1UL << 3, NULL_RTX);
|
||||
DONE;
|
||||
}")
|
||||
|
||||
@@ -8666,6 +8667,164 @@
|
||||
(set_attr "conds" "clob")]
|
||||
)
|
||||
|
||||
+;; Named patterns for stack smashing protection.
|
||||
+(define_expand "stack_protect_combined_set"
|
||||
+ [(parallel
|
||||
+ [(set (match_operand:SI 0 "memory_operand" "")
|
||||
+ (unspec:SI [(match_operand:SI 1 "guard_operand" "")]
|
||||
+ UNSPEC_SP_SET))
|
||||
+ (clobber (match_scratch:SI 2 ""))
|
||||
+ (clobber (match_scratch:SI 3 ""))])]
|
||||
+ ""
|
||||
+ ""
|
||||
+)
|
||||
+
|
||||
+;; Use a separate insn from the above expand to be able to have the mem outside
|
||||
+;; the operand #1 when register allocation comes. This is needed to avoid LRA
|
||||
+;; try to reload the guard since we need to control how PIC access is done in
|
||||
+;; the -fpic/-fPIC case (see COMPUTE_NOW parameter when calling
|
||||
+;; legitimize_pic_address ()).
|
||||
+(define_insn_and_split "*stack_protect_combined_set_insn"
|
||||
+ [(set (match_operand:SI 0 "memory_operand" "=m,m")
|
||||
+ (unspec:SI [(mem:SI (match_operand:SI 1 "guard_addr_operand" "X,X"))]
|
||||
+ UNSPEC_SP_SET))
|
||||
+ (clobber (match_scratch:SI 2 "=&l,&r"))
|
||||
+ (clobber (match_scratch:SI 3 "=&l,&r"))]
|
||||
+ ""
|
||||
+ "#"
|
||||
+ "reload_completed"
|
||||
+ [(parallel [(set (match_dup 0) (unspec:SI [(mem:SI (match_dup 2))]
|
||||
+ UNSPEC_SP_SET))
|
||||
+ (clobber (match_dup 2))])]
|
||||
+ "
|
||||
+{
|
||||
+ if (flag_pic)
|
||||
+ {
|
||||
+ /* Forces recomputing of GOT base now. */
|
||||
+ legitimize_pic_address (operands[1], SImode, operands[2], operands[3],
|
||||
+ true /*compute_now*/);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (address_operand (operands[1], SImode))
|
||||
+ operands[2] = operands[1];
|
||||
+ else
|
||||
+ {
|
||||
+ rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
|
||||
+ emit_move_insn (operands[2], mem);
|
||||
+ }
|
||||
+ }
|
||||
+}"
|
||||
+ [(set_attr "arch" "t1,32")]
|
||||
+)
|
||||
+
|
||||
+(define_insn "*stack_protect_set_insn"
|
||||
+ [(set (match_operand:SI 0 "memory_operand" "=m,m")
|
||||
+ (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "+&l,&r"))]
|
||||
+ UNSPEC_SP_SET))
|
||||
+ (clobber (match_dup 1))]
|
||||
+ ""
|
||||
+ "@
|
||||
+ ldr\\t%1, [%1]\;str\\t%1, %0\;movs\t%1,#0
|
||||
+ ldr\\t%1, [%1]\;str\\t%1, %0\;mov\t%1,#0"
|
||||
+ [(set_attr "length" "8,12")
|
||||
+ (set_attr "conds" "clob,nocond")
|
||||
+ (set_attr "type" "multiple")
|
||||
+ (set_attr "arch" "t1,32")]
|
||||
+)
|
||||
+
|
||||
+(define_expand "stack_protect_combined_test"
|
||||
+ [(parallel
|
||||
+ [(set (pc)
|
||||
+ (if_then_else
|
||||
+ (eq (match_operand:SI 0 "memory_operand" "")
|
||||
+ (unspec:SI [(match_operand:SI 1 "guard_operand" "")]
|
||||
+ UNSPEC_SP_TEST))
|
||||
+ (label_ref (match_operand 2))
|
||||
+ (pc)))
|
||||
+ (clobber (match_scratch:SI 3 ""))
|
||||
+ (clobber (match_scratch:SI 4 ""))
|
||||
+ (clobber (reg:CC CC_REGNUM))])]
|
||||
+ ""
|
||||
+ ""
|
||||
+)
|
||||
+
|
||||
+;; Use a separate insn from the above expand to be able to have the mem outside
|
||||
+;; the operand #1 when register allocation comes. This is needed to avoid LRA
|
||||
+;; try to reload the guard since we need to control how PIC access is done in
|
||||
+;; the -fpic/-fPIC case (see COMPUTE_NOW parameter when calling
|
||||
+;; legitimize_pic_address ()).
|
||||
+(define_insn_and_split "*stack_protect_combined_test_insn"
|
||||
+ [(set (pc)
|
||||
+ (if_then_else
|
||||
+ (eq (match_operand:SI 0 "memory_operand" "m,m")
|
||||
+ (unspec:SI [(mem:SI (match_operand:SI 1 "guard_addr_operand" "X,X"))]
|
||||
+ UNSPEC_SP_TEST))
|
||||
+ (label_ref (match_operand 2))
|
||||
+ (pc)))
|
||||
+ (clobber (match_scratch:SI 3 "=&l,&r"))
|
||||
+ (clobber (match_scratch:SI 4 "=&l,&r"))
|
||||
+ (clobber (reg:CC CC_REGNUM))]
|
||||
+ ""
|
||||
+ "#"
|
||||
+ "reload_completed"
|
||||
+ [(const_int 0)]
|
||||
+{
|
||||
+ rtx eq;
|
||||
+
|
||||
+ if (flag_pic)
|
||||
+ {
|
||||
+ /* Forces recomputing of GOT base now. */
|
||||
+ legitimize_pic_address (operands[1], SImode, operands[3], operands[4],
|
||||
+ true /*compute_now*/);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (address_operand (operands[1], SImode))
|
||||
+ operands[3] = operands[1];
|
||||
+ else
|
||||
+ {
|
||||
+ rtx mem = XEXP (force_const_mem (SImode, operands[1]), 0);
|
||||
+ emit_move_insn (operands[3], mem);
|
||||
+ }
|
||||
+ }
|
||||
+ if (TARGET_32BIT)
|
||||
+ {
|
||||
+ emit_insn (gen_arm_stack_protect_test_insn (operands[4], operands[0],
|
||||
+ operands[3]));
|
||||
+ rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
|
||||
+ eq = gen_rtx_EQ (CC_Zmode, cc_reg, const0_rtx);
|
||||
+ emit_jump_insn (gen_arm_cond_branch (operands[2], eq, cc_reg));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ emit_insn (gen_thumb1_stack_protect_test_insn (operands[4], operands[0],
|
||||
+ operands[3]));
|
||||
+ eq = gen_rtx_EQ (VOIDmode, operands[4], const0_rtx);
|
||||
+ emit_jump_insn (gen_cbranchsi4 (eq, operands[4], const0_rtx,
|
||||
+ operands[2]));
|
||||
+ }
|
||||
+ DONE;
|
||||
+}
|
||||
+ [(set_attr "arch" "t1,32")]
|
||||
+)
|
||||
+
|
||||
+(define_insn "arm_stack_protect_test_insn"
|
||||
+ [(set (reg:CC_Z CC_REGNUM)
|
||||
+ (compare:CC_Z (unspec:SI [(match_operand:SI 1 "memory_operand" "m,m")
|
||||
+ (mem:SI (match_operand:SI 2 "register_operand" "+l,r"))]
|
||||
+ UNSPEC_SP_TEST)
|
||||
+ (const_int 0)))
|
||||
+ (clobber (match_operand:SI 0 "register_operand" "=&l,&r"))
|
||||
+ (clobber (match_dup 2))]
|
||||
+ "TARGET_32BIT"
|
||||
+ "ldr\t%0, [%2]\;ldr\t%2, %1\;eors\t%0, %2, %0"
|
||||
+ [(set_attr "length" "8,12")
|
||||
+ (set_attr "conds" "set")
|
||||
+ (set_attr "type" "multiple")
|
||||
+ (set_attr "arch" "t,32")]
|
||||
+)
|
||||
+
|
||||
(define_expand "casesi"
|
||||
[(match_operand:SI 0 "s_register_operand" "") ; index to jump on
|
||||
(match_operand:SI 1 "const_int_operand" "") ; lower bound
|
||||
diff -urpN a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
|
||||
--- a/gcc/config/arm/arm-protos.h 2019-05-30 16:58:45.358508769 +0800
|
||||
+++ b/gcc/config/arm/arm-protos.h 2019-05-30 11:52:58.491157149 +0800
|
||||
@@ -28,7 +28,7 @@ extern enum unwind_info_type arm_except_
|
||||
extern int use_return_insn (int, rtx);
|
||||
extern bool use_simple_return_p (void);
|
||||
extern enum reg_class arm_regno_class (int);
|
||||
-extern void arm_load_pic_register (unsigned long);
|
||||
+extern void arm_load_pic_register (unsigned long, rtx);
|
||||
extern int arm_volatile_func (void);
|
||||
extern void arm_expand_prologue (void);
|
||||
extern void arm_expand_epilogue (bool);
|
||||
@@ -69,7 +69,7 @@ extern int const_ok_for_dimode_op (HOST_
|
||||
extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
|
||||
HOST_WIDE_INT, rtx, rtx, int);
|
||||
extern int legitimate_pic_operand_p (rtx);
|
||||
-extern rtx legitimize_pic_address (rtx, machine_mode, rtx);
|
||||
+extern rtx legitimize_pic_address (rtx, machine_mode, rtx, rtx, bool);
|
||||
extern rtx legitimize_tls_address (rtx, rtx);
|
||||
extern bool arm_legitimate_address_p (machine_mode, rtx, bool);
|
||||
extern int arm_legitimate_address_outer_p (machine_mode, rtx, RTX_CODE, int);
|
||||
diff -urpN a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
|
||||
--- a/gcc/config/arm/predicates.md 2019-05-30 16:58:45.358508769 +0800
|
||||
+++ b/gcc/config/arm/predicates.md 2019-05-30 11:52:58.491157149 +0800
|
||||
@@ -31,6 +31,23 @@
|
||||
|| REGNO_REG_CLASS (REGNO (op)) != NO_REGS));
|
||||
})
|
||||
|
||||
+; Predicate for stack protector guard's address in
|
||||
+; stack_protect_combined_set_insn and stack_protect_combined_test_insn patterns
|
||||
+(define_predicate "guard_addr_operand"
|
||||
+ (match_test "true")
|
||||
+{
|
||||
+ return (CONSTANT_ADDRESS_P (op)
|
||||
+ || !targetm.cannot_force_const_mem (mode, op));
|
||||
+})
|
||||
+
|
||||
+; Predicate for stack protector guard in stack_protect_combined_set and
|
||||
+; stack_protect_combined_test patterns
|
||||
+(define_predicate "guard_operand"
|
||||
+ (match_code "mem")
|
||||
+{
|
||||
+ return guard_addr_operand (XEXP (op, 0), mode);
|
||||
+})
|
||||
+
|
||||
(define_predicate "imm_for_neon_inv_logic_operand"
|
||||
(match_code "const_vector")
|
||||
{
|
||||
diff -urpN a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
|
||||
--- a/gcc/config/arm/thumb1.md 2019-05-30 16:58:45.358508769 +0800
|
||||
+++ b/gcc/config/arm/thumb1.md 2019-05-30 11:52:58.491157149 +0800
|
||||
@@ -1964,4 +1964,17 @@
|
||||
}"
|
||||
[(set_attr "type" "mov_reg")]
|
||||
)
|
||||
+
|
||||
+(define_insn "thumb1_stack_protect_test_insn"
|
||||
+ [(set (match_operand:SI 0 "register_operand" "=&l")
|
||||
+ (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
|
||||
+ (mem:SI (match_operand:SI 2 "register_operand" "+l"))]
|
||||
+ UNSPEC_SP_TEST))
|
||||
+ (clobber (match_dup 2))]
|
||||
+ "TARGET_THUMB1"
|
||||
+ "ldr\t%0, [%2]\;ldr\t%2, %1\;eors\t%0, %2, %0"
|
||||
+ [(set_attr "length" "8")
|
||||
+ (set_attr "conds" "set")
|
||||
+ (set_attr "type" "multiple")]
|
||||
+)
|
||||
|
||||
diff -urpN a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
|
||||
--- a/gcc/config/arm/unspecs.md 2019-05-30 16:58:45.358508769 +0800
|
||||
+++ b/gcc/config/arm/unspecs.md 2019-05-30 11:52:58.491157149 +0800
|
||||
@@ -86,6 +86,9 @@
|
||||
UNSPEC_PROBE_STACK ; Probe stack memory reference
|
||||
UNSPEC_NONSECURE_MEM ; Represent non-secure memory in ARMv8-M with
|
||||
; security extension
|
||||
+ UNSPEC_SP_SET ; Represent the setting of stack protector's canary
|
||||
+ UNSPEC_SP_TEST ; Represent the testing of stack protector's canary
|
||||
+ ; against the guard.
|
||||
])
|
||||
|
||||
(define_c_enum "unspec" [
|
||||
diff -urpN a/gcc/doc/md.texi b/gcc/doc/md.texi
|
||||
--- a/gcc/doc/md.texi 2019-05-30 16:58:45.362508769 +0800
|
||||
+++ b/gcc/doc/md.texi 2019-05-30 11:52:58.491157149 +0800
|
||||
@@ -6955,22 +6955,61 @@ builtins.
|
||||
The get/set patterns have a single output/input operand respectively,
|
||||
with @var{mode} intended to be @code{Pmode}.
|
||||
|
||||
+@cindex @code{stack_protect_combined_set} instruction pattern
|
||||
+@item @samp{stack_protect_combined_set}
|
||||
+This pattern, if defined, moves a @code{ptr_mode} value from an address
|
||||
+whose declaration RTX is given in operand 1 to the memory in operand 0
|
||||
+without leaving the value in a register afterward. If several
|
||||
+instructions are needed by the target to perform the operation (eg. to
|
||||
+load the address from a GOT entry then load the @code{ptr_mode} value
|
||||
+and finally store it), it is the backend's responsibility to ensure no
|
||||
+intermediate result gets spilled. This is to avoid leaking the value
|
||||
+some place that an attacker might use to rewrite the stack guard slot
|
||||
+after having clobbered it.
|
||||
+
|
||||
+If this pattern is not defined, then the address declaration is
|
||||
+expanded first in the standard way and a @code{stack_protect_set}
|
||||
+pattern is then generated to move the value from that address to the
|
||||
+address in operand 0.
|
||||
+
|
||||
@cindex @code{stack_protect_set} instruction pattern
|
||||
@item @samp{stack_protect_set}
|
||||
-This pattern, if defined, moves a @code{ptr_mode} value from the memory
|
||||
-in operand 1 to the memory in operand 0 without leaving the value in
|
||||
-a register afterward. This is to avoid leaking the value some place
|
||||
-that an attacker might use to rewrite the stack guard slot after
|
||||
-having clobbered it.
|
||||
+This pattern, if defined, moves a @code{ptr_mode} value from the valid
|
||||
+memory location in operand 1 to the memory in operand 0 without leaving
|
||||
+the value in a register afterward. This is to avoid leaking the value
|
||||
+some place that an attacker might use to rewrite the stack guard slot
|
||||
+after having clobbered it.
|
||||
+
|
||||
+Note: on targets where the addressing modes do not allow to load
|
||||
+directly from stack guard address, the address is expanded in a standard
|
||||
+way first which could cause some spills.
|
||||
|
||||
If this pattern is not defined, then a plain move pattern is generated.
|
||||
|
||||
+@cindex @code{stack_protect_combined_test} instruction pattern
|
||||
+@item @samp{stack_protect_combined_test}
|
||||
+This pattern, if defined, compares a @code{ptr_mode} value from an
|
||||
+address whose declaration RTX is given in operand 1 with the memory in
|
||||
+operand 0 without leaving the value in a register afterward and
|
||||
+branches to operand 2 if the values were equal. If several
|
||||
+instructions are needed by the target to perform the operation (eg. to
|
||||
+load the address from a GOT entry then load the @code{ptr_mode} value
|
||||
+and finally store it), it is the backend's responsibility to ensure no
|
||||
+intermediate result gets spilled. This is to avoid leaking the value
|
||||
+some place that an attacker might use to rewrite the stack guard slot
|
||||
+after having clobbered it.
|
||||
+
|
||||
+If this pattern is not defined, then the address declaration is
|
||||
+expanded first in the standard way and a @code{stack_protect_test}
|
||||
+pattern is then generated to compare the value from that address to the
|
||||
+value at the memory in operand 0.
|
||||
+
|
||||
@cindex @code{stack_protect_test} instruction pattern
|
||||
@item @samp{stack_protect_test}
|
||||
This pattern, if defined, compares a @code{ptr_mode} value from the
|
||||
-memory in operand 1 with the memory in operand 0 without leaving the
|
||||
-value in a register afterward and branches to operand 2 if the values
|
||||
-were equal.
|
||||
+valid memory location in operand 1 with the memory in operand 0 without
|
||||
+leaving the value in a register afterward and branches to operand 2 if
|
||||
+the values were equal.
|
||||
|
||||
If this pattern is not defined, then a plain compare pattern and
|
||||
conditional branch pattern is used.
|
||||
diff -urpN a/gcc/function.c b/gcc/function.c
|
||||
--- a/gcc/function.c 2019-05-30 16:58:45.362508769 +0800
|
||||
+++ b/gcc/function.c 2019-05-30 11:53:14.071156599 +0800
|
||||
@@ -5065,18 +5065,34 @@ stack_protect_epilogue (void)
|
||||
tree guard_decl = targetm.stack_protect_guard ();
|
||||
rtx_code_label *label = gen_label_rtx ();
|
||||
rtx x, y;
|
||||
- rtx_insn *seq;
|
||||
+ rtx_insn *seq = NULL;
|
||||
|
||||
x = expand_normal (crtl->stack_protect_guard);
|
||||
- if (guard_decl)
|
||||
- y = expand_normal (guard_decl);
|
||||
+
|
||||
+ if (targetm.have_stack_protect_combined_test () && guard_decl)
|
||||
+ {
|
||||
+ gcc_assert (DECL_P (guard_decl));
|
||||
+ y = DECL_RTL (guard_decl);
|
||||
+ /* Allow the target to compute address of Y and compare it with X without
|
||||
+ leaking Y into a register. This combined address + compare pattern
|
||||
+ allows the target to prevent spilling of any intermediate results by
|
||||
+ splitting it after register allocator. */
|
||||
+ seq = targetm.gen_stack_protect_combined_test (x, y, label);
|
||||
+ }
|
||||
else
|
||||
- y = const0_rtx;
|
||||
+ {
|
||||
+ if (guard_decl)
|
||||
+ y = expand_normal (guard_decl);
|
||||
+ else
|
||||
+ y = const0_rtx;
|
||||
+
|
||||
+ /* Allow the target to compare Y with X without leaking either into
|
||||
+ a register. */
|
||||
+ if (targetm.have_stack_protect_test ())
|
||||
+ seq = targetm.gen_stack_protect_test (x, y, label);
|
||||
+ }
|
||||
|
||||
- /* Allow the target to compare Y with X without leaking either into
|
||||
- a register. */
|
||||
- if (targetm.have_stack_protect_test ()
|
||||
- && ((seq = targetm.gen_stack_protect_test (x, y, label)) != NULL_RTX))
|
||||
+ if (seq)
|
||||
emit_insn (seq);
|
||||
else
|
||||
emit_cmp_and_jump_insns (x, y, EQ, NULL_RTX, ptr_mode, 1, label);
|
||||
diff -urpN a/gcc/genpreds.c b/gcc/genpreds.c
|
||||
--- a/gcc/genpreds.c 2019-05-30 16:58:45.362508769 +0800
|
||||
+++ b/gcc/genpreds.c 2019-05-30 11:53:14.163156595 +0800
|
||||
@@ -1581,7 +1581,8 @@ write_insn_preds_c (void)
|
||||
#include \"reload.h\"\n\
|
||||
#include \"regs.h\"\n\
|
||||
#include \"emit-rtl.h\"\n\
|
||||
-#include \"tm-constrs.h\"\n");
|
||||
+#include \"tm-constrs.h\"\n\
|
||||
+#include \"target.h\"\n");
|
||||
|
||||
FOR_ALL_PREDICATES (p)
|
||||
write_one_predicate_function (p);
|
||||
diff -urpN a/gcc/target-insns.def b/gcc/target-insns.def
|
||||
--- a/gcc/target-insns.def 2019-05-30 16:58:45.362508769 +0800
|
||||
+++ b/gcc/target-insns.def 2019-05-30 11:52:58.495157149 +0800
|
||||
@@ -96,7 +96,9 @@ DEF_TARGET_INSN (sibcall_value, (rtx x0,
|
||||
DEF_TARGET_INSN (simple_return, (void))
|
||||
DEF_TARGET_INSN (split_stack_prologue, (void))
|
||||
DEF_TARGET_INSN (split_stack_space_check, (rtx x0, rtx x1))
|
||||
+DEF_TARGET_INSN (stack_protect_combined_set, (rtx x0, rtx x1))
|
||||
DEF_TARGET_INSN (stack_protect_set, (rtx x0, rtx x1))
|
||||
+DEF_TARGET_INSN (stack_protect_combined_test, (rtx x0, rtx x1, rtx x2))
|
||||
DEF_TARGET_INSN (stack_protect_test, (rtx x0, rtx x1, rtx x2))
|
||||
DEF_TARGET_INSN (store_multiple, (rtx x0, rtx x1, rtx x2))
|
||||
DEF_TARGET_INSN (tablejump, (rtx x0, rtx x1))
|
||||
@ -1,51 +0,0 @@
|
||||
diff -urpN a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
|
||||
--- a/gcc/config/rs6000/altivec.md 2018-01-15 01:47:30.483964000 +0800
|
||||
+++ b/gcc/config/rs6000/altivec.md 2019-09-09 00:01:25.770835633 +0800
|
||||
@@ -74,9 +74,6 @@
|
||||
UNSPEC_VUNPACK_LO_SIGN_DIRECT
|
||||
UNSPEC_VUPKHPX
|
||||
UNSPEC_VUPKLPX
|
||||
- UNSPEC_DARN
|
||||
- UNSPEC_DARN_32
|
||||
- UNSPEC_DARN_RAW
|
||||
UNSPEC_DST
|
||||
UNSPEC_DSTT
|
||||
UNSPEC_DSTST
|
||||
@@ -3770,21 +3767,21 @@
|
||||
|
||||
(define_insn "darn_32"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
- (unspec:SI [(const_int 0)] UNSPEC_DARN_32))]
|
||||
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_DARN_32))]
|
||||
"TARGET_P9_MISC"
|
||||
"darn %0,0"
|
||||
[(set_attr "type" "integer")])
|
||||
|
||||
(define_insn "darn_raw"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
- (unspec:DI [(const_int 0)] UNSPEC_DARN_RAW))]
|
||||
+ (unspec_volatile:DI [(const_int 0)] UNSPECV_DARN_RAW))]
|
||||
"TARGET_P9_MISC && TARGET_64BIT"
|
||||
"darn %0,2"
|
||||
[(set_attr "type" "integer")])
|
||||
|
||||
(define_insn "darn"
|
||||
[(set (match_operand:DI 0 "register_operand" "=r")
|
||||
- (unspec:DI [(const_int 0)] UNSPEC_DARN))]
|
||||
+ (unspec_volatile:DI [(const_int 0)] UNSPECV_DARN))]
|
||||
"TARGET_P9_MISC && TARGET_64BIT"
|
||||
"darn %0,1"
|
||||
[(set_attr "type" "integer")])
|
||||
diff -urpN a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
|
||||
--- a/gcc/config/rs6000/rs6000.md 2018-01-21 21:32:58.843504000 +0800
|
||||
+++ b/gcc/config/rs6000/rs6000.md 2019-09-08 23:53:13.122859153 +0800
|
||||
@@ -163,6 +163,9 @@
|
||||
UNSPECV_EH_RR ; eh_reg_restore
|
||||
UNSPECV_ISYNC ; isync instruction
|
||||
UNSPECV_MFTB ; move from time base
|
||||
+ UNSPECV_DARN ; darn 1 (deliver a random number)
|
||||
+ UNSPECV_DARN_32 ; darn 2
|
||||
+ UNSPECV_DARN_RAW ; darn 0
|
||||
UNSPECV_NLGR ; non-local goto receiver
|
||||
UNSPECV_MFFS ; Move from FPSCR
|
||||
UNSPECV_MTFSF ; Move to FPSCR Fields
|
||||
@ -1,24 +0,0 @@
|
||||
diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
--- a/gcc/config/aarch64/aarch64.c 2018-10-09 11:49:19.000000000 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.c 2018-10-09 13:42:15.000000000 +0800
|
||||
@@ -1619,7 +1619,7 @@ aarch64_load_symref_appropriately (rtx d
|
||||
case SYMBOL_SMALL_TLSDESC:
|
||||
{
|
||||
machine_mode mode = GET_MODE (dest);
|
||||
- rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
|
||||
+ rtx x0 = gen_rtx_REG (ptr_mode, R0_REGNUM);
|
||||
rtx tp;
|
||||
|
||||
gcc_assert (mode == Pmode || mode == ptr_mode);
|
||||
@@ -1635,6 +1635,11 @@ aarch64_load_symref_appropriately (rtx d
|
||||
if (mode != Pmode)
|
||||
tp = gen_lowpart (mode, tp);
|
||||
|
||||
+ if (mode != ptr_mode)
|
||||
+ {
|
||||
+ x0 = force_reg (mode, gen_rtx_SIGN_EXTEND (mode, x0));
|
||||
+ }
|
||||
+
|
||||
emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
|
||||
set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
|
||||
return;
|
||||
@ -1,31 +0,0 @@
|
||||
diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
--- a/gcc/config/aarch64/aarch64.md 2018-10-09 11:30:50.000000000 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.md 2018-10-09 11:52:54.000000000 +0800
|
||||
@@ -857,6 +857,13 @@
|
||||
: !REG_P (callee))
|
||||
XEXP (operands[0], 0) = force_reg (Pmode, callee);
|
||||
|
||||
+ if (TARGET_ILP32
|
||||
+ && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
|
||||
+ && GET_MODE (XEXP (operands[0], 0)) == SImode)
|
||||
+ XEXP (operands[0], 0) = convert_memory_address (DImode,
|
||||
+ XEXP (operands[0], 0));
|
||||
+
|
||||
+
|
||||
if (operands[2] == NULL_RTX)
|
||||
operands[2] = const0_rtx;
|
||||
|
||||
@@ -889,6 +896,13 @@
|
||||
: !REG_P (callee))
|
||||
XEXP (operands[1], 0) = force_reg (Pmode, callee);
|
||||
|
||||
+ if (TARGET_ILP32
|
||||
+ && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
|
||||
+ && GET_MODE (XEXP (operands[1], 0)) == SImode)
|
||||
+ XEXP (operands[1], 0) = convert_memory_address (DImode,
|
||||
+ XEXP (operands[1], 0));
|
||||
+
|
||||
+
|
||||
if (operands[3] == NULL_RTX)
|
||||
operands[3] = const0_rtx;
|
||||
|
||||
@ -1,780 +0,0 @@
|
||||
diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
--- a/gcc/config/aarch64/aarch64.c 2019-04-15 14:50:25.866378665 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.c 2019-04-15 14:49:21.986376983 +0800
|
||||
@@ -554,6 +554,31 @@ static const struct tune_params generic_
|
||||
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
};
|
||||
|
||||
+static const struct tune_params tsv110_tunings =
|
||||
+{
|
||||
+ &cortexa57_extra_costs,
|
||||
+ &generic_addrcost_table,
|
||||
+ &generic_regmove_cost,
|
||||
+ &generic_vector_cost,
|
||||
+ &generic_branch_cost,
|
||||
+ &generic_approx_modes,
|
||||
+ 4, /* memmov_cost */
|
||||
+ 4, /* issue_rate */
|
||||
+ AARCH64_FUSE_NOTHING, /* fusible_ops */
|
||||
+ 16, /* function_align. */
|
||||
+ 16, /* jump_align. */
|
||||
+ 8, /* loop_align. */
|
||||
+ 2, /* int_reassoc_width. */
|
||||
+ 4, /* fp_reassoc_width. */
|
||||
+ 1, /* vec_reassoc_width. */
|
||||
+ 2, /* min_div_recip_mul_sf. */
|
||||
+ 2, /* min_div_recip_mul_df. */
|
||||
+ 0, /* max_case_values. */
|
||||
+ 0, /* cache_line_size. */
|
||||
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
|
||||
+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
|
||||
+};
|
||||
+
|
||||
static const struct tune_params cortexa35_tunings =
|
||||
{
|
||||
&cortexa53_extra_costs,
|
||||
diff -urpN a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
|
||||
--- a/gcc/config/aarch64/aarch64-cores.def 2017-02-15 08:09:28.845771000 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64-cores.def 2019-04-15 14:49:21.986376983 +0800
|
||||
@@ -78,6 +78,8 @@ AARCH64_CORE("xgene1", xgene1, x
|
||||
AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
|
||||
+AARCH64_CORE("tsv110", tsv110, tsv110, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, tsv110, 0x48, 0xd01, -1)
|
||||
+
|
||||
/* V8 big.LITTLE implementations. */
|
||||
|
||||
AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
|
||||
diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
--- a/gcc/config/aarch64/aarch64.md 2019-04-15 14:50:25.870378665 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.md 2019-04-15 14:49:21.986376983 +0800
|
||||
@@ -226,6 +226,7 @@
|
||||
(include "thunderx.md")
|
||||
(include "../arm/xgene1.md")
|
||||
(include "thunderx2t99.md")
|
||||
+(include "tsv110.md")
|
||||
|
||||
;; -------------------------------------------------------------------
|
||||
;; Jumps and other miscellaneous insns
|
||||
diff -urpN a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
|
||||
--- a/gcc/config/aarch64/aarch64-tune.md 2017-02-15 08:09:28.845771000 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64-tune.md 2019-04-15 14:49:21.986376983 +0800
|
||||
@@ -1,5 +1,5 @@
|
||||
;; -*- buffer-read-only: t -*-
|
||||
;; Generated automatically by gentune.sh from aarch64-cores.def
|
||||
(define_attr "tune"
|
||||
- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
|
||||
+ "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,tsv110,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
|
||||
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
|
||||
diff -urpN a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
|
||||
--- a/gcc/config/aarch64/tsv110.md 1970-01-01 08:00:00.000000000 +0800
|
||||
+++ b/gcc/config/aarch64/tsv110.md 2019-04-15 14:55:30.420081420 +0800
|
||||
@@ -0,0 +1,708 @@
|
||||
+;; tsv110 pipeline description
|
||||
+;; Copyright (C) 2018 Free Software Foundation, Inc.
|
||||
+;;
|
||||
+;; This file is part of GCC.
|
||||
+;;
|
||||
+;; GCC is free software; you can redistribute it and/or modify it
|
||||
+;; under the terms of the GNU General Public License as published by
|
||||
+;; the Free Software Foundation; either version 3, or (at your option)
|
||||
+;; any later version.
|
||||
+;;
|
||||
+;; GCC is distributed in the hope that it will be useful, but
|
||||
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+;; General Public License for more details.
|
||||
+;;
|
||||
+;; You should have received a copy of the GNU General Public License
|
||||
+;; along with GCC; see the file COPYING3. If not see
|
||||
+;; <http://www.gnu.org/licenses/>.
|
||||
+
|
||||
+(define_automaton "tsv110")
|
||||
+
|
||||
+(define_attr "tsv110_neon_type"
|
||||
+ "neon_arith_acc, neon_arith_acc_q,
|
||||
+ neon_arith_basic, neon_arith_complex,
|
||||
+ neon_reduc_add_acc, neon_multiply, neon_multiply_q,
|
||||
+ neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long,
|
||||
+ neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic,
|
||||
+ neon_shift_imm_complex,
|
||||
+ neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex,
|
||||
+ neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith,
|
||||
+ neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int,
|
||||
+ neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul,
|
||||
+ neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte,
|
||||
+ neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q,
|
||||
+ neon_bitops, neon_bitops_q, neon_from_gp,
|
||||
+ neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp,
|
||||
+ neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e,
|
||||
+ neon_load_f, neon_store_a, neon_store_b, neon_store_complex,
|
||||
+ unknown"
|
||||
+ (cond [
|
||||
+ (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\
|
||||
+ neon_reduc_add_acc_q")
|
||||
+ (const_string "neon_arith_acc")
|
||||
+ (eq_attr "type" "neon_arith_acc_q")
|
||||
+ (const_string "neon_arith_acc_q")
|
||||
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add, neon_add_q, neon_add_long,\
|
||||
+ neon_add_widen, neon_neg, neon_neg_q,\
|
||||
+ neon_reduc_add, neon_reduc_add_q,\
|
||||
+ neon_reduc_add_long, neon_sub, neon_sub_q,\
|
||||
+ neon_sub_long, neon_sub_widen, neon_logic,\
|
||||
+ neon_logic_q, neon_tst, neon_tst_q,\
|
||||
+ neon_compare, neon_compare_q,\
|
||||
+ neon_compare_zero, neon_compare_zero_q,\
|
||||
+ neon_minmax, neon_minmax_q, neon_reduc_minmax,\
|
||||
+ neon_reduc_minmax_q")
|
||||
+ (const_string "neon_arith_basic")
|
||||
+ (eq_attr "type" "neon_add_halve_narrow_q,\
|
||||
+ neon_add_halve, neon_add_halve_q,\
|
||||
+ neon_sub_halve, neon_sub_halve_q, neon_qabs,\
|
||||
+ neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\
|
||||
+ neon_qneg_q, neon_qsub, neon_qsub_q,\
|
||||
+ neon_sub_halve_narrow_q")
|
||||
+ (const_string "neon_arith_complex")
|
||||
+
|
||||
+ (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\
|
||||
+ neon_mul_h_scalar, neon_mul_s_scalar,\
|
||||
+ neon_sat_mul_b, neon_sat_mul_h,\
|
||||
+ neon_sat_mul_s, neon_sat_mul_h_scalar,\
|
||||
+ neon_sat_mul_s_scalar,\
|
||||
+ neon_mul_b_long, neon_mul_h_long,\
|
||||
+ neon_mul_s_long,\
|
||||
+ neon_mul_h_scalar_long, neon_mul_s_scalar_long,\
|
||||
+ neon_sat_mul_b_long, neon_sat_mul_h_long,\
|
||||
+ neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\
|
||||
+ neon_sat_mul_s_scalar_long,\
|
||||
+ neon_mla_b, neon_mla_h, neon_mla_s,\
|
||||
+ neon_mla_h_scalar, neon_mla_s_scalar,\
|
||||
+ neon_mla_b_long, neon_mla_h_long,\
|
||||
+ neon_mla_s_long,\
|
||||
+ neon_mla_h_scalar_long, neon_mla_s_scalar_long,\
|
||||
+ neon_sat_mla_b_long, neon_sat_mla_h_long,\
|
||||
+ neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\
|
||||
+ neon_sat_mla_s_scalar_long")
|
||||
+ (const_string "neon_multiply")
|
||||
+ (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\
|
||||
+ neon_mul_h_scalar_q, neon_mul_s_scalar_q,\
|
||||
+ neon_sat_mul_b_q, neon_sat_mul_h_q,\
|
||||
+ neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\
|
||||
+ neon_sat_mul_s_scalar_q,\
|
||||
+ neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\
|
||||
+ neon_mla_h_scalar_q, neon_mla_s_scalar_q")
|
||||
+ (const_string "neon_multiply_q")
|
||||
+
|
||||
+ (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
|
||||
+ (const_string "neon_shift_acc")
|
||||
+ (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
|
||||
+ neon_shift_imm_narrow_q, neon_shift_imm_long")
|
||||
+ (const_string "neon_shift_imm_basic")
|
||||
+ (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\
|
||||
+ neon_sat_shift_imm_narrow_q")
|
||||
+ (const_string "neon_shift_imm_complex")
|
||||
+ (eq_attr "type" "neon_shift_reg")
|
||||
+ (const_string "neon_shift_reg_basic")
|
||||
+ (eq_attr "type" "neon_shift_reg_q")
|
||||
+ (const_string "neon_shift_reg_basic_q")
|
||||
+ (eq_attr "type" "neon_sat_shift_reg")
|
||||
+ (const_string "neon_shift_reg_complex")
|
||||
+ (eq_attr "type" "neon_sat_shift_reg_q")
|
||||
+ (const_string "neon_shift_reg_complex_q")
|
||||
+
|
||||
+ (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\
|
||||
+ neon_fp_abs_s, neon_fp_abs_s_q,\
|
||||
+ neon_fp_neg_d, neon_fp_neg_d_q,\
|
||||
+ neon_fp_abs_d, neon_fp_abs_d_q,\
|
||||
+ neon_fp_minmax_s,neon_fp_minmax_d,\
|
||||
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d")
|
||||
+ (const_string "neon_fp_negabs")
|
||||
+ (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\
|
||||
+ neon_fp_reduc_add_s, neon_fp_compare_s,\
|
||||
+ neon_fp_round_s,\
|
||||
+ neon_fp_addsub_d, neon_fp_abd_d,\
|
||||
+ neon_fp_reduc_add_d, neon_fp_compare_d,\
|
||||
+ neon_fp_round_d")
|
||||
+ (const_string "neon_fp_arith")
|
||||
+ (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\
|
||||
+ neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\
|
||||
+ neon_fp_minmax_s_q, neon_fp_round_s_q,\
|
||||
+ neon_fp_addsub_d_q, neon_fp_abd_d_q,\
|
||||
+ neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\
|
||||
+ neon_fp_minmax_d_q, neon_fp_round_d_q")
|
||||
+ (const_string "neon_fp_arith_q")
|
||||
+ (eq_attr "type" "neon_fp_reduc_minmax_s_q,\
|
||||
+ neon_fp_reduc_minmax_d_q,\
|
||||
+ neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q")
|
||||
+ (const_string "neon_fp_reductions_q")
|
||||
+ (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\
|
||||
+ neon_fp_to_int_d, neon_int_to_fp_d")
|
||||
+ (const_string "neon_fp_cvt_int")
|
||||
+ (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\
|
||||
+ neon_fp_to_int_d_q, neon_int_to_fp_d_q")
|
||||
+ (const_string "neon_fp_cvt_int_q")
|
||||
+ (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h")
|
||||
+ (const_string "neon_fp_cvt16")
|
||||
+ (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\
|
||||
+ neon_fp_mul_d")
|
||||
+ (const_string "neon_fp_mul")
|
||||
+ (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\
|
||||
+ neon_fp_mul_d_q, neon_fp_mul_d_scalar_q")
|
||||
+ (const_string "neon_fp_mul_q")
|
||||
+ (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\
|
||||
+ neon_fp_mla_d")
|
||||
+ (const_string "neon_fp_mla")
|
||||
+ (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q,
|
||||
+ neon_fp_mla_d_q, neon_fp_mla_d_scalar_q")
|
||||
+ (const_string "neon_fp_mla_q")
|
||||
+ (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\
|
||||
+ neon_fp_recpx_s,\
|
||||
+ neon_fp_recpe_d, neon_fp_rsqrte_d,\
|
||||
+ neon_fp_recpx_d")
|
||||
+ (const_string "neon_fp_recpe_rsqrte")
|
||||
+ (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
|
||||
+ neon_fp_recpx_s_q,\
|
||||
+ neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\
|
||||
+ neon_fp_recpx_d_q")
|
||||
+ (const_string "neon_fp_recpe_rsqrte_q")
|
||||
+ (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\
|
||||
+ neon_fp_recps_d, neon_fp_rsqrts_d")
|
||||
+ (const_string "neon_fp_recps_rsqrts")
|
||||
+ (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\
|
||||
+ neon_fp_recps_d_q, neon_fp_rsqrts_d_q")
|
||||
+ (const_string "neon_fp_recps_rsqrts_q")
|
||||
+ (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\
|
||||
+ neon_rev, neon_permute, neon_rbit,\
|
||||
+ neon_tbl1, neon_tbl2, neon_zip,\
|
||||
+ neon_dup, neon_dup_q, neon_ext, neon_ext_q,\
|
||||
+ neon_move, neon_move_q, neon_move_narrow_q")
|
||||
+ (const_string "neon_bitops")
|
||||
+ (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\
|
||||
+ neon_rev_q, neon_permute_q, neon_rbit_q")
|
||||
+ (const_string "neon_bitops_q")
|
||||
+ (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr")
|
||||
+ (const_string "neon_from_gp")
|
||||
+ (eq_attr "type" "neon_from_gp_q")
|
||||
+ (const_string "neon_from_gp_q")
|
||||
+
|
||||
+ (eq_attr "type" "f_loads, f_loadd,\
|
||||
+ neon_load1_1reg, neon_load1_1reg_q,\
|
||||
+ neon_load1_2reg, neon_load1_2reg_q")
|
||||
+ (const_string "neon_load_a")
|
||||
+ (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
|
||||
+ neon_load1_4reg, neon_load1_4reg_q")
|
||||
+ (const_string "neon_load_b")
|
||||
+ (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\
|
||||
+ neon_load1_all_lanes, neon_load1_all_lanes_q,\
|
||||
+ neon_load2_2reg, neon_load2_2reg_q,\
|
||||
+ neon_load2_all_lanes, neon_load2_all_lanes_q")
|
||||
+ (const_string "neon_load_c")
|
||||
+ (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\
|
||||
+ neon_load3_3reg, neon_load3_3reg_q,\
|
||||
+ neon_load3_one_lane, neon_load3_one_lane_q,\
|
||||
+ neon_load4_4reg, neon_load4_4reg_q")
|
||||
+ (const_string "neon_load_d")
|
||||
+ (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\
|
||||
+ neon_load3_all_lanes, neon_load3_all_lanes_q,\
|
||||
+ neon_load4_all_lanes, neon_load4_all_lanes_q")
|
||||
+ (const_string "neon_load_e")
|
||||
+ (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q")
|
||||
+ (const_string "neon_load_f")
|
||||
+
|
||||
+ (eq_attr "type" "f_stores, f_stored,\
|
||||
+ neon_store1_1reg")
|
||||
+ (const_string "neon_store_a")
|
||||
+ (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q")
|
||||
+ (const_string "neon_store_b")
|
||||
+ (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
|
||||
+ neon_store3_3reg, neon_store3_3reg_q,\
|
||||
+ neon_store2_4reg, neon_store2_4reg_q,\
|
||||
+ neon_store4_4reg, neon_store4_4reg_q,\
|
||||
+ neon_store2_2reg, neon_store2_2reg_q,\
|
||||
+ neon_store3_one_lane, neon_store3_one_lane_q,\
|
||||
+ neon_store4_one_lane, neon_store4_one_lane_q,\
|
||||
+ neon_store1_4reg, neon_store1_4reg_q,\
|
||||
+ neon_store1_one_lane, neon_store1_one_lane_q,\
|
||||
+ neon_store2_one_lane, neon_store2_one_lane_q")
|
||||
+ (const_string "neon_store_complex")]
|
||||
+ (const_string "unknown")))
|
||||
+
|
||||
+;; The tsv110 core is modelled as issues pipeline that has
|
||||
+;; the following functional units.
|
||||
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
|
||||
+
|
||||
+(define_cpu_unit "tsv110_alu1_issue" "tsv110")
|
||||
+(define_reservation "tsv110_alu1" "tsv110_alu1_issue")
|
||||
+
|
||||
+(define_cpu_unit "tsv110_alu2_issue" "tsv110")
|
||||
+(define_reservation "tsv110_alu2" "tsv110_alu2_issue")
|
||||
+
|
||||
+(define_cpu_unit "tsv110_alu3_issue" "tsv110")
|
||||
+(define_reservation "tsv110_alu3" "tsv110_alu3_issue")
|
||||
+
|
||||
+;; 2. One pipeline for complex integer operations: MDU
|
||||
+
|
||||
+(define_cpu_unit "tsv110_mdu_issue" "tsv110")
|
||||
+(define_reservation "tsv110_mdu" "tsv110_mdu_issue")
|
||||
+
|
||||
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
|
||||
+(define_automaton "tsv110_fsu")
|
||||
+
|
||||
+(define_cpu_unit "tsv110_fsu1_issue"
|
||||
+ "tsv110_fsu")
|
||||
+(define_cpu_unit "tsv110_fsu2_issue"
|
||||
+ "tsv110_fsu")
|
||||
+
|
||||
+(define_reservation "tsv110_fsu1" "tsv110_fsu1_issue")
|
||||
+(define_reservation "tsv110_fsu2" "tsv110_fsu2_issue")
|
||||
+
|
||||
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
|
||||
+
|
||||
+;; 5. Two pipelines for load and store operations: LS1, LS2.
|
||||
+
|
||||
+(define_cpu_unit "tsv110_ls1_issue" "tsv110")
|
||||
+(define_cpu_unit "tsv110_ls2_issue" "tsv110")
|
||||
+(define_reservation "tsv110_ls1" "tsv110_ls1_issue")
|
||||
+(define_reservation "tsv110_ls2" "tsv110_ls2_issue")
|
||||
+
|
||||
+;; Block all issue queues.
|
||||
+
|
||||
+(define_reservation "tsv110_block" "tsv110_fsu1_issue + tsv110_fsu2_issue
|
||||
+ + tsv110_mdu_issue + tsv110_alu1_issue
|
||||
+ + tsv110_alu2_issue + tsv110_alu3_issue + tsv110_ls1_issue + tsv110_ls2_issue")
|
||||
+
|
||||
+;; Simple Execution Unit:
|
||||
+;;
|
||||
+;; Simple ALU without shift
|
||||
+(define_insn_reservation "tsv110_alu" 1
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "alu_imm,logic_imm,\
|
||||
+ alu_sreg,logic_reg,\
|
||||
+ adc_imm,adc_reg,\
|
||||
+ adr,bfm,clz,rbit,rev,\
|
||||
+ shift_imm,shift_reg,\
|
||||
+ mov_imm,mov_reg,\
|
||||
+ mvn_imm,mvn_reg,\
|
||||
+ mrs,multiple,no_insn"))
|
||||
+ "tsv110_alu1|tsv110_alu2|tsv110_alu3")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_alus" 1
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "alus_imm,logics_imm,\
|
||||
+ alus_sreg,logics_reg,\
|
||||
+ adcs_imm,adcs_reg"))
|
||||
+ "tsv110_alu2|tsv110_alu3")
|
||||
+
|
||||
+;; ALU ops with shift
|
||||
+(define_insn_reservation "tsv110_alu_shift" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "extend,\
|
||||
+ alu_shift_imm,alu_shift_reg,\
|
||||
+ crc,logic_shift_imm,logic_shift_reg,\
|
||||
+ mov_shift,mvn_shift,\
|
||||
+ mov_shift_reg,mvn_shift_reg"))
|
||||
+ "tsv110_mdu")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_alus_shift" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
|
||||
+ logics_shift_imm,logics_shift_reg"))
|
||||
+ "tsv110_alu2|tsv110_alu3")
|
||||
+
|
||||
+;; Multiplies instructions
|
||||
+(define_insn_reservation "tsv110_mult" 3
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (ior (eq_attr "mul32" "yes")
|
||||
+ (eq_attr "mul64" "yes")))
|
||||
+ "tsv110_mdu")
|
||||
+
|
||||
+;; Integer divide
|
||||
+(define_insn_reservation "tsv110_div" 10
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "udiv,sdiv"))
|
||||
+ "tsv110_mdu")
|
||||
+
|
||||
+;; Block all issue pipes for a cycle
|
||||
+(define_insn_reservation "tsv110_block" 1
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "block"))
|
||||
+ "tsv110_block")
|
||||
+
|
||||
+;; Branch execution Unit
|
||||
+;;
|
||||
+;; Branches take two issue slot.
|
||||
+;; No latency as there is no result
|
||||
+(define_insn_reservation "tsv110_branch" 0
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "branch"))
|
||||
+ "tsv110_alu2|tsv110_alu3")
|
||||
+
|
||||
+;; Load-store execution Unit
|
||||
+;;
|
||||
+;; Loads of up to two words.
|
||||
+(define_insn_reservation "tsv110_load1" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "load1,load2"))
|
||||
+ "tsv110_ls1|tsv110_ls2")
|
||||
+
|
||||
+;; Stores of up to two words.
|
||||
+(define_insn_reservation "tsv110_store1" 0
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "store1,store2"))
|
||||
+ "tsv110_ls1|tsv110_ls2")
|
||||
+
|
||||
+;; Advanced SIMD Unit - Integer Arithmetic Instructions.
|
||||
+
|
||||
+(define_insn_reservation "tsv110_neon_abd_aba" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_abd,neon_arith_acc"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_neon_abd_aba_q" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_arith_acc_q"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_neon_arith_basic" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_arith_basic"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_neon_arith_complex" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_arith_complex"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+;; Integer Multiply Instructions.
|
||||
+;; D-form
|
||||
+(define_insn_reservation "tsv110_neon_multiply" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_multiply"))
|
||||
+ "tsv110_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_neon_multiply_dlong" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_mul_d_long"))
|
||||
+ "tsv110_fsu1")
|
||||
+
|
||||
+;; Q-form
|
||||
+(define_insn_reservation "tsv110_neon_multiply_q" 8
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_multiply_q"))
|
||||
+ "tsv110_fsu1")
|
||||
+
|
||||
+;; Integer Shift Instructions.
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_shift_acc" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_shift_acc,\
|
||||
+ neon_shift_imm_basic,neon_shift_imm_complex,neon_shift_reg_basic,\
|
||||
+ neon_shift_reg_complex"))
|
||||
+ "tsv110_fsu1")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_shift_acc_q" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_shift_reg_basic_q,\
|
||||
+ neon_shift_reg_complex_q"))
|
||||
+ "tsv110_fsu1")
|
||||
+
|
||||
+;; Floating Point Instructions.
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_negabs" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_fp_negabs"))
|
||||
+ "(tsv110_fsu1|tsv110_fsu2)")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_arith" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_fp_arith"))
|
||||
+ "(tsv110_fsu1|tsv110_fsu2)")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_arith_q" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_fp_arith_q"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_minmax_q" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_reductions_q" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_fp_reductions_q"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_cvt_int" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_fp_cvt_int,neon_fp_cvt_int_q"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_mul" 5
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_fp_mul"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_mul_q" 5
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_fp_mul_q"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_mla" 7
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_fp_mla,\
|
||||
+ neon_fp_recps_rsqrts"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_recpe_rsqrte" 3
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_mla_q" 7
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_fp_mla_q,\
|
||||
+ neon_fp_recps_rsqrts_q"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_fp_recpe_rsqrte_q" 3
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_fp_recpe_rsqrte_q"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+;; Miscellaneous Instructions.
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_bitops" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_bitops"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_dup" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_from_gp,f_mcr"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_mov" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "f_mcrr"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_bitops_q" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_bitops_q"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_from_gp_q" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_from_gp_q"))
|
||||
+ "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_to_gp" 3
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
|
||||
+ "tsv110_fsu1")
|
||||
+
|
||||
+;; Load Instructions.
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_ld1_lane" 8
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
|
||||
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
|
||||
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_ld1_reg1" 6
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "f_loads,f_loadd,neon_load1_1reg,neon_load1_1reg_q"))
|
||||
+ "tsv110_ls1|tsv110_ls2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_ld1_reg2" 6
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
|
||||
+ "tsv110_ls1|tsv110_ls2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_ld1_reg3" 7
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
|
||||
+ "tsv110_ls1|tsv110_ls2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_ld1_reg4" 7
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
|
||||
+ "tsv110_ls1|tsv110_ls2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_ld2" 8
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q,\
|
||||
+ neon_load2_2reg,neon_load2_2reg_q,neon_load2_all_lanes,\
|
||||
+ neon_load2_all_lanes_q,neon_load2_one_lane,neon_load2_one_lane_q"))
|
||||
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_ld3" 9
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
|
||||
+ neon_load3_one_lane,neon_load3_one_lane_q,\
|
||||
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
|
||||
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_ld4_lane" 9
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
|
||||
+ neon_load4_one_lane,neon_load4_one_lane_q"))
|
||||
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_ld4_reg" 11
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "neon_load4_all_lanes,neon_load4_all_lanes_q,\
|
||||
+ neon_load4_one_lane,neon_load4_one_lane_q"))
|
||||
+ "(tsv110_ls1 + tsv110_fsu1)|(tsv110_ls1 + tsv110_fsu2)|(tsv110_ls2 + tsv110_fsu1)|(tsv110_ls2 + tsv110_fsu2)")
|
||||
+
|
||||
+;; Store Instructions.
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_store_a" 0
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_store_a"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_store_b" 0
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_store_b"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+;; These block issue for a number of cycles proportional to the number
|
||||
+;; of 64-bit chunks they will store, we don't attempt to model that
|
||||
+;; precisely, treat them as blocking execution for two cycles when
|
||||
+;; issued.
|
||||
+(define_insn_reservation
|
||||
+ "tsv110_neon_store_complex" 0
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "tsv110_neon_type" "neon_store_complex"))
|
||||
+ "tsv110_block*2")
|
||||
+
|
||||
+;; Floating-Point Operations.
|
||||
+
|
||||
+(define_insn_reservation "tsv110_fp_const" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "fconsts,fconstd,fmov"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_fp_add_sub" 5
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_fp_mac" 7
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_fp_cvt" 3
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "f_cvt"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_fp_cvtf2i" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "f_cvtf2i"))
|
||||
+ "tsv110_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_fp_cvti2f" 5
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "f_cvti2f"))
|
||||
+ "(tsv110_alu1+tsv110_fsu1)|(tsv110_alu1+tsv110_fsu2)")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_fp_cmp" 4
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "fcmps,fcmpd"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_fp_arith" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "ffariths,ffarithd"))
|
||||
+ "tsv110_fsu1|tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_fp_divs" 12
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "fdivs,neon_fp_div_s,fdivd,neon_fp_div_d,\
|
||||
+ neon_fp_div_s_q,neon_fp_div_d_q"))
|
||||
+ "tsv110_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_fp_sqrts" 24
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "fsqrts,neon_fp_sqrt_s,fsqrtd,neon_fp_sqrt_d,\
|
||||
+ neon_fp_sqrt_s_q,neon_fp_sqrt_d_q"))
|
||||
+ "tsv110_fsu2")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_crypto_aes" 3
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
|
||||
+ "tsv110_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_crypto_sha1_fast" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor"))
|
||||
+ "(tsv110_fsu1|tsv110_fsu2)")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_crypto_sha256_fast" 2
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "crypto_sha256_fast"))
|
||||
+ "tsv110_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "tsv110_crypto_complex" 5
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow"))
|
||||
+ "tsv110_fsu1")
|
||||
+
|
||||
+;; We lie with calls. They take up all issue slots, but are otherwise
|
||||
+;; not harmful.
|
||||
+(define_insn_reservation "tsv110_call" 1
|
||||
+ (and (eq_attr "tune" "tsv110")
|
||||
+ (eq_attr "type" "call"))
|
||||
+ "tsv110_alu1_issue+tsv110_alu2_issue+tsv110_alu3_issue+tsv110_fsu1_issue+tsv110_fsu2_issue\
|
||||
+ +tsv110_mdu_issue+tsv110_ls1_issue+tsv110_ls2_issue"
|
||||
+)
|
||||
+
|
||||
+;; Simple execution unit bypasses
|
||||
+(define_bypass 1 "tsv110_alu"
|
||||
+ "tsv110_alu,tsv110_alu_shift")
|
||||
+(define_bypass 2 "tsv110_alu_shift"
|
||||
+ "tsv110_alu,tsv110_alu_shift")
|
||||
+
|
||||
+;; An MLA or a MUL can feed a dependent MLA.
|
||||
+(define_bypass 3 "tsv110_neon_*mla*,tsv110_neon_*mul*"
|
||||
+ "tsv110_neon_*mla*")
|
||||
+
|
||||
+;; We don't need to care about control hazards, either the branch is
|
||||
+;; predicted in which case we pay no penalty, or the branch is
|
||||
+;; mispredicted in which case instruction scheduling will be unlikely to
|
||||
+;; help.
|
||||
+(define_bypass 1 "tsv110_*"
|
||||
+ "tsv110_call,tsv110_branch")
|
||||
57
address-calculation-optimization-within-loop.patch
Normal file
57
address-calculation-optimization-within-loop.patch
Normal file
@ -0,0 +1,57 @@
|
||||
diff -Nurp a/gcc/testsuite/gcc.dg/pr94269.c b/gcc/testsuite/gcc.dg/pr94269.c
|
||||
--- a/gcc/testsuite/gcc.dg/pr94269.c 1970-01-01 08:00:00.000000000 +0800
|
||||
+++ b/gcc/testsuite/gcc.dg/pr94269.c 2020-04-17 17:04:50.608000000 +0800
|
||||
@@ -0,0 +1,26 @@
|
||||
+/* { dg-do compile { target aarch64*-*-* } } */
|
||||
+/* { dg-options "-O2 -ftree-loop-vectorize -funsafe-math-optimizations -march=armv8.2-a+sve -msve-vector-bits=256" } */
|
||||
+
|
||||
+float
|
||||
+foo(long n, float *x, int inc_x,
|
||||
+ float *y, int inc_y)
|
||||
+{
|
||||
+ float dot = 0.0;
|
||||
+ int ix = 0, iy = 0;
|
||||
+
|
||||
+ if (n < 0) {
|
||||
+ return dot;
|
||||
+ }
|
||||
+
|
||||
+ int i = 0;
|
||||
+ while (i < n) {
|
||||
+ dot += y[iy] * x[ix];
|
||||
+ ix += inc_x;
|
||||
+ iy += inc_y;
|
||||
+ i++;
|
||||
+ }
|
||||
+
|
||||
+ return dot;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-not "smaddl" { target aarch64*-*-* } } } */
|
||||
diff -Nurp a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
|
||||
--- a/gcc/tree-ssa-math-opts.c 2020-04-17 16:43:59.540000000 +0800
|
||||
+++ b/gcc/tree-ssa-math-opts.c 2020-04-17 16:48:34.072036000 +0800
|
||||
@@ -2721,11 +2721,14 @@ convert_plusminus_to_widen (gimple_stmt_
|
||||
multiply-and-accumulate instructions.
|
||||
|
||||
If the widened-multiplication result has more than one uses, it is
|
||||
- probably wiser not to do the conversion. */
|
||||
+ probably wiser not to do the conversion. Also restrict this operation
|
||||
+ to single basic block to avoid moving the multiply to a different block
|
||||
+ with a higher execution frequency. */
|
||||
if (code == PLUS_EXPR
|
||||
&& (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
|
||||
{
|
||||
if (!has_single_use (rhs1)
|
||||
+ || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
|
||||
|| !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
|
||||
&type2, &mult_rhs2))
|
||||
return false;
|
||||
@@ -2735,6 +2738,7 @@ convert_plusminus_to_widen (gimple_stmt_
|
||||
else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
|
||||
{
|
||||
if (!has_single_use (rhs2)
|
||||
+ || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
|
||||
|| !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
|
||||
&type2, &mult_rhs2))
|
||||
return false;
|
||||
@ -1,60 +0,0 @@
|
||||
diff -urp a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
|
||||
--- a/gcc/config/arm/arm.c 2019-01-18 11:25:20.840179114 +0800
|
||||
+++ b/gcc/config/arm/arm.c 2019-01-18 11:25:47.548179817 +0800
|
||||
@@ -14306,18 +14306,36 @@ gen_movmem_ldrd_strd (rtx *operands)
|
||||
emit_move_insn (reg0, src);
|
||||
else
|
||||
{
|
||||
- emit_insn (gen_unaligned_loadsi (low_reg, src));
|
||||
- src = next_consecutive_mem (src);
|
||||
- emit_insn (gen_unaligned_loadsi (hi_reg, src));
|
||||
+ if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN)
|
||||
+ {
|
||||
+ emit_insn (gen_unaligned_loadsi (hi_reg, src));
|
||||
+ src = next_consecutive_mem (src);
|
||||
+ emit_insn (gen_unaligned_loadsi (low_reg, src));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ emit_insn (gen_unaligned_loadsi (low_reg, src));
|
||||
+ src = next_consecutive_mem (src);
|
||||
+ emit_insn (gen_unaligned_loadsi (hi_reg, src));
|
||||
+ }
|
||||
}
|
||||
|
||||
if (dst_aligned)
|
||||
emit_move_insn (dst, reg0);
|
||||
else
|
||||
{
|
||||
- emit_insn (gen_unaligned_storesi (dst, low_reg));
|
||||
- dst = next_consecutive_mem (dst);
|
||||
- emit_insn (gen_unaligned_storesi (dst, hi_reg));
|
||||
+ if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN)
|
||||
+ {
|
||||
+ emit_insn (gen_unaligned_storesi (dst, hi_reg));
|
||||
+ dst = next_consecutive_mem (dst);
|
||||
+ emit_insn (gen_unaligned_storesi (dst, low_reg));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ emit_insn (gen_unaligned_storesi (dst, low_reg));
|
||||
+ dst = next_consecutive_mem (dst);
|
||||
+ emit_insn (gen_unaligned_storesi (dst, hi_reg));
|
||||
+ }
|
||||
}
|
||||
|
||||
src = next_consecutive_mem (src);
|
||||
diff -urp a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
|
||||
--- a/gcc/config/arm/arm.opt 2019-01-18 11:25:20.840179114 +0800
|
||||
+++ b/gcc/config/arm/arm.opt 2019-01-18 11:28:51.744184666 +0800
|
||||
@@ -274,6 +274,10 @@ masm-syntax-unified
|
||||
Target Report Var(inline_asm_unified) Init(0) Save
|
||||
Assume unified syntax for inline assembly code.
|
||||
|
||||
+mlsrd-be-adjust
|
||||
+Target Report Var(flag_lsrd_be_adjust) Init(1)
|
||||
+Adjust ldrd/strd splitting order when it's big-endian.
|
||||
+
|
||||
mpure-code
|
||||
Target Report Var(target_pure_code) Init(0)
|
||||
Do not allow constant data to be placed in code sections.
|
||||
@ -1,19 +0,0 @@
|
||||
diff -urpN gcc-7.3.0-bak/gcc/config/arm/arm.c gcc-7.3.0/gcc/config/arm/arm.c
|
||||
--- gcc-7.3.0-bak/gcc/config/arm/arm.c 2018-11-13 14:23:21.362347728 +0800
|
||||
+++ gcc-7.3.0/gcc/config/arm/arm.c 2018-11-13 14:31:15.722360215 +0800
|
||||
@@ -26853,7 +26853,14 @@ static bool
|
||||
arm_array_mode_supported_p (machine_mode mode,
|
||||
unsigned HOST_WIDE_INT nelems)
|
||||
{
|
||||
- if (TARGET_NEON
|
||||
+
|
||||
+
|
||||
+ /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
|
||||
+ for now, as the lane-swapping logic needs to be extended in the expanders.
|
||||
+ See PR target/82518. */
|
||||
+
|
||||
+
|
||||
+ if (TARGET_NEON && !BYTES_BIG_ENDIAN
|
||||
&& (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
|
||||
&& (nelems >= 2 && nelems <= 4))
|
||||
return true;
|
||||
@ -1,25 +0,0 @@
|
||||
diff -Nurp a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
|
||||
--- a/gcc/config/arm/arm.md 2019-08-10 00:21:12.658523444 +0800
|
||||
+++ b/gcc/config/arm/arm.md 2019-08-10 00:21:53.478521496 +0800
|
||||
@@ -5337,7 +5337,9 @@
|
||||
#
|
||||
ldrh%?\\t%0, %1"
|
||||
[(set_attr "type" "alu_shift_reg,load_byte")
|
||||
- (set_attr "predicable" "yes")]
|
||||
+ (set_attr "predicable" "yes")
|
||||
+ (set_attr "pool_range" "*,256")
|
||||
+ (set_attr "neg_pool_range" "*,244")]
|
||||
)
|
||||
|
||||
(define_insn "*arm_zero_extendhisi2_v6"
|
||||
@@ -5348,7 +5350,9 @@
|
||||
uxth%?\\t%0, %1
|
||||
ldrh%?\\t%0, %1"
|
||||
[(set_attr "predicable" "yes")
|
||||
- (set_attr "type" "extend,load_byte")]
|
||||
+ (set_attr "type" "extend,load_byte")
|
||||
+ (set_attr "pool_range" "*,256")
|
||||
+ (set_attr "neg_pool_range" "*,244")]
|
||||
)
|
||||
|
||||
(define_insn "*arm_zero_extendhisi2addsi"
|
||||
Binary file not shown.
@ -1,21 +0,0 @@
|
||||
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
--- a/gcc/config/aarch64/aarch64.c 2018-11-16 18:02:11.000000000 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.c 2018-11-16 18:07:39.000000000 +0800
|
||||
@@ -6102,7 +6102,7 @@ aarch64_elf_asm_constructor (rtx symbol,
|
||||
-Wformat-truncation false positive, use a larger size. */
|
||||
char buf[23];
|
||||
snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
|
||||
- s = get_section (buf, SECTION_WRITE, NULL);
|
||||
+ s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
|
||||
switch_to_section (s);
|
||||
assemble_align (POINTER_SIZE);
|
||||
assemble_aligned_integer (POINTER_BYTES, symbol);
|
||||
@@ -6122,7 +6122,7 @@ aarch64_elf_asm_destructor (rtx symbol,
|
||||
-Wformat-truncation false positive, use a larger size. */
|
||||
char buf[23];
|
||||
snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
|
||||
- s = get_section (buf, SECTION_WRITE, NULL);
|
||||
+ s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL);
|
||||
switch_to_section (s);
|
||||
assemble_align (POINTER_SIZE);
|
||||
assemble_aligned_integer (POINTER_BYTES, symbol);
|
||||
52
delete-incorrect-smw.patch
Normal file
52
delete-incorrect-smw.patch
Normal file
@ -0,0 +1,52 @@
|
||||
diff -uprN a/gcc/testsuite/gcc.dg/pr91195.c b/gcc/testsuite/gcc.dg/pr91195.c
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/pr91195.c
|
||||
@@ -0,0 +1,25 @@
|
||||
+/* PR middle-end/91195 */
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-Wmaybe-uninitialized -O2" } */
|
||||
+
|
||||
+int bar (char*);
|
||||
+
|
||||
+void
|
||||
+foo (char *x, char *y)
|
||||
+{
|
||||
+ char *a[2];
|
||||
+ int b = 0;
|
||||
+
|
||||
+ if (x)
|
||||
+ a[b++] = x; /* { dg-bogus "may be used uninitialized in this function" } */
|
||||
+ if (y)
|
||||
+ a[b++] = y;
|
||||
+
|
||||
+ for (int j = 0; j < 4; j++)
|
||||
+ switch (j)
|
||||
+ {
|
||||
+ case 0:
|
||||
+ if (b == 0 || bar (a[0]))
|
||||
+ break;
|
||||
+ }
|
||||
+}
|
||||
diff -uprN a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -2269,6 +2269,10 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
|
||||
name = make_temp_ssa_name (TREE_TYPE (lhs), NULL, "cstore");
|
||||
new_stmt = gimple_build_assign (name, lhs);
|
||||
gimple_set_location (new_stmt, locus);
|
||||
+ lhs = unshare_expr (lhs);
|
||||
+ /* Set TREE_NO_WARNING on the rhs of the load to avoid uninit
|
||||
+ warnings. */
|
||||
+ TREE_NO_WARNING (gimple_assign_rhs1 (new_stmt)) = 1;
|
||||
gsi_insert_on_edge (e1, new_stmt);
|
||||
|
||||
/* 3) Create a PHI node at the join block, with one argument
|
||||
@@ -2279,7 +2283,6 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
|
||||
add_phi_arg (newphi, rhs, e0, locus);
|
||||
add_phi_arg (newphi, name, e1, locus);
|
||||
|
||||
- lhs = unshare_expr (lhs);
|
||||
new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi));
|
||||
|
||||
/* 4) Insert that PHI node. */
|
||||
69
div-opti.patch
Normal file
69
div-opti.patch
Normal file
@ -0,0 +1,69 @@
|
||||
From dbf3dc75888623e9d4bb7cc5e9c30caa9b24ffe7 Mon Sep 17 00:00:00 2001
|
||||
From: Bu Le <bule1@huawei.com>
|
||||
Date: Thu, 12 Mar 2020 22:39:12 +0000
|
||||
Subject: [PATCH] aarch64: Add --params to control the number of recip steps
|
||||
[PR94154]
|
||||
|
||||
-mlow-precision-div hard-coded the number of iterations to 2 for double
|
||||
and 1 for float. This patch adds a --param to control the number.
|
||||
|
||||
2020-03-13 Bu Le <bule1@huawei.com>
|
||||
|
||||
gcc/
|
||||
PR target/94154
|
||||
* config/aarch64/aarch64.opt (-param=aarch64-float-recp-precision=)
|
||||
(-param=aarch64-double-recp-precision=): New options.
|
||||
* doc/invoke.texi: Document them.
|
||||
* config/aarch64/aarch64.c (aarch64_emit_approx_div): Use them
|
||||
instead of hard-coding the choice of 1 for float and 2 for double.
|
||||
---
|
||||
gcc/ChangeLog | 9 +++++++++
|
||||
gcc/config/aarch64/aarch64.c | 8 +++++---
|
||||
gcc/config/aarch64/aarch64.opt | 9 +++++++++
|
||||
gcc/doc/invoke.texi | 11 +++++++++++
|
||||
4 files changed, 34 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
index c320d5ba51d..2c81f86dd2a 100644
|
||||
--- a/gcc/config/aarch64/aarch64.c
|
||||
+++ b/gcc/config/aarch64/aarch64.c
|
||||
@@ -12911,10 +12911,12 @@ aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
|
||||
/* Iterate over the series twice for SF and thrice for DF. */
|
||||
int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2;
|
||||
|
||||
- /* Optionally iterate over the series once less for faster performance,
|
||||
- while sacrificing the accuracy. */
|
||||
+ /* Optionally iterate over the series less for faster performance,
|
||||
+ while sacrificing the accuracy. The default is 2 for DF and 1 for SF. */
|
||||
if (flag_mlow_precision_div)
|
||||
- iterations--;
|
||||
+ iterations = (GET_MODE_INNER (mode) == DFmode
|
||||
+ ? PARAM_VALUE (PARAM_AARCH64_DOUBLE_RECP_PRECISION)
|
||||
+ : PARAM_VALUE (PARAM_AARCH64_FLOAT_RECP_PRECISION));
|
||||
|
||||
/* Iterate over the series to calculate the approximate reciprocal. */
|
||||
rtx xtmp = gen_reg_rtx (mode);
|
||||
|
||||
--- a/gcc/params.def 2020-04-15 17:24:31.984000000 +0800
|
||||
+++ b/gcc/params.def 2020-04-15 16:59:21.752000000 +0800
|
||||
@@ -1420,6 +1414,17 @@ DEFPARAM(PARAM_SSA_NAME_DEF_CHAIN_LIMIT,
|
||||
"a value.",
|
||||
512, 0, 0)
|
||||
|
||||
+DEFPARAM(PARAM_AARCH64_FLOAT_RECP_PRECISION,
|
||||
+ "aarch64-float-recp-precision",
|
||||
+ "The number of Newton iterations for calculating the reciprocal "
|
||||
+ "for float type. ",
|
||||
+ 1, 1, 5)
|
||||
+
|
||||
+DEFPARAM(PARAM_AARCH64_DOUBLE_RECP_PRECISION,
|
||||
+ "aarch64-double-recp-precision",
|
||||
+ "The number of Newton iterations for calculating the reciprocal "
|
||||
+ "for double type.",
|
||||
+ 2, 1, 5)
|
||||
/*
|
||||
|
||||
Local variables:
|
||||
--
|
||||
2.18.2
|
||||
|
||||
19
dont-generate-IF_THEN_ELSE.patch
Normal file
19
dont-generate-IF_THEN_ELSE.patch
Normal file
@ -0,0 +1,19 @@
|
||||
diff --git a/gcc/combine.c b/gcc/combine.c
|
||||
index 4de759a8e6b..ce7aeecb5c2 100644
|
||||
--- a/gcc/combine.c
|
||||
+++ b/gcc/combine.c
|
||||
@@ -5909,14 +5909,6 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, int in_dest,
|
||||
mode, VOIDmode,
|
||||
cond, cop1),
|
||||
mode);
|
||||
- else
|
||||
- return gen_rtx_IF_THEN_ELSE (mode,
|
||||
- simplify_gen_relational (cond_code,
|
||||
- mode,
|
||||
- VOIDmode,
|
||||
- cond,
|
||||
- cop1),
|
||||
- true_rtx, false_rtx);
|
||||
|
||||
code = GET_CODE (x);
|
||||
op0_mode = VOIDmode;
|
||||
460
enable-aarch64-libquadmath.patch
Normal file
460
enable-aarch64-libquadmath.patch
Normal file
@ -0,0 +1,460 @@
|
||||
diff -urpN a/libquadmath/Makefile.in b/libquadmath/Makefile.in
|
||||
--- a/libquadmath/Makefile.in 2020-03-31 09:51:59.000000000 +0800
|
||||
+++ b/libquadmath/Makefile.in 2020-04-06 11:52:45.650793256 +0800
|
||||
@@ -90,7 +90,7 @@ POST_UNINSTALL = :
|
||||
build_triplet = @build@
|
||||
host_triplet = @host@
|
||||
target_triplet = @target@
|
||||
-@BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES =
|
||||
+#libquadmath_la_DEPENDENCIES =
|
||||
subdir = .
|
||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
|
||||
@@ -146,68 +146,68 @@ am__installdirs = "$(DESTDIR)$(toolexecl
|
||||
"$(DESTDIR)$(libsubincludedir)"
|
||||
LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
|
||||
am__dirstamp = $(am__leading_dot)dirstamp
|
||||
-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/acoshq.lo math/fmodq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/acosq.lo math/frexpq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.lo math/asinhq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/hypotq.lo math/remainderq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/asinq.lo math/rintq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/atan2q.lo math/isinfq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/roundq.lo math/atanhq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/isnanq.lo math/scalblnq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/atanq.lo math/j0q.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/scalbnq.lo math/cbrtq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/j1q.lo math/signbitq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/ceilq.lo math/jnq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/sincos_table.lo math/complex.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/ldexpq.lo math/sincosq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/copysignq.lo math/lgammaq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/sincosq_kernel.lo math/coshq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/llroundq.lo math/sinhq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/cosq.lo math/log10q.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/sinq.lo math/cosq_kernel.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/log1pq.lo math/sinq_kernel.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/erfq.lo math/logq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/sqrtq.lo math/expm1q.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/lroundq.lo math/tanhq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/expq.lo math/modfq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/tanq.lo math/fabsq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/nanq.lo math/tgammaq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/finiteq.lo math/nextafterq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/truncq.lo math/floorq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/powq.lo math/fmaq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/logbq.lo math/exp2q.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/issignalingq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_neg.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_product.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/tgammaq_product.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/casinhq_kernel.lo math/cacoshq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/cacosq.lo math/casinhq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/casinq.lo math/catanhq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/catanq.lo math/cimagq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/conjq.lo math/cprojq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/crealq.lo math/fdimq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/fmaxq.lo math/fminq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/ilogbq.lo math/llrintq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/log2q.lo math/lrintq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/nearbyintq.lo math/remquoq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.lo math/cexpq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/clog10q.lo math/clogq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/csinq.lo math/csinhq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/csqrtq.lo math/ctanq.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/ctanhq.lo printf/addmul_1.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/add_n.lo printf/cmp.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/divrem.lo printf/flt1282mpn.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/fpioconst.lo printf/lshift.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/mul_1.lo printf/mul_n.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/mul.lo printf/printf_fphex.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/printf_fp.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/rshift.lo printf/submul_1.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/sub_n.lo strtod/strtoflt128.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ strtod/mpn2flt128.lo \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ strtod/tens_in_limb.lo
|
||||
+am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \
|
||||
+ math/acoshq.lo math/fmodq.lo \
|
||||
+ math/acosq.lo math/frexpq.lo \
|
||||
+ math/rem_pio2q.lo math/asinhq.lo \
|
||||
+ math/hypotq.lo math/remainderq.lo \
|
||||
+ math/asinq.lo math/rintq.lo \
|
||||
+ math/atan2q.lo math/isinfq.lo \
|
||||
+ math/roundq.lo math/atanhq.lo \
|
||||
+ math/isnanq.lo math/scalblnq.lo \
|
||||
+ math/atanq.lo math/j0q.lo \
|
||||
+ math/scalbnq.lo math/cbrtq.lo \
|
||||
+ math/j1q.lo math/signbitq.lo \
|
||||
+ math/ceilq.lo math/jnq.lo \
|
||||
+ math/sincos_table.lo math/complex.lo \
|
||||
+ math/ldexpq.lo math/sincosq.lo \
|
||||
+ math/copysignq.lo math/lgammaq.lo \
|
||||
+ math/sincosq_kernel.lo math/coshq.lo \
|
||||
+ math/llroundq.lo math/sinhq.lo \
|
||||
+ math/cosq.lo math/log10q.lo \
|
||||
+ math/sinq.lo math/cosq_kernel.lo \
|
||||
+ math/log1pq.lo math/sinq_kernel.lo \
|
||||
+ math/erfq.lo math/logq.lo \
|
||||
+ math/sqrtq.lo math/expm1q.lo \
|
||||
+ math/lroundq.lo math/tanhq.lo \
|
||||
+ math/expq.lo math/modfq.lo \
|
||||
+ math/tanq.lo math/fabsq.lo \
|
||||
+ math/nanq.lo math/tgammaq.lo \
|
||||
+ math/finiteq.lo math/nextafterq.lo \
|
||||
+ math/truncq.lo math/floorq.lo \
|
||||
+ math/powq.lo math/fmaq.lo \
|
||||
+ math/logbq.lo math/exp2q.lo \
|
||||
+ math/issignalingq.lo \
|
||||
+ math/lgammaq_neg.lo \
|
||||
+ math/lgammaq_product.lo \
|
||||
+ math/tanq_kernel.lo \
|
||||
+ math/tgammaq_product.lo \
|
||||
+ math/casinhq_kernel.lo math/cacoshq.lo \
|
||||
+ math/cacosq.lo math/casinhq.lo \
|
||||
+ math/casinq.lo math/catanhq.lo \
|
||||
+ math/catanq.lo math/cimagq.lo \
|
||||
+ math/conjq.lo math/cprojq.lo \
|
||||
+ math/crealq.lo math/fdimq.lo \
|
||||
+ math/fmaxq.lo math/fminq.lo \
|
||||
+ math/ilogbq.lo math/llrintq.lo \
|
||||
+ math/log2q.lo math/lrintq.lo \
|
||||
+ math/nearbyintq.lo math/remquoq.lo \
|
||||
+ math/ccoshq.lo math/cexpq.lo \
|
||||
+ math/clog10q.lo math/clogq.lo \
|
||||
+ math/csinq.lo math/csinhq.lo \
|
||||
+ math/csqrtq.lo math/ctanq.lo \
|
||||
+ math/ctanhq.lo printf/addmul_1.lo \
|
||||
+ printf/add_n.lo printf/cmp.lo \
|
||||
+ printf/divrem.lo printf/flt1282mpn.lo \
|
||||
+ printf/fpioconst.lo printf/lshift.lo \
|
||||
+ printf/mul_1.lo printf/mul_n.lo \
|
||||
+ printf/mul.lo printf/printf_fphex.lo \
|
||||
+ printf/printf_fp.lo \
|
||||
+ printf/quadmath-printf.lo \
|
||||
+ printf/rshift.lo printf/submul_1.lo \
|
||||
+ printf/sub_n.lo strtod/strtoflt128.lo \
|
||||
+ strtod/mpn2flt128.lo \
|
||||
+ strtod/tens_in_limb.lo
|
||||
libquadmath_la_OBJECTS = $(am_libquadmath_la_OBJECTS)
|
||||
AM_V_lt = $(am__v_lt_@AM_V@)
|
||||
am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
|
||||
@@ -217,8 +217,8 @@ libquadmath_la_LINK = $(LIBTOOL) $(AM_V_
|
||||
$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
|
||||
$(AM_CFLAGS) $(CFLAGS) $(libquadmath_la_LDFLAGS) $(LDFLAGS) -o \
|
||||
$@
|
||||
-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_rpath = -rpath \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ $(toolexeclibdir)
|
||||
+am_libquadmath_la_rpath = -rpath \
|
||||
+ $(toolexeclibdir)
|
||||
AM_V_P = $(am__v_P_@AM_V@)
|
||||
am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
|
||||
am__v_P_0 = false
|
||||
@@ -336,7 +336,7 @@ CFLAGS = @CFLAGS@
|
||||
CPP = @CPP@
|
||||
CPPFLAGS = @CPPFLAGS@
|
||||
CYGPATH_W = @CYGPATH_W@
|
||||
-DEFS = @DEFS@
|
||||
+DEFS = @DEFS@ -D__float128="long double"
|
||||
DEPDIR = @DEPDIR@
|
||||
DSYMUTIL = @DSYMUTIL@
|
||||
DUMPBIN = @DUMPBIN@
|
||||
@@ -408,7 +408,7 @@ datadir = @datadir@
|
||||
datarootdir = @datarootdir@
|
||||
docdir = @docdir@
|
||||
dvidir = @dvidir@
|
||||
-enable_shared = @enable_shared@
|
||||
+enable_shared = yes
|
||||
enable_static = @enable_static@
|
||||
exec_prefix = @exec_prefix@
|
||||
get_gcc_base_ver = @get_gcc_base_ver@
|
||||
@@ -450,109 +450,109 @@ top_build_prefix = @top_build_prefix@
|
||||
top_builddir = @top_builddir@
|
||||
top_srcdir = @top_srcdir@
|
||||
AUTOMAKE_OPTIONS = foreign info-in-builddir
|
||||
-@BUILD_LIBQUADMATH_TRUE@ACLOCAL_AMFLAGS = -I .. -I ../config
|
||||
-@BUILD_LIBQUADMATH_TRUE@AM_CPPFLAGS = -I $(top_srcdir)/../include
|
||||
-@BUILD_LIBQUADMATH_TRUE@AM_CFLAGS = $(XCFLAGS)
|
||||
-@BUILD_LIBQUADMATH_TRUE@gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_arg =
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_dep =
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun
|
||||
-@BUILD_LIBQUADMATH_TRUE@toolexeclib_LTLIBRARIES = libquadmath.la
|
||||
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LIBADD =
|
||||
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ $(version_arg) $(lt_host_flags) -lm
|
||||
-
|
||||
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD)
|
||||
-@BUILD_LIBQUADMATH_TRUE@nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h
|
||||
-@BUILD_LIBQUADMATH_TRUE@libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
|
||||
-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_SOURCES = \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/x2y2m1q.c math/acoshq.c math/fmodq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/acosq.c math/frexpq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
|
||||
+ACLOCAL_AMFLAGS = -I .. -I ../config
|
||||
+AM_CPPFLAGS = -I $(top_srcdir)/../include
|
||||
+AM_CFLAGS = $(XCFLAGS)
|
||||
+gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
|
||||
+@LIBQUAD_USE_SYMVER_FALSE@version_arg =
|
||||
+@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map
|
||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun
|
||||
+@LIBQUAD_USE_SYMVER_FALSE@version_dep =
|
||||
+@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map
|
||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun
|
||||
+toolexeclib_LTLIBRARIES = libquadmath.la
|
||||
+libquadmath_la_LIBADD =
|
||||
+libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
|
||||
+ $(version_arg) $(lt_host_flags) -lm
|
||||
+
|
||||
+libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD)
|
||||
+nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h
|
||||
+libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
|
||||
+libquadmath_la_SOURCES = \
|
||||
+ math/x2y2m1q.c math/acoshq.c math/fmodq.c \
|
||||
+ math/acosq.c math/frexpq.c \
|
||||
+ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \
|
||||
+ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \
|
||||
+ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \
|
||||
+ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \
|
||||
+ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \
|
||||
+ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \
|
||||
+ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \
|
||||
+ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \
|
||||
+ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \
|
||||
+ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \
|
||||
+ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \
|
||||
+ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \
|
||||
+ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \
|
||||
+ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \
|
||||
+ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \
|
||||
+ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \
|
||||
+ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \
|
||||
+ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \
|
||||
+ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \
|
||||
+ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \
|
||||
+ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \
|
||||
+ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \
|
||||
+ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \
|
||||
+ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
|
||||
+ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
|
||||
|
||||
|
||||
# Work around what appears to be a GNU make bug handling MAKEFLAGS
|
||||
# values defined in terms of make variables, as is the case for CC and
|
||||
# friends when we are called from the top level Makefile.
|
||||
-@BUILD_LIBQUADMATH_TRUE@AM_MAKEFLAGS = \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "AR_FLAGS=$(AR_FLAGS)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS=$(CFLAGS)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "CXXFLAGS=$(CXXFLAGS)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL=$(INSTALL)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_DATA=$(INSTALL_DATA)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "JC1FLAGS=$(JC1FLAGS)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "LDFLAGS=$(LDFLAGS)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "MAKE=$(MAKE)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "SHELL=$(SHELL)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "exec_prefix=$(exec_prefix)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "infodir=$(infodir)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "libdir=$(libdir)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "prefix=$(prefix)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "includedir=$(includedir)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "AR=$(AR)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "AS=$(AS)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "CC=$(CC)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "CXX=$(CXX)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "LD=$(LD)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "NM=$(NM)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "RANLIB=$(RANLIB)" \
|
||||
-@BUILD_LIBQUADMATH_TRUE@ "DESTDIR=$(DESTDIR)"
|
||||
+AM_MAKEFLAGS = \
|
||||
+ "AR_FLAGS=$(AR_FLAGS)" \
|
||||
+ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \
|
||||
+ "CFLAGS=$(CFLAGS)" \
|
||||
+ "CXXFLAGS=$(CXXFLAGS)" \
|
||||
+ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
|
||||
+ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
|
||||
+ "INSTALL=$(INSTALL)" \
|
||||
+ "INSTALL_DATA=$(INSTALL_DATA)" \
|
||||
+ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
|
||||
+ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
|
||||
+ "JC1FLAGS=$(JC1FLAGS)" \
|
||||
+ "LDFLAGS=$(LDFLAGS)" \
|
||||
+ "LIBCFLAGS=$(LIBCFLAGS)" \
|
||||
+ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
|
||||
+ "MAKE=$(MAKE)" \
|
||||
+ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
|
||||
+ "PICFLAG=$(PICFLAG)" \
|
||||
+ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
|
||||
+ "SHELL=$(SHELL)" \
|
||||
+ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \
|
||||
+ "exec_prefix=$(exec_prefix)" \
|
||||
+ "infodir=$(infodir)" \
|
||||
+ "libdir=$(libdir)" \
|
||||
+ "prefix=$(prefix)" \
|
||||
+ "includedir=$(includedir)" \
|
||||
+ "AR=$(AR)" \
|
||||
+ "AS=$(AS)" \
|
||||
+ "CC=$(CC)" \
|
||||
+ "CXX=$(CXX)" \
|
||||
+ "LD=$(LD)" \
|
||||
+ "LIBCFLAGS=$(LIBCFLAGS)" \
|
||||
+ "NM=$(NM)" \
|
||||
+ "PICFLAG=$(PICFLAG)" \
|
||||
+ "RANLIB=$(RANLIB)" \
|
||||
+ "DESTDIR=$(DESTDIR)"
|
||||
|
||||
|
||||
# Subdir rules rely on $(FLAGS_TO_PASS)
|
||||
-@BUILD_LIBQUADMATH_TRUE@FLAGS_TO_PASS = $(AM_MAKEFLAGS)
|
||||
-@BUILD_LIBQUADMATH_TRUE@MAKEOVERRIDES =
|
||||
-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_FALSE@STAMP_GENINSRC =
|
||||
+FLAGS_TO_PASS = $(AM_MAKEFLAGS)
|
||||
+MAKEOVERRIDES =
|
||||
+@GENINSRC_FALSE@STAMP_GENINSRC =
|
||||
|
||||
# AM_CONDITIONAL on configure option --generated-files-in-srcdir
|
||||
-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc
|
||||
-@BUILD_LIBQUADMATH_TRUE@ALL_LOCAL_DEPS = $(STAMP_GENINSRC)
|
||||
-@BUILD_INFO_FALSE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO =
|
||||
+@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc
|
||||
+ALL_LOCAL_DEPS = $(STAMP_GENINSRC)
|
||||
+@BUILD_INFO_FALSE@STAMP_BUILD_INFO =
|
||||
|
||||
# AM_CONDITIONAL on configure check ACX_CHECK_PROG_VER([MAKEINFO])
|
||||
-@BUILD_INFO_TRUE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = stamp-build-info
|
||||
-@BUILD_LIBQUADMATH_TRUE@CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO)
|
||||
-@BUILD_LIBQUADMATH_TRUE@MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info
|
||||
+@BUILD_INFO_TRUE@STAMP_BUILD_INFO = stamp-build-info
|
||||
+CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO)
|
||||
+MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info
|
||||
|
||||
# Automake Documentation:
|
||||
# If your package has Texinfo files in many directories, you can use the
|
||||
@@ -563,8 +563,8 @@ TEXINFO_TEX = ../gcc/doc/include/texinfo
|
||||
|
||||
# Defines info, dvi, pdf and html targets
|
||||
MAKEINFOFLAGS = -I $(srcdir)/../gcc/doc/include
|
||||
-@BUILD_LIBQUADMATH_FALSE@info_TEXINFOS =
|
||||
-@BUILD_LIBQUADMATH_TRUE@info_TEXINFOS = libquadmath.texi
|
||||
+info_TEXINFOS =
|
||||
+info_TEXINFOS = libquadmath.texi
|
||||
libquadmath_TEXINFOS = libquadmath-vers.texi
|
||||
MULTISRCTOP =
|
||||
MULTIBUILDTOP =
|
||||
@@ -1186,6 +1186,7 @@ distclean-tags:
|
||||
-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
|
||||
check-am: all-am
|
||||
check: check-am
|
||||
+#all-local
|
||||
all-am: Makefile $(INFO_DEPS) $(LTLIBRARIES) $(HEADERS) config.h \
|
||||
all-local
|
||||
installdirs:
|
||||
@@ -1424,22 +1425,22 @@ uninstall-am: uninstall-dvi-am uninstall
|
||||
|
||||
.PRECIOUS: Makefile
|
||||
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD)
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \
|
||||
-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1)
|
||||
-
|
||||
-@BUILD_LIBQUADMATH_TRUE@stamp-geninsrc: libquadmath.info
|
||||
-@BUILD_LIBQUADMATH_TRUE@ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info
|
||||
-@BUILD_LIBQUADMATH_TRUE@ @touch $@
|
||||
-
|
||||
-@BUILD_LIBQUADMATH_TRUE@stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS)
|
||||
-@BUILD_LIBQUADMATH_TRUE@ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi
|
||||
-@BUILD_LIBQUADMATH_TRUE@ @touch $@
|
||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \
|
||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \
|
||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD)
|
||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \
|
||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \
|
||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \
|
||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \
|
||||
+@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1)
|
||||
+
|
||||
+stamp-geninsrc: libquadmath.info
|
||||
+ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info
|
||||
+ @touch $@
|
||||
+
|
||||
+stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS)
|
||||
+ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi
|
||||
+ @touch $@
|
||||
|
||||
all-local: $(ALL_LOCAL_DEPS)
|
||||
|
||||
diff -Nurp a/libquadmath/quadmath.h b/libquadmath/quadmath.h
|
||||
--- a/libquadmath/quadmath.h 2020-03-31 09:51:59.000000000 +0800
|
||||
+++ b/libquadmath/quadmath.h 2020-04-06 11:52:45.650793256 +0800
|
||||
@@ -27,6 +27,9 @@ Boston, MA 02110-1301, USA. */
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
+#ifdef AARCH64_QUADMATH
|
||||
+typedef long double __float128;
|
||||
+#endif
|
||||
/* Define the complex type corresponding to __float128
|
||||
("_Complex __float128" is not allowed) */
|
||||
#if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
|
||||
diff -Nurp a/libquadmath/quadmath.h b/libquadmath/quadmath.h
|
||||
--- a/libquadmath/quadmath.h 2015-08-09 16:46:52.541904000 +0800
|
||||
+++ b/libquadmath/quadmath.h 2019-08-17 18:25:51.923399149 +0800
|
||||
@@ -154,10 +154,9 @@ extern int quadmath_snprintf (char *str,
|
||||
#define FLT128_MAX_10_EXP 4932
|
||||
|
||||
|
||||
-#define HUGE_VALQ __builtin_huge_valq()
|
||||
/* The following alternative is valid, but brings the warning:
|
||||
(floating constant exceeds range of ‘__float128’) */
|
||||
-/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */
|
||||
+ #define HUGE_VALQ (__extension__ 0x1.0p32767Q)
|
||||
|
||||
#define M_Eq 2.718281828459045235360287471352662498Q /* e */
|
||||
#define M_LOG2Eq 1.442695040888963407359924681001892137Q /* log_2 e */
|
||||
|
||||
32
fix-ICE-during-pass-ccp.patch
Normal file
32
fix-ICE-during-pass-ccp.patch
Normal file
@ -0,0 +1,32 @@
|
||||
diff -uprN a/gcc/testsuite/gcc.dg/pr94574.c b/gcc/testsuite/gcc.dg/pr94574.c
|
||||
--- a/gcc/testsuite/gcc.dg/pr94574.c 1970-01-01 00:00:00.000000000 +0000
|
||||
+++ b/gcc/testsuite/gcc.dg/pr94574.c 2020-04-15 21:08:48.972000000 +0000
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2" } */
|
||||
+
|
||||
+typedef unsigned int v4si __attribute__((vector_size(16)));
|
||||
+typedef unsigned int v2si __attribute__((vector_size(8)));
|
||||
+
|
||||
+/* The aliasing is somewhat dubious here, but it must compile. */
|
||||
+
|
||||
+v2si
|
||||
+foo (v4si v)
|
||||
+{
|
||||
+ v2si res;
|
||||
+ *(v4si *) &res = v;
|
||||
+ return res;
|
||||
+}
|
||||
diff -uprN a/gcc/tree-ssa.c b/gcc/tree-ssa.c
|
||||
--- a/gcc/tree-ssa.c 2020-03-31 01:51:30.000000000 +0000
|
||||
+++ b/gcc/tree-ssa.c 2020-04-15 21:26:09.828000000 +0000
|
||||
@@ -1528,7 +1528,9 @@ non_rewritable_lvalue_p (tree lhs)
|
||||
&& known_gt (wi::to_poly_offset (TYPE_SIZE_UNIT (TREE_TYPE (decl))),
|
||||
mem_ref_offset (lhs))
|
||||
&& multiple_of_p (sizetype, TREE_OPERAND (lhs, 1),
|
||||
- TYPE_SIZE_UNIT (TREE_TYPE (lhs))))
|
||||
+ TYPE_SIZE_UNIT (TREE_TYPE (lhs)))
|
||||
+ && known_ge (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (decl))),
|
||||
+ wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (lhs)))))
|
||||
return false;
|
||||
}
|
||||
65
fix-ICE-in-vectorizable-load.patch
Normal file
65
fix-ICE-in-vectorizable-load.patch
Normal file
@ -0,0 +1,65 @@
|
||||
diff -Nurp a/gcc/testsuite/gcc.target/aarch64/pr94398.c b/gcc/testsuite/gcc.target/aarch64/pr94398.c
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/pr94398.c 1970-01-01 08:00:00.000000000 +0800
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/pr94398.c 2020-04-17 17:15:58.176000000 +0800
|
||||
@@ -0,0 +1,24 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -ftree-loop-vectorize -funsafe-math-optimizations -march=armv8.2-a+sve -mstrict-align" } */
|
||||
+
|
||||
+float
|
||||
+foo(long n, float *x, int inc_x,
|
||||
+ float *y, int inc_y)
|
||||
+{
|
||||
+ float dot = 0.0;
|
||||
+ int ix = 0, iy = 0;
|
||||
+
|
||||
+ if (n < 0) {
|
||||
+ return dot;
|
||||
+ }
|
||||
+
|
||||
+ int i = 0;
|
||||
+ while (i < n) {
|
||||
+ dot += y[iy] * x[ix];
|
||||
+ ix += inc_x;
|
||||
+ iy += inc_y;
|
||||
+ i++;
|
||||
+ }
|
||||
+
|
||||
+ return dot;
|
||||
+}
|
||||
diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
|
||||
--- a/gcc/tree-vect-stmts.c 2020-04-17 17:10:14.796000000 +0800
|
||||
+++ b/gcc/tree-vect-stmts.c 2020-04-17 17:15:08.611850850 +0800
|
||||
@@ -7025,8 +7025,14 @@ vectorizable_store (stmt_vec_info stmt_i
|
||||
auto_vec<tree> dr_chain (group_size);
|
||||
oprnds.create (group_size);
|
||||
|
||||
- alignment_support_scheme
|
||||
- = vect_supportable_dr_alignment (first_dr_info, false);
|
||||
+ /* Gather-scatter accesses perform only component accesses, alignment
|
||||
+ is irrelevant for them. */
|
||||
+ if (memory_access_type == VMAT_GATHER_SCATTER)
|
||||
+ alignment_support_scheme = dr_unaligned_supported;
|
||||
+ else
|
||||
+ alignment_support_scheme
|
||||
+ = vect_supportable_dr_alignment (first_dr_info, false);
|
||||
+
|
||||
gcc_assert (alignment_support_scheme);
|
||||
vec_loop_masks *loop_masks
|
||||
= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
|
||||
@@ -8162,8 +8168,14 @@ vectorizable_load (stmt_vec_info stmt_in
|
||||
ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
|
||||
}
|
||||
|
||||
- alignment_support_scheme
|
||||
- = vect_supportable_dr_alignment (first_dr_info, false);
|
||||
+ /* Gather-scatter accesses perform only component accesses, alignment
|
||||
+ is irrelevant for them. */
|
||||
+ if (memory_access_type == VMAT_GATHER_SCATTER)
|
||||
+ alignment_support_scheme = dr_unaligned_supported;
|
||||
+ else
|
||||
+ alignment_support_scheme
|
||||
+ = vect_supportable_dr_alignment (first_dr_info, false);
|
||||
+
|
||||
gcc_assert (alignment_support_scheme);
|
||||
vec_loop_masks *loop_masks
|
||||
= (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
|
||||
81
fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch
Normal file
81
fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch
Normal file
@ -0,0 +1,81 @@
|
||||
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
index b0cbb6e2d55..58d38f74bde 100644
|
||||
--- a/gcc/config/aarch64/aarch64.c
|
||||
+++ b/gcc/config/aarch64/aarch64.c
|
||||
@@ -2739,8 +2739,21 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
|
||||
}
|
||||
|
||||
case SYMBOL_TINY_GOT:
|
||||
- emit_insn (gen_ldr_got_tiny (dest, imm));
|
||||
- return;
|
||||
+ {
|
||||
+ rtx insn;
|
||||
+ machine_mode mode = GET_MODE (dest);
|
||||
+
|
||||
+ if (mode == ptr_mode)
|
||||
+ insn = gen_ldr_got_tiny (mode, dest, imm);
|
||||
+ else
|
||||
+ {
|
||||
+ gcc_assert (mode == Pmode);
|
||||
+ insn = gen_ldr_got_tiny_sidi (dest, imm);
|
||||
+ }
|
||||
+
|
||||
+ emit_insn (insn);
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
case SYMBOL_TINY_TLSIE:
|
||||
{
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index 7ad4e918578..c7c4d1dd519 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -6766,13 +6766,23 @@
|
||||
[(set_attr "type" "load_4")]
|
||||
)
|
||||
|
||||
-(define_insn "ldr_got_tiny"
|
||||
- [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
- (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")]
|
||||
- UNSPEC_GOTTINYPIC))]
|
||||
+(define_insn "@ldr_got_tiny_<mode>"
|
||||
+ [(set (match_operand:PTR 0 "register_operand" "=r")
|
||||
+ (unspec:PTR [(match_operand:PTR 1 "aarch64_valid_symref" "S")]
|
||||
+ UNSPEC_GOTTINYPIC))]
|
||||
""
|
||||
- "ldr\\t%0, %L1"
|
||||
- [(set_attr "type" "load_8")]
|
||||
+ "ldr\t%<w>0, %L1"
|
||||
+ [(set_attr "type" "load_<ldst_sz>")]
|
||||
+)
|
||||
+
|
||||
+(define_insn "ldr_got_tiny_sidi"
|
||||
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
+ (zero_extend:DI
|
||||
+ (unspec:SI [(match_operand:DI 1 "aarch64_valid_symref" "S")]
|
||||
+ UNSPEC_GOTTINYPIC)))]
|
||||
+ "TARGET_ILP32"
|
||||
+ "ldr\t%w0, %L1"
|
||||
+ [(set_attr "type" "load_4")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_load_tp_hard"
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/pr94201.c b/gcc/testsuite/gcc.target/aarch64/pr94201.c
|
||||
new file mode 100644
|
||||
index 00000000000..69176169186
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/pr94201.c
|
||||
@@ -0,0 +1,13 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mcmodel=tiny -mabi=ilp32 -fPIC" } */
|
||||
+
|
||||
+extern int bar (void *);
|
||||
+extern long long a;
|
||||
+
|
||||
+int
|
||||
+foo (void)
|
||||
+{
|
||||
+ a = 1;
|
||||
+ return bar ((void *)bar);
|
||||
+}
|
||||
+
|
||||
13
fix-cost-of-plus.patch
Normal file
13
fix-cost-of-plus.patch
Normal file
@ -0,0 +1,13 @@
|
||||
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
index 56a4a47db73..71d44de1d0a 100644
|
||||
--- a/gcc/config/aarch64/aarch64.c
|
||||
+++ b/gcc/config/aarch64/aarch64.c
|
||||
@@ -10753,7 +10753,7 @@ cost_plus:
|
||||
}
|
||||
|
||||
if (GET_MODE_CLASS (mode) == MODE_INT
|
||||
- && ((CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
|
||||
+ && (aarch64_plus_immediate (op1, mode)
|
||||
|| aarch64_sve_addvl_addpl_immediate (op1, mode)))
|
||||
{
|
||||
*cost += rtx_cost (op0, mode, PLUS, 0, speed);
|
||||
@ -1,155 +0,0 @@
|
||||
diff -N -urp a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
|
||||
--- a/gcc/config/i386/sse.md 2019-10-30 10:02:45.894920908 +0800
|
||||
+++ b/gcc/config/i386/sse.md 2019-10-30 10:17:39.682887612 +0800
|
||||
@@ -16012,9 +16012,11 @@
|
||||
switch (INTVAL (operands[4]))
|
||||
{
|
||||
case 3:
|
||||
- return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
|
||||
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
+ gas changed what it requires incompatibly. */
|
||||
+ return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||
case 2:
|
||||
- return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
|
||||
+ return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
@@ -16057,9 +16059,11 @@
|
||||
switch (INTVAL (operands[4]))
|
||||
{
|
||||
case 3:
|
||||
- return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
|
||||
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
+ gas changed what it requires incompatibly. */
|
||||
+ return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||
case 2:
|
||||
- return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
|
||||
+ return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
@@ -16103,10 +16107,12 @@
|
||||
{
|
||||
case 3:
|
||||
case 7:
|
||||
- return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
|
||||
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
+ gas changed what it requires incompatibly. */
|
||||
+ return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||
case 2:
|
||||
case 6:
|
||||
- return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
|
||||
+ return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
@@ -16150,10 +16156,12 @@
|
||||
{
|
||||
case 3:
|
||||
case 7:
|
||||
- return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
|
||||
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
+ gas changed what it requires incompatibly. */
|
||||
+ return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||
case 2:
|
||||
case 6:
|
||||
- return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
|
||||
+ return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
@@ -19153,12 +19161,6 @@
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
-;; Memory operand override for -masm=intel of the v*gatherq* patterns.
|
||||
-(define_mode_attr gatherq_mode
|
||||
- [(V4SI "q") (V2DI "x") (V4SF "q") (V2DF "x")
|
||||
- (V8SI "x") (V4DI "t") (V8SF "x") (V4DF "t")
|
||||
- (V16SI "t") (V8DI "g") (V16SF "t") (V8DF "g")])
|
||||
-
|
||||
(define_expand "<avx512>_gathersi<mode>"
|
||||
[(parallel [(set (match_operand:VI48F 0 "register_operand")
|
||||
(unspec:VI48F
|
||||
@@ -19192,7 +19194,9 @@
|
||||
UNSPEC_GATHER))
|
||||
(clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
|
||||
"TARGET_AVX512F"
|
||||
- "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %<xtg_mode>6}"
|
||||
+;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
+;; gas changed what it requires incompatibly.
|
||||
+ "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
@@ -19211,7 +19215,9 @@
|
||||
UNSPEC_GATHER))
|
||||
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
|
||||
"TARGET_AVX512F"
|
||||
- "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<xtg_mode>5}"
|
||||
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
+;; gas changed what it requires incompatibly.
|
||||
+ "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
@@ -19250,9 +19256,9 @@
|
||||
UNSPEC_GATHER))
|
||||
(clobber (match_scratch:QI 2 "=&Yk"))]
|
||||
"TARGET_AVX512F"
|
||||
-{
|
||||
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %<gatherq_mode>6}";
|
||||
-}
|
||||
+;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
+;; gas changed what it requires incompatibly.
|
||||
+ "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
@@ -19272,14 +19278,16 @@
|
||||
(clobber (match_scratch:QI 1 "=&Yk"))]
|
||||
"TARGET_AVX512F"
|
||||
{
|
||||
+ /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
+ gas changed what it requires incompatibly. */
|
||||
if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
|
||||
{
|
||||
if (<MODE_SIZE> != 64)
|
||||
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %<gatherq_mode>5}";
|
||||
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
|
||||
else
|
||||
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %t5}";
|
||||
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
|
||||
}
|
||||
- return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %<gatherq_mode>5}";
|
||||
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
|
||||
}
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix" "evex")
|
||||
@@ -19316,7 +19324,9 @@
|
||||
UNSPEC_SCATTER))
|
||||
(clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
|
||||
"TARGET_AVX512F"
|
||||
- "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
|
||||
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
+;; gas changed what it requires incompatibly.
|
||||
+ "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
@@ -19352,11 +19362,9 @@
|
||||
UNSPEC_SCATTER))
|
||||
(clobber (match_scratch:QI 1 "=&Yk"))]
|
||||
"TARGET_AVX512F"
|
||||
-{
|
||||
- if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 8)
|
||||
- return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}";
|
||||
- return "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%t5%{%1%}, %3}";
|
||||
-}
|
||||
+;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
|
||||
+;; gas changed what it requires incompatibly.
|
||||
+ "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix" "evex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
12
fix-regno-out-of-range.patch
Normal file
12
fix-regno-out-of-range.patch
Normal file
@ -0,0 +1,12 @@
|
||||
diff -Nurp a/gcc/lra-assigns.c b/gcc/lra-assigns.c
|
||||
--- a/gcc/lra-assigns.c 2020-04-17 16:27:46.192000000 +0800
|
||||
+++ b/gcc/lra-assigns.c 2020-04-17 16:29:37.125688580 +0800
|
||||
@@ -968,6 +968,8 @@ spill_for (int regno, bitmap spilled_pse
|
||||
bitmap_clear (&spill_pseudos_bitmap);
|
||||
for (j = hard_regno_nregs (hard_regno, mode) - 1; j >= 0; j--)
|
||||
{
|
||||
+ if (hard_regno + j >= FIRST_PSEUDO_REGISTER)
|
||||
+ break;
|
||||
if (try_hard_reg_pseudos_check[hard_regno + j] != curr_pseudo_check)
|
||||
continue;
|
||||
lra_assert (!bitmap_empty_p (&try_hard_reg_pseudos[hard_regno + j]));
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,905 +0,0 @@
|
||||
diff -N -urp a/gcc/Makefile.in b/gcc/Makefile.in
|
||||
--- a/gcc/Makefile.in 2018-11-07 11:37:24.615223860 +0800
|
||||
+++ b/gcc/Makefile.in 2018-11-07 11:38:26.155223860 +0800
|
||||
@@ -1292,6 +1292,7 @@ OBJS = \
|
||||
gimple-iterator.o \
|
||||
gimple-fold.o \
|
||||
gimple-laddress.o \
|
||||
+ gimple-loop-jam.o \
|
||||
gimple-low.o \
|
||||
gimple-pretty-print.o \
|
||||
gimple-ssa-backprop.o \
|
||||
diff -N -urp a/gcc/cfgloop.c b/gcc/cfgloop.c
|
||||
--- a/gcc/cfgloop.c 2018-11-07 11:37:24.947223860 +0800
|
||||
+++ b/gcc/cfgloop.c 2018-11-07 11:38:26.155223860 +0800
|
||||
@@ -296,13 +296,25 @@ establish_preds (struct loop *loop, stru
|
||||
|
||||
/* Add LOOP to the loop hierarchy tree where FATHER is father of the
|
||||
added loop. If LOOP has some children, take care of that their
|
||||
- pred field will be initialized correctly. */
|
||||
+ pred field will be initialized correctly. If AFTER is non-null
|
||||
+ then it's expected it's a pointer into FATHERs inner sibling
|
||||
+ list and LOOP is added behind AFTER, otherwise it's added in front
|
||||
+ of FATHERs siblings. */
|
||||
|
||||
void
|
||||
-flow_loop_tree_node_add (struct loop *father, struct loop *loop)
|
||||
+flow_loop_tree_node_add (struct loop *father, struct loop *loop,
|
||||
+ struct loop *after)
|
||||
{
|
||||
- loop->next = father->inner;
|
||||
- father->inner = loop;
|
||||
+ if (after)
|
||||
+ {
|
||||
+ loop->next = after->next;
|
||||
+ after->next = loop;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ loop->next = father->inner;
|
||||
+ father->inner = loop;
|
||||
+ }
|
||||
|
||||
establish_preds (loop, father);
|
||||
}
|
||||
diff -N -urp a/gcc/cfgloop.h b/gcc/cfgloop.h
|
||||
--- a/gcc/cfgloop.h 2018-11-07 11:37:24.331223860 +0800
|
||||
+++ b/gcc/cfgloop.h 2018-11-07 11:38:26.155223860 +0800
|
||||
@@ -324,7 +324,8 @@ void record_loop_exits (void);
|
||||
void rescan_loop_exit (edge, bool, bool);
|
||||
|
||||
/* Loop data structure manipulation/querying. */
|
||||
-extern void flow_loop_tree_node_add (struct loop *, struct loop *);
|
||||
+extern void flow_loop_tree_node_add (struct loop *, struct loop *,
|
||||
+ struct loop * = NULL);
|
||||
extern void flow_loop_tree_node_remove (struct loop *);
|
||||
extern bool flow_loop_nested_p (const struct loop *, const struct loop *);
|
||||
extern bool flow_bb_inside_loop_p (const struct loop *, const_basic_block);
|
||||
diff -N -urp a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
|
||||
--- a/gcc/cfgloopmanip.c 2018-11-07 11:37:24.847223860 +0800
|
||||
+++ b/gcc/cfgloopmanip.c 2018-11-07 11:38:26.155223860 +0800
|
||||
@@ -1026,9 +1026,11 @@ copy_loop_info (struct loop *loop, struc
|
||||
}
|
||||
|
||||
/* Copies copy of LOOP as subloop of TARGET loop, placing newly
|
||||
- created loop into loops structure. */
|
||||
+ created loop into loops structure. If AFTER is non-null
|
||||
+ the new loop is added at AFTER->next, otherwise in front of TARGETs
|
||||
+ sibling list. */
|
||||
struct loop *
|
||||
-duplicate_loop (struct loop *loop, struct loop *target)
|
||||
+duplicate_loop (struct loop *loop, struct loop *target, struct loop *after)
|
||||
{
|
||||
struct loop *cloop;
|
||||
cloop = alloc_loop ();
|
||||
@@ -1040,36 +1042,46 @@ duplicate_loop (struct loop *loop, struc
|
||||
set_loop_copy (loop, cloop);
|
||||
|
||||
/* Add it to target. */
|
||||
- flow_loop_tree_node_add (target, cloop);
|
||||
+ flow_loop_tree_node_add (target, cloop, after);
|
||||
|
||||
return cloop;
|
||||
}
|
||||
|
||||
/* Copies structure of subloops of LOOP into TARGET loop, placing
|
||||
- newly created loops into loop tree. */
|
||||
+ newly created loops into loop tree at the end of TARGETs sibling
|
||||
+ list in the original order. */
|
||||
void
|
||||
duplicate_subloops (struct loop *loop, struct loop *target)
|
||||
{
|
||||
- struct loop *aloop, *cloop;
|
||||
+ struct loop *aloop, *cloop, *tail;
|
||||
|
||||
+ for (tail = target->inner; tail && tail->next; tail = tail->next)
|
||||
+ ;
|
||||
for (aloop = loop->inner; aloop; aloop = aloop->next)
|
||||
{
|
||||
- cloop = duplicate_loop (aloop, target);
|
||||
+ cloop = duplicate_loop (aloop, target, tail);
|
||||
+ tail = cloop;
|
||||
+ gcc_assert (!tail->next);
|
||||
duplicate_subloops (aloop, cloop);
|
||||
}
|
||||
}
|
||||
|
||||
/* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
|
||||
- into TARGET loop, placing newly created loops into loop tree. */
|
||||
+ into TARGET loop, placing newly created loops into loop tree adding
|
||||
+ them to TARGETs sibling list at the end in order. */
|
||||
static void
|
||||
copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
|
||||
{
|
||||
- struct loop *aloop;
|
||||
+ struct loop *aloop, *tail;
|
||||
int i;
|
||||
|
||||
+ for (tail = target->inner; tail && tail->next; tail = tail->next)
|
||||
+ ;
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
- aloop = duplicate_loop (copied_loops[i], target);
|
||||
+ aloop = duplicate_loop (copied_loops[i], target, tail);
|
||||
+ tail = aloop;
|
||||
+ gcc_assert (!tail->next);
|
||||
duplicate_subloops (copied_loops[i], aloop);
|
||||
}
|
||||
}
|
||||
@@ -1133,14 +1145,15 @@ set_zero_probability (edge e)
|
||||
}
|
||||
|
||||
/* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
|
||||
- loop structure and dominators. E's destination must be LOOP header for
|
||||
- this to work, i.e. it must be entry or latch edge of this loop; these are
|
||||
- unique, as the loops must have preheaders for this function to work
|
||||
- correctly (in case E is latch, the function unrolls the loop, if E is entry
|
||||
- edge, it peels the loop). Store edges created by copying ORIG edge from
|
||||
- copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
|
||||
- original LOOP body, the other copies are numbered in order given by control
|
||||
- flow through them) into TO_REMOVE array. Returns false if duplication is
|
||||
+ loop structure and dominators (order of inner subloops is retained).
|
||||
+ E's destination must be LOOP header for this to work, i.e. it must be entry
|
||||
+ or latch edge of this loop; these are unique, as the loops must have
|
||||
+ preheaders for this function to work correctly (in case E is latch, the
|
||||
+ function unrolls the loop, if E is entry edge, it peels the loop). Store
|
||||
+ edges created by copying ORIG edge from copies corresponding to set bits in
|
||||
+ WONT_EXIT bitmap (bit 0 corresponds to original LOOP body, the other copies
|
||||
+ are numbered in order given by control flow through them) into TO_REMOVE
|
||||
+ array. Returns false if duplication is
|
||||
impossible. */
|
||||
|
||||
bool
|
||||
diff -N -urp a/gcc/cfgloopmanip.h b/gcc/cfgloopmanip.h
|
||||
--- a/gcc/cfgloopmanip.h 2018-11-07 11:37:24.939223860 +0800
|
||||
+++ b/gcc/cfgloopmanip.h 2018-11-07 11:38:26.155223860 +0800
|
||||
@@ -47,7 +47,8 @@ extern struct loop *loopify (edge, edge,
|
||||
unsigned, unsigned);
|
||||
extern void unloop (struct loop *, bool *, bitmap);
|
||||
extern void copy_loop_info (struct loop *loop, struct loop *target);
|
||||
-extern struct loop * duplicate_loop (struct loop *, struct loop *);
|
||||
+extern struct loop * duplicate_loop (struct loop *, struct loop *,
|
||||
+ struct loop * = NULL);
|
||||
extern void duplicate_subloops (struct loop *, struct loop *);
|
||||
extern bool can_duplicate_loop_p (const struct loop *loop);
|
||||
extern bool duplicate_loop_to_header_edge (struct loop *, edge,
|
||||
diff -N -urp a/gcc/common.opt b/gcc/common.opt
|
||||
--- a/gcc/common.opt 2018-11-07 11:37:24.859223860 +0800
|
||||
+++ b/gcc/common.opt 2018-11-07 11:38:26.159223860 +0800
|
||||
@@ -1496,8 +1496,8 @@ Common Alias(floop-nest-optimize)
|
||||
Enable loop nest transforms. Same as -floop-nest-optimize.
|
||||
|
||||
floop-unroll-and-jam
|
||||
-Common Alias(floop-nest-optimize)
|
||||
-Enable loop nest transforms. Same as -floop-nest-optimize.
|
||||
+Common Report Var(flag_unroll_jam) Optimization
|
||||
+Perform unroll-and-jam on loops.
|
||||
|
||||
fgnu-tm
|
||||
Common Report Var(flag_tm)
|
||||
diff -N -urp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
--- a/gcc/doc/invoke.texi 2018-11-07 11:37:24.915223860 +0800
|
||||
+++ b/gcc/doc/invoke.texi 2018-11-07 11:39:49.031223860 +0800
|
||||
@@ -7120,7 +7120,8 @@ Optimize yet more. @option{-O3} turns o
|
||||
by @option{-O2} and also turns on the @option{-finline-functions},
|
||||
@option{-funswitch-loops}, @option{-fpredictive-commoning},
|
||||
@option{-fgcse-after-reload}, @option{-ftree-loop-vectorize},
|
||||
-@option{-ftree-loop-distribute-patterns}, @option{-fsplit-paths}
|
||||
+@option{-ftree-loop-distribute-patterns}, @option{-fsplit-paths},
|
||||
+@option{-floop-unroll-and-jam},
|
||||
@option{-ftree-slp-vectorize}, @option{-fvect-cost-model},
|
||||
@option{-ftree-partial-pre}, @option{-fpeel-loops}
|
||||
and @option{-fipa-cp-clone} options.
|
||||
@@ -8226,12 +8227,10 @@ at @option{-O} and higher.
|
||||
@itemx -floop-interchange
|
||||
@itemx -floop-strip-mine
|
||||
@itemx -floop-block
|
||||
-@itemx -floop-unroll-and-jam
|
||||
@opindex ftree-loop-linear
|
||||
@opindex floop-interchange
|
||||
@opindex floop-strip-mine
|
||||
@opindex floop-block
|
||||
-@opindex floop-unroll-and-jam
|
||||
Perform loop nest optimizations. Same as
|
||||
@option{-floop-nest-optimize}. To use this code transformation, GCC has
|
||||
to be configured with @option{--with-isl} to enable the Graphite loop
|
||||
@@ -8323,6 +8322,12 @@ ENDDO
|
||||
@end smallexample
|
||||
and the initialization loop is transformed into a call to memset zero.
|
||||
|
||||
+@item -floop-unroll-and-jam
|
||||
+@opindex floop-unroll-and-jam
|
||||
+Apply unroll and jam transformations on feasible loops. In a loop
|
||||
+nest this unrolls the outer loop by some factor and fuses the resulting
|
||||
+multiple inner loops. This flag is enabled by default at @option{-O3}.
|
||||
+
|
||||
@item -ftree-loop-im
|
||||
@opindex ftree-loop-im
|
||||
Perform loop invariant motion on trees. This pass moves only invariants that
|
||||
@@ -10353,13 +10358,13 @@ loop in the loop nest by a given number
|
||||
length can be changed using the @option{loop-block-tile-size}
|
||||
parameter. The default value is 51 iterations.
|
||||
|
||||
-@item loop-unroll-jam-size
|
||||
-Specify the unroll factor for the @option{-floop-unroll-and-jam} option. The
|
||||
-default value is 4.
|
||||
-
|
||||
-@item loop-unroll-jam-depth
|
||||
-Specify the dimension to be unrolled (counting from the most inner loop)
|
||||
-for the @option{-floop-unroll-and-jam}. The default value is 2.
|
||||
+@item unroll-jam-min-percent
|
||||
+The minimum percentage of memory references that must be optimized
|
||||
+away for the unroll-and-jam transformation to be considered profitable.
|
||||
+
|
||||
+@item unroll-jam-max-unroll
|
||||
+The maximum number of times the outer loop should be unrolled by
|
||||
+the unroll-and-jam transformation.
|
||||
|
||||
@item ipa-cp-value-list-size
|
||||
IPA-CP attempts to track all possible values and types passed to a function's
|
||||
diff -N -urp a/gcc/gimple-loop-jam.c b/gcc/gimple-loop-jam.c
|
||||
--- a/gcc/gimple-loop-jam.c 1970-01-01 08:00:00.000000000 +0800
|
||||
+++ b/gcc/gimple-loop-jam.c 2018-11-07 11:38:26.167223860 +0800
|
||||
@@ -0,0 +1,598 @@
|
||||
+/* Loop unroll-and-jam.
|
||||
+ Copyright (C) 2017-2018 Free Software Foundation, Inc.
|
||||
+
|
||||
+This file is part of GCC.
|
||||
+
|
||||
+GCC is free software; you can redistribute it and/or modify it
|
||||
+under the terms of the GNU General Public License as published by the
|
||||
+Free Software Foundation; either version 3, or (at your option) any
|
||||
+later version.
|
||||
+
|
||||
+GCC is distributed in the hope that it will be useful, but WITHOUT
|
||||
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
+for more details.
|
||||
+
|
||||
+You should have received a copy of the GNU General Public License
|
||||
+along with GCC; see the file COPYING3. If not see
|
||||
+<http://www.gnu.org/licenses/>. */
|
||||
+
|
||||
+#include "config.h"
|
||||
+#include "system.h"
|
||||
+#include "coretypes.h"
|
||||
+#include "params.h"
|
||||
+#include "tree-pass.h"
|
||||
+#include "backend.h"
|
||||
+#include "tree.h"
|
||||
+#include "gimple.h"
|
||||
+#include "ssa.h"
|
||||
+#include "fold-const.h"
|
||||
+#include "tree-cfg.h"
|
||||
+#include "tree-ssa.h"
|
||||
+#include "tree-ssa-loop-niter.h"
|
||||
+#include "tree-ssa-loop.h"
|
||||
+#include "tree-ssa-loop-manip.h"
|
||||
+#include "cfgloop.h"
|
||||
+#include "tree-scalar-evolution.h"
|
||||
+#include "gimple-iterator.h"
|
||||
+#include "cfghooks.h"
|
||||
+#include "tree-data-ref.h"
|
||||
+#include "tree-ssa-loop-ivopts.h"
|
||||
+#include "tree-vectorizer.h"
|
||||
+
|
||||
+/* Unroll and Jam transformation
|
||||
+
|
||||
+ This is a combination of two transformations, where the second
|
||||
+ is not always valid. It's applicable if a loop nest has redundancies
|
||||
+ over the iterations of an outer loop while not having that with
|
||||
+ an inner loop.
|
||||
+
|
||||
+ Given this nest:
|
||||
+ for (i) {
|
||||
+ for (j) {
|
||||
+ B (i,j)
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ first unroll:
|
||||
+ for (i by 2) {
|
||||
+ for (j) {
|
||||
+ B (i,j)
|
||||
+ }
|
||||
+ for (j) {
|
||||
+ B (i+1,j)
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ then fuse the two adjacent inner loops resulting from that:
|
||||
+ for (i by 2) {
|
||||
+ for (j) {
|
||||
+ B (i,j)
|
||||
+ B (i+1,j)
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ As the order of evaluations of the body B changes this is valid
|
||||
+ only in certain situations: all distance vectors need to be forward.
|
||||
+ Additionally if there are multiple induction variables than just
|
||||
+ a counting control IV (j above) we can also deal with some situations.
|
||||
+
|
||||
+ The validity is checked by unroll_jam_possible_p, and the data-dep
|
||||
+ testing below.
|
||||
+
|
||||
+ A trivial example where the fusion is wrong would be when
|
||||
+ B (i,j) == x[j-1] = x[j];
|
||||
+ for (i by 2) {
|
||||
+ for (j) {
|
||||
+ x[j-1] = x[j];
|
||||
+ }
|
||||
+ for (j) {
|
||||
+ x[j-1] = x[j];
|
||||
+ }
|
||||
+ } effect: move content to front by two elements
|
||||
+ -->
|
||||
+ for (i by 2) {
|
||||
+ for (j) {
|
||||
+ x[j-1] = x[j];
|
||||
+ x[j-1] = x[j];
|
||||
+ }
|
||||
+ } effect: move content to front by one element
|
||||
+*/
|
||||
+
|
||||
+/* Modify the loop tree for the fact that all code once belonging
|
||||
+ to the OLD loop or the outer loop of OLD now is inside LOOP. */
|
||||
+
|
||||
+static void
|
||||
+merge_loop_tree (struct loop *loop, struct loop *old)
|
||||
+{
|
||||
+ basic_block *bbs;
|
||||
+ int i, n;
|
||||
+ struct loop *subloop;
|
||||
+ edge e;
|
||||
+ edge_iterator ei;
|
||||
+
|
||||
+ /* Find its nodes. */
|
||||
+ bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
|
||||
+ n = get_loop_body_with_size (loop, bbs, n_basic_blocks_for_fn (cfun));
|
||||
+
|
||||
+ for (i = 0; i < n; i++)
|
||||
+ {
|
||||
+ /* If the block was direct child of OLD loop it's now part
|
||||
+ of LOOP. If it was outside OLD, then it moved into LOOP
|
||||
+ as well. This avoids changing the loop father for BBs
|
||||
+ in inner loops of OLD. */
|
||||
+ if (bbs[i]->loop_father == old
|
||||
+ || loop_depth (bbs[i]->loop_father) < loop_depth (old))
|
||||
+ {
|
||||
+ remove_bb_from_loops (bbs[i]);
|
||||
+ add_bb_to_loop (bbs[i], loop);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ /* If we find a direct subloop of OLD, move it to LOOP. */
|
||||
+ subloop = bbs[i]->loop_father;
|
||||
+ if (loop_outer (subloop) == old && subloop->header == bbs[i])
|
||||
+ {
|
||||
+ flow_loop_tree_node_remove (subloop);
|
||||
+ flow_loop_tree_node_add (loop, subloop);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Update the information about loop exit edges. */
|
||||
+ for (i = 0; i < n; i++)
|
||||
+ {
|
||||
+ FOR_EACH_EDGE (e, ei, bbs[i]->succs)
|
||||
+ {
|
||||
+ rescan_loop_exit (e, false, false);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ loop->num_nodes = n;
|
||||
+
|
||||
+ free (bbs);
|
||||
+}
|
||||
+
|
||||
+/* BB is part of the outer loop of an unroll-and-jam situation.
|
||||
+ Check if any statements therein would prevent the transformation. */
|
||||
+
|
||||
+static bool
|
||||
+bb_prevents_fusion_p (basic_block bb)
|
||||
+{
|
||||
+ gimple_stmt_iterator gsi;
|
||||
+ /* BB is duplicated by outer unrolling and then all N-1 first copies
|
||||
+ move into the body of the fused inner loop. If BB exits the outer loop
|
||||
+ the last copy still does so, and the first N-1 copies are cancelled
|
||||
+ by loop unrolling, so also after fusion it's the exit block.
|
||||
+ But there might be other reasons that prevent fusion:
|
||||
+ * stores or unknown side-effects prevent fusion
|
||||
+ * loads don't
|
||||
+ * computations into SSA names: these aren't problematic. Their
|
||||
+ result will be unused on the exit edges of the first N-1 copies
|
||||
+ (those aren't taken after unrolling). If they are used on the
|
||||
+ other edge (the one leading to the outer latch block) they are
|
||||
+ loop-carried (on the outer loop) and the Nth copy of BB will
|
||||
+ compute them again (i.e. the first N-1 copies will be dead). */
|
||||
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
+ {
|
||||
+ gimple *g = gsi_stmt (gsi);
|
||||
+ if (gimple_vdef (g) || gimple_has_side_effects (g))
|
||||
+ return true;
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+/* Given an inner loop LOOP (of some OUTER loop) determine if
|
||||
+ we can safely fuse copies of it (generated by outer unrolling).
|
||||
+ If so return true, otherwise return false. */
|
||||
+
|
||||
+static bool
|
||||
+unroll_jam_possible_p (struct loop *outer, struct loop *loop)
|
||||
+{
|
||||
+ basic_block *bbs;
|
||||
+ int i, n;
|
||||
+ struct tree_niter_desc niter;
|
||||
+
|
||||
+ /* When fusing the loops we skip the latch block
|
||||
+ of the first one, so it mustn't have any effects to
|
||||
+ preserve. */
|
||||
+ if (!empty_block_p (loop->latch))
|
||||
+ return false;
|
||||
+
|
||||
+ if (!single_exit (loop))
|
||||
+ return false;
|
||||
+
|
||||
+ /* We need a perfect nest. Quick check for adjacent inner loops. */
|
||||
+ if (outer->inner != loop || loop->next)
|
||||
+ return false;
|
||||
+
|
||||
+ /* Prevent head-controlled inner loops, that we usually have.
|
||||
+ The guard block would need to be accepted
|
||||
+ (invariant condition either entering or skipping the loop),
|
||||
+ without also accepting arbitrary control flow. When unswitching
|
||||
+ ran before us (as with -O3) this won't be a problem because its
|
||||
+ outer loop unswitching will have moved out the invariant condition.
|
||||
+
|
||||
+ If we do that we need to extend fuse_loops () to cope with this
|
||||
+ by threading through the (still invariant) copied condition
|
||||
+ between the two loop copies. */
|
||||
+ if (!dominated_by_p (CDI_DOMINATORS, outer->latch, loop->header))
|
||||
+ return false;
|
||||
+
|
||||
+ /* The number of iterations of the inner loop must be loop invariant
|
||||
+ with respect to the outer loop. */
|
||||
+ if (!number_of_iterations_exit (loop, single_exit (loop), &niter,
|
||||
+ false, true)
|
||||
+ || niter.cmp == ERROR_MARK
|
||||
+ || !integer_zerop (niter.may_be_zero)
|
||||
+ || !expr_invariant_in_loop_p (outer, niter.niter))
|
||||
+ return false;
|
||||
+
|
||||
+ /* If the inner loop produces any values that are used inside the
|
||||
+ outer loop (except the virtual op) then it can flow
|
||||
+ back (perhaps indirectly) into the inner loop. This prevents
|
||||
+ fusion: without fusion the value at the last iteration is used,
|
||||
+ with fusion the value after the initial iteration is used.
|
||||
+
|
||||
+ If all uses are outside the outer loop this doesn't prevent fusion;
|
||||
+ the value of the last iteration is still used (and the values from
|
||||
+ all intermediate iterations are dead). */
|
||||
+ gphi_iterator psi;
|
||||
+ for (psi = gsi_start_phis (single_exit (loop)->dest);
|
||||
+ !gsi_end_p (psi); gsi_next (&psi))
|
||||
+ {
|
||||
+ imm_use_iterator imm_iter;
|
||||
+ use_operand_p use_p;
|
||||
+ tree op = gimple_phi_result (psi.phi ());
|
||||
+ if (virtual_operand_p (op))
|
||||
+ continue;
|
||||
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, op)
|
||||
+ {
|
||||
+ gimple *use_stmt = USE_STMT (use_p);
|
||||
+ if (!is_gimple_debug (use_stmt)
|
||||
+ && flow_bb_inside_loop_p (outer, gimple_bb (use_stmt)))
|
||||
+ return false;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* And check blocks belonging to just outer loop. */
|
||||
+ bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
|
||||
+ n = get_loop_body_with_size (outer, bbs, n_basic_blocks_for_fn (cfun));
|
||||
+
|
||||
+ for (i = 0; i < n; i++)
|
||||
+ if (bbs[i]->loop_father == outer && bb_prevents_fusion_p (bbs[i]))
|
||||
+ break;
|
||||
+ free (bbs);
|
||||
+ if (i != n)
|
||||
+ return false;
|
||||
+
|
||||
+ /* For now we can safely fuse copies of LOOP only if all
|
||||
+ loop carried variables are inductions (or the virtual op).
|
||||
+
|
||||
+ We could handle reductions as well (the initial value in the second
|
||||
+ body would be the after-iter value of the first body) if it's over
|
||||
+ an associative and commutative operation. We wouldn't
|
||||
+ be able to handle unknown cycles. */
|
||||
+ for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
|
||||
+ {
|
||||
+ affine_iv iv;
|
||||
+ tree op = gimple_phi_result (psi.phi ());
|
||||
+
|
||||
+ if (virtual_operand_p (op))
|
||||
+ continue;
|
||||
+ if (!simple_iv (loop, loop, op, &iv, true))
|
||||
+ return false;
|
||||
+ /* The inductions must be regular, loop invariant step and initial
|
||||
+ value. */
|
||||
+ if (!expr_invariant_in_loop_p (outer, iv.step)
|
||||
+ || !expr_invariant_in_loop_p (outer, iv.base))
|
||||
+ return false;
|
||||
+ /* XXX With more effort we could also be able to deal with inductions
|
||||
+ where the initial value is loop variant but a simple IV in the
|
||||
+ outer loop. The initial value for the second body would be
|
||||
+ the original initial value plus iv.base.step. The next value
|
||||
+ for the fused loop would be the original next value of the first
|
||||
+ copy, _not_ the next value of the second body. */
|
||||
+ }
|
||||
+
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+/* Fuse LOOP with all further neighbors. The loops are expected to
|
||||
+ be in appropriate form. */
|
||||
+
|
||||
+static void
|
||||
+fuse_loops (struct loop *loop)
|
||||
+{
|
||||
+ struct loop *next = loop->next;
|
||||
+
|
||||
+ while (next)
|
||||
+ {
|
||||
+ edge e;
|
||||
+
|
||||
+ remove_branch (single_pred_edge (loop->latch));
|
||||
+ /* Make delete_basic_block not fiddle with the loop structure. */
|
||||
+ basic_block oldlatch = loop->latch;
|
||||
+ loop->latch = NULL;
|
||||
+ delete_basic_block (oldlatch);
|
||||
+ e = redirect_edge_and_branch (loop_latch_edge (next),
|
||||
+ loop->header);
|
||||
+ loop->latch = e->src;
|
||||
+ flush_pending_stmts (e);
|
||||
+
|
||||
+ gcc_assert (EDGE_COUNT (next->header->preds) == 1);
|
||||
+
|
||||
+ /* The PHI nodes of the second body (single-argument now)
|
||||
+ need adjustments to use the right values: either directly
|
||||
+ the value of the corresponding PHI in the first copy or
|
||||
+ the one leaving the first body which unrolling did for us.
|
||||
+
|
||||
+ See also unroll_jam_possible_p () for further possibilities. */
|
||||
+ gphi_iterator psi_first, psi_second;
|
||||
+ e = single_pred_edge (next->header);
|
||||
+ for (psi_first = gsi_start_phis (loop->header),
|
||||
+ psi_second = gsi_start_phis (next->header);
|
||||
+ !gsi_end_p (psi_first);
|
||||
+ gsi_next (&psi_first), gsi_next (&psi_second))
|
||||
+ {
|
||||
+ gphi *phi_first = psi_first.phi ();
|
||||
+ gphi *phi_second = psi_second.phi ();
|
||||
+ tree firstop = gimple_phi_result (phi_first);
|
||||
+ /* The virtual operand is correct already as it's
|
||||
+ always live at exit, hence has a LCSSA node and outer
|
||||
+ loop unrolling updated SSA form. */
|
||||
+ if (virtual_operand_p (firstop))
|
||||
+ continue;
|
||||
+
|
||||
+ /* Due to unroll_jam_possible_p () we know that this is
|
||||
+ an induction. The second body goes over the same
|
||||
+ iteration space. */
|
||||
+ add_phi_arg (phi_second, firstop, e,
|
||||
+ gimple_location (phi_first));
|
||||
+ }
|
||||
+ gcc_assert (gsi_end_p (psi_second));
|
||||
+
|
||||
+ merge_loop_tree (loop, next);
|
||||
+ gcc_assert (!next->num_nodes);
|
||||
+ struct loop *ln = next->next;
|
||||
+ delete_loop (next);
|
||||
+ next = ln;
|
||||
+ }
|
||||
+ rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
|
||||
+}
|
||||
+
|
||||
+/* Returns true if the distance in DDR can be determined and adjusts
|
||||
+ the unroll factor in *UNROLL to make unrolling valid for that distance.
|
||||
+ Otherwise return false.
|
||||
+
|
||||
+ If this data dep can lead to a removed memory reference, increment
|
||||
+ *REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor
|
||||
+ for this to happen. */
|
||||
+
|
||||
+static bool
|
||||
+adjust_unroll_factor (struct data_dependence_relation *ddr,
|
||||
+ unsigned *unroll, unsigned *profit_unroll,
|
||||
+ unsigned *removed)
|
||||
+{
|
||||
+ bool ret = false;
|
||||
+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
|
||||
+ {
|
||||
+ if (DDR_NUM_DIST_VECTS (ddr) == 0)
|
||||
+ return false;
|
||||
+ unsigned i;
|
||||
+ lambda_vector dist_v;
|
||||
+ FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
|
||||
+ {
|
||||
+ /* A distance (a,b) is at worst transformed into (a/N,b) by the
|
||||
+ unrolling (factor N), so the transformation is valid if
|
||||
+ a >= N, or b > 0, or b is zero and a > 0. Otherwise the unroll
|
||||
+ factor needs to be limited so that the first condition holds.
|
||||
+ That may limit the factor down to zero in the worst case. */
|
||||
+ int dist = dist_v[0];
|
||||
+ if (dist < 0)
|
||||
+ gcc_unreachable ();
|
||||
+ else if ((unsigned)dist >= *unroll)
|
||||
+ ;
|
||||
+ else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
|
||||
+ || (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1)
|
||||
+ && dist > 0))
|
||||
+ ;
|
||||
+ else
|
||||
+ *unroll = dist;
|
||||
+
|
||||
+ /* With a distance (a,0) it's always profitable to unroll-and-jam
|
||||
+ (by a+1), because one memory reference will go away. With
|
||||
+ (a,b) and b != 0 that's less clear. We will increase the
|
||||
+ number of streams without lowering the number of mem refs.
|
||||
+ So for now only handle the first situation. */
|
||||
+ if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
|
||||
+ {
|
||||
+ *profit_unroll = MAX (*profit_unroll, (unsigned)dist + 1);
|
||||
+ (*removed)++;
|
||||
+ }
|
||||
+
|
||||
+ ret = true;
|
||||
+ }
|
||||
+ }
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+/* Main entry point for the unroll-and-jam transformation
|
||||
+ described above. */
|
||||
+
|
||||
+static unsigned int
|
||||
+tree_loop_unroll_and_jam (void)
|
||||
+{
|
||||
+ struct loop *loop;
|
||||
+ bool changed = false;
|
||||
+
|
||||
+ gcc_assert (scev_initialized_p ());
|
||||
+
|
||||
+ /* Go through all innermost loops. */
|
||||
+ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
|
||||
+ {
|
||||
+ struct loop *outer = loop_outer (loop);
|
||||
+
|
||||
+ if (loop_depth (loop) < 2
|
||||
+ || optimize_loop_nest_for_size_p (outer))
|
||||
+ continue;
|
||||
+
|
||||
+ if (!unroll_jam_possible_p (outer, loop))
|
||||
+ continue;
|
||||
+
|
||||
+ vec<data_reference_p> datarefs;
|
||||
+ vec<ddr_p> dependences;
|
||||
+ unsigned unroll_factor, profit_unroll, removed;
|
||||
+ struct tree_niter_desc desc;
|
||||
+ bool unroll = false;
|
||||
+
|
||||
+ auto_vec<loop_p, 3> loop_nest;
|
||||
+ dependences.create (10);
|
||||
+ datarefs.create (10);
|
||||
+ if (!compute_data_dependences_for_loop (outer, true, &loop_nest,
|
||||
+ &datarefs, &dependences))
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "Cannot analyze data dependencies\n");
|
||||
+ free_data_refs (datarefs);
|
||||
+ free_dependence_relations (dependences);
|
||||
+ return false;
|
||||
+ }
|
||||
+ if (!datarefs.length ())
|
||||
+ continue;
|
||||
+
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ dump_data_dependence_relations (dump_file, dependences);
|
||||
+
|
||||
+ unroll_factor = (unsigned)-1;
|
||||
+ profit_unroll = 1;
|
||||
+ removed = 0;
|
||||
+
|
||||
+ /* Check all dependencies. */
|
||||
+ unsigned i;
|
||||
+ struct data_dependence_relation *ddr;
|
||||
+ FOR_EACH_VEC_ELT (dependences, i, ddr)
|
||||
+ {
|
||||
+ struct data_reference *dra, *drb;
|
||||
+
|
||||
+ /* If the refs are independend there's nothing to do. */
|
||||
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
|
||||
+ continue;
|
||||
+ dra = DDR_A (ddr);
|
||||
+ drb = DDR_B (ddr);
|
||||
+ /* Nothing interesting for the self dependencies. */
|
||||
+ if (dra == drb)
|
||||
+ continue;
|
||||
+
|
||||
+ /* Now check the distance vector, for determining a sensible
|
||||
+ outer unroll factor, and for validity of merging the inner
|
||||
+ loop copies. */
|
||||
+ if (!adjust_unroll_factor (ddr, &unroll_factor, &profit_unroll,
|
||||
+ &removed))
|
||||
+ {
|
||||
+ /* Couldn't get the distance vector. For two reads that's
|
||||
+ harmless (we assume we should unroll). For at least
|
||||
+ one write this means we can't check the dependence direction
|
||||
+ and hence can't determine safety. */
|
||||
+
|
||||
+ if (DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
|
||||
+ {
|
||||
+ unroll_factor = 0;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* We regard a user-specified minimum percentage of zero as a request
|
||||
+ to ignore all profitability concerns and apply the transformation
|
||||
+ always. */
|
||||
+ if (!PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
|
||||
+ profit_unroll = 2;
|
||||
+ else if (removed * 100 / datarefs.length ()
|
||||
+ < (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
|
||||
+ profit_unroll = 1;
|
||||
+ if (unroll_factor > profit_unroll)
|
||||
+ unroll_factor = profit_unroll;
|
||||
+ if (unroll_factor > (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MAX_UNROLL))
|
||||
+ unroll_factor = PARAM_VALUE (PARAM_UNROLL_JAM_MAX_UNROLL);
|
||||
+ unroll = (unroll_factor > 1
|
||||
+ && can_unroll_loop_p (outer, unroll_factor, &desc));
|
||||
+
|
||||
+ if (unroll)
|
||||
+ {
|
||||
+ if (dump_enabled_p ())
|
||||
+ dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
|
||||
+ find_loop_location (outer),
|
||||
+ "applying unroll and jam with factor %d\n",
|
||||
+ unroll_factor);
|
||||
+ initialize_original_copy_tables ();
|
||||
+ tree_unroll_loop (outer, unroll_factor, single_dom_exit (outer),
|
||||
+ &desc);
|
||||
+ free_original_copy_tables ();
|
||||
+ fuse_loops (outer->inner);
|
||||
+ changed = true;
|
||||
+ }
|
||||
+
|
||||
+ loop_nest.release ();
|
||||
+ free_dependence_relations (dependences);
|
||||
+ free_data_refs (datarefs);
|
||||
+ }
|
||||
+
|
||||
+ if (changed)
|
||||
+ {
|
||||
+ scev_reset ();
|
||||
+ free_dominance_info (CDI_DOMINATORS);
|
||||
+ return TODO_cleanup_cfg;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* Pass boilerplate. */
|
||||
+
|
||||
+namespace {
|
||||
+
|
||||
+const pass_data pass_data_loop_jam =
|
||||
+{
|
||||
+ GIMPLE_PASS, /* type. */
|
||||
+ "unrolljam", /* name. */
|
||||
+ OPTGROUP_LOOP, /* optinfo_flags. */
|
||||
+ TV_LOOP_JAM, /* tv_id. */
|
||||
+ PROP_cfg, /* properties_required. */
|
||||
+ 0, /* properties_provided. */
|
||||
+ 0, /* properties_destroyed. */
|
||||
+ 0, /* todo_flags_start. */
|
||||
+ 0, /* todo_flags_finish. */
|
||||
+};
|
||||
+
|
||||
+class pass_loop_jam : public gimple_opt_pass
|
||||
+{
|
||||
+public:
|
||||
+ pass_loop_jam (gcc::context *ctxt)
|
||||
+ : gimple_opt_pass (pass_data_loop_jam, ctxt)
|
||||
+ {}
|
||||
+
|
||||
+ /* opt_pass methods: */
|
||||
+ virtual bool gate (function *)
|
||||
+ {
|
||||
+ return flag_unroll_jam != 0;
|
||||
+ }
|
||||
+ virtual unsigned int execute (function *);
|
||||
+
|
||||
+};
|
||||
+
|
||||
+unsigned int
|
||||
+pass_loop_jam::execute (function *fun)
|
||||
+{
|
||||
+ if (number_of_loops (fun) <= 1)
|
||||
+ return 0;
|
||||
+
|
||||
+ return tree_loop_unroll_and_jam ();
|
||||
+}
|
||||
+
|
||||
+}
|
||||
+
|
||||
+gimple_opt_pass *
|
||||
+make_pass_loop_jam (gcc::context *ctxt)
|
||||
+{
|
||||
+ return new pass_loop_jam (ctxt);
|
||||
+}
|
||||
+
|
||||
diff -N -urp a/gcc/opts.c b/gcc/opts.c
|
||||
--- a/gcc/opts.c 2018-11-07 11:37:24.891223860 +0800
|
||||
+++ b/gcc/opts.c 2018-11-07 11:38:26.171223860 +0800
|
||||
@@ -534,6 +534,7 @@ static const struct default_options defa
|
||||
{ OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 },
|
||||
+ { OPT_LEVELS_3_PLUS, OPT_floop_unroll_and_jam, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
|
||||
{ OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
|
||||
diff -N -urp a/gcc/params.def b/gcc/params.def
|
||||
--- a/gcc/params.def 2018-11-07 11:37:27.543223860 +0800
|
||||
+++ b/gcc/params.def 2018-11-07 11:38:26.171223860 +0800
|
||||
@@ -1280,6 +1280,16 @@ DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
|
||||
"Enable loop epilogue vectorization using smaller vector size.",
|
||||
0, 0, 1)
|
||||
|
||||
+DEFPARAM (PARAM_UNROLL_JAM_MIN_PERCENT,
|
||||
+ "unroll-jam-min-percent",
|
||||
+ "Minimum percentage of memrefs that must go away for unroll-and-jam to be considered profitable.",
|
||||
+ 1, 0, 100)
|
||||
+
|
||||
+DEFPARAM (PARAM_UNROLL_JAM_MAX_UNROLL,
|
||||
+ "unroll-jam-max-unroll",
|
||||
+ "Maximum unroll factor for the unroll-and-jam transformation.",
|
||||
+ 4, 0, 0)
|
||||
+
|
||||
/*
|
||||
|
||||
Local variables:
|
||||
diff -N -urp a/gcc/passes.def b/gcc/passes.def
|
||||
--- a/gcc/passes.def 2018-11-07 11:37:24.859223860 +0800
|
||||
+++ b/gcc/passes.def 2018-11-07 11:38:26.171223860 +0800
|
||||
@@ -272,6 +272,7 @@ along with GCC; see the file COPYING3.
|
||||
NEXT_PASS (pass_tree_unswitch);
|
||||
NEXT_PASS (pass_scev_cprop);
|
||||
NEXT_PASS (pass_loop_split);
|
||||
+ NEXT_PASS (pass_loop_jam);
|
||||
/* All unswitching, final value replacement and splitting can expose
|
||||
empty loops. Remove them now. */
|
||||
NEXT_PASS (pass_cd_dce);
|
||||
diff -N -urp a/gcc/timevar.def b/gcc/timevar.def
|
||||
--- a/gcc/timevar.def 2018-11-07 11:37:24.935223860 +0800
|
||||
+++ b/gcc/timevar.def 2018-11-07 11:38:26.175223860 +0800
|
||||
@@ -186,6 +186,7 @@ DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "
|
||||
DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop")
|
||||
DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
|
||||
DEFTIMEVAR (TV_LOOP_SPLIT , "loop splitting")
|
||||
+DEFTIMEVAR (TV_LOOP_JAM , "unroll and jam")
|
||||
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
|
||||
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
|
||||
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
|
||||
diff -N -urp a/gcc/tree-pass.h b/gcc/tree-pass.h
|
||||
--- a/gcc/tree-pass.h 2018-11-07 11:37:24.887223860 +0800
|
||||
+++ b/gcc/tree-pass.h 2018-11-07 11:38:26.175223860 +0800
|
||||
@@ -369,6 +369,7 @@ extern gimple_opt_pass *make_pass_tree_l
|
||||
extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_loop_split (gcc::context *ctxt);
|
||||
+extern gimple_opt_pass *make_pass_loop_jam (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_predcom (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
|
||||
@ -1,768 +0,0 @@
|
||||
diff -N -urp a/gcc/combine-stack-adj.c b/gcc/combine-stack-adj.c
|
||||
--- a/gcc/combine-stack-adj.c 2017-01-20 08:05:30.925466000 +0800
|
||||
+++ b/gcc/combine-stack-adj.c 2019-01-10 17:10:16.606528459 +0800
|
||||
@@ -508,6 +508,8 @@ combine_stack_adjustments_for_block (bas
|
||||
continue;
|
||||
|
||||
set = single_set_for_csa (insn);
|
||||
+ if (set && find_reg_note (insn, REG_STACK_CHECK, NULL_RTX))
|
||||
+ set = NULL_RTX;
|
||||
if (set)
|
||||
{
|
||||
rtx dest = SET_DEST (set);
|
||||
diff -N -urp a/gcc/common.opt b/gcc/common.opt
|
||||
--- a/gcc/common.opt 2019-01-10 13:33:20.926185828 +0800
|
||||
+++ b/gcc/common.opt 2019-01-10 16:37:35.238476827 +0800
|
||||
@@ -2336,13 +2336,18 @@ Common Report Var(flag_variable_expansio
|
||||
Apply variable expansion when loops are unrolled.
|
||||
|
||||
fstack-check=
|
||||
-Common Report RejectNegative Joined
|
||||
+Common Report RejectNegative Joined Optimization
|
||||
-fstack-check=[no|generic|specific] Insert stack checking code into the program.
|
||||
|
||||
fstack-check
|
||||
Common Alias(fstack-check=, specific, no)
|
||||
Insert stack checking code into the program. Same as -fstack-check=specific.
|
||||
|
||||
+fstack-clash-protection
|
||||
+Common Report Var(flag_stack_clash_protection) Optimization
|
||||
+Insert code to probe each page of stack space as it is allocated to protect
|
||||
+from stack-clash style attacks.
|
||||
+
|
||||
fstack-limit
|
||||
Common Var(common_deferred_options) Defer
|
||||
|
||||
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
--- a/gcc/config/aarch64/aarch64.c 2019-01-10 13:33:20.914185828 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.c 2019-01-11 14:12:22.248521895 +0800
|
||||
@@ -3881,12 +3881,14 @@ aarch64_expand_prologue (void)
|
||||
{
|
||||
if (crtl->is_leaf && !cfun->calls_alloca)
|
||||
{
|
||||
- if (frame_size > PROBE_INTERVAL && frame_size > STACK_CHECK_PROTECT)
|
||||
- aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT,
|
||||
- frame_size - STACK_CHECK_PROTECT);
|
||||
+ if (frame_size > PROBE_INTERVAL
|
||||
+ && frame_size > get_stack_check_protect ())
|
||||
+ aarch64_emit_probe_stack_range (get_stack_check_protect (),
|
||||
+ (frame_size
|
||||
+ - get_stack_check_protect ()));
|
||||
}
|
||||
else if (frame_size > 0)
|
||||
- aarch64_emit_probe_stack_range (STACK_CHECK_PROTECT, frame_size);
|
||||
+ aarch64_emit_probe_stack_range (get_stack_check_protect (), frame_size);
|
||||
}
|
||||
|
||||
aarch64_sub_sp (IP0_REGNUM, initial_adjust, true);
|
||||
diff -N -urp a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
|
||||
--- a/gcc/config/i386/i386.c 2019-01-10 13:33:20.674185822 +0800
|
||||
+++ b/gcc/config/i386/i386.c 2019-01-28 10:55:37.006876481 +0800
|
||||
@@ -14396,7 +14396,7 @@ ix86_expand_prologue (void)
|
||||
HOST_WIDE_INT size = allocate;
|
||||
|
||||
if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
|
||||
- size = 0x80000000 - STACK_CHECK_PROTECT - 1;
|
||||
+ size = 0x80000000 - get_stack_check_protect () - 1;
|
||||
|
||||
if (TARGET_STACK_PROBE)
|
||||
{
|
||||
@@ -14406,18 +14406,21 @@ ix86_expand_prologue (void)
|
||||
ix86_emit_probe_stack_range (0, size);
|
||||
}
|
||||
else
|
||||
- ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
|
||||
+ ix86_emit_probe_stack_range (0,
|
||||
+ size + get_stack_check_protect ());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (crtl->is_leaf && !cfun->calls_alloca)
|
||||
{
|
||||
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
|
||||
- ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
|
||||
- size - STACK_CHECK_PROTECT);
|
||||
+ if (size > PROBE_INTERVAL
|
||||
+ && size > get_stack_check_protect ())
|
||||
+ ix86_emit_probe_stack_range (get_stack_check_protect (),
|
||||
+ (size
|
||||
+ - get_stack_check_protect ()));
|
||||
}
|
||||
else
|
||||
- ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
|
||||
+ ix86_emit_probe_stack_range (get_stack_check_protect (), size);
|
||||
}
|
||||
}
|
||||
}
|
||||
diff -N -urp a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
|
||||
--- a/gcc/config/ia64/ia64.c 2017-01-01 20:07:43.905435000 +0800
|
||||
+++ b/gcc/config/ia64/ia64.c 2019-01-28 10:58:37.582881234 +0800
|
||||
@@ -3481,15 +3481,16 @@ ia64_expand_prologue (void)
|
||||
|
||||
if (crtl->is_leaf && !cfun->calls_alloca)
|
||||
{
|
||||
- if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
|
||||
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT,
|
||||
- size - STACK_CHECK_PROTECT,
|
||||
+ if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
|
||||
+ ia64_emit_probe_stack_range (get_stack_check_protect (),
|
||||
+ size - get_stack_check_protect (),
|
||||
bs_size);
|
||||
- else if (size + bs_size > STACK_CHECK_PROTECT)
|
||||
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, 0, bs_size);
|
||||
+ else if (size + bs_size > get_stack_check_protect ())
|
||||
+ ia64_emit_probe_stack_range (get_stack_check_protect (),
|
||||
+ 0, bs_size);
|
||||
}
|
||||
else if (size + bs_size > 0)
|
||||
- ia64_emit_probe_stack_range (STACK_CHECK_PROTECT, size, bs_size);
|
||||
+ ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
|
||||
}
|
||||
|
||||
if (dump_file)
|
||||
diff -N -urp a/gcc/coretypes.h b/gcc/coretypes.h
|
||||
--- a/gcc/coretypes.h 2017-01-01 20:07:43.905435000 +0800
|
||||
+++ b/gcc/coretypes.h 2019-01-11 14:09:58.612518114 +0800
|
||||
@@ -371,6 +371,7 @@ typedef unsigned char uchar;
|
||||
#include "input.h"
|
||||
#include "is-a.h"
|
||||
#include "memory-block.h"
|
||||
+#include "dumpfile.h"
|
||||
#endif /* GENERATOR_FILE && !USED_FOR_TARGET */
|
||||
|
||||
#endif /* coretypes.h */
|
||||
diff -N -urp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
--- a/gcc/doc/invoke.texi 2019-01-10 13:33:20.882185827 +0800
|
||||
+++ b/gcc/doc/invoke.texi 2019-01-10 16:40:40.066481692 +0800
|
||||
@@ -10050,6 +10050,21 @@ compilation without. The value for comp
|
||||
needs to be more conservative (higher) in order to make tracer
|
||||
effective.
|
||||
|
||||
+@item stack-clash-protection-guard-size
|
||||
+Specify the size of the operating system provided stack guard as
|
||||
+2 raised to @var{num} bytes. The default value is 12 (4096 bytes).
|
||||
+Acceptable values are between 12 and 30. Higher values may reduce the
|
||||
+number of explicit probes, but a value larger than the operating system
|
||||
+provided guard will leave code vulnerable to stack clash style attacks.
|
||||
+
|
||||
+@item stack-clash-protection-probe-interval
|
||||
+Stack clash protection involves probing stack space as it is allocated. This
|
||||
+param controls the maximum distance between probes into the stack as 2 raised
|
||||
+to @var{num} bytes. Acceptable values are between 10 and 16 and defaults to
|
||||
+12. Higher values may reduce the number of explicit probes, but a value
|
||||
+larger than the operating system provided guard will leave code vulnerable to
|
||||
+stack clash style attacks.
|
||||
+
|
||||
@item max-cse-path-length
|
||||
|
||||
The maximum number of basic blocks on path that CSE considers.
|
||||
@@ -11248,7 +11263,8 @@ target support in the compiler but comes
|
||||
@enumerate
|
||||
@item
|
||||
Modified allocation strategy for large objects: they are always
|
||||
-allocated dynamically if their size exceeds a fixed threshold.
|
||||
+allocated dynamically if their size exceeds a fixed threshold. Note this
|
||||
+may change the semantics of some code.
|
||||
|
||||
@item
|
||||
Fixed limit on the size of the static frame of functions: when it is
|
||||
@@ -11263,6 +11279,25 @@ generic implementation, code performance
|
||||
Note that old-style stack checking is also the fallback method for
|
||||
@samp{specific} if no target support has been added in the compiler.
|
||||
|
||||
+@samp{-fstack-check=} is designed for Ada's needs to detect infinite recursion
|
||||
+and stack overflows. @samp{specific} is an excellent choice when compiling
|
||||
+Ada code. It is not generally sufficient to protect against stack-clash
|
||||
+attacks. To protect against those you want @samp{-fstack-clash-protection}.
|
||||
+
|
||||
+@item -fstack-clash-protection
|
||||
+@opindex fstack-clash-protection
|
||||
+Generate code to prevent stack clash style attacks. When this option is
|
||||
+enabled, the compiler will only allocate one page of stack space at a time
|
||||
+and each page is accessed immediately after allocation. Thus, it prevents
|
||||
+allocations from jumping over any stack guard page provided by the
|
||||
+operating system.
|
||||
+
|
||||
+Most targets do not fully support stack clash protection. However, on
|
||||
+those targets @option{-fstack-clash-protection} will protect dynamic stack
|
||||
+allocations. @option{-fstack-clash-protection} may also provide limited
|
||||
+protection for static stack allocations if the target supports
|
||||
+@option{-fstack-check=specific}.
|
||||
+
|
||||
@item -fstack-limit-register=@var{reg}
|
||||
@itemx -fstack-limit-symbol=@var{sym}
|
||||
@itemx -fno-stack-limit
|
||||
diff -N -urp a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
--- a/gcc/doc/tm.texi 2017-04-05 01:52:27.193766000 +0800
|
||||
+++ b/gcc/doc/tm.texi 2019-01-10 16:50:44.006497591 +0800
|
||||
@@ -3419,6 +3419,10 @@ GCC computed the default from the values
|
||||
normally not need to override that default.
|
||||
@end defmac
|
||||
|
||||
+@deftypefn {Target Hook} bool TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE (rtx @var{residual})
|
||||
+Some targets make optimistic assumptions about the state of stack probing when they emit their prologues. On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks. Define this variable to return nonzero if such a probe is required or zero otherwise. You need not define this macro if it would always have the value zero.
|
||||
+@end deftypefn
|
||||
+
|
||||
@need 2000
|
||||
@node Frame Registers
|
||||
@subsection Registers That Address the Stack Frame
|
||||
diff -N -urp a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
||||
--- a/gcc/doc/tm.texi.in 2017-04-05 01:52:27.193766000 +0800
|
||||
+++ b/gcc/doc/tm.texi.in 2019-01-10 16:51:41.530499105 +0800
|
||||
@@ -2999,6 +2999,8 @@ GCC computed the default from the values
|
||||
normally not need to override that default.
|
||||
@end defmac
|
||||
|
||||
+@hook TARGET_STACK_CLASH_PROTECTION_FINAL_DYNAMIC_PROBE
|
||||
+
|
||||
@need 2000
|
||||
@node Frame Registers
|
||||
@subsection Registers That Address the Stack Frame
|
||||
diff -N -urp a/gcc/explow.c b/gcc/explow.c
|
||||
--- a/gcc/explow.c 2017-02-02 20:39:09.589196000 +0800
|
||||
+++ b/gcc/explow.c 2019-01-10 16:56:07.454506105 +0800
|
||||
@@ -39,8 +39,10 @@ along with GCC; see the file COPYING3.
|
||||
#include "expr.h"
|
||||
#include "common/common-target.h"
|
||||
#include "output.h"
|
||||
+#include "params.h"
|
||||
|
||||
static rtx break_out_memory_refs (rtx);
|
||||
+static void anti_adjust_stack_and_probe_stack_clash (rtx);
|
||||
|
||||
|
||||
/* Truncate and perhaps sign-extend C as appropriate for MODE. */
|
||||
@@ -1271,6 +1273,29 @@ get_dynamic_stack_size (rtx *psize, unsi
|
||||
*psize = size;
|
||||
}
|
||||
|
||||
+/* Return the number of bytes to "protect" on the stack for -fstack-check.
|
||||
+
|
||||
+ "protect" in the context of -fstack-check means how many bytes we
|
||||
+ should always ensure are available on the stack. More importantly
|
||||
+ this is how many bytes are skipped when probing the stack.
|
||||
+
|
||||
+ On some targets we want to reuse the -fstack-check prologue support
|
||||
+ to give a degree of protection against stack clashing style attacks.
|
||||
+
|
||||
+ In that scenario we do not want to skip bytes before probing as that
|
||||
+ would render the stack clash protections useless.
|
||||
+
|
||||
+ So we never use STACK_CHECK_PROTECT directly. Instead we indirect though
|
||||
+ this helper which allows us to provide different values for
|
||||
+ -fstack-check and -fstack-clash-protection. */
|
||||
+HOST_WIDE_INT
|
||||
+get_stack_check_protect (void)
|
||||
+{
|
||||
+ if (flag_stack_clash_protection)
|
||||
+ return 0;
|
||||
+ return STACK_CHECK_PROTECT;
|
||||
+}
|
||||
+
|
||||
/* Return an rtx representing the address of an area of memory dynamically
|
||||
pushed on the stack.
|
||||
|
||||
@@ -1429,7 +1454,7 @@ allocate_dynamic_stack_space (rtx size,
|
||||
probe_stack_range (STACK_OLD_CHECK_PROTECT + STACK_CHECK_MAX_FRAME_SIZE,
|
||||
size);
|
||||
else if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
|
||||
- probe_stack_range (STACK_CHECK_PROTECT, size);
|
||||
+ probe_stack_range (get_stack_check_protect (), size);
|
||||
|
||||
/* Don't let anti_adjust_stack emit notes. */
|
||||
suppress_reg_args_size = true;
|
||||
@@ -1482,6 +1507,8 @@ allocate_dynamic_stack_space (rtx size,
|
||||
|
||||
if (flag_stack_check && STACK_CHECK_MOVING_SP)
|
||||
anti_adjust_stack_and_probe (size, false);
|
||||
+ else if (flag_stack_clash_protection)
|
||||
+ anti_adjust_stack_and_probe_stack_clash (size);
|
||||
else
|
||||
anti_adjust_stack (size);
|
||||
|
||||
@@ -1757,6 +1784,237 @@ probe_stack_range (HOST_WIDE_INT first,
|
||||
emit_insn (gen_blockage ());
|
||||
}
|
||||
|
||||
+/* Compute parameters for stack clash probing a dynamic stack
|
||||
+ allocation of SIZE bytes.
|
||||
+
|
||||
+ We compute ROUNDED_SIZE, LAST_ADDR, RESIDUAL and PROBE_INTERVAL.
|
||||
+
|
||||
+ Additionally we conditionally dump the type of probing that will
|
||||
+ be needed given the values computed. */
|
||||
+
|
||||
+void
|
||||
+compute_stack_clash_protection_loop_data (rtx *rounded_size, rtx *last_addr,
|
||||
+ rtx *residual,
|
||||
+ HOST_WIDE_INT *probe_interval,
|
||||
+ rtx size)
|
||||
+{
|
||||
+ /* Round SIZE down to STACK_CLASH_PROTECTION_PROBE_INTERVAL. */
|
||||
+ *probe_interval
|
||||
+ = 1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL);
|
||||
+ *rounded_size = simplify_gen_binary (AND, Pmode, size,
|
||||
+ GEN_INT (-*probe_interval));
|
||||
+
|
||||
+ /* Compute the value of the stack pointer for the last iteration.
|
||||
+ It's just SP + ROUNDED_SIZE. */
|
||||
+ rtx rounded_size_op = force_operand (*rounded_size, NULL_RTX);
|
||||
+ *last_addr = force_operand (gen_rtx_fmt_ee (STACK_GROW_OP, Pmode,
|
||||
+ stack_pointer_rtx,
|
||||
+ rounded_size_op),
|
||||
+ NULL_RTX);
|
||||
+
|
||||
+ /* Compute any residuals not allocated by the loop above. Residuals
|
||||
+ are just the ROUNDED_SIZE - SIZE. */
|
||||
+ *residual = simplify_gen_binary (MINUS, Pmode, size, *rounded_size);
|
||||
+
|
||||
+ /* Dump key information to make writing tests easy. */
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ if (*rounded_size == CONST0_RTX (Pmode))
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash skipped dynamic allocation and probing loop.\n");
|
||||
+ else if (CONST_INT_P (*rounded_size)
|
||||
+ && INTVAL (*rounded_size) <= 4 * *probe_interval)
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash dynamic allocation and probing inline.\n");
|
||||
+ else if (CONST_INT_P (*rounded_size))
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash dynamic allocation and probing in "
|
||||
+ "rotated loop.\n");
|
||||
+ else
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash dynamic allocation and probing in loop.\n");
|
||||
+
|
||||
+ if (*residual != CONST0_RTX (Pmode))
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash dynamic allocation and probing residuals.\n");
|
||||
+ else
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash skipped dynamic allocation and "
|
||||
+ "probing residuals.\n");
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Emit the start of an allocate/probe loop for stack
|
||||
+ clash protection.
|
||||
+
|
||||
+ LOOP_LAB and END_LAB are returned for use when we emit the
|
||||
+ end of the loop.
|
||||
+
|
||||
+ LAST addr is the value for SP which stops the loop. */
|
||||
+void
|
||||
+emit_stack_clash_protection_probe_loop_start (rtx *loop_lab,
|
||||
+ rtx *end_lab,
|
||||
+ rtx last_addr,
|
||||
+ bool rotated)
|
||||
+{
|
||||
+ /* Essentially we want to emit any setup code, the top of loop
|
||||
+ label and the comparison at the top of the loop. */
|
||||
+ *loop_lab = gen_label_rtx ();
|
||||
+ *end_lab = gen_label_rtx ();
|
||||
+
|
||||
+ emit_label (*loop_lab);
|
||||
+ if (!rotated)
|
||||
+ emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, EQ, NULL_RTX,
|
||||
+ Pmode, 1, *end_lab);
|
||||
+}
|
||||
+
|
||||
+/* Emit the end of a stack clash probing loop.
|
||||
+
|
||||
+ This consists of just the jump back to LOOP_LAB and
|
||||
+ emitting END_LOOP after the loop. */
|
||||
+
|
||||
+void
|
||||
+emit_stack_clash_protection_probe_loop_end (rtx loop_lab, rtx end_loop,
|
||||
+ rtx last_addr, bool rotated)
|
||||
+{
|
||||
+ if (rotated)
|
||||
+ emit_cmp_and_jump_insns (stack_pointer_rtx, last_addr, NE, NULL_RTX,
|
||||
+ Pmode, 1, loop_lab);
|
||||
+ else
|
||||
+ emit_jump (loop_lab);
|
||||
+
|
||||
+ emit_label (end_loop);
|
||||
+
|
||||
+}
|
||||
+
|
||||
+/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
|
||||
+ while probing it. This pushes when SIZE is positive. SIZE need not
|
||||
+ be constant.
|
||||
+
|
||||
+ This is subtly different than anti_adjust_stack_and_probe to try and
|
||||
+ prevent stack-clash attacks
|
||||
+
|
||||
+ 1. It must assume no knowledge of the probing state, any allocation
|
||||
+ must probe.
|
||||
+
|
||||
+ Consider the case of a 1 byte alloca in a loop. If the sum of the
|
||||
+ allocations is large, then this could be used to jump the guard if
|
||||
+ probes were not emitted.
|
||||
+
|
||||
+ 2. It never skips probes, whereas anti_adjust_stack_and_probe will
|
||||
+ skip probes on the first couple PROBE_INTERVALs on the assumption
|
||||
+ they're done elsewhere.
|
||||
+
|
||||
+ 3. It only allocates and probes SIZE bytes, it does not need to
|
||||
+ allocate/probe beyond that because this probing style does not
|
||||
+ guarantee signal handling capability if the guard is hit. */
|
||||
+
|
||||
+static void
|
||||
+anti_adjust_stack_and_probe_stack_clash (rtx size)
|
||||
+{
|
||||
+ /* First ensure SIZE is Pmode. */
|
||||
+ if (GET_MODE (size) != VOIDmode && GET_MODE (size) != Pmode)
|
||||
+ size = convert_to_mode (Pmode, size, 1);
|
||||
+
|
||||
+ /* We can get here with a constant size on some targets. */
|
||||
+ rtx rounded_size, last_addr, residual;
|
||||
+ HOST_WIDE_INT probe_interval;
|
||||
+ compute_stack_clash_protection_loop_data (&rounded_size, &last_addr,
|
||||
+ &residual, &probe_interval, size);
|
||||
+
|
||||
+ if (rounded_size != CONST0_RTX (Pmode))
|
||||
+ {
|
||||
+ if (CONST_INT_P (rounded_size)
|
||||
+ && INTVAL (rounded_size) <= 4 * probe_interval)
|
||||
+ {
|
||||
+ for (HOST_WIDE_INT i = 0;
|
||||
+ i < INTVAL (rounded_size);
|
||||
+ i += probe_interval)
|
||||
+ {
|
||||
+ anti_adjust_stack (GEN_INT (probe_interval));
|
||||
+
|
||||
+ /* The prologue does not probe residuals. Thus the offset
|
||||
+ here to probe just beyond what the prologue had already
|
||||
+ allocated. */
|
||||
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||
+ (probe_interval
|
||||
+ - GET_MODE_SIZE (word_mode))));
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ rtx loop_lab, end_loop;
|
||||
+ bool rotate_loop = CONST_INT_P (rounded_size);
|
||||
+ emit_stack_clash_protection_probe_loop_start (&loop_lab, &end_loop,
|
||||
+ last_addr, rotate_loop);
|
||||
+
|
||||
+ anti_adjust_stack (GEN_INT (probe_interval));
|
||||
+
|
||||
+ /* The prologue does not probe residuals. Thus the offset here
|
||||
+ to probe just beyond what the prologue had already allocated. */
|
||||
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||
+ (probe_interval
|
||||
+ - GET_MODE_SIZE (word_mode))));
|
||||
+
|
||||
+ emit_stack_clash_protection_probe_loop_end (loop_lab, end_loop,
|
||||
+ last_addr, rotate_loop);
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (residual != CONST0_RTX (Pmode))
|
||||
+ {
|
||||
+ rtx label = NULL_RTX;
|
||||
+ /* RESIDUAL could be zero at runtime and in that case *sp could
|
||||
+ hold live data. Furthermore, we do not want to probe into the
|
||||
+ red zone.
|
||||
+
|
||||
+ Go ahead and just guard the probe at *sp on RESIDUAL != 0 at
|
||||
+ runtime if RESIDUAL is not a compile time constant. */
|
||||
+ if (!CONST_INT_P (residual))
|
||||
+ {
|
||||
+ label = gen_label_rtx ();
|
||||
+ emit_cmp_and_jump_insns (residual, CONST0_RTX (GET_MODE (residual)),
|
||||
+ EQ, NULL_RTX, Pmode, 1, label);
|
||||
+ }
|
||||
+
|
||||
+ rtx x = force_reg (Pmode, plus_constant (Pmode, residual,
|
||||
+ -GET_MODE_SIZE (word_mode)));
|
||||
+ anti_adjust_stack (residual);
|
||||
+ emit_stack_probe (gen_rtx_PLUS (Pmode, stack_pointer_rtx, x));
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ if (!CONST_INT_P (residual))
|
||||
+ emit_label (label);
|
||||
+ }
|
||||
+
|
||||
+ /* Some targets make optimistic assumptions in their prologues about
|
||||
+ how the caller may have probed the stack. Make sure we honor
|
||||
+ those assumptions when needed. */
|
||||
+ if (size != CONST0_RTX (Pmode)
|
||||
+ && targetm.stack_clash_protection_final_dynamic_probe (residual))
|
||||
+ {
|
||||
+ /* SIZE could be zero at runtime and in that case *sp could hold
|
||||
+ live data. Furthermore, we don't want to probe into the red
|
||||
+ zone.
|
||||
+
|
||||
+ Go ahead and just guard the probe at *sp on SIZE != 0 at runtime
|
||||
+ if SIZE is not a compile time constant. */
|
||||
+ rtx label = NULL_RTX;
|
||||
+ if (!CONST_INT_P (size))
|
||||
+ {
|
||||
+ label = gen_label_rtx ();
|
||||
+ emit_cmp_and_jump_insns (size, CONST0_RTX (GET_MODE (size)),
|
||||
+ EQ, NULL_RTX, Pmode, 1, label);
|
||||
+ }
|
||||
+
|
||||
+ emit_stack_probe (stack_pointer_rtx);
|
||||
+ emit_insn (gen_blockage ());
|
||||
+ if (!CONST_INT_P (size))
|
||||
+ emit_label (label);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* Adjust the stack pointer by minus SIZE (an rtx for a number of bytes)
|
||||
while probing it. This pushes when SIZE is positive. SIZE need not
|
||||
be constant. If ADJUST_BACK is true, adjust back the stack pointer
|
||||
diff -N -urp a/gcc/explow.h b/gcc/explow.h
|
||||
--- a/gcc/explow.h 2017-01-01 20:07:43.905435000 +0800
|
||||
+++ b/gcc/explow.h 2019-01-10 16:57:37.934508487 +0800
|
||||
@@ -69,6 +69,15 @@ extern void anti_adjust_stack (rtx);
|
||||
/* Add some bytes to the stack while probing it. An rtx says how many. */
|
||||
extern void anti_adjust_stack_and_probe (rtx, bool);
|
||||
|
||||
+/* Support for building allocation/probing loops for stack-clash
|
||||
+ protection of dyamically allocated stack space. */
|
||||
+extern void compute_stack_clash_protection_loop_data (rtx *, rtx *, rtx *,
|
||||
+ HOST_WIDE_INT *, rtx);
|
||||
+extern void emit_stack_clash_protection_probe_loop_start (rtx *, rtx *,
|
||||
+ rtx, bool);
|
||||
+extern void emit_stack_clash_protection_probe_loop_end (rtx, rtx,
|
||||
+ rtx, bool);
|
||||
+
|
||||
/* This enum is used for the following two functions. */
|
||||
enum save_level {SAVE_BLOCK, SAVE_FUNCTION, SAVE_NONLOCAL};
|
||||
|
||||
diff -N -urp a/gcc/flag-types.h b/gcc/flag-types.h
|
||||
--- a/gcc/flag-types.h 2017-01-01 20:07:43.905435000 +0800
|
||||
+++ b/gcc/flag-types.h 2019-01-10 16:42:11.490484099 +0800
|
||||
@@ -166,7 +166,14 @@ enum permitted_flt_eval_methods
|
||||
PERMITTED_FLT_EVAL_METHODS_C11
|
||||
};
|
||||
|
||||
-/* Type of stack check. */
|
||||
+/* Type of stack check.
|
||||
+
|
||||
+ Stack checking is designed to detect infinite recursion and stack
|
||||
+ overflows for Ada programs. Furthermore stack checking tries to ensure
|
||||
+ in that scenario that enough stack space is left to run a signal handler.
|
||||
+
|
||||
+ -fstack-check= does not prevent stack-clash style attacks. For that
|
||||
+ you want -fstack-clash-protection. */
|
||||
enum stack_check_type
|
||||
{
|
||||
/* Do not check the stack. */
|
||||
diff -N -urp a/gcc/function.c b/gcc/function.c
|
||||
--- a/gcc/function.c 2017-08-08 21:21:12.755378000 +0800
|
||||
+++ b/gcc/function.c 2019-01-10 17:07:17.414523742 +0800
|
||||
@@ -5695,6 +5695,58 @@ get_arg_pointer_save_area (void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
+
|
||||
+/* If debugging dumps are requested, dump information about how the
|
||||
+ target handled -fstack-check=clash for the prologue.
|
||||
+
|
||||
+ PROBES describes what if any probes were emitted.
|
||||
+
|
||||
+ RESIDUALS indicates if the prologue had any residual allocation
|
||||
+ (i.e. total allocation was not a multiple of PROBE_INTERVAL). */
|
||||
+
|
||||
+void
|
||||
+dump_stack_clash_frame_info (enum stack_clash_probes probes, bool residuals)
|
||||
+{
|
||||
+ if (!dump_file)
|
||||
+ return;
|
||||
+
|
||||
+ switch (probes)
|
||||
+ {
|
||||
+ case NO_PROBE_NO_FRAME:
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash no probe no stack adjustment in prologue.\n");
|
||||
+ break;
|
||||
+ case NO_PROBE_SMALL_FRAME:
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash no probe small stack adjustment in prologue.\n");
|
||||
+ break;
|
||||
+ case PROBE_INLINE:
|
||||
+ fprintf (dump_file, "Stack clash inline probes in prologue.\n");
|
||||
+ break;
|
||||
+ case PROBE_LOOP:
|
||||
+ fprintf (dump_file, "Stack clash probe loop in prologue.\n");
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if (residuals)
|
||||
+ fprintf (dump_file, "Stack clash residual allocation in prologue.\n");
|
||||
+ else
|
||||
+ fprintf (dump_file, "Stack clash no residual allocation in prologue.\n");
|
||||
+
|
||||
+ if (frame_pointer_needed)
|
||||
+ fprintf (dump_file, "Stack clash frame pointer needed.\n");
|
||||
+ else
|
||||
+ fprintf (dump_file, "Stack clash no frame pointer needed.\n");
|
||||
+
|
||||
+ if (TREE_THIS_VOLATILE (cfun->decl))
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash noreturn prologue, assuming no implicit"
|
||||
+ " probes in caller.\n");
|
||||
+ else
|
||||
+ fprintf (dump_file,
|
||||
+ "Stack clash not noreturn prologue.\n");
|
||||
+}
|
||||
+
|
||||
/* Add a list of INSNS to the hash HASHP, possibly allocating HASHP
|
||||
for the first time. */
|
||||
|
||||
diff -N -urp a/gcc/function.h b/gcc/function.h
|
||||
--- a/gcc/function.h 2017-01-25 01:07:36.015431000 +0800
|
||||
+++ b/gcc/function.h 2019-01-10 17:08:12.806525200 +0800
|
||||
@@ -553,6 +553,14 @@ do { \
|
||||
((TARGET_PTRMEMFUNC_VBIT_LOCATION == ptrmemfunc_vbit_in_pfn) \
|
||||
? MAX (FUNCTION_BOUNDARY, 2 * BITS_PER_UNIT) : FUNCTION_BOUNDARY)
|
||||
|
||||
+enum stack_clash_probes {
|
||||
+ NO_PROBE_NO_FRAME,
|
||||
+ NO_PROBE_SMALL_FRAME,
|
||||
+ PROBE_INLINE,
|
||||
+ PROBE_LOOP
|
||||
+};
|
||||
+
|
||||
+extern void dump_stack_clash_frame_info (enum stack_clash_probes, bool);
|
||||
|
||||
|
||||
extern void push_function_context (void);
|
||||
diff -N -urp a/gcc/params.def b/gcc/params.def
|
||||
--- a/gcc/params.def 2019-01-10 13:33:20.894185827 +0800
|
||||
+++ b/gcc/params.def 2019-01-10 16:43:15.414485782 +0800
|
||||
@@ -213,6 +213,16 @@ DEFPARAM(PARAM_STACK_FRAME_GROWTH,
|
||||
"Maximal stack frame growth due to inlining (in percent).",
|
||||
1000, 0, 0)
|
||||
|
||||
+DEFPARAM(PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE,
|
||||
+ "stack-clash-protection-guard-size",
|
||||
+ "Size of the stack guard expressed as a power of two.",
|
||||
+ 12, 12, 30)
|
||||
+
|
||||
+DEFPARAM(PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL,
|
||||
+ "stack-clash-protection-probe-interval",
|
||||
+ "Interval in which to probe the stack expressed as a power of two.",
|
||||
+ 12, 10, 16)
|
||||
+
|
||||
/* The GCSE optimization will be disabled if it would require
|
||||
significantly more memory than this value. */
|
||||
DEFPARAM(PARAM_MAX_GCSE_MEMORY,
|
||||
diff -N -urp a/gcc/reg-notes.def b/gcc/reg-notes.def
|
||||
--- a/gcc/reg-notes.def 2017-03-28 05:00:35.674561000 +0800
|
||||
+++ b/gcc/reg-notes.def 2019-01-10 17:12:11.678531488 +0800
|
||||
@@ -223,6 +223,10 @@ REG_NOTE (ARGS_SIZE)
|
||||
pseudo reg. */
|
||||
REG_NOTE (RETURNED)
|
||||
|
||||
+/* Indicates the instruction is a stack check probe that should not
|
||||
+ be combined with other stack adjustments. */
|
||||
+REG_NOTE (STACK_CHECK)
|
||||
+
|
||||
/* Used to mark a call with the function decl called by the call.
|
||||
The decl might not be available in the call due to splitting of the call
|
||||
insn. This note is a SYMBOL_REF. */
|
||||
diff -N -urp a/gcc/rtl.h b/gcc/rtl.h
|
||||
--- a/gcc/rtl.h 2017-03-14 20:47:42.745690000 +0800
|
||||
+++ b/gcc/rtl.h 2019-01-10 16:59:15.574511058 +0800
|
||||
@@ -2707,6 +2707,7 @@ get_full_set_src_cost (rtx x, machine_mo
|
||||
/* In explow.c */
|
||||
extern HOST_WIDE_INT trunc_int_for_mode (HOST_WIDE_INT, machine_mode);
|
||||
extern rtx plus_constant (machine_mode, rtx, HOST_WIDE_INT, bool = false);
|
||||
+extern HOST_WIDE_INT get_stack_check_protect (void);
|
||||
|
||||
/* In rtl.c */
|
||||
extern rtx rtx_alloc_stat (RTX_CODE MEM_STAT_DECL);
|
||||
diff -N -urp a/gcc/sched-deps.c b/gcc/sched-deps.c
|
||||
--- a/gcc/sched-deps.c 2017-01-01 20:07:43.905435000 +0800
|
||||
+++ b/gcc/sched-deps.c 2019-01-10 17:13:37.470533746 +0800
|
||||
@@ -4717,6 +4717,11 @@ parse_add_or_inc (struct mem_inc_info *m
|
||||
if (RTX_FRAME_RELATED_P (insn) || !pat)
|
||||
return false;
|
||||
|
||||
+ /* Do not allow breaking data dependencies for insns that are marked
|
||||
+ with REG_STACK_CHECK. */
|
||||
+ if (find_reg_note (insn, REG_STACK_CHECK, NULL))
|
||||
+ return false;
|
||||
+
|
||||
/* Result must be single reg. */
|
||||
if (!REG_P (SET_DEST (pat)))
|
||||
return false;
|
||||
diff -N -urp a/gcc/target.def b/gcc/target.def
|
||||
--- a/gcc/target.def 2019-01-10 13:33:20.762185824 +0800
|
||||
+++ b/gcc/target.def 2019-01-10 17:01:49.146515100 +0800
|
||||
@@ -5490,6 +5490,12 @@ these registers when the target switches
|
||||
void, (void),
|
||||
hook_void_void)
|
||||
|
||||
+DEFHOOK
|
||||
+(stack_clash_protection_final_dynamic_probe,
|
||||
+ "Some targets make optimistic assumptions about the state of stack probing when they emit their prologues. On such targets a probe into the end of any dynamically allocated space is likely required for safety against stack clash style attacks. Define this variable to return nonzero if such a probe is required or zero otherwise. You need not define this macro if it would always have the value zero.",
|
||||
+ bool, (rtx residual),
|
||||
+ default_stack_clash_protection_final_dynamic_probe)
|
||||
+
|
||||
/* Functions specific to the C family of frontends. */
|
||||
#undef HOOK_PREFIX
|
||||
#define HOOK_PREFIX "TARGET_C_"
|
||||
diff -N -urp a/gcc/targhooks.c b/gcc/targhooks.c
|
||||
--- a/gcc/targhooks.c 2017-02-07 19:29:06.644837000 +0800
|
||||
+++ b/gcc/targhooks.c 2019-01-10 17:03:23.818517592 +0800
|
||||
@@ -2107,4 +2107,10 @@ default_excess_precision (enum excess_pr
|
||||
return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
|
||||
}
|
||||
|
||||
+bool
|
||||
+default_stack_clash_protection_final_dynamic_probe (rtx residual ATTRIBUTE_UNUSED)
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
#include "gt-targhooks.h"
|
||||
diff -N -urp a/gcc/targhooks.h b/gcc/targhooks.h
|
||||
--- a/gcc/targhooks.h 2017-04-05 01:52:27.193766000 +0800
|
||||
+++ b/gcc/targhooks.h 2019-01-10 17:04:11.438518846 +0800
|
||||
@@ -263,5 +263,6 @@ extern unsigned int default_min_arithmet
|
||||
|
||||
extern enum flt_eval_method
|
||||
default_excess_precision (enum excess_precision_type ATTRIBUTE_UNUSED);
|
||||
+extern bool default_stack_clash_protection_final_dynamic_probe (rtx);
|
||||
|
||||
#endif /* GCC_TARGHOOKS_H */
|
||||
diff -N -urp a/gcc/toplev.c b/gcc/toplev.c
|
||||
--- a/gcc/toplev.c 2017-09-15 16:18:34.015147000 +0800
|
||||
+++ b/gcc/toplev.c 2019-01-10 16:45:33.626489420 +0800
|
||||
@@ -1573,6 +1573,26 @@ process_options (void)
|
||||
flag_associative_math = 0;
|
||||
}
|
||||
|
||||
+ /* -fstack-clash-protection is not currently supported on targets
|
||||
+ where the stack grows up. */
|
||||
+ if (flag_stack_clash_protection && !STACK_GROWS_DOWNWARD)
|
||||
+ {
|
||||
+ warning_at (UNKNOWN_LOCATION, 0,
|
||||
+ "%<-fstack-clash-protection%> is not supported on targets "
|
||||
+ "where the stack grows from lower to higher addresses");
|
||||
+ flag_stack_clash_protection = 0;
|
||||
+ }
|
||||
+
|
||||
+ /* We can not support -fstack-check= and -fstack-clash-protection at
|
||||
+ the same time. */
|
||||
+ if (flag_stack_check != NO_STACK_CHECK && flag_stack_clash_protection)
|
||||
+ {
|
||||
+ warning_at (UNKNOWN_LOCATION, 0,
|
||||
+ "%<-fstack-check=%> and %<-fstack-clash_protection%> are "
|
||||
+ "mutually exclusive. Disabling %<-fstack-check=%>");
|
||||
+ flag_stack_check = NO_STACK_CHECK;
|
||||
+ }
|
||||
+
|
||||
/* With -fcx-limited-range, we do cheap and quick complex arithmetic. */
|
||||
if (flag_cx_limited_range)
|
||||
flag_complex_method = 0;
|
||||
Binary file not shown.
@ -1,12 +0,0 @@
|
||||
diff --git a/gcc/graphite.h b/gcc/graphite.h
|
||||
index 4e0e58c..be0a22b 100644 (file)
|
||||
--- a/gcc/graphite.h
|
||||
+++ b/gcc/graphite.h
|
||||
@@ -37,6 +37,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include <isl/schedule.h>
|
||||
#include <isl/ast_build.h>
|
||||
#include <isl/schedule_node.h>
|
||||
+#include <isl/id.h>
|
||||
+#include <isl/space.h>
|
||||
|
||||
typedef struct poly_dr *poly_dr_p;
|
||||
171
generate-csel.patch
Normal file
171
generate-csel.patch
Normal file
@ -0,0 +1,171 @@
|
||||
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
|
||||
+
|
||||
+unsigned test(unsigned k, unsigned b) {
|
||||
+ unsigned a[2];
|
||||
+ if (b < a[k]) {
|
||||
+ a[k] = b;
|
||||
+ }
|
||||
+ return a[0]+a[1];
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
|
||||
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
|
||||
@@ -0,0 +1,14 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
|
||||
+
|
||||
+int c;
|
||||
+unsigned test(unsigned k, unsigned b) {
|
||||
+ unsigned a[2];
|
||||
+ a[k] = c;
|
||||
+ if (b < a[k]) {
|
||||
+ a[k] = b;
|
||||
+ }
|
||||
+ return a[0]+a[1];
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
|
||||
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-3.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-3.c
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-3.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
|
||||
+
|
||||
+unsigned a[2];
|
||||
+unsigned test(unsigned k, unsigned b) {
|
||||
+ if (b < a[k]) {
|
||||
+ a[k] = b;
|
||||
+ }
|
||||
+ return a[0]+a[1];
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-not "Conditional store replacement" "cselim" } } */
|
||||
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-4.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-4.c
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-4.c
|
||||
@@ -0,0 +1,14 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
|
||||
+
|
||||
+int *p;
|
||||
+unsigned test(unsigned k, unsigned b) {
|
||||
+ unsigned a[2];
|
||||
+ p = a;
|
||||
+ if (b < a[k]) {
|
||||
+ a[k] = b;
|
||||
+ }
|
||||
+ return a[0]+a[1];
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-not "Conditional store replacement" "cselim" } } */
|
||||
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
|
||||
@@ -0,0 +1,16 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
|
||||
+
|
||||
+int test(int b, int k) {
|
||||
+ struct {
|
||||
+ int data[2];
|
||||
+ } a;
|
||||
+
|
||||
+ if (b < a.data[k]) {
|
||||
+ a.data[k] = b;
|
||||
+ }
|
||||
+
|
||||
+ return a.data[0] + a.data[1];
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
|
||||
diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
|
||||
@@ -0,0 +1,19 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-cselim-details" } */
|
||||
+
|
||||
+int test(int b, int k) {
|
||||
+ typedef struct {
|
||||
+ int x;
|
||||
+ } SS;
|
||||
+ struct {
|
||||
+ SS data[2];
|
||||
+ } a;
|
||||
+
|
||||
+ if (b < a.data[k].x) {
|
||||
+ a.data[k].x = b;
|
||||
+ }
|
||||
+
|
||||
+ return a.data[0].x + a.data[1].x;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
|
||||
diff -uprN a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
|
||||
--- a/gcc/tree-ssa-phiopt.c
|
||||
+++ b/gcc/tree-ssa-phiopt.c
|
||||
@@ -2196,7 +2196,8 @@ get_non_trapping (void)
|
||||
|
||||
We check that MIDDLE_BB contains only one store, that that store
|
||||
doesn't trap (not via NOTRAP, but via checking if an access to the same
|
||||
- memory location dominates us) and that the store has a "simple" RHS. */
|
||||
+ memory location dominates us, or the store is to a local addressable
|
||||
+ object) and that the store has a "simple" RHS. */
|
||||
|
||||
static bool
|
||||
cond_store_replacement (basic_block middle_bb, basic_block join_bb,
|
||||
@@ -2218,8 +2219,9 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
|
||||
locus = gimple_location (assign);
|
||||
lhs = gimple_assign_lhs (assign);
|
||||
rhs = gimple_assign_rhs1 (assign);
|
||||
- if (TREE_CODE (lhs) != MEM_REF
|
||||
- || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME
|
||||
+ if ((TREE_CODE (lhs) != MEM_REF
|
||||
+ && TREE_CODE (lhs) != ARRAY_REF
|
||||
+ && TREE_CODE (lhs) != COMPONENT_REF)
|
||||
|| !is_gimple_reg_type (TREE_TYPE (lhs)))
|
||||
return false;
|
||||
|
||||
@@ -2227,7 +2229,13 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
|
||||
TREE_THIS_NOTRAP here, but in that case we also could move stores,
|
||||
whose value is not available readily, which we want to avoid. */
|
||||
if (!nontrap->contains (lhs))
|
||||
- return false;
|
||||
+ {
|
||||
+ /* If LHS is a local variable without address-taken, we could
|
||||
+ always safely move down the store. */
|
||||
+ tree base = get_base_address (lhs);
|
||||
+ if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
|
||||
+ return false;
|
||||
+ }
|
||||
|
||||
/* Now we've checked the constraints, so do the transformation:
|
||||
1) Remove the single store. */
|
||||
@@ -2280,6 +2288,14 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
|
||||
else
|
||||
gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
|
||||
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ {
|
||||
+ fprintf (dump_file, "\nConditional store replacement happened!");
|
||||
+ fprintf (dump_file, "\nReplaced the store with a load.");
|
||||
+ fprintf (dump_file, "\nInserted a new PHI statement in joint block:\n");
|
||||
+ print_gimple_stmt (dump_file, new_stmt, 0, TDF_VOPS|TDF_MEMSYMS);
|
||||
+ }
|
||||
+
|
||||
return true;
|
||||
}
|
||||
BIN
isl-0.14.tar.xz
BIN
isl-0.14.tar.xz
Binary file not shown.
178
ivopts-1.patch
Normal file
178
ivopts-1.patch
Normal file
@ -0,0 +1,178 @@
|
||||
diff -urpN a/gcc/testsuite/gfortran.dg/graphite/pr90240.f b/gcc/testsuite/gfortran.dg/graphite/pr90240.f
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gfortran.dg/graphite/pr90240.f
|
||||
@@ -0,0 +1,18 @@
|
||||
+! { dg-do compile }
|
||||
+! { dg-options "-O1 -floop-nest-optimize" }
|
||||
+
|
||||
+ PARAMETER (n=1335, N2=1335)
|
||||
+ COMMON a(n,N2), b(n,N2), c(n,N2),
|
||||
+ * d(n,N2),
|
||||
+ 2 e(n,N2), f(n,N2),
|
||||
+ * g(n,N2), h(n,N2)
|
||||
+ DO 200 j=1,i
|
||||
+ DO 300 k=1,l
|
||||
+ a(k,j) = c(k,j)*g(k,j)*f(k+1,m)+f(k,m)+f(k,j)
|
||||
+ 2 +f(k+1,j)*h(k+1,j)
|
||||
+ b(k,j+1) = d(k,j+1)*g(k,m)+g(k,j+1)
|
||||
+ 1 *e(k,m)+e(k,j+1)+e(k,j)+e(k+1,j)
|
||||
+ 2 *h(k,j+1)-h(k,j)
|
||||
+ 300 ENDDO
|
||||
+ 200 ENDDO
|
||||
+ END
|
||||
diff -urpN a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
|
||||
--- a/gcc/tree-ssa-loop-ivopts.c
|
||||
+++ b/gcc/tree-ssa-loop-ivopts.c
|
||||
@@ -4557,22 +4557,25 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
|
||||
static comp_cost
|
||||
get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
|
||||
{
|
||||
- int loop_freq = data->current_loop->header->count.to_frequency (cfun);
|
||||
- int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
|
||||
- if (loop_freq != 0)
|
||||
- {
|
||||
- gcc_assert (cost.scratch <= cost.cost);
|
||||
- int scaled_cost
|
||||
- = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
|
||||
+ if (data->speed
|
||||
+ && data->current_loop->header->count.to_frequency (cfun) > 0)
|
||||
+ {
|
||||
+ basic_block bb = gimple_bb (at);
|
||||
+ gcc_assert (cost.scratch <= cost.cost);
|
||||
+ int scale_factor = (int)(intptr_t) bb->aux;
|
||||
+ if (scale_factor == 1)
|
||||
+ return cost;
|
||||
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "Scaling cost based on bb prob "
|
||||
- "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
|
||||
- 1.0f * bb_freq / loop_freq, cost.cost,
|
||||
- cost.scratch, scaled_cost, bb_freq, loop_freq);
|
||||
+ int scaled_cost
|
||||
+ = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
|
||||
|
||||
- cost.cost = scaled_cost;
|
||||
- }
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "Scaling cost based on bb prob "
|
||||
+ "by %2.2f: %d (scratch: %d) -> %d\n",
|
||||
+ 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
|
||||
+
|
||||
+ cost.cost = scaled_cost;
|
||||
+ }
|
||||
|
||||
return cost;
|
||||
}
|
||||
@@ -6678,9 +6681,8 @@ try_improve_iv_set (struct ivopts_data *data,
|
||||
}
|
||||
|
||||
iv_ca_delta_commit (data, ivs, best_delta, true);
|
||||
- gcc_assert (best_cost == iv_ca_cost (ivs));
|
||||
iv_ca_delta_free (&best_delta);
|
||||
- return true;
|
||||
+ return best_cost == iv_ca_cost (ivs);
|
||||
}
|
||||
|
||||
/* Attempts to find the optimal set of induction variables. We do simple
|
||||
@@ -6717,6 +6719,14 @@ find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
|
||||
}
|
||||
}
|
||||
|
||||
+ /* If the set has infinite_cost, it can't be optimal. */
|
||||
+ if (iv_ca_cost (set).infinite_cost_p ())
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file,
|
||||
+ "Overflow to infinite cost in try_improve_iv_set.\n");
|
||||
+ iv_ca_free (&set);
|
||||
+ }
|
||||
return set;
|
||||
}
|
||||
|
||||
@@ -7522,6 +7532,49 @@ loop_body_includes_call (basic_block *body, unsigned num_nodes)
|
||||
return false;
|
||||
}
|
||||
|
||||
+/* Determine cost scaling factor for basic blocks in loop. */
|
||||
+#define COST_SCALING_FACTOR_BOUND (20)
|
||||
+
|
||||
+static void
|
||||
+determine_scaling_factor (struct ivopts_data *data, basic_block *body)
|
||||
+{
|
||||
+ int lfreq = data->current_loop->header->count.to_frequency (cfun);
|
||||
+ if (!data->speed || lfreq <= 0)
|
||||
+ return;
|
||||
+
|
||||
+ int max_freq = lfreq;
|
||||
+ for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
|
||||
+ {
|
||||
+ body[i]->aux = (void *)(intptr_t) 1;
|
||||
+ if (max_freq < body[i]->count.to_frequency (cfun))
|
||||
+ max_freq = body[i]->count.to_frequency (cfun);
|
||||
+ }
|
||||
+ if (max_freq > lfreq)
|
||||
+ {
|
||||
+ int divisor, factor;
|
||||
+ /* Check if scaling factor itself needs to be scaled by the bound. This
|
||||
+ is to avoid overflow when scaling cost according to profile info. */
|
||||
+ if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
|
||||
+ {
|
||||
+ divisor = max_freq;
|
||||
+ factor = COST_SCALING_FACTOR_BOUND;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ divisor = lfreq;
|
||||
+ factor = 1;
|
||||
+ }
|
||||
+ for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
|
||||
+ {
|
||||
+ int bfreq = body[i]->count.to_frequency (cfun);
|
||||
+ if (bfreq <= lfreq)
|
||||
+ continue;
|
||||
+
|
||||
+ body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* Optimizes the LOOP. Returns true if anything changed. */
|
||||
|
||||
static bool
|
||||
@@ -7560,7 +7613,6 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
|
||||
body = get_loop_body (loop);
|
||||
data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
|
||||
renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
|
||||
- free (body);
|
||||
|
||||
data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
|
||||
|
||||
@@ -7574,6 +7626,9 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
|
||||
if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
|
||||
goto finish;
|
||||
|
||||
+ /* Determine cost scaling factor for basic blocks in loop. */
|
||||
+ determine_scaling_factor (data, body);
|
||||
+
|
||||
/* Finds candidates for the induction variables (item 2). */
|
||||
find_iv_candidates (data);
|
||||
|
||||
@@ -7584,6 +7639,9 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
|
||||
|
||||
/* Find the optimal set of induction variables (item 3, part 2). */
|
||||
iv_ca = find_optimal_iv_set (data);
|
||||
+ /* Cleanup basic block aux field. */
|
||||
+ for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
|
||||
+ body[i]->aux = NULL;
|
||||
if (!iv_ca)
|
||||
goto finish;
|
||||
changed = true;
|
||||
@@ -7599,6 +7657,7 @@ tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
|
||||
remove_unused_ivs (data, toremove);
|
||||
|
||||
finish:
|
||||
+ free (body);
|
||||
free_loop_data (data);
|
||||
|
||||
return changed;
|
||||
407
ivopts-2.patch
Normal file
407
ivopts-2.patch
Normal file
@ -0,0 +1,407 @@
|
||||
diff -urpN a/gcc/testsuite/g++.dg/tree-ssa/pr90078.C b/gcc/testsuite/g++.dg/tree-ssa/pr90078.C
|
||||
new file mode 100644
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr90078.C
|
||||
@@ -0,0 +1,199 @@
|
||||
+// { dg-do compile }
|
||||
+// { dg-options "-std=c++14 -O2 -ftemplate-depth=1000000" }
|
||||
+
|
||||
+template <class T, int Dim0, int Dim1, int Dim2> struct Tensor3;
|
||||
+template <class A, class T, int Dim0, int Dim1, int Dim2, char i, char j,
|
||||
+ char k>
|
||||
+struct Tensor3_Expr;
|
||||
+
|
||||
+template <class T, int Dim0, int Dim1, int Dim2, int Dim3> struct Tensor4;
|
||||
+template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i,
|
||||
+ char j, char k, char l>
|
||||
+struct Tensor4_Expr;
|
||||
+
|
||||
+template <char i, int Dim> struct Index
|
||||
+{};
|
||||
+template <const int N> struct Number
|
||||
+{
|
||||
+ Number(){};
|
||||
+ operator int() const { return N; }
|
||||
+};
|
||||
+
|
||||
+template <class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2>
|
||||
+struct Tensor3
|
||||
+{
|
||||
+ T data[Tensor_Dim0][Tensor_Dim1][Tensor_Dim2];
|
||||
+
|
||||
+ T operator()(const int N1, const int N2, const int N3) const
|
||||
+ {
|
||||
+ return data[N1][N2][N3];
|
||||
+ }
|
||||
+
|
||||
+ template <char i, char j, char k, int Dim0, int Dim1, int Dim2>
|
||||
+ Tensor3_Expr<const Tensor3<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>, T,
|
||||
+ Dim0, Dim1, Dim2, i, j, k>
|
||||
+ operator()(const Index<i, Dim0>, const Index<j, Dim1>,
|
||||
+ const Index<k, Dim2>) const
|
||||
+ {
|
||||
+ return Tensor3_Expr<const Tensor3<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>,
|
||||
+ T, Dim0, Dim1, Dim2, i, j, k>(*this);
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+template <class A, class T, int Dim0, int Dim1, int Dim2, char i, char j,
|
||||
+ char k>
|
||||
+struct Tensor3_Expr
|
||||
+{
|
||||
+ A iter;
|
||||
+
|
||||
+ Tensor3_Expr(const A &a) : iter(a) {}
|
||||
+ T operator()(const int N1, const int N2, const int N3) const
|
||||
+ {
|
||||
+ return iter(N1, N2, N3);
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+template <class A, class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2,
|
||||
+ int Dim0, int Dim1, int Dim2, char i, char j, char k>
|
||||
+struct Tensor3_Expr<Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2>, T, Dim0,
|
||||
+ Dim1, Dim2, i, j, k>
|
||||
+{
|
||||
+ Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2> &iter;
|
||||
+
|
||||
+ Tensor3_Expr(Tensor3<A, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2> &a) : iter(a)
|
||||
+ {}
|
||||
+ T operator()(const int N1, const int N2, const int N3) const
|
||||
+ {
|
||||
+ return iter(N1, N2, N3);
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+template <class A, class B, class T, class U, int Dim0, int Dim1, int Dim23,
|
||||
+ int Dim4, int Dim5, char i, char j, char k, char l, char m>
|
||||
+struct Tensor3_times_Tensor3_21
|
||||
+{
|
||||
+ Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> iterA;
|
||||
+ Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> iterB;
|
||||
+
|
||||
+ template <int CurrentDim>
|
||||
+ T eval(const int N1, const int N2, const int N3, const int N4,
|
||||
+ const Number<CurrentDim> &) const
|
||||
+ {
|
||||
+ return iterA(N1, N2, CurrentDim - 1) * iterB(CurrentDim - 1, N3, N4)
|
||||
+ + eval(N1, N2, N3, N4, Number<CurrentDim - 1>());
|
||||
+ }
|
||||
+ T eval(const int N1, const int N2, const int N3, const int N4,
|
||||
+ const Number<1> &) const
|
||||
+ {
|
||||
+ return iterA(N1, N2, 0) * iterB(0, N3, N4);
|
||||
+ }
|
||||
+
|
||||
+ Tensor3_times_Tensor3_21(
|
||||
+ const Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> &a,
|
||||
+ const Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> &b)
|
||||
+ : iterA(a), iterB(b)
|
||||
+ {}
|
||||
+ T operator()(const int &N1, const int &N2, const int &N3,
|
||||
+ const int &N4) const
|
||||
+ {
|
||||
+ return eval(N1, N2, N3, N4, Number<Dim23>());
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+template <class A, class B, class T, class U, int Dim0, int Dim1, int Dim23,
|
||||
+ int Dim4, int Dim5, char i, char j, char k, char l, char m>
|
||||
+Tensor4_Expr<Tensor3_times_Tensor3_21<A, B, T, U, Dim0, Dim1, Dim23, Dim4,
|
||||
+ Dim5, i, j, k, l, m>,
|
||||
+ T, Dim0, Dim1, Dim4, Dim5, i, j, l, m>
|
||||
+operator*(const Tensor3_Expr<A, T, Dim0, Dim1, Dim23, i, j, k> &a,
|
||||
+ const Tensor3_Expr<B, U, Dim23, Dim4, Dim5, k, l, m> &b)
|
||||
+{
|
||||
+ using TensorExpr = Tensor3_times_Tensor3_21<A, B, T, U, Dim0, Dim1, Dim23,
|
||||
+ Dim4, Dim5, i, j, k, l, m>;
|
||||
+ return Tensor4_Expr<TensorExpr, T, Dim0, Dim1, Dim4, Dim5, i, j, l, m>(
|
||||
+ TensorExpr(a, b));
|
||||
+};
|
||||
+
|
||||
+template <class T, int Tensor_Dim0, int Tensor_Dim1, int Tensor_Dim2,
|
||||
+ int Tensor_Dim3>
|
||||
+struct Tensor4
|
||||
+{
|
||||
+ T data[Tensor_Dim0][Tensor_Dim1][Tensor_Dim2][Tensor_Dim3];
|
||||
+
|
||||
+ Tensor4() {}
|
||||
+ T &operator()(const int N1, const int N2, const int N3, const int N4)
|
||||
+ {
|
||||
+ return data[N1][N2][N3][N4];
|
||||
+ }
|
||||
+
|
||||
+ template <char i, char j, char k, char l, int Dim0, int Dim1, int Dim2,
|
||||
+ int Dim3>
|
||||
+ Tensor4_Expr<Tensor4<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2, Tensor_Dim3>,
|
||||
+ T, Dim0, Dim1, Dim2, Dim3, i, j, k, l>
|
||||
+ operator()(const Index<i, Dim0>, const Index<j, Dim1>, const Index<k, Dim2>,
|
||||
+ const Index<l, Dim3>)
|
||||
+ {
|
||||
+ return Tensor4_Expr<
|
||||
+ Tensor4<T, Tensor_Dim0, Tensor_Dim1, Tensor_Dim2, Tensor_Dim3>, T, Dim0,
|
||||
+ Dim1, Dim2, Dim3, i, j, k, l>(*this);
|
||||
+ };
|
||||
+};
|
||||
+
|
||||
+template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i,
|
||||
+ char j, char k, char l>
|
||||
+struct Tensor4_Expr
|
||||
+{
|
||||
+ A iter;
|
||||
+
|
||||
+ Tensor4_Expr(const A &a) : iter(a) {}
|
||||
+ T operator()(const int N1, const int N2, const int N3, const int N4) const
|
||||
+ {
|
||||
+ return iter(N1, N2, N3, N4);
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+template <class A, class T, int Dim0, int Dim1, int Dim2, int Dim3, char i,
|
||||
+ char j, char k, char l>
|
||||
+struct Tensor4_Expr<Tensor4<A, Dim0, Dim1, Dim2, Dim3>, T, Dim0, Dim1, Dim2,
|
||||
+ Dim3, i, j, k, l>
|
||||
+{
|
||||
+ Tensor4<A, Dim0, Dim1, Dim2, Dim3> &iter;
|
||||
+
|
||||
+ Tensor4_Expr(Tensor4<A, Dim0, Dim1, Dim2, Dim3> &a) : iter(a) {}
|
||||
+ T operator()(const int N1, const int N2, const int N3, const int N4) const
|
||||
+ {
|
||||
+ return iter(N1, N2, N3, N4);
|
||||
+ }
|
||||
+
|
||||
+ template <class B, class U, int Dim1_0, int Dim1_1, int Dim1_2, int Dim1_3,
|
||||
+ char i_1, char j_1, char k_1, char l_1>
|
||||
+ auto &operator=(const Tensor4_Expr<B, U, Dim1_0, Dim1_1, Dim1_2, Dim1_3, i_1,
|
||||
+ j_1, k_1, l_1> &rhs)
|
||||
+ {
|
||||
+ for(int ii = 0; ii < Dim0; ++ii)
|
||||
+ for(int jj = 0; jj < Dim1; ++jj)
|
||||
+ for(int kk = 0; kk < Dim2; ++kk)
|
||||
+ for(int ll = 0; ll < Dim3; ++ll)
|
||||
+ {
|
||||
+ iter(ii, jj, kk, ll) = rhs(ii, jj, kk, ll);
|
||||
+ }
|
||||
+ return *this;
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+int main()
|
||||
+{
|
||||
+ Tensor3<float, 100, 100, 1000> t1;
|
||||
+ Tensor3<float, 1000, 100, 100> t2;
|
||||
+
|
||||
+ Index<'l', 100> l;
|
||||
+ Index<'m', 100> m;
|
||||
+ Index<'k', 1000> k;
|
||||
+ Index<'n', 100> n;
|
||||
+ Index<'o', 100> o;
|
||||
+
|
||||
+ Tensor4<float, 100, 100, 100, 100> res;
|
||||
+ res(l, m, n, o) = t1(l, m, k) * t2(k, n, o);
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
diff -urpN a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
|
||||
--- a/gcc/tree-ssa-loop-ivopts.c
|
||||
+++ b/gcc/tree-ssa-loop-ivopts.c
|
||||
@@ -114,7 +114,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
interface between the GIMPLE and RTL worlds. */
|
||||
|
||||
/* The infinite cost. */
|
||||
-#define INFTY 10000000
|
||||
+#define INFTY 1000000000
|
||||
|
||||
/* Returns the expected number of loop iterations for LOOP.
|
||||
The average trip count is computed from profile data if it
|
||||
@@ -180,7 +180,7 @@ struct comp_cost
|
||||
comp_cost (): cost (0), complexity (0), scratch (0)
|
||||
{}
|
||||
|
||||
- comp_cost (int cost, unsigned complexity, int scratch = 0)
|
||||
+ comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
|
||||
: cost (cost), complexity (complexity), scratch (scratch)
|
||||
{}
|
||||
|
||||
@@ -220,16 +220,16 @@ struct comp_cost
|
||||
/* Returns true if COST1 is smaller or equal than COST2. */
|
||||
friend bool operator<= (comp_cost cost1, comp_cost cost2);
|
||||
|
||||
- int cost; /* The runtime cost. */
|
||||
+ int64_t cost; /* The runtime cost. */
|
||||
unsigned complexity; /* The estimate of the complexity of the code for
|
||||
the computation (in no concrete units --
|
||||
complexity field should be larger for more
|
||||
complex expressions and addressing modes). */
|
||||
- int scratch; /* Scratch used during cost computation. */
|
||||
+ int64_t scratch; /* Scratch used during cost computation. */
|
||||
};
|
||||
|
||||
static const comp_cost no_cost;
|
||||
-static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
|
||||
+static const comp_cost infinite_cost (INFTY, 0, INFTY);
|
||||
|
||||
bool
|
||||
comp_cost::infinite_cost_p ()
|
||||
@@ -243,6 +243,7 @@ operator+ (comp_cost cost1, comp_cost cost2)
|
||||
if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
|
||||
return infinite_cost;
|
||||
|
||||
+ gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
|
||||
cost1.cost += cost2.cost;
|
||||
cost1.complexity += cost2.complexity;
|
||||
|
||||
@@ -256,6 +257,7 @@ operator- (comp_cost cost1, comp_cost cost2)
|
||||
return infinite_cost;
|
||||
|
||||
gcc_assert (!cost2.infinite_cost_p ());
|
||||
+ gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
|
||||
|
||||
cost1.cost -= cost2.cost;
|
||||
cost1.complexity -= cost2.complexity;
|
||||
@@ -276,6 +278,7 @@ comp_cost::operator+= (HOST_WIDE_INT c)
|
||||
if (infinite_cost_p ())
|
||||
return *this;
|
||||
|
||||
+ gcc_assert (this->cost + c < infinite_cost.cost);
|
||||
this->cost += c;
|
||||
|
||||
return *this;
|
||||
@@ -287,6 +290,7 @@ comp_cost::operator-= (HOST_WIDE_INT c)
|
||||
if (infinite_cost_p ())
|
||||
return *this;
|
||||
|
||||
+ gcc_assert (this->cost - c < infinite_cost.cost);
|
||||
this->cost -= c;
|
||||
|
||||
return *this;
|
||||
@@ -295,6 +299,7 @@ comp_cost::operator-= (HOST_WIDE_INT c)
|
||||
comp_cost
|
||||
comp_cost::operator/= (HOST_WIDE_INT c)
|
||||
{
|
||||
+ gcc_assert (c != 0);
|
||||
if (infinite_cost_p ())
|
||||
return *this;
|
||||
|
||||
@@ -309,6 +314,7 @@ comp_cost::operator*= (HOST_WIDE_INT c)
|
||||
if (infinite_cost_p ())
|
||||
return *this;
|
||||
|
||||
+ gcc_assert (this->cost * c < infinite_cost.cost);
|
||||
this->cost *= c;
|
||||
|
||||
return *this;
|
||||
@@ -638,7 +644,7 @@ struct iv_ca
|
||||
comp_cost cand_use_cost;
|
||||
|
||||
/* Total cost of candidates. */
|
||||
- unsigned cand_cost;
|
||||
+ int64_t cand_cost;
|
||||
|
||||
/* Number of times each invariant variable is used. */
|
||||
unsigned *n_inv_var_uses;
|
||||
@@ -4025,16 +4031,16 @@ get_computation_at (struct loop *loop, gimple *at,
|
||||
if we're optimizing for speed, amortize it over the per-iteration cost.
|
||||
If ROUND_UP_P is true, the result is round up rather than to zero when
|
||||
optimizing for speed. */
|
||||
-static unsigned
|
||||
-adjust_setup_cost (struct ivopts_data *data, unsigned cost,
|
||||
+static int64_t
|
||||
+adjust_setup_cost (struct ivopts_data *data, int64_t cost,
|
||||
bool round_up_p = false)
|
||||
{
|
||||
if (cost == INFTY)
|
||||
return cost;
|
||||
else if (optimize_loop_for_speed_p (data->current_loop))
|
||||
{
|
||||
- HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
|
||||
- return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
|
||||
+ int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
|
||||
+ return (cost + (round_up_p ? niters - 1 : 0)) / niters;
|
||||
}
|
||||
else
|
||||
return cost;
|
||||
@@ -4305,7 +4311,7 @@ enum ainc_type
|
||||
|
||||
struct ainc_cost_data
|
||||
{
|
||||
- unsigned costs[AINC_NONE];
|
||||
+ int64_t costs[AINC_NONE];
|
||||
};
|
||||
|
||||
static comp_cost
|
||||
@@ -4566,12 +4572,12 @@ get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
|
||||
if (scale_factor == 1)
|
||||
return cost;
|
||||
|
||||
- int scaled_cost
|
||||
+ int64_t scaled_cost
|
||||
= cost.scratch + (cost.cost - cost.scratch) * scale_factor;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "Scaling cost based on bb prob "
|
||||
- "by %2.2f: %d (scratch: %d) -> %d\n",
|
||||
+ fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
|
||||
+ "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
|
||||
1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
|
||||
|
||||
cost.cost = scaled_cost;
|
||||
@@ -5539,7 +5545,7 @@ determine_group_iv_costs (struct ivopts_data *data)
|
||||
|| group->cost_map[j].cost.infinite_cost_p ())
|
||||
continue;
|
||||
|
||||
- fprintf (dump_file, " %d\t%d\t%d\t",
|
||||
+ fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
|
||||
group->cost_map[j].cand->id,
|
||||
group->cost_map[j].cost.cost,
|
||||
group->cost_map[j].cost.complexity);
|
||||
@@ -5569,7 +5575,7 @@ static void
|
||||
determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
|
||||
{
|
||||
comp_cost cost_base;
|
||||
- unsigned cost, cost_step;
|
||||
+ int64_t cost, cost_step;
|
||||
tree base;
|
||||
|
||||
gcc_assert (cand->iv != NULL);
|
||||
@@ -6139,11 +6145,11 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
|
||||
unsigned i;
|
||||
comp_cost cost = iv_ca_cost (ivs);
|
||||
|
||||
- fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
|
||||
+ fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
|
||||
cost.complexity);
|
||||
- fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
|
||||
- ivs->cand_cost, ivs->cand_use_cost.cost,
|
||||
- ivs->cand_use_cost.complexity);
|
||||
+ fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
|
||||
+ "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
|
||||
+ ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
|
||||
bitmap_print (file, ivs->cands, " candidates: ","\n");
|
||||
|
||||
for (i = 0; i < ivs->upto; i++)
|
||||
@@ -6151,9 +6157,9 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
|
||||
struct iv_group *group = data->vgroups[i];
|
||||
struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
|
||||
if (cp)
|
||||
- fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
|
||||
- group->id, cp->cand->id, cp->cost.cost,
|
||||
- cp->cost.complexity);
|
||||
+ fprintf (file, " group:%d --> iv_cand:%d, cost=("
|
||||
+ "%" PRId64 ",%d)\n", group->id, cp->cand->id,
|
||||
+ cp->cost.cost, cp->cost.complexity);
|
||||
else
|
||||
fprintf (file, " group:%d --> ??\n", group->id);
|
||||
}
|
||||
@@ -6751,9 +6757,9 @@ find_optimal_iv_set (struct ivopts_data *data)
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
- fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
|
||||
+ fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
|
||||
origcost.cost, origcost.complexity);
|
||||
- fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
|
||||
+ fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
|
||||
cost.cost, cost.complexity);
|
||||
}
|
||||
|
||||
160
loop-finite-bugfix.patch
Normal file
160
loop-finite-bugfix.patch
Normal file
@ -0,0 +1,160 @@
|
||||
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
|
||||
index 6b6c754ad86..58ba0948e79 100644
|
||||
--- a/gcc/c-family/c-opts.c
|
||||
+++ b/gcc/c-family/c-opts.c
|
||||
@@ -989,6 +989,10 @@ c_common_post_options (const char **pfilename)
|
||||
if (!global_options_set.x_flag_new_ttp)
|
||||
flag_new_ttp = (cxx_dialect >= cxx17);
|
||||
|
||||
+ /* C++11 guarantees forward progress. */
|
||||
+ if (!global_options_set.x_flag_finite_loops)
|
||||
+ flag_finite_loops = (optimize >= 2 && cxx_dialect >= cxx11);
|
||||
+
|
||||
if (cxx_dialect >= cxx11)
|
||||
{
|
||||
/* If we're allowing C++0x constructs, don't warn about C++98
|
||||
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
|
||||
index 1c49a8b8c2d..18b404e292f 100644
|
||||
--- a/gcc/cfgloop.h
|
||||
+++ b/gcc/cfgloop.h
|
||||
@@ -226,6 +226,10 @@ public:
|
||||
/* True if the loop is part of an oacc kernels region. */
|
||||
unsigned in_oacc_kernels_region : 1;
|
||||
|
||||
+ /* True if the loop is known to be finite. This is a localized
|
||||
+ flag_finite_loops or similar pragmas state. */
|
||||
+ unsigned finite_p : 1;
|
||||
+
|
||||
/* The number of times to unroll the loop. 0 means no information given,
|
||||
just do what we always do. A value of 1 means do not unroll the loop.
|
||||
A value of USHRT_MAX means unroll with no specific unrolling factor.
|
||||
diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
|
||||
index c9375565f62..50c7267ec49 100644
|
||||
--- a/gcc/cfgloopmanip.c
|
||||
+++ b/gcc/cfgloopmanip.c
|
||||
@@ -1023,6 +1023,7 @@ copy_loop_info (class loop *loop, class loop *target)
|
||||
target->dont_vectorize = loop->dont_vectorize;
|
||||
target->force_vectorize = loop->force_vectorize;
|
||||
target->in_oacc_kernels_region = loop->in_oacc_kernels_region;
|
||||
+ target->finite_p = loop->finite_p;
|
||||
target->unroll = loop->unroll;
|
||||
target->owned_clique = loop->owned_clique;
|
||||
}
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index 4368910cb54..bb2ea4c905d 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -1490,7 +1490,7 @@ Common Report Var(flag_finite_math_only) Optimization SetByCombined
|
||||
Assume no NaNs or infinities are generated.
|
||||
|
||||
ffinite-loops
|
||||
-Common Report Var(flag_finite_loops) Optimization
|
||||
+Common Report Var(flag_finite_loops) Optimization Init(0)
|
||||
Assume that loops with an exit will terminate and not loop indefinitely.
|
||||
|
||||
ffixed-
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index e9e1683e9a8..e3e652ff6c1 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -10432,7 +10432,8 @@ Assume that a loop with an exit will eventually take the exit and not loop
|
||||
indefinitely. This allows the compiler to remove loops that otherwise have
|
||||
no side-effects, not considering eventual endless looping as such.
|
||||
|
||||
-This option is enabled by default at @option{-O2}.
|
||||
+This option is enabled by default at @option{-O2} for C++ with -std=c++11
|
||||
+or higher.
|
||||
|
||||
@item -ftree-dominator-opts
|
||||
@opindex ftree-dominator-opts
|
||||
diff --git a/gcc/lto-streamer-in.c b/gcc/lto-streamer-in.c
|
||||
index 9566e5ee102..244f5b8aa5c 100644
|
||||
--- a/gcc/lto-streamer-in.c
|
||||
+++ b/gcc/lto-streamer-in.c
|
||||
@@ -821,6 +821,7 @@ input_cfg (class lto_input_block *ib, class data_in *data_in,
|
||||
loop->owned_clique = streamer_read_hwi (ib);
|
||||
loop->dont_vectorize = streamer_read_hwi (ib);
|
||||
loop->force_vectorize = streamer_read_hwi (ib);
|
||||
+ loop->finite_p = streamer_read_hwi (ib);
|
||||
loop->simduid = stream_read_tree (ib, data_in);
|
||||
|
||||
place_new_loop (fn, loop);
|
||||
diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c
|
||||
index a219c1d0dd1..52ef94718db 100644
|
||||
--- a/gcc/lto-streamer-out.c
|
||||
+++ b/gcc/lto-streamer-out.c
|
||||
@@ -1950,6 +1950,7 @@ output_cfg (struct output_block *ob, struct function *fn)
|
||||
streamer_write_hwi (ob, loop->owned_clique);
|
||||
streamer_write_hwi (ob, loop->dont_vectorize);
|
||||
streamer_write_hwi (ob, loop->force_vectorize);
|
||||
+ streamer_write_hwi (ob, loop->finite_p);
|
||||
stream_write_tree (ob, loop->simduid, true);
|
||||
}
|
||||
|
||||
diff --git a/gcc/opts.c b/gcc/opts.c
|
||||
index 5dc7d65dedd..d4df8627bf7 100644
|
||||
--- a/gcc/opts.c
|
||||
+++ b/gcc/opts.c
|
||||
@@ -478,7 +478,6 @@ static const struct default_options default_options_table[] =
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fdevirtualize, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fdevirtualize_speculatively, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fexpensive_optimizations, NULL, 1 },
|
||||
- { OPT_LEVELS_2_PLUS, OPT_ffinite_loops, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fgcse, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_findirect_inlining, NULL, 1 },
|
||||
diff --git a/gcc/testsuite/gcc.dg/torture/pr94392.c b/gcc/testsuite/gcc.dg/torture/pr94392.c
|
||||
new file mode 100644
|
||||
index 00000000000..373f18ce983
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/torture/pr94392.c
|
||||
@@ -0,0 +1,22 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-skip-if "finite loops" { *-*-* } { "-ffinite-loops" } } */
|
||||
+/* { dg-skip-if "LTO optimizes the test" { *-*-* } { "-flto" } } */
|
||||
+/* { dg-additional-options "-fdump-tree-optimized" } */
|
||||
+
|
||||
+int a, b;
|
||||
+
|
||||
+int
|
||||
+main()
|
||||
+{
|
||||
+ while (1)
|
||||
+ {
|
||||
+ /* Try really hard. */
|
||||
+ if (a != b)
|
||||
+ return 1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* ISO C does not guarantee forward progress like C++ does so we
|
||||
+ cannot assume the loop is finite and optimize it to return 1. */
|
||||
+/* { dg-final { scan-tree-dump "if" "optimized" } } */
|
||||
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
|
||||
index f7b817d94e6..e99fb9ff5d1 100644
|
||||
--- a/gcc/tree-cfg.c
|
||||
+++ b/gcc/tree-cfg.c
|
||||
@@ -324,6 +324,9 @@ replace_loop_annotate (void)
|
||||
/* Then look into the latch, if any. */
|
||||
if (loop->latch)
|
||||
replace_loop_annotate_in_block (loop->latch, loop);
|
||||
+
|
||||
+ /* Push the global flag_finite_loops state down to individual loops. */
|
||||
+ loop->finite_p = flag_finite_loops;
|
||||
}
|
||||
|
||||
/* Remove IFN_ANNOTATE. Safeguard for the case loop->latch == NULL. */
|
||||
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
|
||||
index 6e6df0bfdb8..7d61ef080eb 100644
|
||||
--- a/gcc/tree-ssa-loop-niter.c
|
||||
+++ b/gcc/tree-ssa-loop-niter.c
|
||||
@@ -2834,7 +2834,7 @@ finite_loop_p (class loop *loop)
|
||||
return true;
|
||||
}
|
||||
|
||||
- if (flag_finite_loops)
|
||||
+ if (loop->finite_p)
|
||||
{
|
||||
unsigned i;
|
||||
vec<edge> exits = get_loop_exit_edges (loop);
|
||||
367
loop-finite.patch
Normal file
367
loop-finite.patch
Normal file
@ -0,0 +1,367 @@
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index e1404165feb..a1544d06824 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -1437,6 +1437,10 @@ ffinite-math-only
|
||||
Common Report Var(flag_finite_math_only) Optimization SetByCombined
|
||||
Assume no NaNs or infinities are generated.
|
||||
|
||||
+ffinite-loops
|
||||
+Common Report Var(flag_finite_loops) Optimization
|
||||
+Assume that loops with an exit will terminate and not loop indefinitely.
|
||||
+
|
||||
ffixed-
|
||||
Common Joined RejectNegative Var(common_deferred_options) Defer
|
||||
-ffixed-<register> Mark <register> as being unavailable to the compiler.
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index 090d606b3ba..bf9da0f0a6e 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -413,6 +413,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-fdevirtualize-at-ltrans -fdse @gol
|
||||
-fearly-inlining -fipa-sra -fexpensive-optimizations -ffat-lto-objects @gol
|
||||
-ffast-math -ffinite-math-only -ffloat-store -fexcess-precision=@var{style} @gol
|
||||
+-ffinite-loops @gol
|
||||
-fforward-propagate -ffp-contract=@var{style} -ffunction-sections @gol
|
||||
-fgcse -fgcse-after-reload -fgcse-las -fgcse-lm -fgraphite-identity @gol
|
||||
-fgcse-sm -fhoist-adjacent-loads -fif-conversion @gol
|
||||
@@ -8303,6 +8304,7 @@ also turns on the following optimization flags:
|
||||
-fdelete-null-pointer-checks @gol
|
||||
-fdevirtualize -fdevirtualize-speculatively @gol
|
||||
-fexpensive-optimizations @gol
|
||||
+-ffinite-loops @gol
|
||||
-fgcse -fgcse-lm @gol
|
||||
-fhoist-adjacent-loads @gol
|
||||
-finline-small-functions @gol
|
||||
@@ -9524,6 +9526,15 @@ that may set @code{errno} but are otherwise free of side effects. This flag is
|
||||
enabled by default at @option{-O2} and higher if @option{-Os} is not also
|
||||
specified.
|
||||
|
||||
+@item -ffinite-loops
|
||||
+@opindex ffinite-loops
|
||||
+@opindex fno-finite-loops
|
||||
+Assume that a loop with an exit will eventually take the exit and not loop
|
||||
+indefinitely. This allows the compiler to remove loops that otherwise have
|
||||
+no side-effects, not considering eventual endless looping as such.
|
||||
+
|
||||
+This option is enabled by default at @option{-O2}.
|
||||
+
|
||||
@item -ftree-dominator-opts
|
||||
@opindex ftree-dominator-opts
|
||||
Perform a variety of simple scalar cleanups (constant/copy
|
||||
diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
|
||||
index 97ae47b3135..c8a281c6d28 100644
|
||||
--- a/gcc/omp-offload.c
|
||||
+++ b/gcc/omp-offload.c
|
||||
@@ -300,7 +300,7 @@ oacc_xform_loop (gcall *call)
|
||||
tree chunk_size = NULL_TREE;
|
||||
unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
|
||||
tree lhs = gimple_call_lhs (call);
|
||||
- tree type = TREE_TYPE (lhs);
|
||||
+ tree type = NULL_TREE;
|
||||
tree diff_type = TREE_TYPE (range);
|
||||
tree r = NULL_TREE;
|
||||
gimple_seq seq = NULL;
|
||||
@@ -308,6 +308,15 @@ oacc_xform_loop (gcall *call)
|
||||
unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
|
||||
unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
|
||||
|
||||
+ /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
|
||||
+ if (!lhs)
|
||||
+ {
|
||||
+ gsi_replace_with_seq (&gsi, seq, true);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ type = TREE_TYPE (lhs);
|
||||
+
|
||||
#ifdef ACCEL_COMPILER
|
||||
chunk_size = gimple_call_arg (call, 4);
|
||||
if (integer_minus_onep (chunk_size) /* Force static allocation. */
|
||||
diff --git a/gcc/opts.c b/gcc/opts.c
|
||||
index 64f94ac8ffd..b38bfb15a56 100644
|
||||
--- a/gcc/opts.c
|
||||
+++ b/gcc/opts.c
|
||||
@@ -494,6 +494,7 @@ static const struct default_options default_options_table[] =
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fdevirtualize, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fdevirtualize_speculatively, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fexpensive_optimizations, NULL, 1 },
|
||||
+ { OPT_LEVELS_2_PLUS, OPT_ffinite_loops, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fgcse, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 },
|
||||
{ OPT_LEVELS_2_PLUS, OPT_findirect_inlining, NULL, 1 },
|
||||
diff --git a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
|
||||
new file mode 100644
|
||||
index 00000000000..6b1e879e6a9
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
|
||||
@@ -0,0 +1,33 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-cddce2 -ffinite-loops" } */
|
||||
+
|
||||
+#include <string>
|
||||
+#include <vector>
|
||||
+#include <list>
|
||||
+#include <set>
|
||||
+#include <map>
|
||||
+
|
||||
+using namespace std;
|
||||
+
|
||||
+int foo (vector<string> &v, list<string> &l, set<string> &s, map<int, string> &m)
|
||||
+{
|
||||
+ for (vector<string>::iterator it = v.begin (); it != v.end (); ++it)
|
||||
+ it->length();
|
||||
+
|
||||
+ for (list<string>::iterator it = l.begin (); it != l.end (); ++it)
|
||||
+ it->length();
|
||||
+
|
||||
+ for (map<int, string>::iterator it = m.begin (); it != m.end (); ++it)
|
||||
+ it->first + it->second.length();
|
||||
+
|
||||
+ for (set<string>::iterator it0 = s.begin (); it0 != s.end(); ++it0)
|
||||
+ for (vector<string>::reverse_iterator it1 = v.rbegin(); it1 != v.rend(); ++it1)
|
||||
+ {
|
||||
+ it0->length();
|
||||
+ it1->length();
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+/* { dg-final { scan-tree-dump-not "if" "cddce2"} } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/const-1.c b/gcc/testsuite/gcc.dg/const-1.c
|
||||
index a5b2b167728..2e95bd8e2ea 100644
|
||||
--- a/gcc/testsuite/gcc.dg/const-1.c
|
||||
+++ b/gcc/testsuite/gcc.dg/const-1.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target nonpic } } */
|
||||
-/* { dg-options "-O2 -Wsuggest-attribute=const" } */
|
||||
+/* { dg-options "-O2 -Wsuggest-attribute=const -fno-finite-loops" } */
|
||||
|
||||
extern int extern_const(int a) __attribute__ ((const));
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/graphite/graphite.exp b/gcc/testsuite/gcc.dg/graphite/graphite.exp
|
||||
index ea6144607e2..523a955e82d 100644
|
||||
--- a/gcc/testsuite/gcc.dg/graphite/graphite.exp
|
||||
+++ b/gcc/testsuite/gcc.dg/graphite/graphite.exp
|
||||
@@ -56,7 +56,7 @@ set vect_files [lsort [glob -nocomplain $srcdir/$subdir/vect-*.c ] ]
|
||||
|
||||
# Tests to be compiled.
|
||||
set dg-do-what-default compile
|
||||
-dg-runtest $scop_files "" "-O2 -fgraphite -fdump-tree-graphite-all"
|
||||
+dg-runtest $scop_files "" "-O2 -fgraphite -fdump-tree-graphite-all -fno-finite-loops"
|
||||
dg-runtest $id_files "" "-O2 -fgraphite-identity -ffast-math -fdump-tree-graphite-details"
|
||||
|
||||
# Tests to be run.
|
||||
diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-1.c b/gcc/testsuite/gcc.dg/loop-unswitch-1.c
|
||||
index f6fc41d6bcc..de2fb2c0e4b 100644
|
||||
--- a/gcc/testsuite/gcc.dg/loop-unswitch-1.c
|
||||
+++ b/gcc/testsuite/gcc.dg/loop-unswitch-1.c
|
||||
@@ -1,6 +1,6 @@
|
||||
/* For PR rtl-optimization/27735 */
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details" } */
|
||||
+/* { dg-options "-O2 -funswitch-loops -fdump-tree-unswitch-details -fno-finite-loops" } */
|
||||
|
||||
void set_color(void);
|
||||
void xml_colorize_line(unsigned int *p, int state)
|
||||
diff --git a/gcc/testsuite/gcc.dg/predict-9.c b/gcc/testsuite/gcc.dg/predict-9.c
|
||||
index 7e5ba085ece..f491c511bd9 100644
|
||||
--- a/gcc/testsuite/gcc.dg/predict-9.c
|
||||
+++ b/gcc/testsuite/gcc.dg/predict-9.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-profile_estimate" } */
|
||||
+/* { dg-options "-O2 -fdisable-tree-evrp -fdump-tree-profile_estimate -fno-finite-loops" } */
|
||||
|
||||
extern int global;
|
||||
extern int global2;
|
||||
diff --git a/gcc/testsuite/gcc.dg/pure-2.c b/gcc/testsuite/gcc.dg/pure-2.c
|
||||
index fe6e2bce695..318cfd18630 100644
|
||||
--- a/gcc/testsuite/gcc.dg/pure-2.c
|
||||
+++ b/gcc/testsuite/gcc.dg/pure-2.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -Wsuggest-attribute=pure" } */
|
||||
+/* { dg-options "-O2 -Wsuggest-attribute=pure -fno-finite-loops" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
extern int extern_const(int a) __attribute__ ((pure));
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c
|
||||
index d289e5d0f55..a9bdf26931a 100644
|
||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/20040211-1.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fdump-tree-cddce2" } */
|
||||
+/* { dg-options "-O2 -fdump-tree-cddce2 -fno-finite-loops" } */
|
||||
|
||||
struct rtx_def;
|
||||
typedef struct rtx_def *rtx;
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c b/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c
|
||||
new file mode 100644
|
||||
index 00000000000..18c1ddb819e
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/dce-2.c
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-cddce1 -ffinite-loops" } */
|
||||
+
|
||||
+typedef struct list {
|
||||
+ char pad[15];
|
||||
+ struct list *next;
|
||||
+} list;
|
||||
+
|
||||
+int data;
|
||||
+
|
||||
+list *head, *tail;
|
||||
+
|
||||
+int __attribute__((pure)) pfn (int);
|
||||
+
|
||||
+int foo (unsigned u, int s)
|
||||
+{
|
||||
+ unsigned i;
|
||||
+ list *p;
|
||||
+ int j;
|
||||
+
|
||||
+ for (i = 0; i < u; i += 2)
|
||||
+ ;
|
||||
+
|
||||
+ for (p = head; p; p = p->next)
|
||||
+ ;
|
||||
+
|
||||
+ for (j = data; j & s; j = pfn (j + 3))
|
||||
+ ;
|
||||
+
|
||||
+ for (p = head; p != tail; p = p->next)
|
||||
+ for (j = data + 1; j > s; j = pfn (j + 2))
|
||||
+ ;
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+/* { dg-final { scan-tree-dump-not "if" "cddce1"} } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c
|
||||
index a29c9fb2501..3d05ad2d073 100644
|
||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-10.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
+/* { dg-options "-O2 -fdump-tree-optimized -fno-finite-loops" } */
|
||||
/* { dg-require-effective-target int32plus } */
|
||||
|
||||
int bar (void);
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
|
||||
index e9b4f2628d5..187c08407d5 100644
|
||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-6.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w" } */
|
||||
+/* { dg-options "-O2 -fsplit-paths -fno-tree-cselim -fdump-tree-split-paths-details -w -fno-finite-loops" } */
|
||||
|
||||
struct __sFILE
|
||||
{
|
||||
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c
|
||||
index d829b04d177..67526762f2c 100644
|
||||
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c
|
||||
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-12.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fdump-tree-thread2-details -fdump-tree-thread3-details -fdump-tree-thread4-details" } */
|
||||
+/* { dg-options "-O2 -fdump-tree-thread2-details -fdump-tree-thread3-details -fdump-tree-thread4-details -fno-finite-loops" } */
|
||||
/* { dg-final { scan-tree-dump "FSM" "thread2" } } */
|
||||
/* { dg-final { scan-tree-dump "FSM" "thread3" } } */
|
||||
/* { dg-final { scan-tree-dump "FSM" "thread4" { xfail *-*-* } } } */
|
||||
diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c
|
||||
index 2478219d873..a38899edd6c 100644
|
||||
--- a/gcc/tree-ssa-dce.c
|
||||
+++ b/gcc/tree-ssa-dce.c
|
||||
@@ -245,6 +245,17 @@ mark_stmt_if_obviously_necessary (gimple *stmt, bool aggressive)
|
||||
mark_stmt_necessary (stmt, true);
|
||||
return;
|
||||
}
|
||||
+ /* IFN_GOACC_LOOP calls are necessary in that they are used to
|
||||
+ represent parameter (i.e. step, bound) of a lowered OpenACC
|
||||
+ partitioned loop. But this kind of partitioned loop might not
|
||||
+ survive from aggressive loop removal for it has loop exit and
|
||||
+ is assumed to be finite. Therefore, we need to explicitly mark
|
||||
+ these calls. (An example is libgomp.oacc-c-c++-common/pr84955.c) */
|
||||
+ if (gimple_call_internal_p (stmt, IFN_GOACC_LOOP))
|
||||
+ {
|
||||
+ mark_stmt_necessary (stmt, true);
|
||||
+ return;
|
||||
+ }
|
||||
if (!gimple_call_lhs (stmt))
|
||||
return;
|
||||
break;
|
||||
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
|
||||
index 84e6e313c85..f51385900ed 100644
|
||||
--- a/gcc/tree-ssa-loop-niter.c
|
||||
+++ b/gcc/tree-ssa-loop-niter.c
|
||||
@@ -2830,6 +2830,27 @@ finite_loop_p (struct loop *loop)
|
||||
loop->num);
|
||||
return true;
|
||||
}
|
||||
+
|
||||
+ if (flag_finite_loops)
|
||||
+ {
|
||||
+ unsigned i;
|
||||
+ vec<edge> exits = get_loop_exit_edges (loop);
|
||||
+ edge ex;
|
||||
+
|
||||
+ /* If the loop has a normal exit, we can assume it will terminate. */
|
||||
+ FOR_EACH_VEC_ELT (exits, i, ex)
|
||||
+ if (!(ex->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_FAKE)))
|
||||
+ {
|
||||
+ exits.release ();
|
||||
+ if (dump_file)
|
||||
+ fprintf (dump_file, "Assume loop %i to be finite: it has an exit "
|
||||
+ "and -ffinite-loops is on.\n", loop->num);
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ exits.release ();
|
||||
+ }
|
||||
+
|
||||
return false;
|
||||
}
|
||||
|
||||
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c
|
||||
new file mode 100644
|
||||
index 00000000000..44767cd27c3
|
||||
--- /dev/null
|
||||
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr84955-1.c
|
||||
@@ -0,0 +1,31 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fdump-tree-cddce2 -ffinite-loops" } */
|
||||
+
|
||||
+int
|
||||
+f1 (void)
|
||||
+{
|
||||
+ int i, j;
|
||||
+
|
||||
+#pragma acc parallel loop tile(2,3)
|
||||
+ for (i = 1; i < 10; i++)
|
||||
+ for (j = 1; j < 10; j++)
|
||||
+ for (;;)
|
||||
+ ;
|
||||
+
|
||||
+ return i + j;
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+f2 (void)
|
||||
+{
|
||||
+ int i, j, k;
|
||||
+
|
||||
+#pragma acc parallel loop tile(2,3)
|
||||
+ for (i = 1; i < 10; i++)
|
||||
+ for (j = 1; j < 10; j++)
|
||||
+ for (k = 1; k < 10; k++)
|
||||
+ ;
|
||||
+
|
||||
+ return i + j;
|
||||
+}
|
||||
+/* { dg-final { scan-tree-dump-not "if" "cddce2"} } */
|
||||
1276
loop-split.patch
Normal file
1276
loop-split.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,13 +0,0 @@
|
||||
diff -N -urp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
--- a/gcc/config/aarch64/aarch64.md 2019-05-30 16:12:52.950606040 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.md 2019-05-30 16:15:56.606599549 +0800
|
||||
@@ -3110,7 +3110,8 @@
|
||||
(define_insn_and_split "*compare_cstore<mode>_insn"
|
||||
[(set (match_operand:GPI 0 "register_operand" "=r")
|
||||
(EQL:GPI (match_operand:GPI 1 "register_operand" "r")
|
||||
- (match_operand:GPI 2 "aarch64_imm24" "n")))]
|
||||
+ (match_operand:GPI 2 "aarch64_imm24" "n")))
|
||||
+ (clobber (reg:CC CC_REGNUM))]
|
||||
"!aarch64_move_imm (INTVAL (operands[2]), <MODE>mode)
|
||||
&& !aarch64_plus_operand (operands[2], <MODE>mode)
|
||||
&& !reload_completed"
|
||||
@ -1,108 +0,0 @@
|
||||
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
--- a/gcc/config/aarch64/aarch64.c 2018-09-19 17:11:42.583520820 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.c 2018-09-19 17:10:22.715520820 +0800
|
||||
@@ -1260,29 +1260,32 @@ aarch64_is_long_call_p (rtx sym)
|
||||
void
|
||||
aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
|
||||
{
|
||||
- if (!TARGET_LONG_CALLS)
|
||||
+ if (flag_fentry)
|
||||
{
|
||||
- fprintf (file, "\tmov\tx9, x30\n");
|
||||
- fprintf (file, "\tbl\t__fentry__\n");
|
||||
- fprintf (file, "\tmov\tx30, x9\n");
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- if (flag_pic)
|
||||
+ if (!TARGET_LONG_CALLS)
|
||||
{
|
||||
fprintf (file, "\tmov\tx9, x30\n");
|
||||
- fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
|
||||
- fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
|
||||
- fprintf (file, "\tblr\tx10\n");
|
||||
+ fprintf (file, "\tbl\t__fentry__\n");
|
||||
fprintf (file, "\tmov\tx30, x9\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
- fprintf (file, "\tmov\tx9, x30\n");
|
||||
- fprintf (file, "\tadrp\tx10, __fentry__\n");
|
||||
- fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
|
||||
- fprintf (file, "\tblr\tx10\n");
|
||||
- fprintf (file, "\tmov\tx30, x9\n");
|
||||
+ if (flag_pic)
|
||||
+ {
|
||||
+ fprintf (file, "\tmov\tx9, x30\n");
|
||||
+ fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
|
||||
+ fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
|
||||
+ fprintf (file, "\tblr\tx10\n");
|
||||
+ fprintf (file, "\tmov\tx30, x9\n");
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ fprintf (file, "\tmov\tx9, x30\n");
|
||||
+ fprintf (file, "\tadrp\tx10, __fentry__\n");
|
||||
+ fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
|
||||
+ fprintf (file, "\tblr\tx10\n");
|
||||
+ fprintf (file, "\tmov\tx30, x9\n");
|
||||
+ }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -12020,6 +12023,15 @@ aarch64_emit_unlikely_jump (rtx insn)
|
||||
add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
|
||||
}
|
||||
|
||||
+/* Return true, if profiling code should be emitted before
|
||||
+ prologue. Otherwise it returns false.
|
||||
+ Note: For x86 with "hotfix" it is sorried. */
|
||||
+static bool
|
||||
+aarch64_profile_before_prologue (void)
|
||||
+{
|
||||
+ return flag_fentry != 0;
|
||||
+}
|
||||
+
|
||||
/* Expand a compare and swap pattern. */
|
||||
|
||||
void
|
||||
@@ -14952,6 +14964,9 @@ aarch64_run_selftests (void)
|
||||
#undef TARGET_ASM_ALIGNED_SI_OP
|
||||
#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
|
||||
|
||||
+#undef TARGET_PROFILE_BEFORE_PROLOGUE
|
||||
+#define TARGET_PROFILE_BEFORE_PROLOGUE aarch64_profile_before_prologue
|
||||
+
|
||||
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
|
||||
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
|
||||
hook_bool_const_tree_hwi_hwi_const_tree_true
|
||||
diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
--- a/gcc/config/aarch64/aarch64.h 2018-09-19 17:11:42.587520820 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.h 2018-09-19 17:10:22.715520820 +0800
|
||||
@@ -850,9 +850,12 @@ typedef struct
|
||||
{ \
|
||||
rtx fun, lr; \
|
||||
const rtx_insn* tmp = get_insns (); \
|
||||
- lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
|
||||
- fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
|
||||
- emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
|
||||
+ if (!flag_fentry) \
|
||||
+ { \
|
||||
+ lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
|
||||
+ fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
|
||||
+ emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
|
||||
+ } \
|
||||
if (TARGET_LONG_CALLS) \
|
||||
{ \
|
||||
emit_insn (gen_blockage ()); \
|
||||
diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
||||
--- a/gcc/config/aarch64/aarch64.opt 2018-09-19 17:11:42.587520820 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.opt 2018-09-19 17:10:22.715520820 +0800
|
||||
@@ -192,3 +192,7 @@ single precision and to 32 bits for doub
|
||||
mverbose-cost-dump
|
||||
Common Undocumented Var(flag_aarch64_verbose_cost)
|
||||
Enables verbose cost model dumping in the debug dump files.
|
||||
+
|
||||
+mfentry
|
||||
+Target Report Var(flag_fentry) Init(0)
|
||||
+Emit profiling counter call at function entry immediately after prologue.
|
||||
@ -1,362 +0,0 @@
|
||||
diff -N -urp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
||||
--- a/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:43:27.862079389 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:44:34.930081154 +0800
|
||||
@@ -353,6 +353,10 @@ bool aarch64_use_return_insn_p (void);
|
||||
const char *aarch64_mangle_builtin_type (const_tree);
|
||||
const char *aarch64_output_casesi (rtx *);
|
||||
|
||||
+extern void aarch64_pr_long_calls (struct cpp_reader *);
|
||||
+extern void aarch64_pr_no_long_calls (struct cpp_reader *);
|
||||
+extern void aarch64_pr_long_calls_off (struct cpp_reader *);
|
||||
+
|
||||
enum aarch64_symbol_type aarch64_classify_symbol (rtx, rtx);
|
||||
enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
|
||||
enum reg_class aarch64_regno_regclass (unsigned);
|
||||
@@ -384,6 +388,7 @@ void aarch64_expand_epilogue (bool);
|
||||
void aarch64_expand_mov_immediate (rtx, rtx);
|
||||
void aarch64_expand_prologue (void);
|
||||
void aarch64_expand_vector_init (rtx, rtx);
|
||||
+void aarch64_function_profiler (FILE *, int);
|
||||
void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
|
||||
const_tree, unsigned);
|
||||
void aarch64_init_expanders (void);
|
||||
diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
--- a/gcc/config/aarch64/aarch64.c 2018-11-06 10:43:27.870079389 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.c 2018-11-06 10:44:34.934081154 +0800
|
||||
@@ -70,6 +70,9 @@
|
||||
/* This file should be included last. */
|
||||
#include "target-def.h"
|
||||
|
||||
+static void aarch64_set_default_type_attributes (tree);
|
||||
+static int aarch64_comp_type_attributes (const_tree, const_tree);
|
||||
+
|
||||
/* Defined for convenience. */
|
||||
#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
|
||||
|
||||
@@ -1092,12 +1095,163 @@ aarch64_hard_regno_caller_save_mode (uns
|
||||
return choose_hard_reg_mode (regno, nregs, false);
|
||||
}
|
||||
|
||||
+/* Table of machine attributes. */
|
||||
+static const struct attribute_spec aarch64_attribute_table[] =
|
||||
+{
|
||||
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
|
||||
+ affects_type_identity }. */
|
||||
+ /* Function calls made to this symbol must be done indirectly, because
|
||||
+ it may lie outside of the 26 bit addressing range of a normal function
|
||||
+ call. */
|
||||
+ { "long_call", 0, 0, false, true, true, NULL, false },
|
||||
+ /* Whereas these functions are always known to reside within the 26 bit
|
||||
+ addressing range. */
|
||||
+ { "short_call", 0, 0, false, true, true, NULL, false },
|
||||
+ { NULL, 0, 0, false, false, false, NULL, false }
|
||||
+};
|
||||
+
|
||||
+/* Encode the current state of the #pragma[no_]long_calls. */
|
||||
+typedef enum
|
||||
+{
|
||||
+ OFF, /* No #pragma[no_]long_calls is in effect. */
|
||||
+ LONG, /* #pragma long_calls is in effect. */
|
||||
+ SHORT /* #pragma no_long_calls is in effect. */
|
||||
+} aarch64_pragma_enum;
|
||||
+
|
||||
+static aarch64_pragma_enum aarch64_pragma_long_calls = OFF;
|
||||
+
|
||||
+void
|
||||
+aarch64_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
|
||||
+{
|
||||
+ aarch64_pragma_long_calls = LONG;
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+aarch64_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
|
||||
+{
|
||||
+ aarch64_pragma_long_calls = SHORT;
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+aarch64_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
|
||||
+{
|
||||
+ aarch64_pragma_long_calls = OFF;
|
||||
+}
|
||||
+
|
||||
+/* Return 0 if the attributes for two types are incompatible, 1 if they
|
||||
+ are compatible. */
|
||||
+static int
|
||||
+aarch64_comp_type_attributes (const_tree type1, const_tree type2)
|
||||
+{
|
||||
+ int l1, l2, s1, s2;
|
||||
+
|
||||
+ /* Check for mismatch of non-default calling convention. */
|
||||
+ if (TREE_CODE (type1) != FUNCTION_TYPE)
|
||||
+ return 1;
|
||||
+
|
||||
+ /* Check for mismatched call attributes. */
|
||||
+ l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
|
||||
+ l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
|
||||
+ s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
|
||||
+ s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
|
||||
+
|
||||
+ /* Only bother to check if an attribute is defined. */
|
||||
+ if (l1 | l2 | s1 | s2)
|
||||
+ {
|
||||
+ /* If one type has an attribute, the other
|
||||
+ must have the same attribute. */
|
||||
+ if ((l1 != l2) || (s1 != s2))
|
||||
+ {
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ /* Disallow mixed attributes. */
|
||||
+ if ((l1 && s2) || (l2 && s1))
|
||||
+ {
|
||||
+ return 0;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+/* Assigns default attributes to newly defined type. This is used to
|
||||
+ set short_call/long_call attributes for function types of
|
||||
+ functions defined inside corresponding #pragma scopes. */
|
||||
+static void
|
||||
+aarch64_set_default_type_attributes (tree type)
|
||||
+{
|
||||
+ /* Add __attribute__ ((long_call)) to all functions, when
|
||||
+ inside #pragma long_calls or __attribute__ ((short_call)),
|
||||
+ when inside #pragma no_long_calls. */
|
||||
+ if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
|
||||
+ {
|
||||
+ tree type_attr_list = NULL;
|
||||
+ tree attr_name = NULL;
|
||||
+ type_attr_list = TYPE_ATTRIBUTES (type);
|
||||
+
|
||||
+ if (aarch64_pragma_long_calls == LONG)
|
||||
+ {
|
||||
+ attr_name = get_identifier ("long_call");
|
||||
+ }
|
||||
+ else if (aarch64_pragma_long_calls == SHORT)
|
||||
+ {
|
||||
+ attr_name = get_identifier ("short_call");
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
|
||||
+ TYPE_ATTRIBUTES (type) = type_attr_list;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Return true if DECL is known to be linked into section SECTION. */
|
||||
+static bool
|
||||
+aarch64_function_in_section_p (tree decl, section *section)
|
||||
+{
|
||||
+ /* We can only be certain about the prevailing symbol definition. */
|
||||
+ if (!decl_binds_to_current_def_p (decl))
|
||||
+ return false;
|
||||
+
|
||||
+ /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
|
||||
+ if (!DECL_SECTION_NAME (decl))
|
||||
+ {
|
||||
+ /* Make sure that we will not create a unique section for DECL. */
|
||||
+ if (flag_function_sections || DECL_COMDAT_GROUP (decl))
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ return function_section (decl) == section;
|
||||
+}
|
||||
+
|
||||
/* Return true if calls to DECL should be treated as
|
||||
long-calls (ie called via a register). */
|
||||
static bool
|
||||
-aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
|
||||
+aarch64_decl_is_long_call_p (tree decl)
|
||||
{
|
||||
- return false;
|
||||
+ tree attrs = NULL;
|
||||
+
|
||||
+ if (!decl)
|
||||
+ return TARGET_LONG_CALLS;
|
||||
+
|
||||
+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
|
||||
+ if (lookup_attribute ("short_call", attrs))
|
||||
+ return false;
|
||||
+
|
||||
+ /* For "f", be conservative, and only cater for cases in which the
|
||||
+ whole of the current function is placed in the same section. */
|
||||
+ if (!flag_reorder_blocks_and_partition
|
||||
+ && TREE_CODE (decl) == FUNCTION_DECL
|
||||
+ && aarch64_function_in_section_p (decl, current_function_section ()))
|
||||
+ return false;
|
||||
+
|
||||
+ if (lookup_attribute ("long_call", attrs))
|
||||
+ return true;
|
||||
+
|
||||
+ return TARGET_LONG_CALLS;
|
||||
}
|
||||
|
||||
/* Return true if calls to symbol-ref SYM should be treated as
|
||||
@@ -1108,6 +1257,36 @@ aarch64_is_long_call_p (rtx sym)
|
||||
return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
|
||||
}
|
||||
|
||||
+void
|
||||
+aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
|
||||
+{
|
||||
+ if (!TARGET_LONG_CALLS)
|
||||
+ {
|
||||
+ fprintf (file, "\tmov\tx9, x30\n");
|
||||
+ fprintf (file, "\tbl\t__fentry__\n");
|
||||
+ fprintf (file, "\tmov\tx30, x9\n");
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (flag_pic)
|
||||
+ {
|
||||
+ fprintf (file, "\tmov\tx9, x30\n");
|
||||
+ fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
|
||||
+ fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
|
||||
+ fprintf (file, "\tblr\tx10\n");
|
||||
+ fprintf (file, "\tmov\tx30, x9\n");
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ fprintf (file, "\tmov\tx9, x30\n");
|
||||
+ fprintf (file, "\tadrp\tx10, __fentry__\n");
|
||||
+ fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
|
||||
+ fprintf (file, "\tblr\tx10\n");
|
||||
+ fprintf (file, "\tmov\tx30, x9\n");
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* Return true if calls to symbol-ref SYM should not go through
|
||||
plt stubs. */
|
||||
|
||||
@@ -15099,6 +15278,15 @@ aarch64_libgcc_floating_mode_supported_p
|
||||
#undef TARGET_SCHED_CAN_SPECULATE_INSN
|
||||
#define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
|
||||
|
||||
+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
|
||||
+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES aarch64_set_default_type_attributes
|
||||
+
|
||||
+#undef TARGET_ATTRIBUTE_TABLE
|
||||
+#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
|
||||
+
|
||||
+#undef TARGET_COMP_TYPE_ATTRIBUTES
|
||||
+#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
|
||||
+
|
||||
#undef TARGET_CAN_USE_DOLOOP_P
|
||||
#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
|
||||
|
||||
diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
--- a/gcc/config/aarch64/aarch64.h 2018-11-06 10:43:27.870079389 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.h 2018-11-06 10:49:29.574088911 +0800
|
||||
@@ -28,7 +28,6 @@
|
||||
|
||||
|
||||
|
||||
-#define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas ()
|
||||
|
||||
/* Target machine storage layout. */
|
||||
|
||||
@@ -659,6 +658,14 @@ typedef struct
|
||||
} CUMULATIVE_ARGS;
|
||||
#endif
|
||||
|
||||
+/* Handle pragmas for compatibility with Intel's compilers. */
|
||||
+#define REGISTER_TARGET_PRAGMAS() do { \
|
||||
+ c_register_pragma (0, "long_calls", aarch64_pr_long_calls); \
|
||||
+ c_register_pragma (0, "no_long_calls", aarch64_pr_no_long_calls); \
|
||||
+ c_register_pragma (0, "long_calls_off", aarch64_pr_long_calls_off); \
|
||||
+ aarch64_register_pragmas (); \
|
||||
+} while (0)
|
||||
+
|
||||
#define FUNCTION_ARG_PADDING(MODE, TYPE) \
|
||||
(aarch64_pad_arg_upward (MODE, TYPE) ? upward : downward)
|
||||
|
||||
@@ -842,13 +849,20 @@ typedef struct
|
||||
#define PROFILE_HOOK(LABEL) \
|
||||
{ \
|
||||
rtx fun, lr; \
|
||||
+ const rtx_insn* tmp = get_insns (); \
|
||||
lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
|
||||
fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
|
||||
emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
|
||||
+ if (TARGET_LONG_CALLS) \
|
||||
+ { \
|
||||
+ emit_insn (gen_blockage ()); \
|
||||
+ emit_insn_after (gen_blockage (), NEXT_INSN (tmp)); \
|
||||
+ } \
|
||||
}
|
||||
|
||||
/* All the work done in PROFILE_HOOK, but still required. */
|
||||
-#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
|
||||
+#define FUNCTION_PROFILER(STREAM, LABELNO) \
|
||||
+ aarch64_function_profiler (STREAM, LABELNO)
|
||||
|
||||
/* For some reason, the Linux headers think they know how to define
|
||||
these macros. They don't!!! */
|
||||
diff -N -urp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
--- a/gcc/config/aarch64/aarch64.md 2018-11-06 10:43:27.874079389 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.md 2018-11-06 10:44:34.934081154 +0800
|
||||
@@ -850,9 +850,10 @@
|
||||
{
|
||||
rtx pat;
|
||||
rtx callee = XEXP (operands[0], 0);
|
||||
- if (!REG_P (callee)
|
||||
- && ((GET_CODE (callee) != SYMBOL_REF)
|
||||
- || aarch64_is_noplt_call_p (callee)))
|
||||
+
|
||||
+ if (GET_CODE (callee) == SYMBOL_REF
|
||||
+ ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee))
|
||||
+ : !REG_P (callee))
|
||||
XEXP (operands[0], 0) = force_reg (Pmode, callee);
|
||||
|
||||
if (operands[2] == NULL_RTX)
|
||||
@@ -881,9 +882,10 @@
|
||||
{
|
||||
rtx pat;
|
||||
rtx callee = XEXP (operands[1], 0);
|
||||
- if (!REG_P (callee)
|
||||
- && ((GET_CODE (callee) != SYMBOL_REF)
|
||||
- || aarch64_is_noplt_call_p (callee)))
|
||||
+
|
||||
+ if (GET_CODE (callee) == SYMBOL_REF
|
||||
+ ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee))
|
||||
+ : !REG_P (callee))
|
||||
XEXP (operands[1], 0) = force_reg (Pmode, callee);
|
||||
|
||||
if (operands[3] == NULL_RTX)
|
||||
diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
||||
--- a/gcc/config/aarch64/aarch64.opt 2018-11-06 10:43:27.874079389 +0800
|
||||
+++ b/gcc/config/aarch64/aarch64.opt 2018-11-06 10:44:34.934081154 +0800
|
||||
@@ -80,6 +80,10 @@ mlittle-endian
|
||||
Target Report RejectNegative InverseMask(BIG_END)
|
||||
Assume target CPU is configured as little endian.
|
||||
|
||||
+mlong-calls
|
||||
+Target Report Mask(LONG_CALLS)
|
||||
+Generate call insns as indirect calls, if necessary.
|
||||
+
|
||||
mcmodel=
|
||||
Target RejectNegative Joined Enum(cmodel) Var(aarch64_cmodel_var) Init(AARCH64_CMODEL_SMALL) Save
|
||||
Specify the code model.
|
||||
diff -N -urp a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
|
||||
--- a/gcc/config/aarch64/predicates.md 2018-11-06 10:43:27.878079389 +0800
|
||||
+++ b/gcc/config/aarch64/predicates.md 2018-11-06 10:44:34.938081154 +0800
|
||||
@@ -27,8 +27,9 @@
|
||||
)
|
||||
|
||||
(define_predicate "aarch64_call_insn_operand"
|
||||
- (ior (match_code "symbol_ref")
|
||||
- (match_operand 0 "register_operand")))
|
||||
+ (ior (and (match_code "symbol_ref")
|
||||
+ (match_test "!aarch64_is_long_call_p (op)"))
|
||||
+ (match_operand 0 "register_operand")))
|
||||
|
||||
;; Return true if OP a (const_int 0) operand.
|
||||
(define_predicate "const0_operand"
|
||||
321
remove-array-index-inliner-hint.patch
Normal file
321
remove-array-index-inliner-hint.patch
Normal file
@ -0,0 +1,321 @@
|
||||
diff -uprN a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -11895,12 +11895,6 @@ of iterations of a loop known, it adds a bonus of
|
||||
@option{ipa-cp-loop-hint-bonus} to the profitability score of
|
||||
the candidate.
|
||||
|
||||
-@item ipa-cp-array-index-hint-bonus
|
||||
-When IPA-CP determines that a cloning candidate would make the index of
|
||||
-an array access known, it adds a bonus of
|
||||
-@option{ipa-cp-array-index-hint-bonus} to the profitability
|
||||
-score of the candidate.
|
||||
-
|
||||
@item ipa-max-aa-steps
|
||||
During its analysis of function bodies, IPA-CP employs alias analysis
|
||||
in order to track values pointed to by function parameters. In order
|
||||
diff -uprN a/gcc/ipa-cp.c b/gcc/ipa-cp.c
|
||||
--- a/gcc/ipa-cp.c
|
||||
+++ b/gcc/ipa-cp.c
|
||||
@@ -2607,8 +2607,6 @@ hint_time_bonus (ipa_hints hints)
|
||||
int result = 0;
|
||||
if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
|
||||
result += PARAM_VALUE (PARAM_IPA_CP_LOOP_HINT_BONUS);
|
||||
- if (hints & INLINE_HINT_array_index)
|
||||
- result += PARAM_VALUE (PARAM_IPA_CP_ARRAY_INDEX_HINT_BONUS);
|
||||
return result;
|
||||
}
|
||||
|
||||
diff -uprN a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
|
||||
--- a/gcc/ipa-fnsummary.c
|
||||
+++ b/gcc/ipa-fnsummary.c
|
||||
@@ -134,11 +134,6 @@ ipa_dump_hints (FILE *f, ipa_hints hints)
|
||||
hints &= ~INLINE_HINT_declared_inline;
|
||||
fprintf (f, " declared_inline");
|
||||
}
|
||||
- if (hints & INLINE_HINT_array_index)
|
||||
- {
|
||||
- hints &= ~INLINE_HINT_array_index;
|
||||
- fprintf (f, " array_index");
|
||||
- }
|
||||
if (hints & INLINE_HINT_known_hot)
|
||||
{
|
||||
hints &= ~INLINE_HINT_known_hot;
|
||||
@@ -549,8 +544,6 @@ ipa_fn_summary::~ipa_fn_summary ()
|
||||
edge_predicate_pool.remove (loop_iterations);
|
||||
if (loop_stride)
|
||||
edge_predicate_pool.remove (loop_stride);
|
||||
- if (array_index)
|
||||
- edge_predicate_pool.remove (array_index);
|
||||
vec_free (conds);
|
||||
vec_free (size_time_table);
|
||||
}
|
||||
@@ -703,8 +696,6 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
|
||||
possible_truths);
|
||||
remap_hint_predicate_after_duplication (&info->loop_stride,
|
||||
possible_truths);
|
||||
- remap_hint_predicate_after_duplication (&info->array_index,
|
||||
- possible_truths);
|
||||
|
||||
/* If inliner or someone after inliner will ever start producing
|
||||
non-trivial clones, we will get trouble with lack of information
|
||||
@@ -727,12 +718,6 @@ ipa_fn_summary_t::duplicate (cgraph_node *src,
|
||||
info->loop_stride = NULL;
|
||||
set_hint_predicate (&info->loop_stride, p);
|
||||
}
|
||||
- if (info->array_index)
|
||||
- {
|
||||
- predicate p = *info->array_index;
|
||||
- info->array_index = NULL;
|
||||
- set_hint_predicate (&info->array_index, p);
|
||||
- }
|
||||
}
|
||||
if (!dst->global.inlined_to)
|
||||
ipa_update_overall_fn_summary (dst);
|
||||
@@ -894,11 +879,6 @@ ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
|
||||
fprintf (f, " loop stride:");
|
||||
s->loop_stride->dump (f, s->conds);
|
||||
}
|
||||
- if (s->array_index)
|
||||
- {
|
||||
- fprintf (f, " array index:");
|
||||
- s->array_index->dump (f, s->conds);
|
||||
- }
|
||||
fprintf (f, " calls:\n");
|
||||
dump_ipa_call_summary (f, 4, node, s);
|
||||
fprintf (f, "\n");
|
||||
@@ -1824,27 +1804,6 @@ predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
|
||||
nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
|
||||
}
|
||||
|
||||
-/* Return predicate specifying when array index in access OP becomes non-constant. */
|
||||
-
|
||||
-static predicate
|
||||
-array_index_predicate (ipa_fn_summary *info,
|
||||
- vec< predicate> nonconstant_names, tree op)
|
||||
-{
|
||||
- predicate p = false;
|
||||
- while (handled_component_p (op))
|
||||
- {
|
||||
- if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
|
||||
- {
|
||||
- if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
|
||||
- p = p.or_with (info->conds,
|
||||
- nonconstant_names[SSA_NAME_VERSION
|
||||
- (TREE_OPERAND (op, 1))]);
|
||||
- }
|
||||
- op = TREE_OPERAND (op, 0);
|
||||
- }
|
||||
- return p;
|
||||
-}
|
||||
-
|
||||
/* For a typical usage of __builtin_expect (a<b, 1), we
|
||||
may introduce an extra relation stmt:
|
||||
With the builtin, we have
|
||||
@@ -2001,7 +1960,6 @@ analyze_function_body (struct cgraph_node *node, bool early)
|
||||
vec<predicate> nonconstant_names = vNULL;
|
||||
int nblocks, n;
|
||||
int *order;
|
||||
- predicate array_index = true;
|
||||
gimple *fix_builtin_expect_stmt;
|
||||
|
||||
gcc_assert (my_function && my_function->cfg);
|
||||
@@ -2146,26 +2104,6 @@ analyze_function_body (struct cgraph_node *node, bool early)
|
||||
this_time);
|
||||
}
|
||||
|
||||
- if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
|
||||
- {
|
||||
- predicate this_array_index;
|
||||
- this_array_index =
|
||||
- array_index_predicate (info, nonconstant_names,
|
||||
- gimple_assign_rhs1 (stmt));
|
||||
- if (this_array_index != false)
|
||||
- array_index &= this_array_index;
|
||||
- }
|
||||
- if (gimple_store_p (stmt) && nonconstant_names.exists ())
|
||||
- {
|
||||
- predicate this_array_index;
|
||||
- this_array_index =
|
||||
- array_index_predicate (info, nonconstant_names,
|
||||
- gimple_get_lhs (stmt));
|
||||
- if (this_array_index != false)
|
||||
- array_index &= this_array_index;
|
||||
- }
|
||||
-
|
||||
-
|
||||
if (is_gimple_call (stmt)
|
||||
&& !gimple_call_internal_p (stmt))
|
||||
{
|
||||
@@ -2273,14 +2211,40 @@ analyze_function_body (struct cgraph_node *node, bool early)
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " fp_expression set\n");
|
||||
}
|
||||
+ }
|
||||
|
||||
- gcc_assert (time >= 0);
|
||||
- gcc_assert (size >= 0);
|
||||
+ /* Account cost of address calculations in the statements. */
|
||||
+ for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
|
||||
+ {
|
||||
+ for (tree op = gimple_op (stmt, i);
|
||||
+ op && handled_component_p (op);
|
||||
+ op = TREE_OPERAND (op, 0))
|
||||
+ if ((TREE_CODE (op) == ARRAY_REF
|
||||
+ || TREE_CODE (op) == ARRAY_RANGE_REF)
|
||||
+ && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
|
||||
+ {
|
||||
+ predicate p = bb_predicate;
|
||||
+ if (fbi.info)
|
||||
+ p = p & will_be_nonconstant_expr_predicate
|
||||
+ (&fbi, info, TREE_OPERAND (op, 1),
|
||||
+ nonconstant_names);
|
||||
+ if (p != false)
|
||||
+ {
|
||||
+ time += freq;
|
||||
+ size += 1;
|
||||
+ if (dump_file)
|
||||
+ fprintf (dump_file,
|
||||
+ "\t\tAccounting address calculation.\n");
|
||||
+ info->account_size_time (ipa_fn_summary::size_scale,
|
||||
+ freq,
|
||||
+ bb_predicate,
|
||||
+ p);
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
+
|
||||
}
|
||||
}
|
||||
- set_hint_predicate (&ipa_fn_summaries->get_create (node)->array_index,
|
||||
- array_index);
|
||||
free (order);
|
||||
|
||||
if (nonconstant_names.exists () && !early)
|
||||
@@ -2783,9 +2747,6 @@ estimate_node_size_and_time (struct cgraph_node *node,
|
||||
if (info->loop_stride
|
||||
&& !info->loop_stride->evaluate (possible_truths))
|
||||
hints |= INLINE_HINT_loop_stride;
|
||||
- if (info->array_index
|
||||
- && !info->array_index->evaluate (possible_truths))
|
||||
- hints |= INLINE_HINT_array_index;
|
||||
if (info->scc_no)
|
||||
hints |= INLINE_HINT_in_scc;
|
||||
if (DECL_DECLARED_INLINE_P (node->decl))
|
||||
@@ -3106,9 +3067,6 @@ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
|
||||
remap_hint_predicate (info, callee_info,
|
||||
&callee_info->loop_stride,
|
||||
operand_map, offset_map, clause, &toplev_predicate);
|
||||
- remap_hint_predicate (info, callee_info,
|
||||
- &callee_info->array_index,
|
||||
- operand_map, offset_map, clause, &toplev_predicate);
|
||||
|
||||
ipa_call_summary *s = ipa_call_summaries->get (edge);
|
||||
inline_update_callee_summaries (edge->callee, s->loop_depth);
|
||||
@@ -3366,9 +3324,6 @@ inline_read_section (struct lto_file_decl_data *file_data, const char *data,
|
||||
p.stream_in (&ib);
|
||||
if (info)
|
||||
set_hint_predicate (&info->loop_stride, p);
|
||||
- p.stream_in (&ib);
|
||||
- if (info)
|
||||
- set_hint_predicate (&info->array_index, p);
|
||||
for (e = node->callees; e; e = e->next_callee)
|
||||
read_ipa_call_summary (&ib, e, info != NULL);
|
||||
for (e = node->indirect_calls; e; e = e->next_callee)
|
||||
@@ -3517,10 +3472,6 @@ ipa_fn_summary_write (void)
|
||||
info->loop_stride->stream_out (ob);
|
||||
else
|
||||
streamer_write_uhwi (ob, 0);
|
||||
- if (info->array_index)
|
||||
- info->array_index->stream_out (ob);
|
||||
- else
|
||||
- streamer_write_uhwi (ob, 0);
|
||||
for (edge = cnode->callees; edge; edge = edge->next_callee)
|
||||
write_ipa_call_summary (ob, edge);
|
||||
for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
|
||||
diff -uprN a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h
|
||||
--- a/gcc/ipa-fnsummary.h
|
||||
+++ b/gcc/ipa-fnsummary.h
|
||||
@@ -48,11 +48,8 @@ enum ipa_hints_vals {
|
||||
if functions are in different modules, inlining may not be so important.
|
||||
Set by simple_edge_hints in ipa-inline-analysis.c. */
|
||||
INLINE_HINT_cross_module = 64,
|
||||
- /* If array indexes of loads/stores become known there may be room for
|
||||
- further optimization. */
|
||||
- INLINE_HINT_array_index = 128,
|
||||
/* We know that the callee is hot by profile. */
|
||||
- INLINE_HINT_known_hot = 256
|
||||
+ INLINE_HINT_known_hot = 128
|
||||
};
|
||||
|
||||
typedef int ipa_hints;
|
||||
@@ -97,7 +94,7 @@ public:
|
||||
fp_expressions (false), estimated_stack_size (false),
|
||||
stack_frame_offset (false), time (0), size (0), conds (NULL),
|
||||
size_time_table (NULL), loop_iterations (NULL), loop_stride (NULL),
|
||||
- array_index (NULL), growth (0), scc_no (0)
|
||||
+ growth (0), scc_no (0)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -111,7 +108,7 @@ public:
|
||||
stack_frame_offset (s.stack_frame_offset), time (s.time), size (s.size),
|
||||
conds (s.conds), size_time_table (s.size_time_table),
|
||||
loop_iterations (s.loop_iterations), loop_stride (s.loop_stride),
|
||||
- array_index (s.array_index), growth (s.growth), scc_no (s.scc_no)
|
||||
+ growth (s.growth), scc_no (s.scc_no)
|
||||
{}
|
||||
|
||||
/* Default constructor. */
|
||||
@@ -157,8 +154,6 @@ public:
|
||||
/* Predicate on when some loop in the function becomes to have known
|
||||
stride. */
|
||||
predicate * GTY((skip)) loop_stride;
|
||||
- /* Predicate on when some array indexes become constants. */
|
||||
- predicate * GTY((skip)) array_index;
|
||||
/* Estimated growth for inlining all copies of the function before start
|
||||
of small functions inlining.
|
||||
This value will get out of date as the callers are duplicated, but
|
||||
diff -uprN a/gcc/ipa-inline.c b/gcc/ipa-inline.c
|
||||
--- a/gcc/ipa-inline.c
|
||||
+++ b/gcc/ipa-inline.c
|
||||
@@ -807,7 +807,6 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
|
||||
|| (!(hints & (INLINE_HINT_indirect_call
|
||||
| INLINE_HINT_known_hot
|
||||
| INLINE_HINT_loop_iterations
|
||||
- | INLINE_HINT_array_index
|
||||
| INLINE_HINT_loop_stride))
|
||||
&& !(big_speedup = big_speedup_p (e)))))
|
||||
{
|
||||
@@ -833,7 +832,6 @@ want_inline_small_function_p (struct cgraph_edge *e, bool report)
|
||||
&& !(hints & INLINE_HINT_known_hot)
|
||||
&& growth >= ((hints & (INLINE_HINT_indirect_call
|
||||
| INLINE_HINT_loop_iterations
|
||||
- | INLINE_HINT_array_index
|
||||
| INLINE_HINT_loop_stride))
|
||||
? MAX (MAX_INLINE_INSNS_AUTO,
|
||||
MAX_INLINE_INSNS_SINGLE)
|
||||
@@ -1227,7 +1225,6 @@ edge_badness (struct cgraph_edge *edge, bool dump)
|
||||
badness = badness.shift (badness > 0 ? 4 : -4);
|
||||
if ((hints & (INLINE_HINT_indirect_call
|
||||
| INLINE_HINT_loop_iterations
|
||||
- | INLINE_HINT_array_index
|
||||
| INLINE_HINT_loop_stride))
|
||||
|| callee_info->growth <= 0)
|
||||
badness = badness.shift (badness > 0 ? -2 : 2);
|
||||
diff -uprN a/gcc/params.def b/gcc/params.def
|
||||
--- a/gcc/params.def
|
||||
+++ b/gcc/params.def
|
||||
@@ -1109,12 +1109,6 @@ DEFPARAM (PARAM_IPA_CP_LOOP_HINT_BONUS,
|
||||
"bounds or strides known.",
|
||||
64, 0, 0)
|
||||
|
||||
-DEFPARAM (PARAM_IPA_CP_ARRAY_INDEX_HINT_BONUS,
|
||||
- "ipa-cp-array-index-hint-bonus",
|
||||
- "Compile-time bonus IPA-CP assigns to candidates which make an array "
|
||||
- "index known.",
|
||||
- 48, 0, 0)
|
||||
-
|
||||
DEFPARAM (PARAM_IPA_MAX_AA_STEPS,
|
||||
"ipa-max-aa-steps",
|
||||
"Maximum number of statements that will be visited by IPA formal "
|
||||
@ -1,33 +0,0 @@
|
||||
diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
|
||||
index 858bb21..de18e56 100644 (file)
|
||||
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
|
||||
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
|
||||
@@ -157,7 +157,6 @@ typedef struct user_fpregs elf_fpregset_t;
|
||||
# include <sys/procfs.h>
|
||||
#endif
|
||||
#include <sys/user.h>
|
||||
-#include <sys/ustat.h>
|
||||
#include <linux/cyclades.h>
|
||||
#include <linux/if_eql.h>
|
||||
#include <linux/if_plip.h>
|
||||
@@ -250,7 +249,19 @@ namespace __sanitizer {
|
||||
#endif // SANITIZER_LINUX || SANITIZER_FREEBSD
|
||||
|
||||
#if SANITIZER_LINUX && !SANITIZER_ANDROID
|
||||
- unsigned struct_ustat_sz = sizeof(struct ustat);
|
||||
+ // Use pre-computed size of struct ustat to avoid <sys/ustat.h> which
|
||||
+ // has been removed from glibc 2.28.
|
||||
+#if defined(__aarch64__) || defined(__s390x__) || defined (__mips64) \
|
||||
+ || defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) \
|
||||
+ || defined(__x86_64__)
|
||||
+#define SIZEOF_STRUCT_USTAT 32
|
||||
+#elif defined(__arm__) || defined(__i386__) || defined(__mips__) \
|
||||
+ || defined(__powerpc__) || defined(__s390__)
|
||||
+#define SIZEOF_STRUCT_USTAT 20
|
||||
+#else
|
||||
+#error Unknown size of struct ustat
|
||||
+#endif
|
||||
+ unsigned struct_ustat_sz = SIZEOF_STRUCT_USTAT;
|
||||
unsigned struct_rlimit64_sz = sizeof(struct rlimit64);
|
||||
unsigned struct_statvfs64_sz = sizeof(struct statvfs64);
|
||||
#endif // SANITIZER_LINUX && !SANITIZER_ANDROID
|
||||
14
skip-debug-insns-when-computing-inline-costs.patch
Normal file
14
skip-debug-insns-when-computing-inline-costs.patch
Normal file
@ -0,0 +1,14 @@
|
||||
diff -uprN a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
|
||||
--- a/gcc/ipa-fnsummary.c
|
||||
+++ b/gcc/ipa-fnsummary.c
|
||||
@@ -2078,8 +2078,8 @@ analyze_function_body (struct cgraph_node *node, bool early)
|
||||
|
||||
fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
|
||||
|
||||
- for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
|
||||
- gsi_next (&bsi))
|
||||
+ for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
|
||||
+ !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
|
||||
{
|
||||
gimple *stmt = gsi_stmt (bsi);
|
||||
int this_size = estimate_num_insns (stmt, &eni_size_weights);
|
||||
@ -1,11 +0,0 @@
|
||||
--- a/gcc/tree-ssa-loop-ivcanon.c 2018-12-06 05:05:43.841181211 +0800
|
||||
+++ b/gcc/tree-ssa-loop-ivcanon.c 2018-12-06 05:03:17.545185153 +0800
|
||||
@@ -726,7 +726,7 @@ try_unroll_loop_completely (struct loop
|
||||
edge_to_cancel = NULL;
|
||||
}
|
||||
|
||||
- if (!n_unroll_found)
|
||||
+ if (!n_unroll_found || SCEV_NOT_KNOWN == TREE_CODE (niter))
|
||||
return false;
|
||||
|
||||
if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
|
||||
@ -1,25 +0,0 @@
|
||||
diff -N -urp a/gcc/common/config/aarch64/aarch64-common.c b/gcc/common/config/aarch64/aarch64-common.c
|
||||
--- a/gcc/common/config/aarch64/aarch64-common.c 2019-07-02 09:28:49.798701181 +0800
|
||||
+++ b/gcc/common/config/aarch64/aarch64-common.c 2019-07-02 09:30:15.436282799 +0800
|
||||
@@ -51,6 +51,10 @@ static const struct default_options aarc
|
||||
{ OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
|
||||
/* Enable redundant extension instructions removal at -O2 and higher. */
|
||||
{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
|
||||
+#if (TARGET_DEFAULT_ASYNC_UNWIND_TABLES == 1)
|
||||
+ { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
|
||||
+ { OPT_LEVELS_ALL, OPT_funwind_tables, NULL, 1},
|
||||
+#endif
|
||||
{ OPT_LEVELS_NONE, 0, NULL, 0 }
|
||||
};
|
||||
|
||||
diff -N -urp a/gcc/config.gcc b/gcc/config.gcc
|
||||
--- a/gcc/config.gcc 2019-07-02 09:28:50.114701170 +0800
|
||||
+++ b/gcc/config.gcc 2019-07-02 09:31:50.636196118 +0800
|
||||
@@ -966,6 +966,7 @@ aarch64*-*-linux*)
|
||||
tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h"
|
||||
tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-linux.h"
|
||||
tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-linux"
|
||||
+ tm_defines="${tm_defines} TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1"
|
||||
case $target in
|
||||
aarch64_be-*)
|
||||
tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1"
|
||||
Loading…
x
Reference in New Issue
Block a user